* [aarch64] Update reg-costs to differentiate between memmove costs
@ 2022-03-16 14:56 Andre Vieira (lists)
2022-03-16 17:10 ` Richard Sandiford
0 siblings, 1 reply; 2+ messages in thread
From: Andre Vieira (lists) @ 2022-03-16 14:56 UTC (permalink / raw)
To: gcc-patches; +Cc: Kyrylo Tkachov, Richard Sandiford
[-- Attachment #1: Type: text/plain, Size: 830 bytes --]
This patch introduces a struct to differentiate between different
memmove costs to enable a better modeling of memory operations. These
have been modelled for
-mcpu/-mtune=neoverse-v1/neoverse-n1/neoverse-n2/neoverse-512tvb, for
all other tunings all entries are equal to the old single memmove cost
to ensure the behaviour remains the same.
2022-03-16 Tamar Christina <tamar.christina@arm.com>
Andre Vieira <andre.simoesdiasvieira@arm.com>
gcc/ChangeLog:
* config/aarch64/aarch64-protos.h (struct cpu_memmov_cost): New
struct.
(struct tune_params): Change type of memmov_cost to use
cpu_memmov_cost.
* config/aarch64/aarch64.cc (aarch64_memory_move_cost): Update
all tunings
to use new cpu_memmov_cost struct.
[-- Attachment #2: update_reg_costs.patch --]
[-- Type: text/plain, Size: 13773 bytes --]
diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h
index f2fde35c6eb4989af8736db8fad004171c160282..5190eb8b96ea9af809a28470905b8b85ee720b09 100644
--- a/gcc/config/aarch64/aarch64-protos.h
+++ b/gcc/config/aarch64/aarch64-protos.h
@@ -508,6 +508,18 @@ struct cpu_prefetch_tune
const int default_opt_level;
};
+/* Model the costs for loads/stores for reload so that it can do more
+ accurate spill heuristics. */
+struct cpu_memmov_cost
+{
+ int load_int;
+ int store_int;
+ int load_fp;
+ int store_fp;
+ int load_pred;
+ int store_pred;
+};
+
struct tune_params
{
const struct cpu_cost_table *insn_extra_cost;
@@ -520,7 +532,8 @@ struct tune_params
or SVE_NOT_IMPLEMENTED if not applicable. Only used for tuning
decisions, does not disable VLA vectorization. */
unsigned int sve_width;
- int memmov_cost;
+ /* Structure used by reload to cost spills. */
+ struct cpu_memmov_cost memmov_cost;
int issue_rate;
unsigned int fusible_ops;
const char *function_align;
diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
index 9a94f3a30b0f1acc3c9b8a0e3d703e60780d0cbc..3fc5e0bd3d3f39f99b0c8ffb9357603bc0998515 100644
--- a/gcc/config/aarch64/aarch64.cc
+++ b/gcc/config/aarch64/aarch64.cc
@@ -1291,7 +1291,13 @@ static const struct tune_params generic_tunings =
&generic_branch_cost,
&generic_approx_modes,
SVE_NOT_IMPLEMENTED, /* sve_width */
- 4, /* memmov_cost */
+ { 4, /* load_int. */
+ 4, /* store_int. */
+ 4, /* load_fp. */
+ 4, /* store_fp. */
+ 4, /* load_pred. */
+ 4 /* store_pred. */
+ }, /* memmov_cost. */
2, /* issue_rate */
(AARCH64_FUSE_AES_AESMC | AARCH64_FUSE_CMP_BRANCH), /* fusible_ops */
"16:12", /* function_align. */
@@ -1320,7 +1326,13 @@ static const struct tune_params cortexa35_tunings =
&generic_branch_cost,
&generic_approx_modes,
SVE_NOT_IMPLEMENTED, /* sve_width */
- 4, /* memmov_cost */
+ { 4, /* load_int. */
+ 4, /* store_int. */
+ 4, /* load_fp. */
+ 4, /* store_fp. */
+ 4, /* load_pred. */
+ 4 /* store_pred. */
+ }, /* memmov_cost. */
1, /* issue_rate */
(AARCH64_FUSE_AES_AESMC | AARCH64_FUSE_MOV_MOVK | AARCH64_FUSE_ADRP_ADD
| AARCH64_FUSE_MOVK_MOVK | AARCH64_FUSE_ADRP_LDR), /* fusible_ops */
@@ -1347,7 +1359,13 @@ static const struct tune_params cortexa53_tunings =
&generic_branch_cost,
&generic_approx_modes,
SVE_NOT_IMPLEMENTED, /* sve_width */
- 4, /* memmov_cost */
+ { 4, /* load_int. */
+ 4, /* store_int. */
+ 4, /* load_fp. */
+ 4, /* store_fp. */
+ 4, /* load_pred. */
+ 4 /* store_pred. */
+ }, /* memmov_cost. */
2, /* issue_rate */
(AARCH64_FUSE_AES_AESMC | AARCH64_FUSE_MOV_MOVK | AARCH64_FUSE_ADRP_ADD
| AARCH64_FUSE_MOVK_MOVK | AARCH64_FUSE_ADRP_LDR), /* fusible_ops */
@@ -1374,7 +1392,13 @@ static const struct tune_params cortexa57_tunings =
&generic_branch_cost,
&generic_approx_modes,
SVE_NOT_IMPLEMENTED, /* sve_width */
- 4, /* memmov_cost */
+ { 4, /* load_int. */
+ 4, /* store_int. */
+ 4, /* load_fp. */
+ 4, /* store_fp. */
+ 4, /* load_pred. */
+ 4 /* store_pred. */
+ }, /* memmov_cost. */
3, /* issue_rate */
(AARCH64_FUSE_AES_AESMC | AARCH64_FUSE_MOV_MOVK | AARCH64_FUSE_ADRP_ADD
| AARCH64_FUSE_MOVK_MOVK), /* fusible_ops */
@@ -1401,7 +1425,13 @@ static const struct tune_params cortexa72_tunings =
&generic_branch_cost,
&generic_approx_modes,
SVE_NOT_IMPLEMENTED, /* sve_width */
- 4, /* memmov_cost */
+ { 4, /* load_int. */
+ 4, /* store_int. */
+ 4, /* load_fp. */
+ 4, /* store_fp. */
+ 4, /* load_pred. */
+ 4 /* store_pred. */
+ }, /* memmov_cost. */
3, /* issue_rate */
(AARCH64_FUSE_AES_AESMC | AARCH64_FUSE_MOV_MOVK | AARCH64_FUSE_ADRP_ADD
| AARCH64_FUSE_MOVK_MOVK), /* fusible_ops */
@@ -1428,7 +1458,13 @@ static const struct tune_params cortexa73_tunings =
&generic_branch_cost,
&generic_approx_modes,
SVE_NOT_IMPLEMENTED, /* sve_width */
- 4, /* memmov_cost. */
+ { 4, /* load_int. */
+ 4, /* store_int. */
+ 4, /* load_fp. */
+ 4, /* store_fp. */
+ 4, /* load_pred. */
+ 4 /* store_pred. */
+ }, /* memmov_cost. */
2, /* issue_rate. */
(AARCH64_FUSE_AES_AESMC | AARCH64_FUSE_MOV_MOVK | AARCH64_FUSE_ADRP_ADD
| AARCH64_FUSE_MOVK_MOVK | AARCH64_FUSE_ADRP_LDR), /* fusible_ops */
@@ -1457,7 +1493,13 @@ static const struct tune_params exynosm1_tunings =
&generic_branch_cost,
&exynosm1_approx_modes,
SVE_NOT_IMPLEMENTED, /* sve_width */
- 4, /* memmov_cost */
+ { 4, /* load_int. */
+ 4, /* store_int. */
+ 4, /* load_fp. */
+ 4, /* store_fp. */
+ 4, /* load_pred. */
+ 4 /* store_pred. */
+ }, /* memmov_cost. */
3, /* issue_rate */
(AARCH64_FUSE_AES_AESMC), /* fusible_ops */
"4", /* function_align. */
@@ -1483,7 +1525,13 @@ static const struct tune_params thunderxt88_tunings =
&generic_branch_cost,
&generic_approx_modes,
SVE_NOT_IMPLEMENTED, /* sve_width */
- 6, /* memmov_cost */
+ { 6, /* load_int. */
+ 6, /* store_int. */
+ 6, /* load_fp. */
+ 6, /* store_fp. */
+ 6, /* load_pred. */
+ 6 /* store_pred. */
+ }, /* memmov_cost. */
2, /* issue_rate */
AARCH64_FUSE_ALU_BRANCH, /* fusible_ops */
"8", /* function_align. */
@@ -1509,7 +1557,13 @@ static const struct tune_params thunderx_tunings =
&generic_branch_cost,
&generic_approx_modes,
SVE_NOT_IMPLEMENTED, /* sve_width */
- 6, /* memmov_cost */
+ { 6, /* load_int. */
+ 6, /* store_int. */
+ 6, /* load_fp. */
+ 6, /* store_fp. */
+ 6, /* load_pred. */
+ 6 /* store_pred. */
+ }, /* memmov_cost. */
2, /* issue_rate */
AARCH64_FUSE_ALU_BRANCH, /* fusible_ops */
"8", /* function_align. */
@@ -1536,7 +1590,13 @@ static const struct tune_params tsv110_tunings =
&generic_branch_cost,
&generic_approx_modes,
SVE_NOT_IMPLEMENTED, /* sve_width */
- 4, /* memmov_cost */
+ { 4, /* load_int. */
+ 4, /* store_int. */
+ 4, /* load_fp. */
+ 4, /* store_fp. */
+ 4, /* load_pred. */
+ 4 /* store_pred. */
+ }, /* memmov_cost. */
4, /* issue_rate */
(AARCH64_FUSE_AES_AESMC | AARCH64_FUSE_ALU_BRANCH
| AARCH64_FUSE_ALU_CBZ), /* fusible_ops */
@@ -1563,7 +1623,13 @@ static const struct tune_params xgene1_tunings =
&generic_branch_cost,
&xgene1_approx_modes,
SVE_NOT_IMPLEMENTED, /* sve_width */
- 6, /* memmov_cost */
+ { 6, /* load_int. */
+ 6, /* store_int. */
+ 6, /* load_fp. */
+ 6, /* store_fp. */
+ 6, /* load_pred. */
+ 6 /* store_pred. */
+ }, /* memmov_cost. */
4, /* issue_rate */
AARCH64_FUSE_NOTHING, /* fusible_ops */
"16", /* function_align. */
@@ -1589,7 +1655,13 @@ static const struct tune_params emag_tunings =
&generic_branch_cost,
&xgene1_approx_modes,
SVE_NOT_IMPLEMENTED,
- 6, /* memmov_cost */
+ { 6, /* load_int. */
+ 6, /* store_int. */
+ 6, /* load_fp. */
+ 6, /* store_fp. */
+ 6, /* load_pred. */
+ 6 /* store_pred. */
+ }, /* memmov_cost. */
4, /* issue_rate */
AARCH64_FUSE_NOTHING, /* fusible_ops */
"16", /* function_align. */
@@ -1615,7 +1687,13 @@ static const struct tune_params qdf24xx_tunings =
&generic_branch_cost,
&generic_approx_modes,
SVE_NOT_IMPLEMENTED, /* sve_width */
- 4, /* memmov_cost */
+ { 4, /* load_int. */
+ 4, /* store_int. */
+ 4, /* load_fp. */
+ 4, /* store_fp. */
+ 4, /* load_pred. */
+ 4 /* store_pred. */
+ }, /* memmov_cost. */
4, /* issue_rate */
(AARCH64_FUSE_MOV_MOVK | AARCH64_FUSE_ADRP_ADD
| AARCH64_FUSE_MOVK_MOVK), /* fuseable_ops */
@@ -1644,7 +1722,13 @@ static const struct tune_params saphira_tunings =
&generic_branch_cost,
&generic_approx_modes,
SVE_NOT_IMPLEMENTED, /* sve_width */
- 4, /* memmov_cost */
+ { 4, /* load_int. */
+ 4, /* store_int. */
+ 4, /* load_fp. */
+ 4, /* store_fp. */
+ 4, /* load_pred. */
+ 4 /* store_pred. */
+ }, /* memmov_cost. */
4, /* issue_rate */
(AARCH64_FUSE_MOV_MOVK | AARCH64_FUSE_ADRP_ADD
| AARCH64_FUSE_MOVK_MOVK), /* fuseable_ops */
@@ -1671,7 +1755,13 @@ static const struct tune_params thunderx2t99_tunings =
&generic_branch_cost,
&generic_approx_modes,
SVE_NOT_IMPLEMENTED, /* sve_width */
- 4, /* memmov_cost. */
+ { 4, /* load_int. */
+ 4, /* store_int. */
+ 4, /* load_fp. */
+ 4, /* store_fp. */
+ 4, /* load_pred. */
+ 4 /* store_pred. */
+ }, /* memmov_cost. */
4, /* issue_rate. */
(AARCH64_FUSE_ALU_BRANCH | AARCH64_FUSE_AES_AESMC
| AARCH64_FUSE_ALU_CBZ), /* fusible_ops */
@@ -1698,7 +1788,13 @@ static const struct tune_params thunderx3t110_tunings =
&generic_branch_cost,
&generic_approx_modes,
SVE_NOT_IMPLEMENTED, /* sve_width */
- 4, /* memmov_cost. */
+ { 4, /* load_int. */
+ 4, /* store_int. */
+ 4, /* load_fp. */
+ 4, /* store_fp. */
+ 4, /* load_pred. */
+ 4 /* store_pred. */
+ }, /* memmov_cost. */
6, /* issue_rate. */
(AARCH64_FUSE_ALU_BRANCH | AARCH64_FUSE_AES_AESMC
| AARCH64_FUSE_ALU_CBZ), /* fusible_ops */
@@ -1725,7 +1821,13 @@ static const struct tune_params neoversen1_tunings =
&generic_branch_cost,
&generic_approx_modes,
SVE_NOT_IMPLEMENTED, /* sve_width */
- 4, /* memmov_cost */
+ { 4, /* load_int. */
+ 1, /* store_int. */
+ 5, /* load_fp. */
+ 2, /* store_fp. */
+ 4, /* load_pred. */
+ 4 /* store_pred. */
+ }, /* memmov_cost. */
3, /* issue_rate */
(AARCH64_FUSE_AES_AESMC | AARCH64_FUSE_CMP_BRANCH), /* fusible_ops */
"32:16", /* function_align. */
@@ -1751,7 +1853,13 @@ static const struct tune_params ampere1_tunings =
&generic_branch_cost,
&generic_approx_modes,
SVE_NOT_IMPLEMENTED, /* sve_width */
- 4, /* memmov_cost */
+ { 4, /* load_int. */
+ 4, /* store_int. */
+ 4, /* load_fp. */
+ 4, /* store_fp. */
+ 4, /* load_pred. */
+ 4 /* store_pred. */
+ }, /* memmov_cost. */
4, /* issue_rate */
(AARCH64_FUSE_ADRP_ADD | AARCH64_FUSE_AES_AESMC |
AARCH64_FUSE_MOV_MOVK | AARCH64_FUSE_MOVK_MOVK |
@@ -1926,7 +2034,13 @@ static const struct tune_params neoversev1_tunings =
&generic_branch_cost,
&generic_approx_modes,
SVE_256, /* sve_width */
- 4, /* memmov_cost */
+ { 4, /* load_int. */
+ 1, /* store_int. */
+ 6, /* load_fp. */
+ 2, /* store_fp. */
+ 6, /* load_pred. */
+ 1 /* store_pred. */
+ }, /* memmov_cost. */
3, /* issue_rate */
(AARCH64_FUSE_AES_AESMC | AARCH64_FUSE_CMP_BRANCH), /* fusible_ops */
"32:16", /* function_align. */
@@ -2057,7 +2171,13 @@ static const struct tune_params neoverse512tvb_tunings =
&generic_branch_cost,
&generic_approx_modes,
SVE_128 | SVE_256, /* sve_width */
- 4, /* memmov_cost */
+ { 4, /* load_int. */
+ 1, /* store_int. */
+ 6, /* load_fp. */
+ 2, /* store_fp. */
+ 6, /* load_pred. */
+ 1 /* store_pred. */
+ }, /* memmov_cost. */
3, /* issue_rate */
(AARCH64_FUSE_AES_AESMC | AARCH64_FUSE_CMP_BRANCH), /* fusible_ops */
"32:16", /* function_align. */
@@ -2085,7 +2205,13 @@ static const struct tune_params neoversen2_tunings =
&generic_branch_cost,
&generic_approx_modes,
SVE_128, /* sve_width */
- 4, /* memmov_cost */
+ { 4, /* load_int. */
+ 1, /* store_int. */
+ 6, /* load_fp. */
+ 2, /* store_fp. */
+ 6, /* load_pred. */
+ 1 /* store_pred. */
+ }, /* memmov_cost. */
3, /* issue_rate */
(AARCH64_FUSE_AES_AESMC | AARCH64_FUSE_CMP_BRANCH), /* fusible_ops */
"32:16", /* function_align. */
@@ -2111,7 +2237,13 @@ static const struct tune_params a64fx_tunings =
&generic_branch_cost,
&generic_approx_modes,
SVE_512, /* sve_width */
- 4, /* memmov_cost */
+ { 4, /* load_int. */
+ 4, /* store_int. */
+ 4, /* load_fp. */
+ 4, /* store_fp. */
+ 4, /* load_pred. */
+ 4 /* store_pred. */
+ }, /* memmov_cost. */
7, /* issue_rate */
(AARCH64_FUSE_AES_AESMC | AARCH64_FUSE_CMP_BRANCH), /* fusible_ops */
"32", /* function_align. */
@@ -14501,12 +14633,41 @@ aarch64_register_move_cost (machine_mode mode,
return regmove_cost->FP2FP;
}
+/* Implements TARGET_MEMORY_MOVE_COST. */
static int
-aarch64_memory_move_cost (machine_mode mode ATTRIBUTE_UNUSED,
- reg_class_t rclass ATTRIBUTE_UNUSED,
- bool in ATTRIBUTE_UNUSED)
+aarch64_memory_move_cost (machine_mode mode, reg_class_t rclass_i, bool in)
{
- return aarch64_tune_params.memmov_cost;
+ enum reg_class rclass = (enum reg_class) rclass_i;
+ switch (rclass)
+ {
+ case PR_LO_REGS:
+ case PR_HI_REGS:
+ case PR_REGS:
+ return in ? aarch64_tune_params.memmov_cost.load_pred
+ : aarch64_tune_params.memmov_cost.store_pred;
+ case POINTER_AND_FP_REGS:
+ case ALL_REGS:
+ {
+ if (GET_MODE_CLASS (mode) == MODE_VECTOR_BOOL)
+ return in ? aarch64_tune_params.memmov_cost.load_pred
+ : aarch64_tune_params.memmov_cost.store_pred;
+
+ if (VECTOR_MODE_P (mode) || FLOAT_MODE_P (mode))
+ return in ? aarch64_tune_params.memmov_cost.load_fp
+ : aarch64_tune_params.memmov_cost.store_fp;
+
+ return in ? aarch64_tune_params.memmov_cost.load_int
+ : aarch64_tune_params.memmov_cost.store_int;
+ }
+ case FP_LO8_REGS:
+ case FP_LO_REGS:
+ case FP_REGS:
+ return in ? aarch64_tune_params.memmov_cost.load_fp
+ : aarch64_tune_params.memmov_cost.store_fp;
+ default:
+ return in ? aarch64_tune_params.memmov_cost.load_int
+ : aarch64_tune_params.memmov_cost.store_int;
+ }
}
/* Implement TARGET_INIT_BUILTINS. */
^ permalink raw reply [flat|nested] 2+ messages in thread
* Re: [aarch64] Update reg-costs to differentiate between memmove costs
2022-03-16 14:56 [aarch64] Update reg-costs to differentiate between memmove costs Andre Vieira (lists)
@ 2022-03-16 17:10 ` Richard Sandiford
0 siblings, 0 replies; 2+ messages in thread
From: Richard Sandiford @ 2022-03-16 17:10 UTC (permalink / raw)
To: Andre Vieira (lists); +Cc: gcc-patches, Kyrylo Tkachov
"Andre Vieira (lists)" <andre.simoesdiasvieira@arm.com> writes:
> This patch introduces a struct to differentiate between different
> memmove costs to enable a better modeling of memory operations. These
> have been modelled for
> -mcpu/-mtune=neoverse-v1/neoverse-n1/neoverse-n2/neoverse-512tvb, for
> all other tunings all entries are equal to the old single memmove cost
> to ensure the behaviour remains the same.
Thanks for doing this. Having the same cost for loads and stores
has been a long-standing wart.
> 2022-03-16 Tamar Christina <tamar.christina@arm.com>
> Andre Vieira <andre.simoesdiasvieira@arm.com>
>
> gcc/ChangeLog:
>
> * config/aarch64/aarch64-protos.h (struct cpu_memmov_cost): New
> struct.
> (struct tune_params): Change type of memmov_cost to use
> cpu_memmov_cost.
> * config/aarch64/aarch64.cc (aarch64_memory_move_cost): Update
> all tunings
> to use new cpu_memmov_cost struct.
>
> diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h
> index f2fde35c6eb4989af8736db8fad004171c160282..5190eb8b96ea9af809a28470905b8b85ee720b09 100644
> --- a/gcc/config/aarch64/aarch64-protos.h
> +++ b/gcc/config/aarch64/aarch64-protos.h
> @@ -508,6 +508,18 @@ struct cpu_prefetch_tune
> const int default_opt_level;
> };
>
> +/* Model the costs for loads/stores for reload so that it can do more
I'd say s/reload/the register allocators/ here, since the costs affect
decisions made by IRA too.
> + accurate spill heuristics. */
> +struct cpu_memmov_cost
> +{
> + int load_int;
> + int store_int;
> + int load_fp;
> + int store_fp;
> + int load_pred;
> + int store_pred;
> +};
> +
> struct tune_params
> {
> const struct cpu_cost_table *insn_extra_cost;
> […]
> @@ -14501,12 +14633,41 @@ aarch64_register_move_cost (machine_mode mode,
> return regmove_cost->FP2FP;
> }
>
> +/* Implements TARGET_MEMORY_MOVE_COST. */
> static int
> -aarch64_memory_move_cost (machine_mode mode ATTRIBUTE_UNUSED,
> - reg_class_t rclass ATTRIBUTE_UNUSED,
> - bool in ATTRIBUTE_UNUSED)
> +aarch64_memory_move_cost (machine_mode mode, reg_class_t rclass_i, bool in)
> {
> - return aarch64_tune_params.memmov_cost;
> + enum reg_class rclass = (enum reg_class) rclass_i;
> + switch (rclass)
> + {
> + case PR_LO_REGS:
> + case PR_HI_REGS:
> + case PR_REGS:
> + return in ? aarch64_tune_params.memmov_cost.load_pred
> + : aarch64_tune_params.memmov_cost.store_pred;
> + case POINTER_AND_FP_REGS:
> + case ALL_REGS:
> + {
> + if (GET_MODE_CLASS (mode) == MODE_VECTOR_BOOL)
> + return in ? aarch64_tune_params.memmov_cost.load_pred
> + : aarch64_tune_params.memmov_cost.store_pred;
> +
> + if (VECTOR_MODE_P (mode) || FLOAT_MODE_P (mode))
> + return in ? aarch64_tune_params.memmov_cost.load_fp
> + : aarch64_tune_params.memmov_cost.store_fp;
> +
> + return in ? aarch64_tune_params.memmov_cost.load_int
> + : aarch64_tune_params.memmov_cost.store_int;
> + }
> + case FP_LO8_REGS:
> + case FP_LO_REGS:
> + case FP_REGS:
> + return in ? aarch64_tune_params.memmov_cost.load_fp
> + : aarch64_tune_params.memmov_cost.store_fp;
> + default:
> + return in ? aarch64_tune_params.memmov_cost.load_int
> + : aarch64_tune_params.memmov_cost.store_int;
> + }
> }
It would be good to avoid listing individual subclasses if possible,
since it's easy for the list to get out of date if more subclasses
are added.
An alternative would be:
if (GET_MODE_CLASS (mode) == MODE_VECTOR_BOOL
? reg_classes_intersect_p (rclass, PR_REGS)
: reg_class_subset_p (rclass, PR_REGS))
return (in
? aarch64_tune_params.memmov_cost.load_pred
: aarch64_tune_params.memmov_cost.store_pred);
if (VECTOR_MODE_P (mode) || FLOAT_MODE_P (mode)
? reg_classes_intersect_p (rclass, FP_REGS)
: reg_class_subset_p (rclass, FP_REGS))
return (in
? aarch64_tune_params.memmov_cost.load_fp
: aarch64_tune_params.memmov_cost.store_fp);
return (in
? aarch64_tune_params.memmov_cost.load_int
: aarch64_tune_params.memmov_cost.store_int);
OK with that change, if it works.
Thanks,
Richard
^ permalink raw reply [flat|nested] 2+ messages in thread
end of thread, other threads:[~2022-03-16 17:10 UTC | newest]
Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-03-16 14:56 [aarch64] Update reg-costs to differentiate between memmove costs Andre Vieira (lists)
2022-03-16 17:10 ` Richard Sandiford
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).