* [PATCH] [AARCH64] Improve vector generation cost model
@ 2019-03-15 1:47 apinski
2019-05-02 16:10 ` Andrew Pinski
2019-12-13 10:32 ` Kyrill Tkachov
0 siblings, 2 replies; 4+ messages in thread
From: apinski @ 2019-03-15 1:47 UTC (permalink / raw)
To: gcc-patches; +Cc: Andrew Pinski
From: Andrew Pinski <apinski@marvell.com>
Hi,
On OcteonTX2, ld1r and ld1 (with a single lane) are split
into two different micro-ops unlike most other targets.
This adds three extra costs to the cost table:
ld1_dup: used for "ld1r {v0.4s}, [x0]"
merge_dup: used for "dup v0.4s, v0.4s[0]" and "ins v0.4s[0], v0.4s[0]"
ld1_merge: used fir "ld1 {v0.4s}[0], [x0]"
OK? Bootstrapped and tested on aarch64-linux-gnu with no regressions.
Thanks,
Andrew Pinski
ChangeLog:
* config/arm/aarch-common-protos.h (vector_cost_table):
Add merge_dup, ld1_merge, and ld1_dup.
* config/aarch64/aarch64-cost-tables.h (qdf24xx_extra_costs):
Update for the new fields.
(thunderx_extra_costs): Likewise.
(thunderx2t99_extra_costs): Likewise.
(tsv110_extra_costs): Likewise.
* config/arm/aarch-cost-tables.h (generic_extra_costs): Likewise.
(cortexa53_extra_costs): Likewise.
(cortexa57_extra_costs): Likewise.
(exynosm1_extra_costs): Likewise.
(xgene1_extra_costs): Likewise.
* config/aarch64/aarch64.c (aarch64_rtx_costs): Handle vec_dup of a memory.
Hanlde vec_merge of a memory.
Signed-off-by: Andrew Pinski <apinski@marvell.com>
---
gcc/config/aarch64/aarch64-cost-tables.h | 20 +++++++++++++++----
gcc/config/aarch64/aarch64.c | 22 +++++++++++++++++++++
gcc/config/arm/aarch-common-protos.h | 3 +++
gcc/config/arm/aarch-cost-tables.h | 25 +++++++++++++++++++-----
4 files changed, 61 insertions(+), 9 deletions(-)
diff --git a/gcc/config/aarch64/aarch64-cost-tables.h b/gcc/config/aarch64/aarch64-cost-tables.h
index 5c9442e1b89..9a7c70ba595 100644
--- a/gcc/config/aarch64/aarch64-cost-tables.h
+++ b/gcc/config/aarch64/aarch64-cost-tables.h
@@ -123,7 +123,10 @@ const struct cpu_cost_table qdf24xx_extra_costs =
},
/* Vector */
{
- COSTS_N_INSNS (1) /* alu. */
+ COSTS_N_INSNS (1), /* Alu. */
+ COSTS_N_INSNS (1), /* dup_merge. */
+ COSTS_N_INSNS (1), /* ld1_merge. */
+ COSTS_N_INSNS (1) /* ld1_dup. */
}
};
@@ -227,7 +230,10 @@ const struct cpu_cost_table thunderx_extra_costs =
},
/* Vector */
{
- COSTS_N_INSNS (1) /* Alu. */
+ COSTS_N_INSNS (1), /* Alu. */
+ COSTS_N_INSNS (1), /* dup_merge. */
+ COSTS_N_INSNS (1), /* ld1_merge. */
+ COSTS_N_INSNS (1) /* ld1_dup. */
}
};
@@ -330,7 +336,10 @@ const struct cpu_cost_table thunderx2t99_extra_costs =
},
/* Vector */
{
- COSTS_N_INSNS (1) /* Alu. */
+ COSTS_N_INSNS (1), /* Alu. */
+ COSTS_N_INSNS (1), /* dup_merge. */
+ COSTS_N_INSNS (1), /* ld1_merge. */
+ COSTS_N_INSNS (1) /* ld1_dup. */
}
};
@@ -434,7 +443,10 @@ const struct cpu_cost_table tsv110_extra_costs =
},
/* Vector */
{
- COSTS_N_INSNS (1) /* alu. */
+ COSTS_N_INSNS (1), /* Alu. */
+ COSTS_N_INSNS (1), /* dup_merge. */
+ COSTS_N_INSNS (1), /* ld1_merge. */
+ COSTS_N_INSNS (1) /* ld1_dup. */
}
};
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index b38505b0872..dc4d3d39af8 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -10568,6 +10568,28 @@ cost_plus:
}
break;
+ case VEC_DUPLICATE:
+ if (!speed)
+ return false;
+
+ if (GET_CODE (XEXP (x, 0)) == MEM)
+ *cost += extra_cost->vect.ld1_dup;
+ else
+ *cost += extra_cost->vect.merge_dup;
+ return true;
+
+ case VEC_MERGE:
+ if (speed && GET_CODE (XEXP (x, 0)) == VEC_DUPLICATE)
+ {
+ if (GET_CODE (XEXP (XEXP (x, 0), 0)) == MEM)
+ *cost += extra_cost->vect.ld1_merge;
+ else
+ *cost += extra_cost->vect.merge_dup;
+ return true;
+ }
+ break;
+
+
case TRUNCATE:
/* Decompose <su>muldi3_highpart. */
diff --git a/gcc/config/arm/aarch-common-protos.h b/gcc/config/arm/aarch-common-protos.h
index 11cd5145bbc..dbc1282402a 100644
--- a/gcc/config/arm/aarch-common-protos.h
+++ b/gcc/config/arm/aarch-common-protos.h
@@ -131,6 +131,9 @@ struct fp_cost_table
struct vector_cost_table
{
const int alu;
+ const int merge_dup;
+ const int ld1_merge;
+ const int ld1_dup;
};
struct cpu_cost_table
diff --git a/gcc/config/arm/aarch-cost-tables.h b/gcc/config/arm/aarch-cost-tables.h
index bc33efadc6c..a51bc668f56 100644
--- a/gcc/config/arm/aarch-cost-tables.h
+++ b/gcc/config/arm/aarch-cost-tables.h
@@ -121,7 +121,10 @@ const struct cpu_cost_table generic_extra_costs =
},
/* Vector */
{
- COSTS_N_INSNS (1) /* alu. */
+ COSTS_N_INSNS (1), /* alu. */
+ COSTS_N_INSNS (1), /* dup_merge. */
+ COSTS_N_INSNS (1), /* ld1_merge. */
+ COSTS_N_INSNS (1) /* ld1_dup. */
}
};
@@ -224,7 +227,10 @@ const struct cpu_cost_table cortexa53_extra_costs =
},
/* Vector */
{
- COSTS_N_INSNS (1) /* alu. */
+ COSTS_N_INSNS (1), /* alu. */
+ COSTS_N_INSNS (1), /* dup_merge. */
+ COSTS_N_INSNS (1), /* ld1_merge. */
+ COSTS_N_INSNS (1) /* ld1_dup. */
}
};
@@ -327,7 +333,10 @@ const struct cpu_cost_table cortexa57_extra_costs =
},
/* Vector */
{
- COSTS_N_INSNS (1) /* alu. */
+ COSTS_N_INSNS (1), /* alu. */
+ COSTS_N_INSNS (1), /* dup_merge. */
+ COSTS_N_INSNS (1), /* ld1_merge. */
+ COSTS_N_INSNS (1) /* ld1_dup. */
}
};
@@ -430,7 +439,10 @@ const struct cpu_cost_table exynosm1_extra_costs =
},
/* Vector */
{
- COSTS_N_INSNS (0) /* alu. */
+ COSTS_N_INSNS (0), /* alu. */
+ COSTS_N_INSNS (0), /* dup_merge. */
+ COSTS_N_INSNS (0), /* ld1_merge. */
+ COSTS_N_INSNS (0) /* ld1_dup. */
}
};
@@ -533,7 +545,10 @@ const struct cpu_cost_table xgene1_extra_costs =
},
/* Vector */
{
- COSTS_N_INSNS (2) /* alu. */
+ COSTS_N_INSNS (2), /* alu. */
+ COSTS_N_INSNS (2), /* dup_merge. */
+ COSTS_N_INSNS (2), /* ld1_merge. */
+ COSTS_N_INSNS (2) /* ld1_dup. */
}
};
--
2.17.1
^ permalink raw reply [flat|nested] 4+ messages in thread
* Re: [PATCH] [AARCH64] Improve vector generation cost model
2019-03-15 1:47 [PATCH] [AARCH64] Improve vector generation cost model apinski
@ 2019-05-02 16:10 ` Andrew Pinski
2019-12-07 23:59 ` Andrew Pinski
2019-12-13 10:32 ` Kyrill Tkachov
1 sibling, 1 reply; 4+ messages in thread
From: Andrew Pinski @ 2019-05-02 16:10 UTC (permalink / raw)
To: apinski; +Cc: GCC Patches
On Thu, Mar 14, 2019 at 6:19 PM <apinski@marvell.com> wrote:
>
> From: Andrew Pinski <apinski@marvell.com>
>
> Hi,
> On OcteonTX2, ld1r and ld1 (with a single lane) are split
> into two different micro-ops unlike most other targets.
> This adds three extra costs to the cost table:
> ld1_dup: used for "ld1r {v0.4s}, [x0]"
> merge_dup: used for "dup v0.4s, v0.4s[0]" and "ins v0.4s[0], v0.4s[0]"
> ld1_merge: used fir "ld1 {v0.4s}[0], [x0]"
>
> OK? Bootstrapped and tested on aarch64-linux-gnu with no regressions.
Ping? It has been 1.5 months now.
>
> Thanks,
> Andrew Pinski
>
> ChangeLog:
> * config/arm/aarch-common-protos.h (vector_cost_table):
> Add merge_dup, ld1_merge, and ld1_dup.
> * config/aarch64/aarch64-cost-tables.h (qdf24xx_extra_costs):
> Update for the new fields.
> (thunderx_extra_costs): Likewise.
> (thunderx2t99_extra_costs): Likewise.
> (tsv110_extra_costs): Likewise.
> * config/arm/aarch-cost-tables.h (generic_extra_costs): Likewise.
> (cortexa53_extra_costs): Likewise.
> (cortexa57_extra_costs): Likewise.
> (exynosm1_extra_costs): Likewise.
> (xgene1_extra_costs): Likewise.
> * config/aarch64/aarch64.c (aarch64_rtx_costs): Handle vec_dup of a memory.
> Hanlde vec_merge of a memory.
>
> Signed-off-by: Andrew Pinski <apinski@marvell.com>
> ---
> gcc/config/aarch64/aarch64-cost-tables.h | 20 +++++++++++++++----
> gcc/config/aarch64/aarch64.c | 22 +++++++++++++++++++++
> gcc/config/arm/aarch-common-protos.h | 3 +++
> gcc/config/arm/aarch-cost-tables.h | 25 +++++++++++++++++++-----
> 4 files changed, 61 insertions(+), 9 deletions(-)
>
> diff --git a/gcc/config/aarch64/aarch64-cost-tables.h b/gcc/config/aarch64/aarch64-cost-tables.h
> index 5c9442e1b89..9a7c70ba595 100644
> --- a/gcc/config/aarch64/aarch64-cost-tables.h
> +++ b/gcc/config/aarch64/aarch64-cost-tables.h
> @@ -123,7 +123,10 @@ const struct cpu_cost_table qdf24xx_extra_costs =
> },
> /* Vector */
> {
> - COSTS_N_INSNS (1) /* alu. */
> + COSTS_N_INSNS (1), /* Alu. */
> + COSTS_N_INSNS (1), /* dup_merge. */
> + COSTS_N_INSNS (1), /* ld1_merge. */
> + COSTS_N_INSNS (1) /* ld1_dup. */
> }
> };
>
> @@ -227,7 +230,10 @@ const struct cpu_cost_table thunderx_extra_costs =
> },
> /* Vector */
> {
> - COSTS_N_INSNS (1) /* Alu. */
> + COSTS_N_INSNS (1), /* Alu. */
> + COSTS_N_INSNS (1), /* dup_merge. */
> + COSTS_N_INSNS (1), /* ld1_merge. */
> + COSTS_N_INSNS (1) /* ld1_dup. */
> }
> };
>
> @@ -330,7 +336,10 @@ const struct cpu_cost_table thunderx2t99_extra_costs =
> },
> /* Vector */
> {
> - COSTS_N_INSNS (1) /* Alu. */
> + COSTS_N_INSNS (1), /* Alu. */
> + COSTS_N_INSNS (1), /* dup_merge. */
> + COSTS_N_INSNS (1), /* ld1_merge. */
> + COSTS_N_INSNS (1) /* ld1_dup. */
> }
> };
>
> @@ -434,7 +443,10 @@ const struct cpu_cost_table tsv110_extra_costs =
> },
> /* Vector */
> {
> - COSTS_N_INSNS (1) /* alu. */
> + COSTS_N_INSNS (1), /* Alu. */
> + COSTS_N_INSNS (1), /* dup_merge. */
> + COSTS_N_INSNS (1), /* ld1_merge. */
> + COSTS_N_INSNS (1) /* ld1_dup. */
> }
> };
>
> diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
> index b38505b0872..dc4d3d39af8 100644
> --- a/gcc/config/aarch64/aarch64.c
> +++ b/gcc/config/aarch64/aarch64.c
> @@ -10568,6 +10568,28 @@ cost_plus:
> }
> break;
>
> + case VEC_DUPLICATE:
> + if (!speed)
> + return false;
> +
> + if (GET_CODE (XEXP (x, 0)) == MEM)
> + *cost += extra_cost->vect.ld1_dup;
> + else
> + *cost += extra_cost->vect.merge_dup;
> + return true;
> +
> + case VEC_MERGE:
> + if (speed && GET_CODE (XEXP (x, 0)) == VEC_DUPLICATE)
> + {
> + if (GET_CODE (XEXP (XEXP (x, 0), 0)) == MEM)
> + *cost += extra_cost->vect.ld1_merge;
> + else
> + *cost += extra_cost->vect.merge_dup;
> + return true;
> + }
> + break;
> +
> +
> case TRUNCATE:
>
> /* Decompose <su>muldi3_highpart. */
> diff --git a/gcc/config/arm/aarch-common-protos.h b/gcc/config/arm/aarch-common-protos.h
> index 11cd5145bbc..dbc1282402a 100644
> --- a/gcc/config/arm/aarch-common-protos.h
> +++ b/gcc/config/arm/aarch-common-protos.h
> @@ -131,6 +131,9 @@ struct fp_cost_table
> struct vector_cost_table
> {
> const int alu;
> + const int merge_dup;
> + const int ld1_merge;
> + const int ld1_dup;
> };
>
> struct cpu_cost_table
> diff --git a/gcc/config/arm/aarch-cost-tables.h b/gcc/config/arm/aarch-cost-tables.h
> index bc33efadc6c..a51bc668f56 100644
> --- a/gcc/config/arm/aarch-cost-tables.h
> +++ b/gcc/config/arm/aarch-cost-tables.h
> @@ -121,7 +121,10 @@ const struct cpu_cost_table generic_extra_costs =
> },
> /* Vector */
> {
> - COSTS_N_INSNS (1) /* alu. */
> + COSTS_N_INSNS (1), /* alu. */
> + COSTS_N_INSNS (1), /* dup_merge. */
> + COSTS_N_INSNS (1), /* ld1_merge. */
> + COSTS_N_INSNS (1) /* ld1_dup. */
> }
> };
>
> @@ -224,7 +227,10 @@ const struct cpu_cost_table cortexa53_extra_costs =
> },
> /* Vector */
> {
> - COSTS_N_INSNS (1) /* alu. */
> + COSTS_N_INSNS (1), /* alu. */
> + COSTS_N_INSNS (1), /* dup_merge. */
> + COSTS_N_INSNS (1), /* ld1_merge. */
> + COSTS_N_INSNS (1) /* ld1_dup. */
> }
> };
>
> @@ -327,7 +333,10 @@ const struct cpu_cost_table cortexa57_extra_costs =
> },
> /* Vector */
> {
> - COSTS_N_INSNS (1) /* alu. */
> + COSTS_N_INSNS (1), /* alu. */
> + COSTS_N_INSNS (1), /* dup_merge. */
> + COSTS_N_INSNS (1), /* ld1_merge. */
> + COSTS_N_INSNS (1) /* ld1_dup. */
> }
> };
>
> @@ -430,7 +439,10 @@ const struct cpu_cost_table exynosm1_extra_costs =
> },
> /* Vector */
> {
> - COSTS_N_INSNS (0) /* alu. */
> + COSTS_N_INSNS (0), /* alu. */
> + COSTS_N_INSNS (0), /* dup_merge. */
> + COSTS_N_INSNS (0), /* ld1_merge. */
> + COSTS_N_INSNS (0) /* ld1_dup. */
> }
> };
>
> @@ -533,7 +545,10 @@ const struct cpu_cost_table xgene1_extra_costs =
> },
> /* Vector */
> {
> - COSTS_N_INSNS (2) /* alu. */
> + COSTS_N_INSNS (2), /* alu. */
> + COSTS_N_INSNS (2), /* dup_merge. */
> + COSTS_N_INSNS (2), /* ld1_merge. */
> + COSTS_N_INSNS (2) /* ld1_dup. */
> }
> };
>
> --
> 2.17.1
>
^ permalink raw reply [flat|nested] 4+ messages in thread
* Re: [PATCH] [AARCH64] Improve vector generation cost model
2019-05-02 16:10 ` Andrew Pinski
@ 2019-12-07 23:59 ` Andrew Pinski
0 siblings, 0 replies; 4+ messages in thread
From: Andrew Pinski @ 2019-12-07 23:59 UTC (permalink / raw)
To: apinski; +Cc: GCC Patches, richard.sandiford
On Thu, May 2, 2019 at 9:10 AM Andrew Pinski <pinskia@gmail.com> wrote:
>
> On Thu, Mar 14, 2019 at 6:19 PM <apinski@marvell.com> wrote:
> >
> > From: Andrew Pinski <apinski@marvell.com>
> >
> > Hi,
> > On OcteonTX2, ld1r and ld1 (with a single lane) are split
> > into two different micro-ops unlike most other targets.
> > This adds three extra costs to the cost table:
> > ld1_dup: used for "ld1r {v0.4s}, [x0]"
> > merge_dup: used for "dup v0.4s, v0.4s[0]" and "ins v0.4s[0], v0.4s[0]"
> > ld1_merge: used fir "ld1 {v0.4s}[0], [x0]"
> >
> > OK? Bootstrapped and tested on aarch64-linux-gnu with no regressions.
>
> Ping? It has been 1.5 months now.
Ping? I have bootstrapped and tested on aarch64-linux-gnu recently
with the patch.
Or does this has to wait until Stage 1?
Thanks,
Andrew
>
> >
> > Thanks,
> > Andrew Pinski
> >
> > ChangeLog:
> > * config/arm/aarch-common-protos.h (vector_cost_table):
> > Add merge_dup, ld1_merge, and ld1_dup.
> > * config/aarch64/aarch64-cost-tables.h (qdf24xx_extra_costs):
> > Update for the new fields.
> > (thunderx_extra_costs): Likewise.
> > (thunderx2t99_extra_costs): Likewise.
> > (tsv110_extra_costs): Likewise.
> > * config/arm/aarch-cost-tables.h (generic_extra_costs): Likewise.
> > (cortexa53_extra_costs): Likewise.
> > (cortexa57_extra_costs): Likewise.
> > (exynosm1_extra_costs): Likewise.
> > (xgene1_extra_costs): Likewise.
> > * config/aarch64/aarch64.c (aarch64_rtx_costs): Handle vec_dup of a memory.
> > Hanlde vec_merge of a memory.
> >
> > Signed-off-by: Andrew Pinski <apinski@marvell.com>
> > ---
> > gcc/config/aarch64/aarch64-cost-tables.h | 20 +++++++++++++++----
> > gcc/config/aarch64/aarch64.c | 22 +++++++++++++++++++++
> > gcc/config/arm/aarch-common-protos.h | 3 +++
> > gcc/config/arm/aarch-cost-tables.h | 25 +++++++++++++++++++-----
> > 4 files changed, 61 insertions(+), 9 deletions(-)
> >
> > diff --git a/gcc/config/aarch64/aarch64-cost-tables.h b/gcc/config/aarch64/aarch64-cost-tables.h
> > index 5c9442e1b89..9a7c70ba595 100644
> > --- a/gcc/config/aarch64/aarch64-cost-tables.h
> > +++ b/gcc/config/aarch64/aarch64-cost-tables.h
> > @@ -123,7 +123,10 @@ const struct cpu_cost_table qdf24xx_extra_costs =
> > },
> > /* Vector */
> > {
> > - COSTS_N_INSNS (1) /* alu. */
> > + COSTS_N_INSNS (1), /* Alu. */
> > + COSTS_N_INSNS (1), /* dup_merge. */
> > + COSTS_N_INSNS (1), /* ld1_merge. */
> > + COSTS_N_INSNS (1) /* ld1_dup. */
> > }
> > };
> >
> > @@ -227,7 +230,10 @@ const struct cpu_cost_table thunderx_extra_costs =
> > },
> > /* Vector */
> > {
> > - COSTS_N_INSNS (1) /* Alu. */
> > + COSTS_N_INSNS (1), /* Alu. */
> > + COSTS_N_INSNS (1), /* dup_merge. */
> > + COSTS_N_INSNS (1), /* ld1_merge. */
> > + COSTS_N_INSNS (1) /* ld1_dup. */
> > }
> > };
> >
> > @@ -330,7 +336,10 @@ const struct cpu_cost_table thunderx2t99_extra_costs =
> > },
> > /* Vector */
> > {
> > - COSTS_N_INSNS (1) /* Alu. */
> > + COSTS_N_INSNS (1), /* Alu. */
> > + COSTS_N_INSNS (1), /* dup_merge. */
> > + COSTS_N_INSNS (1), /* ld1_merge. */
> > + COSTS_N_INSNS (1) /* ld1_dup. */
> > }
> > };
> >
> > @@ -434,7 +443,10 @@ const struct cpu_cost_table tsv110_extra_costs =
> > },
> > /* Vector */
> > {
> > - COSTS_N_INSNS (1) /* alu. */
> > + COSTS_N_INSNS (1), /* Alu. */
> > + COSTS_N_INSNS (1), /* dup_merge. */
> > + COSTS_N_INSNS (1), /* ld1_merge. */
> > + COSTS_N_INSNS (1) /* ld1_dup. */
> > }
> > };
> >
> > diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
> > index b38505b0872..dc4d3d39af8 100644
> > --- a/gcc/config/aarch64/aarch64.c
> > +++ b/gcc/config/aarch64/aarch64.c
> > @@ -10568,6 +10568,28 @@ cost_plus:
> > }
> > break;
> >
> > + case VEC_DUPLICATE:
> > + if (!speed)
> > + return false;
> > +
> > + if (GET_CODE (XEXP (x, 0)) == MEM)
> > + *cost += extra_cost->vect.ld1_dup;
> > + else
> > + *cost += extra_cost->vect.merge_dup;
> > + return true;
> > +
> > + case VEC_MERGE:
> > + if (speed && GET_CODE (XEXP (x, 0)) == VEC_DUPLICATE)
> > + {
> > + if (GET_CODE (XEXP (XEXP (x, 0), 0)) == MEM)
> > + *cost += extra_cost->vect.ld1_merge;
> > + else
> > + *cost += extra_cost->vect.merge_dup;
> > + return true;
> > + }
> > + break;
> > +
> > +
> > case TRUNCATE:
> >
> > /* Decompose <su>muldi3_highpart. */
> > diff --git a/gcc/config/arm/aarch-common-protos.h b/gcc/config/arm/aarch-common-protos.h
> > index 11cd5145bbc..dbc1282402a 100644
> > --- a/gcc/config/arm/aarch-common-protos.h
> > +++ b/gcc/config/arm/aarch-common-protos.h
> > @@ -131,6 +131,9 @@ struct fp_cost_table
> > struct vector_cost_table
> > {
> > const int alu;
> > + const int merge_dup;
> > + const int ld1_merge;
> > + const int ld1_dup;
> > };
> >
> > struct cpu_cost_table
> > diff --git a/gcc/config/arm/aarch-cost-tables.h b/gcc/config/arm/aarch-cost-tables.h
> > index bc33efadc6c..a51bc668f56 100644
> > --- a/gcc/config/arm/aarch-cost-tables.h
> > +++ b/gcc/config/arm/aarch-cost-tables.h
> > @@ -121,7 +121,10 @@ const struct cpu_cost_table generic_extra_costs =
> > },
> > /* Vector */
> > {
> > - COSTS_N_INSNS (1) /* alu. */
> > + COSTS_N_INSNS (1), /* alu. */
> > + COSTS_N_INSNS (1), /* dup_merge. */
> > + COSTS_N_INSNS (1), /* ld1_merge. */
> > + COSTS_N_INSNS (1) /* ld1_dup. */
> > }
> > };
> >
> > @@ -224,7 +227,10 @@ const struct cpu_cost_table cortexa53_extra_costs =
> > },
> > /* Vector */
> > {
> > - COSTS_N_INSNS (1) /* alu. */
> > + COSTS_N_INSNS (1), /* alu. */
> > + COSTS_N_INSNS (1), /* dup_merge. */
> > + COSTS_N_INSNS (1), /* ld1_merge. */
> > + COSTS_N_INSNS (1) /* ld1_dup. */
> > }
> > };
> >
> > @@ -327,7 +333,10 @@ const struct cpu_cost_table cortexa57_extra_costs =
> > },
> > /* Vector */
> > {
> > - COSTS_N_INSNS (1) /* alu. */
> > + COSTS_N_INSNS (1), /* alu. */
> > + COSTS_N_INSNS (1), /* dup_merge. */
> > + COSTS_N_INSNS (1), /* ld1_merge. */
> > + COSTS_N_INSNS (1) /* ld1_dup. */
> > }
> > };
> >
> > @@ -430,7 +439,10 @@ const struct cpu_cost_table exynosm1_extra_costs =
> > },
> > /* Vector */
> > {
> > - COSTS_N_INSNS (0) /* alu. */
> > + COSTS_N_INSNS (0), /* alu. */
> > + COSTS_N_INSNS (0), /* dup_merge. */
> > + COSTS_N_INSNS (0), /* ld1_merge. */
> > + COSTS_N_INSNS (0) /* ld1_dup. */
> > }
> > };
> >
> > @@ -533,7 +545,10 @@ const struct cpu_cost_table xgene1_extra_costs =
> > },
> > /* Vector */
> > {
> > - COSTS_N_INSNS (2) /* alu. */
> > + COSTS_N_INSNS (2), /* alu. */
> > + COSTS_N_INSNS (2), /* dup_merge. */
> > + COSTS_N_INSNS (2), /* ld1_merge. */
> > + COSTS_N_INSNS (2) /* ld1_dup. */
> > }
> > };
> >
> > --
> > 2.17.1
> >
^ permalink raw reply [flat|nested] 4+ messages in thread
* Re: [PATCH] [AARCH64] Improve vector generation cost model
2019-03-15 1:47 [PATCH] [AARCH64] Improve vector generation cost model apinski
2019-05-02 16:10 ` Andrew Pinski
@ 2019-12-13 10:32 ` Kyrill Tkachov
1 sibling, 0 replies; 4+ messages in thread
From: Kyrill Tkachov @ 2019-12-13 10:32 UTC (permalink / raw)
To: apinski, gcc-patches
Hi Andrew,
On 3/15/19 1:18 AM, apinski@marvell.com wrote:
> From: Andrew Pinski <apinski@marvell.com>
>
> Hi,
> Â On OcteonTX2, ld1r and ld1 (with a single lane) are split
> into two different micro-ops unlike most other targets.
> This adds three extra costs to the cost table:
> ld1_dup: used for "ld1r {v0.4s}, [x0]"
> merge_dup: used for "dup v0.4s, v0.4s[0]" and "ins v0.4s[0], v0.4s[0]"
> ld1_merge: used fir "ld1 {v0.4s}[0], [x0]"
>
> OK? Bootstrapped and tested on aarch64-linux-gnu with no regressions.
>
Sorry for the slow reply, missed it on gcc-patches :(
> Thanks,
> Andrew Pinski
>
> ChangeLog:
> * config/arm/aarch-common-protos.h (vector_cost_table):
> Add merge_dup, ld1_merge, and ld1_dup.
> * config/aarch64/aarch64-cost-tables.h (qdf24xx_extra_costs):
> Update for the new fields.
> (thunderx_extra_costs): Likewise.
> (thunderx2t99_extra_costs): Likewise.
> (tsv110_extra_costs): Likewise.
> * config/arm/aarch-cost-tables.h (generic_extra_costs): Likewise.
> (cortexa53_extra_costs): Likewise.
> (cortexa57_extra_costs): Likewise.
> (exynosm1_extra_costs): Likewise.
> (xgene1_extra_costs): Likewise.
> * config/aarch64/aarch64.c (aarch64_rtx_costs): Handle vec_dup of a
> memory.
> Hanlde vec_merge of a memory.
>
> Signed-off-by: Andrew Pinski <apinski@marvell.com>
> ---
> Â gcc/config/aarch64/aarch64-cost-tables.h | 20 +++++++++++++++----
>  gcc/config/aarch64/aarch64.c            | 22 +++++++++++++++++++++
>  gcc/config/arm/aarch-common-protos.h    | 3 +++
>  gcc/config/arm/aarch-cost-tables.h      | 25 +++++++++++++++++++-----
> Â 4 files changed, 61 insertions(+), 9 deletions(-)
>
> diff --git a/gcc/config/aarch64/aarch64-cost-tables.h
> b/gcc/config/aarch64/aarch64-cost-tables.h
> index 5c9442e1b89..9a7c70ba595 100644
> --- a/gcc/config/aarch64/aarch64-cost-tables.h
> +++ b/gcc/config/aarch64/aarch64-cost-tables.h
> @@ -123,7 +123,10 @@ const struct cpu_cost_table qdf24xx_extra_costs =
> Â Â },
> Â Â /* Vector */
> Â Â {
> -   COSTS_N_INSNS (1) /* alu. */
> +   COSTS_N_INSNS (1), /* Alu. */
> +   COSTS_N_INSNS (1), /* dup_merge. */
> +   COSTS_N_INSNS (1), /* ld1_merge. */
> +   COSTS_N_INSNS (1) /* ld1_dup. */
> Â Â }
> Â };
>
> @@ -227,7 +230,10 @@ const struct cpu_cost_table thunderx_extra_costs =
> Â Â },
> Â Â /* Vector */
> Â Â {
> -   COSTS_N_INSNS (1) /* Alu. */
> +   COSTS_N_INSNS (1), /* Alu. */
> +   COSTS_N_INSNS (1), /* dup_merge. */
> +   COSTS_N_INSNS (1), /* ld1_merge. */
> +   COSTS_N_INSNS (1) /* ld1_dup. */
> Â Â }
> Â };
>
> @@ -330,7 +336,10 @@ const struct cpu_cost_table
> thunderx2t99_extra_costs =
> Â Â },
> Â Â /* Vector */
> Â Â {
> -   COSTS_N_INSNS (1) /* Alu. */
> +   COSTS_N_INSNS (1), /* Alu. */
> +   COSTS_N_INSNS (1), /* dup_merge. */
> +   COSTS_N_INSNS (1), /* ld1_merge. */
> +   COSTS_N_INSNS (1) /* ld1_dup. */
> Â Â }
> Â };
>
> @@ -434,7 +443,10 @@ const struct cpu_cost_table tsv110_extra_costs =
> Â Â },
> Â Â /* Vector */
> Â Â {
> -   COSTS_N_INSNS (1) /* alu. */
> +   COSTS_N_INSNS (1), /* Alu. */
> +   COSTS_N_INSNS (1), /* dup_merge. */
> +   COSTS_N_INSNS (1), /* ld1_merge. */
> +   COSTS_N_INSNS (1) /* ld1_dup. */
> Â Â }
> Â };
>
> diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
> index b38505b0872..dc4d3d39af8 100644
> --- a/gcc/config/aarch64/aarch64.c
> +++ b/gcc/config/aarch64/aarch64.c
> @@ -10568,6 +10568,28 @@ cost_plus:
> Â Â Â Â Â Â Â Â }
> Â Â Â Â Â Â break;
>
> +Â Â Â case VEC_DUPLICATE:
> +Â Â Â Â Â if (!speed)
> +Â Â Â Â Â Â return false;
If I read the code right, before this patch we would be returning true
for !speed i.e. not recursing.
Do we want to trigger a recursion now?
> +
> +Â Â Â Â Â if (GET_CODE (XEXP (x, 0)) == MEM)
> +Â Â Â Â Â Â *cost += extra_cost->vect.ld1_dup;
Please use MEM_P here.
> +Â Â Â Â Â else
> +Â Â Â Â Â Â *cost += extra_cost->vect.merge_dup;
> +Â Â Â Â Â return true;
> +
> +Â Â Â case VEC_MERGE:
> +Â Â Â Â Â if (speed && GET_CODE (XEXP (x, 0)) == VEC_DUPLICATE)
> +Â Â Â Â Â Â {
> +Â Â Â Â Â Â Â Â if (GET_CODE (XEXP (XEXP (x, 0), 0)) == MEM)
And here.
Thanks,
Kyrill
> +Â Â Â Â Â Â Â Â Â Â *cost += extra_cost->vect.ld1_merge;
> +Â Â Â Â Â Â Â Â else
> +Â Â Â Â Â Â Â Â Â Â *cost += extra_cost->vect.merge_dup;
> +Â Â Â Â Â Â Â Â return true;
> +Â Â Â Â Â Â }
> +Â Â Â Â Â break;
> +
> +
> Â Â Â Â case TRUNCATE:
>
>       /* Decompose <su>muldi3_highpart. */
> diff --git a/gcc/config/arm/aarch-common-protos.h
> b/gcc/config/arm/aarch-common-protos.h
> index 11cd5145bbc..dbc1282402a 100644
> --- a/gcc/config/arm/aarch-common-protos.h
> +++ b/gcc/config/arm/aarch-common-protos.h
> @@ -131,6 +131,9 @@ struct fp_cost_table
> Â struct vector_cost_table
> Â {
> Â Â const int alu;
> +Â const int merge_dup;
> +Â const int ld1_merge;
> +Â const int ld1_dup;
> Â };
>
> Â struct cpu_cost_table
> diff --git a/gcc/config/arm/aarch-cost-tables.h
> b/gcc/config/arm/aarch-cost-tables.h
> index bc33efadc6c..a51bc668f56 100644
> --- a/gcc/config/arm/aarch-cost-tables.h
> +++ b/gcc/config/arm/aarch-cost-tables.h
> @@ -121,7 +121,10 @@ const struct cpu_cost_table generic_extra_costs =
> Â Â },
> Â Â /* Vector */
> Â Â {
> -   COSTS_N_INSNS (1) /* alu. */
> +   COSTS_N_INSNS (1), /* alu. */
> +   COSTS_N_INSNS (1), /* dup_merge. */
> +   COSTS_N_INSNS (1), /* ld1_merge. */
> +   COSTS_N_INSNS (1) /* ld1_dup. */
> Â Â }
> Â };
>
> @@ -224,7 +227,10 @@ const struct cpu_cost_table cortexa53_extra_costs =
> Â Â },
> Â Â /* Vector */
> Â Â {
> -   COSTS_N_INSNS (1) /* alu. */
> +   COSTS_N_INSNS (1), /* alu. */
> +   COSTS_N_INSNS (1), /* dup_merge. */
> +   COSTS_N_INSNS (1), /* ld1_merge. */
> +   COSTS_N_INSNS (1) /* ld1_dup. */
> Â Â }
> Â };
>
> @@ -327,7 +333,10 @@ const struct cpu_cost_table cortexa57_extra_costs =
> Â Â },
> Â Â /* Vector */
> Â Â {
> -   COSTS_N_INSNS (1) /* alu. */
> +   COSTS_N_INSNS (1), /* alu. */
> +   COSTS_N_INSNS (1), /* dup_merge. */
> +   COSTS_N_INSNS (1), /* ld1_merge. */
> +   COSTS_N_INSNS (1) /* ld1_dup. */
> Â Â }
> Â };
>
> @@ -430,7 +439,10 @@ const struct cpu_cost_table exynosm1_extra_costs =
> Â Â },
> Â Â /* Vector */
> Â Â {
> -   COSTS_N_INSNS (0) /* alu. */
> +   COSTS_N_INSNS (0), /* alu. */
> +   COSTS_N_INSNS (0), /* dup_merge. */
> +   COSTS_N_INSNS (0), /* ld1_merge. */
> +   COSTS_N_INSNS (0) /* ld1_dup. */
> Â Â }
> Â };
>
> @@ -533,7 +545,10 @@ const struct cpu_cost_table xgene1_extra_costs =
> Â Â },
> Â Â /* Vector */
> Â Â {
> -   COSTS_N_INSNS (2) /* alu. */
> +   COSTS_N_INSNS (2), /* alu. */
> +   COSTS_N_INSNS (2), /* dup_merge. */
> +   COSTS_N_INSNS (2), /* ld1_merge. */
> +   COSTS_N_INSNS (2) /* ld1_dup. */
> Â Â }
> Â };
>
> --
> 2.17.1
>
^ permalink raw reply [flat|nested] 4+ messages in thread
end of thread, other threads:[~2019-12-13 10:32 UTC | newest]
Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-03-15 1:47 [PATCH] [AARCH64] Improve vector generation cost model apinski
2019-05-02 16:10 ` Andrew Pinski
2019-12-07 23:59 ` Andrew Pinski
2019-12-13 10:32 ` Kyrill Tkachov
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).