* [PATCH] s390: Fix builtins vec_rli and verll
@ 2023-08-21 15:56 Stefan Schulze Frielinghaus
2023-08-28 9:33 ` Andreas Krebbel
0 siblings, 1 reply; 4+ messages in thread
From: Stefan Schulze Frielinghaus @ 2023-08-21 15:56 UTC (permalink / raw)
To: krebbel, gcc-patches; +Cc: Stefan Schulze Frielinghaus
The second argument of these builtins is an unsigned immediate. For
vec_rli the API allows immediates up to 64 bits whereas the instruction
verll only allows immediates up to 32 bits. Since the shift count
equals the immediate modulo vector element size, truncating those
immediates is fine.
Bootstrapped and regtested on s390. Ok for mainline?
gcc/ChangeLog:
* config/s390/s390-builtins.def (O_U64): New.
(O1_U64): Ditto.
(O2_U64): Ditto.
(O3_U64): Ditto.
(O4_U64): Ditto.
(O_M12): Change bit position.
(O_S2): Ditto.
(O_S3): Ditto.
(O_S4): Ditto.
(O_S5): Ditto.
(O_S8): Ditto.
(O_S12): Ditto.
(O_S16): Ditto.
(O_S32): Ditto.
(O_ELEM): Ditto.
(O_LIT): Ditto.
(OB_DEF_VAR): Add operand constraints.
(B_DEF): Ditto.
* config/s390/s390.cc (s390_const_operand_ok): Honour 64 bit
operands.
---
gcc/config/s390/s390-builtins.def | 60 ++++++++++++++++++-------------
gcc/config/s390/s390.cc | 6 ++--
2 files changed, 39 insertions(+), 27 deletions(-)
diff --git a/gcc/config/s390/s390-builtins.def b/gcc/config/s390/s390-builtins.def
index a16983b18bd..c829f445a11 100644
--- a/gcc/config/s390/s390-builtins.def
+++ b/gcc/config/s390/s390-builtins.def
@@ -28,6 +28,7 @@
#undef O_U12
#undef O_U16
#undef O_U32
+#undef O_U64
#undef O_M12
@@ -88,6 +89,11 @@
#undef O3_U32
#undef O4_U32
+#undef O1_U64
+#undef O2_U64
+#undef O3_U64
+#undef O4_U64
+
#undef O1_M12
#undef O2_M12
#undef O3_M12
@@ -157,20 +163,21 @@
#define O_U12 7 /* unsigned 16 bit literal */
#define O_U16 8 /* unsigned 16 bit literal */
#define O_U32 9 /* unsigned 32 bit literal */
+#define O_U64 10 /* unsigned 64 bit literal */
-#define O_M12 10 /* matches bitmask of 12 */
+#define O_M12 11 /* matches bitmask of 12 */
-#define O_S2 11 /* signed 2 bit literal */
-#define O_S3 12 /* signed 3 bit literal */
-#define O_S4 13 /* signed 4 bit literal */
-#define O_S5 14 /* signed 5 bit literal */
-#define O_S8 15 /* signed 8 bit literal */
-#define O_S12 16 /* signed 12 bit literal */
-#define O_S16 17 /* signed 16 bit literal */
-#define O_S32 18 /* signed 32 bit literal */
+#define O_S2 12 /* signed 2 bit literal */
+#define O_S3 13 /* signed 3 bit literal */
+#define O_S4 14 /* signed 4 bit literal */
+#define O_S5 15 /* signed 5 bit literal */
+#define O_S8 16 /* signed 8 bit literal */
+#define O_S12 17 /* signed 12 bit literal */
+#define O_S16 18 /* signed 16 bit literal */
+#define O_S32 19 /* signed 32 bit literal */
-#define O_ELEM 19 /* Element selector requiring modulo arithmetic. */
-#define O_LIT 20 /* Operand must be a literal fitting the target type. */
+#define O_ELEM 20 /* Element selector requiring modulo arithmetic. */
+#define O_LIT 21 /* Operand must be a literal fitting the target type. */
#define O_SHIFT 5
@@ -223,6 +230,11 @@
#define O3_U32 (O_U32 << (2 * O_SHIFT))
#define O4_U32 (O_U32 << (3 * O_SHIFT))
+#define O1_U64 O_U64
+#define O2_U64 (O_U64 << O_SHIFT)
+#define O3_U64 (O_U64 << (2 * O_SHIFT))
+#define O4_U64 (O_U64 << (3 * O_SHIFT))
+
#define O1_M12 O_M12
#define O2_M12 (O_M12 << O_SHIFT)
#define O3_M12 (O_M12 << (2 * O_SHIFT))
@@ -1989,19 +2001,19 @@ B_DEF (s390_verllvf, vrotlv4si3, 0,
B_DEF (s390_verllvg, vrotlv2di3, 0, B_VX, 0, BT_FN_UV2DI_UV2DI_UV2DI)
OB_DEF (s390_vec_rli, s390_vec_rli_u8, s390_vec_rli_s64, B_VX, BT_FN_OV4SI_OV4SI_ULONG)
-OB_DEF_VAR (s390_vec_rli_u8, s390_verllb, 0, 0, BT_OV_UV16QI_UV16QI_ULONG)
-OB_DEF_VAR (s390_vec_rli_s8, s390_verllb, 0, 0, BT_OV_V16QI_V16QI_ULONG)
-OB_DEF_VAR (s390_vec_rli_u16, s390_verllh, 0, 0, BT_OV_UV8HI_UV8HI_ULONG)
-OB_DEF_VAR (s390_vec_rli_s16, s390_verllh, 0, 0, BT_OV_V8HI_V8HI_ULONG)
-OB_DEF_VAR (s390_vec_rli_u32, s390_verllf, 0, 0, BT_OV_UV4SI_UV4SI_ULONG)
-OB_DEF_VAR (s390_vec_rli_s32, s390_verllf, 0, 0, BT_OV_V4SI_V4SI_ULONG)
-OB_DEF_VAR (s390_vec_rli_u64, s390_verllg, 0, 0, BT_OV_UV2DI_UV2DI_ULONG)
-OB_DEF_VAR (s390_vec_rli_s64, s390_verllg, 0, 0, BT_OV_V2DI_V2DI_ULONG)
-
-B_DEF (s390_verllb, rotlv16qi3, 0, B_VX, 0, BT_FN_UV16QI_UV16QI_UINT)
-B_DEF (s390_verllh, rotlv8hi3, 0, B_VX, 0, BT_FN_UV8HI_UV8HI_UINT)
-B_DEF (s390_verllf, rotlv4si3, 0, B_VX, 0, BT_FN_UV4SI_UV4SI_UINT)
-B_DEF (s390_verllg, rotlv2di3, 0, B_VX, 0, BT_FN_UV2DI_UV2DI_UINT)
+OB_DEF_VAR (s390_vec_rli_u8, s390_verllb, 0, O2_U64, BT_OV_UV16QI_UV16QI_ULONG)
+OB_DEF_VAR (s390_vec_rli_s8, s390_verllb, 0, O2_U64, BT_OV_V16QI_V16QI_ULONG)
+OB_DEF_VAR (s390_vec_rli_u16, s390_verllh, 0, O2_U64, BT_OV_UV8HI_UV8HI_ULONG)
+OB_DEF_VAR (s390_vec_rli_s16, s390_verllh, 0, O2_U64, BT_OV_V8HI_V8HI_ULONG)
+OB_DEF_VAR (s390_vec_rli_u32, s390_verllf, 0, O2_U64, BT_OV_UV4SI_UV4SI_ULONG)
+OB_DEF_VAR (s390_vec_rli_s32, s390_verllf, 0, O2_U64, BT_OV_V4SI_V4SI_ULONG)
+OB_DEF_VAR (s390_vec_rli_u64, s390_verllg, 0, O2_U64, BT_OV_UV2DI_UV2DI_ULONG)
+OB_DEF_VAR (s390_vec_rli_s64, s390_verllg, 0, O2_U64, BT_OV_V2DI_V2DI_ULONG)
+
+B_DEF (s390_verllb, rotlv16qi3, 0, B_VX, O2_U32, BT_FN_UV16QI_UV16QI_UINT)
+B_DEF (s390_verllh, rotlv8hi3, 0, B_VX, O2_U32, BT_FN_UV8HI_UV8HI_UINT)
+B_DEF (s390_verllf, rotlv4si3, 0, B_VX, O2_U32, BT_FN_UV4SI_UV4SI_UINT)
+B_DEF (s390_verllg, rotlv2di3, 0, B_VX, O2_U32, BT_FN_UV2DI_UV2DI_UINT)
OB_DEF (s390_vec_rl_mask, s390_vec_rl_mask_s8,s390_vec_rl_mask_u64,B_VX, BT_FN_OV4SI_OV4SI_OV4SI_UCHAR)
OB_DEF_VAR (s390_vec_rl_mask_s8, s390_verimb, 0, O3_U8, BT_OV_V16QI_V16QI_UV16QI_UCHAR)
diff --git a/gcc/config/s390/s390.cc b/gcc/config/s390/s390.cc
index 49ab4fc7360..64f56d8effa 100644
--- a/gcc/config/s390/s390.cc
+++ b/gcc/config/s390/s390.cc
@@ -815,8 +815,8 @@ s390_const_operand_ok (tree arg, int argnum, int op_flags, tree decl)
{
if (O_UIMM_P (op_flags))
{
- unsigned HOST_WIDE_INT bitwidths[] = { 1, 2, 3, 4, 5, 8, 12, 16, 32, 4 };
- unsigned HOST_WIDE_INT bitmasks[] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 12 };
+ unsigned HOST_WIDE_INT bitwidths[] = { 1, 2, 3, 4, 5, 8, 12, 16, 32, 64, 4 };
+ unsigned HOST_WIDE_INT bitmasks[] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 12 };
unsigned HOST_WIDE_INT bitwidth = bitwidths[op_flags - O_U1];
unsigned HOST_WIDE_INT bitmask = bitmasks[op_flags - O_U1];
@@ -824,7 +824,7 @@ s390_const_operand_ok (tree arg, int argnum, int op_flags, tree decl)
gcc_assert(ARRAY_SIZE(bitmasks) == (O_M12 - O_U1 + 1));
if (!tree_fits_uhwi_p (arg)
- || tree_to_uhwi (arg) > (HOST_WIDE_INT_1U << bitwidth) - 1
+ || tree_to_uhwi (arg) > ((HOST_WIDE_INT_1U << (bitwidth - 1) << 1) - 1)
|| (bitmask && tree_to_uhwi (arg) & ~bitmask))
{
if (bitmask)
--
2.41.0
^ permalink raw reply [flat|nested] 4+ messages in thread
* Re: [PATCH] s390: Fix builtins vec_rli and verll
2023-08-21 15:56 [PATCH] s390: Fix builtins vec_rli and verll Stefan Schulze Frielinghaus
@ 2023-08-28 9:33 ` Andreas Krebbel
2023-09-11 6:56 ` Stefan Schulze Frielinghaus
0 siblings, 1 reply; 4+ messages in thread
From: Andreas Krebbel @ 2023-08-28 9:33 UTC (permalink / raw)
To: Stefan Schulze Frielinghaus, gcc-patches
Hi Stefan,
do you really need to introduce a new flag for U64 given that the type of the builtin is unsigned long?
Andreas
On 8/21/23 17:56, Stefan Schulze Frielinghaus wrote:
> The second argument of these builtins is an unsigned immediate. For
> vec_rli the API allows immediates up to 64 bits whereas the instruction
> verll only allows immediates up to 32 bits. Since the shift count
> equals the immediate modulo vector element size, truncating those
> immediates is fine.
>
> Bootstrapped and regtested on s390. Ok for mainline?
>
> gcc/ChangeLog:
>
> * config/s390/s390-builtins.def (O_U64): New.
> (O1_U64): Ditto.
> (O2_U64): Ditto.
> (O3_U64): Ditto.
> (O4_U64): Ditto.
> (O_M12): Change bit position.
> (O_S2): Ditto.
> (O_S3): Ditto.
> (O_S4): Ditto.
> (O_S5): Ditto.
> (O_S8): Ditto.
> (O_S12): Ditto.
> (O_S16): Ditto.
> (O_S32): Ditto.
> (O_ELEM): Ditto.
> (O_LIT): Ditto.
> (OB_DEF_VAR): Add operand constraints.
> (B_DEF): Ditto.
> * config/s390/s390.cc (s390_const_operand_ok): Honour 64 bit
> operands.
> ---
> gcc/config/s390/s390-builtins.def | 60 ++++++++++++++++++-------------
> gcc/config/s390/s390.cc | 6 ++--
> 2 files changed, 39 insertions(+), 27 deletions(-)
>
> diff --git a/gcc/config/s390/s390-builtins.def b/gcc/config/s390/s390-builtins.def
> index a16983b18bd..c829f445a11 100644
> --- a/gcc/config/s390/s390-builtins.def
> +++ b/gcc/config/s390/s390-builtins.def
> @@ -28,6 +28,7 @@
> #undef O_U12
> #undef O_U16
> #undef O_U32
> +#undef O_U64
>
> #undef O_M12
>
> @@ -88,6 +89,11 @@
> #undef O3_U32
> #undef O4_U32
>
> +#undef O1_U64
> +#undef O2_U64
> +#undef O3_U64
> +#undef O4_U64
> +
> #undef O1_M12
> #undef O2_M12
> #undef O3_M12
> @@ -157,20 +163,21 @@
> #define O_U12 7 /* unsigned 16 bit literal */
> #define O_U16 8 /* unsigned 16 bit literal */
> #define O_U32 9 /* unsigned 32 bit literal */
> +#define O_U64 10 /* unsigned 64 bit literal */
>
> -#define O_M12 10 /* matches bitmask of 12 */
> +#define O_M12 11 /* matches bitmask of 12 */
>
> -#define O_S2 11 /* signed 2 bit literal */
> -#define O_S3 12 /* signed 3 bit literal */
> -#define O_S4 13 /* signed 4 bit literal */
> -#define O_S5 14 /* signed 5 bit literal */
> -#define O_S8 15 /* signed 8 bit literal */
> -#define O_S12 16 /* signed 12 bit literal */
> -#define O_S16 17 /* signed 16 bit literal */
> -#define O_S32 18 /* signed 32 bit literal */
> +#define O_S2 12 /* signed 2 bit literal */
> +#define O_S3 13 /* signed 3 bit literal */
> +#define O_S4 14 /* signed 4 bit literal */
> +#define O_S5 15 /* signed 5 bit literal */
> +#define O_S8 16 /* signed 8 bit literal */
> +#define O_S12 17 /* signed 12 bit literal */
> +#define O_S16 18 /* signed 16 bit literal */
> +#define O_S32 19 /* signed 32 bit literal */
>
> -#define O_ELEM 19 /* Element selector requiring modulo arithmetic. */
> -#define O_LIT 20 /* Operand must be a literal fitting the target type. */
> +#define O_ELEM 20 /* Element selector requiring modulo arithmetic. */
> +#define O_LIT 21 /* Operand must be a literal fitting the target type. */
>
> #define O_SHIFT 5
>
> @@ -223,6 +230,11 @@
> #define O3_U32 (O_U32 << (2 * O_SHIFT))
> #define O4_U32 (O_U32 << (3 * O_SHIFT))
>
> +#define O1_U64 O_U64
> +#define O2_U64 (O_U64 << O_SHIFT)
> +#define O3_U64 (O_U64 << (2 * O_SHIFT))
> +#define O4_U64 (O_U64 << (3 * O_SHIFT))
> +
> #define O1_M12 O_M12
> #define O2_M12 (O_M12 << O_SHIFT)
> #define O3_M12 (O_M12 << (2 * O_SHIFT))
> @@ -1989,19 +2001,19 @@ B_DEF (s390_verllvf, vrotlv4si3, 0,
> B_DEF (s390_verllvg, vrotlv2di3, 0, B_VX, 0, BT_FN_UV2DI_UV2DI_UV2DI)
>
> OB_DEF (s390_vec_rli, s390_vec_rli_u8, s390_vec_rli_s64, B_VX, BT_FN_OV4SI_OV4SI_ULONG)
> -OB_DEF_VAR (s390_vec_rli_u8, s390_verllb, 0, 0, BT_OV_UV16QI_UV16QI_ULONG)
> -OB_DEF_VAR (s390_vec_rli_s8, s390_verllb, 0, 0, BT_OV_V16QI_V16QI_ULONG)
> -OB_DEF_VAR (s390_vec_rli_u16, s390_verllh, 0, 0, BT_OV_UV8HI_UV8HI_ULONG)
> -OB_DEF_VAR (s390_vec_rli_s16, s390_verllh, 0, 0, BT_OV_V8HI_V8HI_ULONG)
> -OB_DEF_VAR (s390_vec_rli_u32, s390_verllf, 0, 0, BT_OV_UV4SI_UV4SI_ULONG)
> -OB_DEF_VAR (s390_vec_rli_s32, s390_verllf, 0, 0, BT_OV_V4SI_V4SI_ULONG)
> -OB_DEF_VAR (s390_vec_rli_u64, s390_verllg, 0, 0, BT_OV_UV2DI_UV2DI_ULONG)
> -OB_DEF_VAR (s390_vec_rli_s64, s390_verllg, 0, 0, BT_OV_V2DI_V2DI_ULONG)
> -
> -B_DEF (s390_verllb, rotlv16qi3, 0, B_VX, 0, BT_FN_UV16QI_UV16QI_UINT)
> -B_DEF (s390_verllh, rotlv8hi3, 0, B_VX, 0, BT_FN_UV8HI_UV8HI_UINT)
> -B_DEF (s390_verllf, rotlv4si3, 0, B_VX, 0, BT_FN_UV4SI_UV4SI_UINT)
> -B_DEF (s390_verllg, rotlv2di3, 0, B_VX, 0, BT_FN_UV2DI_UV2DI_UINT)
> +OB_DEF_VAR (s390_vec_rli_u8, s390_verllb, 0, O2_U64, BT_OV_UV16QI_UV16QI_ULONG)
> +OB_DEF_VAR (s390_vec_rli_s8, s390_verllb, 0, O2_U64, BT_OV_V16QI_V16QI_ULONG)
> +OB_DEF_VAR (s390_vec_rli_u16, s390_verllh, 0, O2_U64, BT_OV_UV8HI_UV8HI_ULONG)
> +OB_DEF_VAR (s390_vec_rli_s16, s390_verllh, 0, O2_U64, BT_OV_V8HI_V8HI_ULONG)
> +OB_DEF_VAR (s390_vec_rli_u32, s390_verllf, 0, O2_U64, BT_OV_UV4SI_UV4SI_ULONG)
> +OB_DEF_VAR (s390_vec_rli_s32, s390_verllf, 0, O2_U64, BT_OV_V4SI_V4SI_ULONG)
> +OB_DEF_VAR (s390_vec_rli_u64, s390_verllg, 0, O2_U64, BT_OV_UV2DI_UV2DI_ULONG)
> +OB_DEF_VAR (s390_vec_rli_s64, s390_verllg, 0, O2_U64, BT_OV_V2DI_V2DI_ULONG)
> +
> +B_DEF (s390_verllb, rotlv16qi3, 0, B_VX, O2_U32, BT_FN_UV16QI_UV16QI_UINT)
> +B_DEF (s390_verllh, rotlv8hi3, 0, B_VX, O2_U32, BT_FN_UV8HI_UV8HI_UINT)
> +B_DEF (s390_verllf, rotlv4si3, 0, B_VX, O2_U32, BT_FN_UV4SI_UV4SI_UINT)
> +B_DEF (s390_verllg, rotlv2di3, 0, B_VX, O2_U32, BT_FN_UV2DI_UV2DI_UINT)
>
> OB_DEF (s390_vec_rl_mask, s390_vec_rl_mask_s8,s390_vec_rl_mask_u64,B_VX, BT_FN_OV4SI_OV4SI_OV4SI_UCHAR)
> OB_DEF_VAR (s390_vec_rl_mask_s8, s390_verimb, 0, O3_U8, BT_OV_V16QI_V16QI_UV16QI_UCHAR)
> diff --git a/gcc/config/s390/s390.cc b/gcc/config/s390/s390.cc
> index 49ab4fc7360..64f56d8effa 100644
> --- a/gcc/config/s390/s390.cc
> +++ b/gcc/config/s390/s390.cc
> @@ -815,8 +815,8 @@ s390_const_operand_ok (tree arg, int argnum, int op_flags, tree decl)
> {
> if (O_UIMM_P (op_flags))
> {
> - unsigned HOST_WIDE_INT bitwidths[] = { 1, 2, 3, 4, 5, 8, 12, 16, 32, 4 };
> - unsigned HOST_WIDE_INT bitmasks[] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 12 };
> + unsigned HOST_WIDE_INT bitwidths[] = { 1, 2, 3, 4, 5, 8, 12, 16, 32, 64, 4 };
> + unsigned HOST_WIDE_INT bitmasks[] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 12 };
> unsigned HOST_WIDE_INT bitwidth = bitwidths[op_flags - O_U1];
> unsigned HOST_WIDE_INT bitmask = bitmasks[op_flags - O_U1];
>
> @@ -824,7 +824,7 @@ s390_const_operand_ok (tree arg, int argnum, int op_flags, tree decl)
> gcc_assert(ARRAY_SIZE(bitmasks) == (O_M12 - O_U1 + 1));
>
> if (!tree_fits_uhwi_p (arg)
> - || tree_to_uhwi (arg) > (HOST_WIDE_INT_1U << bitwidth) - 1
> + || tree_to_uhwi (arg) > ((HOST_WIDE_INT_1U << (bitwidth - 1) << 1) - 1)
> || (bitmask && tree_to_uhwi (arg) & ~bitmask))
> {
> if (bitmask)
^ permalink raw reply [flat|nested] 4+ messages in thread
* Re: [PATCH] s390: Fix builtins vec_rli and verll
2023-08-28 9:33 ` Andreas Krebbel
@ 2023-09-11 6:56 ` Stefan Schulze Frielinghaus
2023-09-11 15:59 ` Andreas Krebbel
0 siblings, 1 reply; 4+ messages in thread
From: Stefan Schulze Frielinghaus @ 2023-09-11 6:56 UTC (permalink / raw)
To: Andreas Krebbel; +Cc: gcc-patches
On Mon, Aug 28, 2023 at 11:33:37AM +0200, Andreas Krebbel wrote:
> Hi Stefan,
>
> do you really need to introduce a new flag for U64 given that the type of the builtin is unsigned long?
In function s390_const_operand_ok the immediate is checked whether it is
valide w.r.t. the flag:
tree_to_uhwi (arg) > ((HOST_WIDE_INT_1U << (bitwidth - 1) << 1) - 1)
Here bitwidth is derived from the flag.
Cheers,
Stefan
>
> Andreas
>
> On 8/21/23 17:56, Stefan Schulze Frielinghaus wrote:
> > The second argument of these builtins is an unsigned immediate. For
> > vec_rli the API allows immediates up to 64 bits whereas the instruction
> > verll only allows immediates up to 32 bits. Since the shift count
> > equals the immediate modulo vector element size, truncating those
> > immediates is fine.
> >
> > Bootstrapped and regtested on s390. Ok for mainline?
> >
> > gcc/ChangeLog:
> >
> > * config/s390/s390-builtins.def (O_U64): New.
> > (O1_U64): Ditto.
> > (O2_U64): Ditto.
> > (O3_U64): Ditto.
> > (O4_U64): Ditto.
> > (O_M12): Change bit position.
> > (O_S2): Ditto.
> > (O_S3): Ditto.
> > (O_S4): Ditto.
> > (O_S5): Ditto.
> > (O_S8): Ditto.
> > (O_S12): Ditto.
> > (O_S16): Ditto.
> > (O_S32): Ditto.
> > (O_ELEM): Ditto.
> > (O_LIT): Ditto.
> > (OB_DEF_VAR): Add operand constraints.
> > (B_DEF): Ditto.
> > * config/s390/s390.cc (s390_const_operand_ok): Honour 64 bit
> > operands.
> > ---
> > gcc/config/s390/s390-builtins.def | 60 ++++++++++++++++++-------------
> > gcc/config/s390/s390.cc | 6 ++--
> > 2 files changed, 39 insertions(+), 27 deletions(-)
> >
> > diff --git a/gcc/config/s390/s390-builtins.def b/gcc/config/s390/s390-builtins.def
> > index a16983b18bd..c829f445a11 100644
> > --- a/gcc/config/s390/s390-builtins.def
> > +++ b/gcc/config/s390/s390-builtins.def
> > @@ -28,6 +28,7 @@
> > #undef O_U12
> > #undef O_U16
> > #undef O_U32
> > +#undef O_U64
> >
> > #undef O_M12
> >
> > @@ -88,6 +89,11 @@
> > #undef O3_U32
> > #undef O4_U32
> >
> > +#undef O1_U64
> > +#undef O2_U64
> > +#undef O3_U64
> > +#undef O4_U64
> > +
> > #undef O1_M12
> > #undef O2_M12
> > #undef O3_M12
> > @@ -157,20 +163,21 @@
> > #define O_U12 7 /* unsigned 16 bit literal */
> > #define O_U16 8 /* unsigned 16 bit literal */
> > #define O_U32 9 /* unsigned 32 bit literal */
> > +#define O_U64 10 /* unsigned 64 bit literal */
> >
> > -#define O_M12 10 /* matches bitmask of 12 */
> > +#define O_M12 11 /* matches bitmask of 12 */
> >
> > -#define O_S2 11 /* signed 2 bit literal */
> > -#define O_S3 12 /* signed 3 bit literal */
> > -#define O_S4 13 /* signed 4 bit literal */
> > -#define O_S5 14 /* signed 5 bit literal */
> > -#define O_S8 15 /* signed 8 bit literal */
> > -#define O_S12 16 /* signed 12 bit literal */
> > -#define O_S16 17 /* signed 16 bit literal */
> > -#define O_S32 18 /* signed 32 bit literal */
> > +#define O_S2 12 /* signed 2 bit literal */
> > +#define O_S3 13 /* signed 3 bit literal */
> > +#define O_S4 14 /* signed 4 bit literal */
> > +#define O_S5 15 /* signed 5 bit literal */
> > +#define O_S8 16 /* signed 8 bit literal */
> > +#define O_S12 17 /* signed 12 bit literal */
> > +#define O_S16 18 /* signed 16 bit literal */
> > +#define O_S32 19 /* signed 32 bit literal */
> >
> > -#define O_ELEM 19 /* Element selector requiring modulo arithmetic. */
> > -#define O_LIT 20 /* Operand must be a literal fitting the target type. */
> > +#define O_ELEM 20 /* Element selector requiring modulo arithmetic. */
> > +#define O_LIT 21 /* Operand must be a literal fitting the target type. */
> >
> > #define O_SHIFT 5
> >
> > @@ -223,6 +230,11 @@
> > #define O3_U32 (O_U32 << (2 * O_SHIFT))
> > #define O4_U32 (O_U32 << (3 * O_SHIFT))
> >
> > +#define O1_U64 O_U64
> > +#define O2_U64 (O_U64 << O_SHIFT)
> > +#define O3_U64 (O_U64 << (2 * O_SHIFT))
> > +#define O4_U64 (O_U64 << (3 * O_SHIFT))
> > +
> > #define O1_M12 O_M12
> > #define O2_M12 (O_M12 << O_SHIFT)
> > #define O3_M12 (O_M12 << (2 * O_SHIFT))
> > @@ -1989,19 +2001,19 @@ B_DEF (s390_verllvf, vrotlv4si3, 0,
> > B_DEF (s390_verllvg, vrotlv2di3, 0, B_VX, 0, BT_FN_UV2DI_UV2DI_UV2DI)
> >
> > OB_DEF (s390_vec_rli, s390_vec_rli_u8, s390_vec_rli_s64, B_VX, BT_FN_OV4SI_OV4SI_ULONG)
> > -OB_DEF_VAR (s390_vec_rli_u8, s390_verllb, 0, 0, BT_OV_UV16QI_UV16QI_ULONG)
> > -OB_DEF_VAR (s390_vec_rli_s8, s390_verllb, 0, 0, BT_OV_V16QI_V16QI_ULONG)
> > -OB_DEF_VAR (s390_vec_rli_u16, s390_verllh, 0, 0, BT_OV_UV8HI_UV8HI_ULONG)
> > -OB_DEF_VAR (s390_vec_rli_s16, s390_verllh, 0, 0, BT_OV_V8HI_V8HI_ULONG)
> > -OB_DEF_VAR (s390_vec_rli_u32, s390_verllf, 0, 0, BT_OV_UV4SI_UV4SI_ULONG)
> > -OB_DEF_VAR (s390_vec_rli_s32, s390_verllf, 0, 0, BT_OV_V4SI_V4SI_ULONG)
> > -OB_DEF_VAR (s390_vec_rli_u64, s390_verllg, 0, 0, BT_OV_UV2DI_UV2DI_ULONG)
> > -OB_DEF_VAR (s390_vec_rli_s64, s390_verllg, 0, 0, BT_OV_V2DI_V2DI_ULONG)
> > -
> > -B_DEF (s390_verllb, rotlv16qi3, 0, B_VX, 0, BT_FN_UV16QI_UV16QI_UINT)
> > -B_DEF (s390_verllh, rotlv8hi3, 0, B_VX, 0, BT_FN_UV8HI_UV8HI_UINT)
> > -B_DEF (s390_verllf, rotlv4si3, 0, B_VX, 0, BT_FN_UV4SI_UV4SI_UINT)
> > -B_DEF (s390_verllg, rotlv2di3, 0, B_VX, 0, BT_FN_UV2DI_UV2DI_UINT)
> > +OB_DEF_VAR (s390_vec_rli_u8, s390_verllb, 0, O2_U64, BT_OV_UV16QI_UV16QI_ULONG)
> > +OB_DEF_VAR (s390_vec_rli_s8, s390_verllb, 0, O2_U64, BT_OV_V16QI_V16QI_ULONG)
> > +OB_DEF_VAR (s390_vec_rli_u16, s390_verllh, 0, O2_U64, BT_OV_UV8HI_UV8HI_ULONG)
> > +OB_DEF_VAR (s390_vec_rli_s16, s390_verllh, 0, O2_U64, BT_OV_V8HI_V8HI_ULONG)
> > +OB_DEF_VAR (s390_vec_rli_u32, s390_verllf, 0, O2_U64, BT_OV_UV4SI_UV4SI_ULONG)
> > +OB_DEF_VAR (s390_vec_rli_s32, s390_verllf, 0, O2_U64, BT_OV_V4SI_V4SI_ULONG)
> > +OB_DEF_VAR (s390_vec_rli_u64, s390_verllg, 0, O2_U64, BT_OV_UV2DI_UV2DI_ULONG)
> > +OB_DEF_VAR (s390_vec_rli_s64, s390_verllg, 0, O2_U64, BT_OV_V2DI_V2DI_ULONG)
> > +
> > +B_DEF (s390_verllb, rotlv16qi3, 0, B_VX, O2_U32, BT_FN_UV16QI_UV16QI_UINT)
> > +B_DEF (s390_verllh, rotlv8hi3, 0, B_VX, O2_U32, BT_FN_UV8HI_UV8HI_UINT)
> > +B_DEF (s390_verllf, rotlv4si3, 0, B_VX, O2_U32, BT_FN_UV4SI_UV4SI_UINT)
> > +B_DEF (s390_verllg, rotlv2di3, 0, B_VX, O2_U32, BT_FN_UV2DI_UV2DI_UINT)
> >
> > OB_DEF (s390_vec_rl_mask, s390_vec_rl_mask_s8,s390_vec_rl_mask_u64,B_VX, BT_FN_OV4SI_OV4SI_OV4SI_UCHAR)
> > OB_DEF_VAR (s390_vec_rl_mask_s8, s390_verimb, 0, O3_U8, BT_OV_V16QI_V16QI_UV16QI_UCHAR)
> > diff --git a/gcc/config/s390/s390.cc b/gcc/config/s390/s390.cc
> > index 49ab4fc7360..64f56d8effa 100644
> > --- a/gcc/config/s390/s390.cc
> > +++ b/gcc/config/s390/s390.cc
> > @@ -815,8 +815,8 @@ s390_const_operand_ok (tree arg, int argnum, int op_flags, tree decl)
> > {
> > if (O_UIMM_P (op_flags))
> > {
> > - unsigned HOST_WIDE_INT bitwidths[] = { 1, 2, 3, 4, 5, 8, 12, 16, 32, 4 };
> > - unsigned HOST_WIDE_INT bitmasks[] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 12 };
> > + unsigned HOST_WIDE_INT bitwidths[] = { 1, 2, 3, 4, 5, 8, 12, 16, 32, 64, 4 };
> > + unsigned HOST_WIDE_INT bitmasks[] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 12 };
> > unsigned HOST_WIDE_INT bitwidth = bitwidths[op_flags - O_U1];
> > unsigned HOST_WIDE_INT bitmask = bitmasks[op_flags - O_U1];
> >
> > @@ -824,7 +824,7 @@ s390_const_operand_ok (tree arg, int argnum, int op_flags, tree decl)
> > gcc_assert(ARRAY_SIZE(bitmasks) == (O_M12 - O_U1 + 1));
> >
> > if (!tree_fits_uhwi_p (arg)
> > - || tree_to_uhwi (arg) > (HOST_WIDE_INT_1U << bitwidth) - 1
> > + || tree_to_uhwi (arg) > ((HOST_WIDE_INT_1U << (bitwidth - 1) << 1) - 1)
> > || (bitmask && tree_to_uhwi (arg) & ~bitmask))
> > {
> > if (bitmask)
>
^ permalink raw reply [flat|nested] 4+ messages in thread
* Re: [PATCH] s390: Fix builtins vec_rli and verll
2023-09-11 6:56 ` Stefan Schulze Frielinghaus
@ 2023-09-11 15:59 ` Andreas Krebbel
0 siblings, 0 replies; 4+ messages in thread
From: Andreas Krebbel @ 2023-09-11 15:59 UTC (permalink / raw)
To: Stefan Schulze Frielinghaus; +Cc: gcc-patches
On 9/11/23 08:56, Stefan Schulze Frielinghaus wrote:
> On Mon, Aug 28, 2023 at 11:33:37AM +0200, Andreas Krebbel wrote:
>> Hi Stefan,
>>
>> do you really need to introduce a new flag for U64 given that the type of the builtin is unsigned long?
>
> In function s390_const_operand_ok the immediate is checked whether it is
> valide w.r.t. the flag:
>
> tree_to_uhwi (arg) > ((HOST_WIDE_INT_1U << (bitwidth - 1) << 1) - 1)
>
> Here bitwidth is derived from the flag.
I see, it is about enabling the constant check at all.
Ok, thanks!
Andreas
>
> Cheers,
> Stefan
>
>>
>> Andreas
>>
>> On 8/21/23 17:56, Stefan Schulze Frielinghaus wrote:
>>> The second argument of these builtins is an unsigned immediate. For
>>> vec_rli the API allows immediates up to 64 bits whereas the instruction
>>> verll only allows immediates up to 32 bits. Since the shift count
>>> equals the immediate modulo vector element size, truncating those
>>> immediates is fine.
>>>
>>> Bootstrapped and regtested on s390. Ok for mainline?
>>>
>>> gcc/ChangeLog:
>>>
>>> * config/s390/s390-builtins.def (O_U64): New.
>>> (O1_U64): Ditto.
>>> (O2_U64): Ditto.
>>> (O3_U64): Ditto.
>>> (O4_U64): Ditto.
>>> (O_M12): Change bit position.
>>> (O_S2): Ditto.
>>> (O_S3): Ditto.
>>> (O_S4): Ditto.
>>> (O_S5): Ditto.
>>> (O_S8): Ditto.
>>> (O_S12): Ditto.
>>> (O_S16): Ditto.
>>> (O_S32): Ditto.
>>> (O_ELEM): Ditto.
>>> (O_LIT): Ditto.
>>> (OB_DEF_VAR): Add operand constraints.
>>> (B_DEF): Ditto.
>>> * config/s390/s390.cc (s390_const_operand_ok): Honour 64 bit
>>> operands.
>>> ---
>>> gcc/config/s390/s390-builtins.def | 60 ++++++++++++++++++-------------
>>> gcc/config/s390/s390.cc | 6 ++--
>>> 2 files changed, 39 insertions(+), 27 deletions(-)
>>>
>>> diff --git a/gcc/config/s390/s390-builtins.def b/gcc/config/s390/s390-builtins.def
>>> index a16983b18bd..c829f445a11 100644
>>> --- a/gcc/config/s390/s390-builtins.def
>>> +++ b/gcc/config/s390/s390-builtins.def
>>> @@ -28,6 +28,7 @@
>>> #undef O_U12
>>> #undef O_U16
>>> #undef O_U32
>>> +#undef O_U64
>>>
>>> #undef O_M12
>>>
>>> @@ -88,6 +89,11 @@
>>> #undef O3_U32
>>> #undef O4_U32
>>>
>>> +#undef O1_U64
>>> +#undef O2_U64
>>> +#undef O3_U64
>>> +#undef O4_U64
>>> +
>>> #undef O1_M12
>>> #undef O2_M12
>>> #undef O3_M12
>>> @@ -157,20 +163,21 @@
>>> #define O_U12 7 /* unsigned 16 bit literal */
>>> #define O_U16 8 /* unsigned 16 bit literal */
>>> #define O_U32 9 /* unsigned 32 bit literal */
>>> +#define O_U64 10 /* unsigned 64 bit literal */
>>>
>>> -#define O_M12 10 /* matches bitmask of 12 */
>>> +#define O_M12 11 /* matches bitmask of 12 */
>>>
>>> -#define O_S2 11 /* signed 2 bit literal */
>>> -#define O_S3 12 /* signed 3 bit literal */
>>> -#define O_S4 13 /* signed 4 bit literal */
>>> -#define O_S5 14 /* signed 5 bit literal */
>>> -#define O_S8 15 /* signed 8 bit literal */
>>> -#define O_S12 16 /* signed 12 bit literal */
>>> -#define O_S16 17 /* signed 16 bit literal */
>>> -#define O_S32 18 /* signed 32 bit literal */
>>> +#define O_S2 12 /* signed 2 bit literal */
>>> +#define O_S3 13 /* signed 3 bit literal */
>>> +#define O_S4 14 /* signed 4 bit literal */
>>> +#define O_S5 15 /* signed 5 bit literal */
>>> +#define O_S8 16 /* signed 8 bit literal */
>>> +#define O_S12 17 /* signed 12 bit literal */
>>> +#define O_S16 18 /* signed 16 bit literal */
>>> +#define O_S32 19 /* signed 32 bit literal */
>>>
>>> -#define O_ELEM 19 /* Element selector requiring modulo arithmetic. */
>>> -#define O_LIT 20 /* Operand must be a literal fitting the target type. */
>>> +#define O_ELEM 20 /* Element selector requiring modulo arithmetic. */
>>> +#define O_LIT 21 /* Operand must be a literal fitting the target type. */
>>>
>>> #define O_SHIFT 5
>>>
>>> @@ -223,6 +230,11 @@
>>> #define O3_U32 (O_U32 << (2 * O_SHIFT))
>>> #define O4_U32 (O_U32 << (3 * O_SHIFT))
>>>
>>> +#define O1_U64 O_U64
>>> +#define O2_U64 (O_U64 << O_SHIFT)
>>> +#define O3_U64 (O_U64 << (2 * O_SHIFT))
>>> +#define O4_U64 (O_U64 << (3 * O_SHIFT))
>>> +
>>> #define O1_M12 O_M12
>>> #define O2_M12 (O_M12 << O_SHIFT)
>>> #define O3_M12 (O_M12 << (2 * O_SHIFT))
>>> @@ -1989,19 +2001,19 @@ B_DEF (s390_verllvf, vrotlv4si3, 0,
>>> B_DEF (s390_verllvg, vrotlv2di3, 0, B_VX, 0, BT_FN_UV2DI_UV2DI_UV2DI)
>>>
>>> OB_DEF (s390_vec_rli, s390_vec_rli_u8, s390_vec_rli_s64, B_VX, BT_FN_OV4SI_OV4SI_ULONG)
>>> -OB_DEF_VAR (s390_vec_rli_u8, s390_verllb, 0, 0, BT_OV_UV16QI_UV16QI_ULONG)
>>> -OB_DEF_VAR (s390_vec_rli_s8, s390_verllb, 0, 0, BT_OV_V16QI_V16QI_ULONG)
>>> -OB_DEF_VAR (s390_vec_rli_u16, s390_verllh, 0, 0, BT_OV_UV8HI_UV8HI_ULONG)
>>> -OB_DEF_VAR (s390_vec_rli_s16, s390_verllh, 0, 0, BT_OV_V8HI_V8HI_ULONG)
>>> -OB_DEF_VAR (s390_vec_rli_u32, s390_verllf, 0, 0, BT_OV_UV4SI_UV4SI_ULONG)
>>> -OB_DEF_VAR (s390_vec_rli_s32, s390_verllf, 0, 0, BT_OV_V4SI_V4SI_ULONG)
>>> -OB_DEF_VAR (s390_vec_rli_u64, s390_verllg, 0, 0, BT_OV_UV2DI_UV2DI_ULONG)
>>> -OB_DEF_VAR (s390_vec_rli_s64, s390_verllg, 0, 0, BT_OV_V2DI_V2DI_ULONG)
>>> -
>>> -B_DEF (s390_verllb, rotlv16qi3, 0, B_VX, 0, BT_FN_UV16QI_UV16QI_UINT)
>>> -B_DEF (s390_verllh, rotlv8hi3, 0, B_VX, 0, BT_FN_UV8HI_UV8HI_UINT)
>>> -B_DEF (s390_verllf, rotlv4si3, 0, B_VX, 0, BT_FN_UV4SI_UV4SI_UINT)
>>> -B_DEF (s390_verllg, rotlv2di3, 0, B_VX, 0, BT_FN_UV2DI_UV2DI_UINT)
>>> +OB_DEF_VAR (s390_vec_rli_u8, s390_verllb, 0, O2_U64, BT_OV_UV16QI_UV16QI_ULONG)
>>> +OB_DEF_VAR (s390_vec_rli_s8, s390_verllb, 0, O2_U64, BT_OV_V16QI_V16QI_ULONG)
>>> +OB_DEF_VAR (s390_vec_rli_u16, s390_verllh, 0, O2_U64, BT_OV_UV8HI_UV8HI_ULONG)
>>> +OB_DEF_VAR (s390_vec_rli_s16, s390_verllh, 0, O2_U64, BT_OV_V8HI_V8HI_ULONG)
>>> +OB_DEF_VAR (s390_vec_rli_u32, s390_verllf, 0, O2_U64, BT_OV_UV4SI_UV4SI_ULONG)
>>> +OB_DEF_VAR (s390_vec_rli_s32, s390_verllf, 0, O2_U64, BT_OV_V4SI_V4SI_ULONG)
>>> +OB_DEF_VAR (s390_vec_rli_u64, s390_verllg, 0, O2_U64, BT_OV_UV2DI_UV2DI_ULONG)
>>> +OB_DEF_VAR (s390_vec_rli_s64, s390_verllg, 0, O2_U64, BT_OV_V2DI_V2DI_ULONG)
>>> +
>>> +B_DEF (s390_verllb, rotlv16qi3, 0, B_VX, O2_U32, BT_FN_UV16QI_UV16QI_UINT)
>>> +B_DEF (s390_verllh, rotlv8hi3, 0, B_VX, O2_U32, BT_FN_UV8HI_UV8HI_UINT)
>>> +B_DEF (s390_verllf, rotlv4si3, 0, B_VX, O2_U32, BT_FN_UV4SI_UV4SI_UINT)
>>> +B_DEF (s390_verllg, rotlv2di3, 0, B_VX, O2_U32, BT_FN_UV2DI_UV2DI_UINT)
>>>
>>> OB_DEF (s390_vec_rl_mask, s390_vec_rl_mask_s8,s390_vec_rl_mask_u64,B_VX, BT_FN_OV4SI_OV4SI_OV4SI_UCHAR)
>>> OB_DEF_VAR (s390_vec_rl_mask_s8, s390_verimb, 0, O3_U8, BT_OV_V16QI_V16QI_UV16QI_UCHAR)
>>> diff --git a/gcc/config/s390/s390.cc b/gcc/config/s390/s390.cc
>>> index 49ab4fc7360..64f56d8effa 100644
>>> --- a/gcc/config/s390/s390.cc
>>> +++ b/gcc/config/s390/s390.cc
>>> @@ -815,8 +815,8 @@ s390_const_operand_ok (tree arg, int argnum, int op_flags, tree decl)
>>> {
>>> if (O_UIMM_P (op_flags))
>>> {
>>> - unsigned HOST_WIDE_INT bitwidths[] = { 1, 2, 3, 4, 5, 8, 12, 16, 32, 4 };
>>> - unsigned HOST_WIDE_INT bitmasks[] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 12 };
>>> + unsigned HOST_WIDE_INT bitwidths[] = { 1, 2, 3, 4, 5, 8, 12, 16, 32, 64, 4 };
>>> + unsigned HOST_WIDE_INT bitmasks[] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 12 };
>>> unsigned HOST_WIDE_INT bitwidth = bitwidths[op_flags - O_U1];
>>> unsigned HOST_WIDE_INT bitmask = bitmasks[op_flags - O_U1];
>>>
>>> @@ -824,7 +824,7 @@ s390_const_operand_ok (tree arg, int argnum, int op_flags, tree decl)
>>> gcc_assert(ARRAY_SIZE(bitmasks) == (O_M12 - O_U1 + 1));
>>>
>>> if (!tree_fits_uhwi_p (arg)
>>> - || tree_to_uhwi (arg) > (HOST_WIDE_INT_1U << bitwidth) - 1
>>> + || tree_to_uhwi (arg) > ((HOST_WIDE_INT_1U << (bitwidth - 1) << 1) - 1)
>>> || (bitmask && tree_to_uhwi (arg) & ~bitmask))
>>> {
>>> if (bitmask)
>>
^ permalink raw reply [flat|nested] 4+ messages in thread
end of thread, other threads:[~2023-09-11 15:59 UTC | newest]
Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-08-21 15:56 [PATCH] s390: Fix builtins vec_rli and verll Stefan Schulze Frielinghaus
2023-08-28 9:33 ` Andreas Krebbel
2023-09-11 6:56 ` Stefan Schulze Frielinghaus
2023-09-11 15:59 ` Andreas Krebbel
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).