public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [AArch64] Fix NEON load/store gimple lowering and big-endian testisms
@ 2021-11-04  9:55 Andre Vieira (lists)
  2021-11-04 17:48 ` Richard Sandiford
  0 siblings, 1 reply; 8+ messages in thread
From: Andre Vieira (lists) @ 2021-11-04  9:55 UTC (permalink / raw)
  To: Richard Sandiford, Kyrylo Tkachov, gcc-patches

[-- Attachment #1: Type: text/plain, Size: 1028 bytes --]

Hi,

This should address the ubsan bootstrap build and big-endian testisms 
reported against the last NEON load/store gimple lowering patch. I also 
fixed a follow-up issue where the alias information was leading to a bad 
codegen transformation. The NEON intrinsics specifications do not forbid 
the use of memory accesses with different pointer types. In fact you 
will see intrinsic user code loading a int16x8_t vector from an int 
pointer, so we must make sure GCC is aware a NEON memory access of an 
'int' pointer can alias with a 'short' pointer.

Bootstrapped aarch64-linux-gnu (also did an ubsan bootstrap).

Is this OK for trunk?

gcc/ChangeLog:

         * config/aarch64/aarch64-builtins.c 
(aarch64_general_gimple_fold_builtin): Change pointer alignment and alias.

gcc/testsuite/ChangeLog:

         * gcc.target/aarch64/fmla_intrinsic_1.c: Fix big-endian testism.
         * gcc.target/aarch64/fmls_intrinsic_1.c: Likewise.
         * gcc.target/aarch64/fmul_intrinsic_1.c: Likewise.

[-- Attachment #2: neon_load_store_fix.patch --]
[-- Type: text/plain, Size: 5938 bytes --]

diff --git a/gcc/config/aarch64/aarch64-builtins.c b/gcc/config/aarch64/aarch64-builtins.c
index a815e4cfbccab692ca688ba87c71b06c304abbfb..fc8fcb02c55e22963d2a3bf77b4749eb5b1c1561 100644
--- a/gcc/config/aarch64/aarch64-builtins.c
+++ b/gcc/config/aarch64/aarch64-builtins.c
@@ -2486,16 +2486,22 @@ aarch64_general_gimple_fold_builtin (unsigned int fcode, gcall *stmt,
 	    aarch64_simd_type_info simd_type
 	      = aarch64_simd_types[mem_type];
 	    tree elt_ptr_type = build_pointer_type (simd_type.eltype);
+	    elt_ptr_type = build_distinct_type_copy (elt_ptr_type);
+	    TYPE_REF_CAN_ALIAS_ALL (elt_ptr_type) = true;
 	    tree zero = build_zero_cst (elt_ptr_type);
 	    gimple_seq stmts = NULL;
 	    tree base = gimple_convert (&stmts, elt_ptr_type,
 					args[0]);
+	    /* Use element type alignment.  */
+	    tree access_type
+	      = build_aligned_type (simd_type.itype,
+				    TYPE_ALIGN (TREE_TYPE (simd_type.itype)));
 	    if (stmts)
 	      gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
 	    new_stmt
 	      = gimple_build_assign (gimple_get_lhs (stmt),
 				     fold_build2 (MEM_REF,
-						  simd_type.itype,
+						  access_type,
 						  base, zero));
 	  }
 	break;
@@ -2508,17 +2514,22 @@ aarch64_general_gimple_fold_builtin (unsigned int fcode, gcall *stmt,
 	    aarch64_simd_type_info simd_type
 	      = aarch64_simd_types[mem_type];
 	    tree elt_ptr_type = build_pointer_type (simd_type.eltype);
+	    elt_ptr_type = build_distinct_type_copy (elt_ptr_type);
+	    TYPE_REF_CAN_ALIAS_ALL (elt_ptr_type) = true;
 	    tree zero = build_zero_cst (elt_ptr_type);
 	    gimple_seq stmts = NULL;
 	    tree base = gimple_convert (&stmts, elt_ptr_type,
 					args[0]);
+	    /* Use element type alignment.  */
+	    tree access_type
+	      = build_aligned_type (simd_type.itype,
+				    TYPE_ALIGN (TREE_TYPE (simd_type.itype)));
 	    if (stmts)
 	      gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
 	    new_stmt
-	      = gimple_build_assign (fold_build2 (MEM_REF,
-				     simd_type.itype,
-				     base,
-				     zero), args[1]);
+	      = gimple_build_assign (fold_build2 (MEM_REF, access_type,
+						  base, zero),
+				     args[1]);
 	  }
 	break;
 
diff --git a/gcc/testsuite/gcc.target/aarch64/fmla_intrinsic_1.c b/gcc/testsuite/gcc.target/aarch64/fmla_intrinsic_1.c
index adb787a8599af23847dd62dcd153d7cfe43dacc0..c1aeb06e74753052c2ee441b361b92148f1b4b0a 100644
--- a/gcc/testsuite/gcc.target/aarch64/fmla_intrinsic_1.c
+++ b/gcc/testsuite/gcc.target/aarch64/fmla_intrinsic_1.c
@@ -107,10 +107,12 @@ main (int argc, char **argv)
 
 /* vfma_lane_f64.
    vfma_laneq_f64. */
-/* { dg-final { scan-assembler-times "fmadd\\td\[0-9\]+\, d\[0-9\]+\, d\[0-9\]+\, d\[0-9\]+" 2 } } */
+/* { dg-final { scan-assembler-times "fmadd\\td\[0-9\]+\, d\[0-9\]+\, d\[0-9\]+\, d\[0-9\]+" 1 { target aarch64_big_endian } } } */
+/* { dg-final { scan-assembler-times "fmadd\\td\[0-9\]+\, d\[0-9\]+\, d\[0-9\]+\, d\[0-9\]+" 2 { target aarch64_little_endian } } } */
 
 /* vfmaq_lane_f64.
    vfmaq_laneq_f64.  */
-/* { dg-final { scan-assembler-times "fmla\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d, v\[0-9\]+\.d\\\[\[0-9\]+\\\]" 2 } } */
+/* { dg-final { scan-assembler-times "fmla\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d, v\[0-9\]+\.d\\\[\[0-9\]+\\\]" 3 { target aarch64_big_endian } } } */
+/* { dg-final { scan-assembler-times "fmla\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d, v\[0-9\]+\.d\\\[\[0-9\]+\\\]" 2 { target aarch64_little_endian } } } */
 
 
diff --git a/gcc/testsuite/gcc.target/aarch64/fmls_intrinsic_1.c b/gcc/testsuite/gcc.target/aarch64/fmls_intrinsic_1.c
index 865def28c3f4d04042ab495d232bb865cabb2b50..3137ea91e809e37de589091e9bbd43bfe4d221a1 100644
--- a/gcc/testsuite/gcc.target/aarch64/fmls_intrinsic_1.c
+++ b/gcc/testsuite/gcc.target/aarch64/fmls_intrinsic_1.c
@@ -108,10 +108,12 @@ main (int argc, char **argv)
 
 /* vfms_lane_f64.
    vfms_laneq_f64.  */
-/* { dg-final { scan-assembler-times "fmsub\\td\[0-9\]+\, d\[0-9\]+\, d\[0-9\]+\, d\[0-9\]+" 2 } } */
+/* { dg-final { scan-assembler-times "fmsub\\td\[0-9\]+\, d\[0-9\]+\, d\[0-9\]+\, d\[0-9\]+" 1 { target aarch64_big_endian } } } */
+/* { dg-final { scan-assembler-times "fmsub\\td\[0-9\]+\, d\[0-9\]+\, d\[0-9\]+\, d\[0-9\]+" 2 { target aarch64_little_endian } } } */
 
 /* vfmsq_lane_f64.
    vfmsq_laneq_f64.  */
-/* { dg-final { scan-assembler-times "fmls\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d, v\[0-9\]+\.d\\\[\[0-9\]+\\\]" 2 } } */
+/* { dg-final { scan-assembler-times "fmls\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d, v\[0-9\]+\.d\\\[\[0-9\]+\\\]" 3 { target aarch64_big_endian } } } */
+/* { dg-final { scan-assembler-times "fmls\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d, v\[0-9\]+\.d\\\[\[0-9\]+\\\]" 2 { target aarch64_little_endian } } } */
 
 
diff --git a/gcc/testsuite/gcc.target/aarch64/fmul_intrinsic_1.c b/gcc/testsuite/gcc.target/aarch64/fmul_intrinsic_1.c
index d01095e81c1e45dc1da998aa337ba551b3752ebe..7d4829c40d7042226f2f09fab9fdfa7c3dd211c4 100644
--- a/gcc/testsuite/gcc.target/aarch64/fmul_intrinsic_1.c
+++ b/gcc/testsuite/gcc.target/aarch64/fmul_intrinsic_1.c
@@ -107,10 +107,12 @@ main (int argc, char **argv)
 
 /* vmul_lane_f64.
    Vmul_laneq_f64. */
-/* { dg-final { scan-assembler-times "fmul\\td\[0-9\]+, d\[0-9\]+, d\[0-9\]+" 2 } } */
+/* { dg-final { scan-assembler-times "fmul\\td\[0-9\]+, d\[0-9\]+, d\[0-9\]+" 1 { target aarch64_big_endian } } } */
+/* { dg-final { scan-assembler-times "fmul\\td\[0-9\]+, d\[0-9\]+, d\[0-9\]+" 2 { target aarch64_little_endian } } } */
 
 /* vmulq_lane_f64.
    vmulq_laneq_f64.  */
-/* { dg-final { scan-assembler-times "fmul\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d, v\[0-9\]+\.d\\\[\[0-9\]+\\\]" 2 } } */
+/* { dg-final { scan-assembler-times "fmul\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d, v\[0-9\]+\.d\\\[\[0-9\]+\\\]" 3 { target aarch64_big_endian } } } */
+/* { dg-final { scan-assembler-times "fmul\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d, v\[0-9\]+\.d\\\[\[0-9\]+\\\]" 2 { target aarch64_little_endian } } } */
 
 

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [AArch64] Fix NEON load/store gimple lowering and big-endian testisms
  2021-11-04  9:55 [AArch64] Fix NEON load/store gimple lowering and big-endian testisms Andre Vieira (lists)
@ 2021-11-04 17:48 ` Richard Sandiford
  2021-11-05 10:27   ` Richard Biener
  0 siblings, 1 reply; 8+ messages in thread
From: Richard Sandiford @ 2021-11-04 17:48 UTC (permalink / raw)
  To: Andre Vieira (lists); +Cc: Kyrylo Tkachov, gcc-patches

"Andre Vieira (lists)" <andre.simoesdiasvieira@arm.com> writes:
> Hi,
>
> This should address the ubsan bootstrap build and big-endian testisms 
> reported against the last NEON load/store gimple lowering patch. I also 
> fixed a follow-up issue where the alias information was leading to a bad 
> codegen transformation. The NEON intrinsics specifications do not forbid 
> the use of memory accesses with different pointer types. In fact you 
> will see intrinsic user code loading a int16x8_t vector from an int 
> pointer, so we must make sure GCC is aware a NEON memory access of an 
> 'int' pointer can alias with a 'short' pointer.
>
> Bootstrapped aarch64-linux-gnu (also did an ubsan bootstrap).
>
> Is this OK for trunk?
>
> gcc/ChangeLog:
>
>          * config/aarch64/aarch64-builtins.c 
> (aarch64_general_gimple_fold_builtin): Change pointer alignment and alias.
>
> gcc/testsuite/ChangeLog:
>
>          * gcc.target/aarch64/fmla_intrinsic_1.c: Fix big-endian testism.
>          * gcc.target/aarch64/fmls_intrinsic_1.c: Likewise.
>          * gcc.target/aarch64/fmul_intrinsic_1.c: Likewise.
>
> diff --git a/gcc/config/aarch64/aarch64-builtins.c b/gcc/config/aarch64/aarch64-builtins.c
> index a815e4cfbccab692ca688ba87c71b06c304abbfb..fc8fcb02c55e22963d2a3bf77b4749eb5b1c1561 100644
> --- a/gcc/config/aarch64/aarch64-builtins.c
> +++ b/gcc/config/aarch64/aarch64-builtins.c
> @@ -2486,16 +2486,22 @@ aarch64_general_gimple_fold_builtin (unsigned int fcode, gcall *stmt,
>  	    aarch64_simd_type_info simd_type
>  	      = aarch64_simd_types[mem_type];
>  	    tree elt_ptr_type = build_pointer_type (simd_type.eltype);
> +	    elt_ptr_type = build_distinct_type_copy (elt_ptr_type);
> +	    TYPE_REF_CAN_ALIAS_ALL (elt_ptr_type) = true;
>  	    tree zero = build_zero_cst (elt_ptr_type);
>  	    gimple_seq stmts = NULL;
>  	    tree base = gimple_convert (&stmts, elt_ptr_type,
>  					args[0]);

This conversion seems redundant.  Do things work if we use args[0]
directly?

> +	    /* Use element type alignment.  */
> +	    tree access_type
> +	      = build_aligned_type (simd_type.itype,
> +				    TYPE_ALIGN (TREE_TYPE (simd_type.itype)));

I think simd_type.eltype is more natural than TREE_TYPE (simd_type.itype)
here, to match the pointer target type.

Same idea for the stores.

>  	    if (stmts)
>  	      gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
>  	    new_stmt
>  	      = gimple_build_assign (gimple_get_lhs (stmt),
>  				     fold_build2 (MEM_REF,
> -						  simd_type.itype,
> +						  access_type,
>  						  base, zero));
>  	  }
>  	break;
> @@ -2508,17 +2514,22 @@ aarch64_general_gimple_fold_builtin (unsigned int fcode, gcall *stmt,
>  	    aarch64_simd_type_info simd_type
>  	      = aarch64_simd_types[mem_type];
>  	    tree elt_ptr_type = build_pointer_type (simd_type.eltype);
> +	    elt_ptr_type = build_distinct_type_copy (elt_ptr_type);
> +	    TYPE_REF_CAN_ALIAS_ALL (elt_ptr_type) = true;
>  	    tree zero = build_zero_cst (elt_ptr_type);
>  	    gimple_seq stmts = NULL;
>  	    tree base = gimple_convert (&stmts, elt_ptr_type,
>  					args[0]);
> +	    /* Use element type alignment.  */
> +	    tree access_type
> +	      = build_aligned_type (simd_type.itype,
> +				    TYPE_ALIGN (TREE_TYPE (simd_type.itype)));
>  	    if (stmts)
>  	      gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
>  	    new_stmt
> -	      = gimple_build_assign (fold_build2 (MEM_REF,
> -				     simd_type.itype,
> -				     base,
> -				     zero), args[1]);
> +	      = gimple_build_assign (fold_build2 (MEM_REF, access_type,
> +						  base, zero),
> +				     args[1]);
>  	  }
>  	break;
>  
> diff --git a/gcc/testsuite/gcc.target/aarch64/fmla_intrinsic_1.c b/gcc/testsuite/gcc.target/aarch64/fmla_intrinsic_1.c
> index adb787a8599af23847dd62dcd153d7cfe43dacc0..c1aeb06e74753052c2ee441b361b92148f1b4b0a 100644
> --- a/gcc/testsuite/gcc.target/aarch64/fmla_intrinsic_1.c
> +++ b/gcc/testsuite/gcc.target/aarch64/fmla_intrinsic_1.c
> @@ -107,10 +107,12 @@ main (int argc, char **argv)
>  
>  /* vfma_lane_f64.
>     vfma_laneq_f64. */
> -/* { dg-final { scan-assembler-times "fmadd\\td\[0-9\]+\, d\[0-9\]+\, d\[0-9\]+\, d\[0-9\]+" 2 } } */
> +/* { dg-final { scan-assembler-times "fmadd\\td\[0-9\]+\, d\[0-9\]+\, d\[0-9\]+\, d\[0-9\]+" 1 { target aarch64_big_endian } } } */
> +/* { dg-final { scan-assembler-times "fmadd\\td\[0-9\]+\, d\[0-9\]+\, d\[0-9\]+\, d\[0-9\]+" 2 { target aarch64_little_endian } } } */

Could you explain this in more detail?  What happens for little-endian
vs. big-endian?

Thanks,
Richard

>  
>  /* vfmaq_lane_f64.
>     vfmaq_laneq_f64.  */
> -/* { dg-final { scan-assembler-times "fmla\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d, v\[0-9\]+\.d\\\[\[0-9\]+\\\]" 2 } } */
> +/* { dg-final { scan-assembler-times "fmla\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d, v\[0-9\]+\.d\\\[\[0-9\]+\\\]" 3 { target aarch64_big_endian } } } */
> +/* { dg-final { scan-assembler-times "fmla\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d, v\[0-9\]+\.d\\\[\[0-9\]+\\\]" 2 { target aarch64_little_endian } } } */
>  
>  
> diff --git a/gcc/testsuite/gcc.target/aarch64/fmls_intrinsic_1.c b/gcc/testsuite/gcc.target/aarch64/fmls_intrinsic_1.c
> index 865def28c3f4d04042ab495d232bb865cabb2b50..3137ea91e809e37de589091e9bbd43bfe4d221a1 100644
> --- a/gcc/testsuite/gcc.target/aarch64/fmls_intrinsic_1.c
> +++ b/gcc/testsuite/gcc.target/aarch64/fmls_intrinsic_1.c
> @@ -108,10 +108,12 @@ main (int argc, char **argv)
>  
>  /* vfms_lane_f64.
>     vfms_laneq_f64.  */
> -/* { dg-final { scan-assembler-times "fmsub\\td\[0-9\]+\, d\[0-9\]+\, d\[0-9\]+\, d\[0-9\]+" 2 } } */
> +/* { dg-final { scan-assembler-times "fmsub\\td\[0-9\]+\, d\[0-9\]+\, d\[0-9\]+\, d\[0-9\]+" 1 { target aarch64_big_endian } } } */
> +/* { dg-final { scan-assembler-times "fmsub\\td\[0-9\]+\, d\[0-9\]+\, d\[0-9\]+\, d\[0-9\]+" 2 { target aarch64_little_endian } } } */
>  
>  /* vfmsq_lane_f64.
>     vfmsq_laneq_f64.  */
> -/* { dg-final { scan-assembler-times "fmls\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d, v\[0-9\]+\.d\\\[\[0-9\]+\\\]" 2 } } */
> +/* { dg-final { scan-assembler-times "fmls\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d, v\[0-9\]+\.d\\\[\[0-9\]+\\\]" 3 { target aarch64_big_endian } } } */
> +/* { dg-final { scan-assembler-times "fmls\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d, v\[0-9\]+\.d\\\[\[0-9\]+\\\]" 2 { target aarch64_little_endian } } } */
>  
>  
> diff --git a/gcc/testsuite/gcc.target/aarch64/fmul_intrinsic_1.c b/gcc/testsuite/gcc.target/aarch64/fmul_intrinsic_1.c
> index d01095e81c1e45dc1da998aa337ba551b3752ebe..7d4829c40d7042226f2f09fab9fdfa7c3dd211c4 100644
> --- a/gcc/testsuite/gcc.target/aarch64/fmul_intrinsic_1.c
> +++ b/gcc/testsuite/gcc.target/aarch64/fmul_intrinsic_1.c
> @@ -107,10 +107,12 @@ main (int argc, char **argv)
>  
>  /* vmul_lane_f64.
>     Vmul_laneq_f64. */
> -/* { dg-final { scan-assembler-times "fmul\\td\[0-9\]+, d\[0-9\]+, d\[0-9\]+" 2 } } */
> +/* { dg-final { scan-assembler-times "fmul\\td\[0-9\]+, d\[0-9\]+, d\[0-9\]+" 1 { target aarch64_big_endian } } } */
> +/* { dg-final { scan-assembler-times "fmul\\td\[0-9\]+, d\[0-9\]+, d\[0-9\]+" 2 { target aarch64_little_endian } } } */
>  
>  /* vmulq_lane_f64.
>     vmulq_laneq_f64.  */
> -/* { dg-final { scan-assembler-times "fmul\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d, v\[0-9\]+\.d\\\[\[0-9\]+\\\]" 2 } } */
> +/* { dg-final { scan-assembler-times "fmul\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d, v\[0-9\]+\.d\\\[\[0-9\]+\\\]" 3 { target aarch64_big_endian } } } */
> +/* { dg-final { scan-assembler-times "fmul\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d, v\[0-9\]+\.d\\\[\[0-9\]+\\\]" 2 { target aarch64_little_endian } } } */
>  
>  

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [AArch64] Fix NEON load/store gimple lowering and big-endian testisms
  2021-11-04 17:48 ` Richard Sandiford
@ 2021-11-05 10:27   ` Richard Biener
  2021-11-09  9:19     ` Andre Vieira (lists)
  0 siblings, 1 reply; 8+ messages in thread
From: Richard Biener @ 2021-11-05 10:27 UTC (permalink / raw)
  To: Richard Sandiford, Andre Vieira (lists), Kyrylo Tkachov, gcc-patches

On Thu, Nov 4, 2021 at 6:49 PM Richard Sandiford via Gcc-patches
<gcc-patches@gcc.gnu.org> wrote:
>
> "Andre Vieira (lists)" <andre.simoesdiasvieira@arm.com> writes:
> > Hi,
> >
> > This should address the ubsan bootstrap build and big-endian testisms
> > reported against the last NEON load/store gimple lowering patch. I also
> > fixed a follow-up issue where the alias information was leading to a bad
> > codegen transformation. The NEON intrinsics specifications do not forbid
> > the use of memory accesses with different pointer types. In fact you
> > will see intrinsic user code loading a int16x8_t vector from an int
> > pointer, so we must make sure GCC is aware a NEON memory access of an
> > 'int' pointer can alias with a 'short' pointer.
> >
> > Bootstrapped aarch64-linux-gnu (also did an ubsan bootstrap).
> >
> > Is this OK for trunk?
> >
> > gcc/ChangeLog:
> >
> >          * config/aarch64/aarch64-builtins.c
> > (aarch64_general_gimple_fold_builtin): Change pointer alignment and alias.
> >
> > gcc/testsuite/ChangeLog:
> >
> >          * gcc.target/aarch64/fmla_intrinsic_1.c: Fix big-endian testism.
> >          * gcc.target/aarch64/fmls_intrinsic_1.c: Likewise.
> >          * gcc.target/aarch64/fmul_intrinsic_1.c: Likewise.
> >
> > diff --git a/gcc/config/aarch64/aarch64-builtins.c b/gcc/config/aarch64/aarch64-builtins.c
> > index a815e4cfbccab692ca688ba87c71b06c304abbfb..fc8fcb02c55e22963d2a3bf77b4749eb5b1c1561 100644
> > --- a/gcc/config/aarch64/aarch64-builtins.c
> > +++ b/gcc/config/aarch64/aarch64-builtins.c
> > @@ -2486,16 +2486,22 @@ aarch64_general_gimple_fold_builtin (unsigned int fcode, gcall *stmt,
> >           aarch64_simd_type_info simd_type
> >             = aarch64_simd_types[mem_type];
> >           tree elt_ptr_type = build_pointer_type (simd_type.eltype);
> > +         elt_ptr_type = build_distinct_type_copy (elt_ptr_type);
> > +         TYPE_REF_CAN_ALIAS_ALL (elt_ptr_type) = true;
> >           tree zero = build_zero_cst (elt_ptr_type);
> >           gimple_seq stmts = NULL;
> >           tree base = gimple_convert (&stmts, elt_ptr_type,
> >                                       args[0]);
>
> This conversion seems redundant.  Do things work if we use args[0]
> directly?

Just use

   tree elt_ptr_type = build_pointer_type_for_mode (simd_type.eltype,
VOIDmode, true);

that will build a ref-all pointer for you, appropriately shared.

>
> > +         /* Use element type alignment.  */
> > +         tree access_type
> > +           = build_aligned_type (simd_type.itype,
> > +                                 TYPE_ALIGN (TREE_TYPE (simd_type.itype)));
>
> I think simd_type.eltype is more natural than TREE_TYPE (simd_type.itype)
> here, to match the pointer target type.
>
> Same idea for the stores.
>
> >           if (stmts)
> >             gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
> >           new_stmt
> >             = gimple_build_assign (gimple_get_lhs (stmt),
> >                                    fold_build2 (MEM_REF,
> > -                                               simd_type.itype,
> > +                                               access_type,
> >                                                 base, zero));
> >         }
> >       break;
> > @@ -2508,17 +2514,22 @@ aarch64_general_gimple_fold_builtin (unsigned int fcode, gcall *stmt,
> >           aarch64_simd_type_info simd_type
> >             = aarch64_simd_types[mem_type];
> >           tree elt_ptr_type = build_pointer_type (simd_type.eltype);
> > +         elt_ptr_type = build_distinct_type_copy (elt_ptr_type);
> > +         TYPE_REF_CAN_ALIAS_ALL (elt_ptr_type) = true;
> >           tree zero = build_zero_cst (elt_ptr_type);
> >           gimple_seq stmts = NULL;
> >           tree base = gimple_convert (&stmts, elt_ptr_type,
> >                                       args[0]);
> > +         /* Use element type alignment.  */
> > +         tree access_type
> > +           = build_aligned_type (simd_type.itype,
> > +                                 TYPE_ALIGN (TREE_TYPE (simd_type.itype)));
> >           if (stmts)
> >             gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
> >           new_stmt
> > -           = gimple_build_assign (fold_build2 (MEM_REF,
> > -                                  simd_type.itype,
> > -                                  base,
> > -                                  zero), args[1]);
> > +           = gimple_build_assign (fold_build2 (MEM_REF, access_type,
> > +                                               base, zero),
> > +                                  args[1]);
> >         }
> >       break;
> >
> > diff --git a/gcc/testsuite/gcc.target/aarch64/fmla_intrinsic_1.c b/gcc/testsuite/gcc.target/aarch64/fmla_intrinsic_1.c
> > index adb787a8599af23847dd62dcd153d7cfe43dacc0..c1aeb06e74753052c2ee441b361b92148f1b4b0a 100644
> > --- a/gcc/testsuite/gcc.target/aarch64/fmla_intrinsic_1.c
> > +++ b/gcc/testsuite/gcc.target/aarch64/fmla_intrinsic_1.c
> > @@ -107,10 +107,12 @@ main (int argc, char **argv)
> >
> >  /* vfma_lane_f64.
> >     vfma_laneq_f64. */
> > -/* { dg-final { scan-assembler-times "fmadd\\td\[0-9\]+\, d\[0-9\]+\, d\[0-9\]+\, d\[0-9\]+" 2 } } */
> > +/* { dg-final { scan-assembler-times "fmadd\\td\[0-9\]+\, d\[0-9\]+\, d\[0-9\]+\, d\[0-9\]+" 1 { target aarch64_big_endian } } } */
> > +/* { dg-final { scan-assembler-times "fmadd\\td\[0-9\]+\, d\[0-9\]+\, d\[0-9\]+\, d\[0-9\]+" 2 { target aarch64_little_endian } } } */
>
> Could you explain this in more detail?  What happens for little-endian
> vs. big-endian?
>
> Thanks,
> Richard
>
> >
> >  /* vfmaq_lane_f64.
> >     vfmaq_laneq_f64.  */
> > -/* { dg-final { scan-assembler-times "fmla\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d, v\[0-9\]+\.d\\\[\[0-9\]+\\\]" 2 } } */
> > +/* { dg-final { scan-assembler-times "fmla\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d, v\[0-9\]+\.d\\\[\[0-9\]+\\\]" 3 { target aarch64_big_endian } } } */
> > +/* { dg-final { scan-assembler-times "fmla\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d, v\[0-9\]+\.d\\\[\[0-9\]+\\\]" 2 { target aarch64_little_endian } } } */
> >
> >
> > diff --git a/gcc/testsuite/gcc.target/aarch64/fmls_intrinsic_1.c b/gcc/testsuite/gcc.target/aarch64/fmls_intrinsic_1.c
> > index 865def28c3f4d04042ab495d232bb865cabb2b50..3137ea91e809e37de589091e9bbd43bfe4d221a1 100644
> > --- a/gcc/testsuite/gcc.target/aarch64/fmls_intrinsic_1.c
> > +++ b/gcc/testsuite/gcc.target/aarch64/fmls_intrinsic_1.c
> > @@ -108,10 +108,12 @@ main (int argc, char **argv)
> >
> >  /* vfms_lane_f64.
> >     vfms_laneq_f64.  */
> > -/* { dg-final { scan-assembler-times "fmsub\\td\[0-9\]+\, d\[0-9\]+\, d\[0-9\]+\, d\[0-9\]+" 2 } } */
> > +/* { dg-final { scan-assembler-times "fmsub\\td\[0-9\]+\, d\[0-9\]+\, d\[0-9\]+\, d\[0-9\]+" 1 { target aarch64_big_endian } } } */
> > +/* { dg-final { scan-assembler-times "fmsub\\td\[0-9\]+\, d\[0-9\]+\, d\[0-9\]+\, d\[0-9\]+" 2 { target aarch64_little_endian } } } */
> >
> >  /* vfmsq_lane_f64.
> >     vfmsq_laneq_f64.  */
> > -/* { dg-final { scan-assembler-times "fmls\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d, v\[0-9\]+\.d\\\[\[0-9\]+\\\]" 2 } } */
> > +/* { dg-final { scan-assembler-times "fmls\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d, v\[0-9\]+\.d\\\[\[0-9\]+\\\]" 3 { target aarch64_big_endian } } } */
> > +/* { dg-final { scan-assembler-times "fmls\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d, v\[0-9\]+\.d\\\[\[0-9\]+\\\]" 2 { target aarch64_little_endian } } } */
> >
> >
> > diff --git a/gcc/testsuite/gcc.target/aarch64/fmul_intrinsic_1.c b/gcc/testsuite/gcc.target/aarch64/fmul_intrinsic_1.c
> > index d01095e81c1e45dc1da998aa337ba551b3752ebe..7d4829c40d7042226f2f09fab9fdfa7c3dd211c4 100644
> > --- a/gcc/testsuite/gcc.target/aarch64/fmul_intrinsic_1.c
> > +++ b/gcc/testsuite/gcc.target/aarch64/fmul_intrinsic_1.c
> > @@ -107,10 +107,12 @@ main (int argc, char **argv)
> >
> >  /* vmul_lane_f64.
> >     Vmul_laneq_f64. */
> > -/* { dg-final { scan-assembler-times "fmul\\td\[0-9\]+, d\[0-9\]+, d\[0-9\]+" 2 } } */
> > +/* { dg-final { scan-assembler-times "fmul\\td\[0-9\]+, d\[0-9\]+, d\[0-9\]+" 1 { target aarch64_big_endian } } } */
> > +/* { dg-final { scan-assembler-times "fmul\\td\[0-9\]+, d\[0-9\]+, d\[0-9\]+" 2 { target aarch64_little_endian } } } */
> >
> >  /* vmulq_lane_f64.
> >     vmulq_laneq_f64.  */
> > -/* { dg-final { scan-assembler-times "fmul\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d, v\[0-9\]+\.d\\\[\[0-9\]+\\\]" 2 } } */
> > +/* { dg-final { scan-assembler-times "fmul\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d, v\[0-9\]+\.d\\\[\[0-9\]+\\\]" 3 { target aarch64_big_endian } } } */
> > +/* { dg-final { scan-assembler-times "fmul\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d, v\[0-9\]+\.d\\\[\[0-9\]+\\\]" 2 { target aarch64_little_endian } } } */
> >
> >

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [AArch64] Fix NEON load/store gimple lowering and big-endian testisms
  2021-11-05 10:27   ` Richard Biener
@ 2021-11-09  9:19     ` Andre Vieira (lists)
  2021-11-09 10:47       ` Richard Biener
  0 siblings, 1 reply; 8+ messages in thread
From: Andre Vieira (lists) @ 2021-11-09  9:19 UTC (permalink / raw)
  To: Richard Biener, Richard Sandiford, Kyrylo Tkachov, gcc-patches

[-- Attachment #1: Type: text/plain, Size: 63 bytes --]

Thank you both!

Here is a reworked version, this OK for trunk?

[-- Attachment #2: fix_neon_mem_align.patch --]
[-- Type: text/plain, Size: 6017 bytes --]

diff --git a/gcc/config/aarch64/aarch64-builtins.c b/gcc/config/aarch64/aarch64-builtins.c
index a815e4cfbccab692ca688ba87c71b06c304abbfb..e06131a7c61d31c1be3278dcdccc49c3053c78cb 100644
--- a/gcc/config/aarch64/aarch64-builtins.c
+++ b/gcc/config/aarch64/aarch64-builtins.c
@@ -2485,18 +2485,18 @@ aarch64_general_gimple_fold_builtin (unsigned int fcode, gcall *stmt,
 	      = get_mem_type_for_load_store(fcode);
 	    aarch64_simd_type_info simd_type
 	      = aarch64_simd_types[mem_type];
-	    tree elt_ptr_type = build_pointer_type (simd_type.eltype);
+	    tree elt_ptr_type = build_pointer_type_for_mode (simd_type.eltype,
+							     VOIDmode, true);
 	    tree zero = build_zero_cst (elt_ptr_type);
-	    gimple_seq stmts = NULL;
-	    tree base = gimple_convert (&stmts, elt_ptr_type,
-					args[0]);
-	    if (stmts)
-	      gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
+	    /* Use element type alignment.  */
+	    tree access_type
+	      = build_aligned_type (simd_type.itype,
+				    TYPE_ALIGN (simd_type.eltype));
 	    new_stmt
 	      = gimple_build_assign (gimple_get_lhs (stmt),
 				     fold_build2 (MEM_REF,
-						  simd_type.itype,
-						  base, zero));
+						  access_type,
+						  args[0], zero));
 	  }
 	break;
 
@@ -2507,18 +2507,17 @@ aarch64_general_gimple_fold_builtin (unsigned int fcode, gcall *stmt,
 	      = get_mem_type_for_load_store(fcode);
 	    aarch64_simd_type_info simd_type
 	      = aarch64_simd_types[mem_type];
-	    tree elt_ptr_type = build_pointer_type (simd_type.eltype);
+	    tree elt_ptr_type = build_pointer_type_for_mode (simd_type.eltype,
+							     VOIDmode, true);
 	    tree zero = build_zero_cst (elt_ptr_type);
-	    gimple_seq stmts = NULL;
-	    tree base = gimple_convert (&stmts, elt_ptr_type,
-					args[0]);
-	    if (stmts)
-	      gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
+	    /* Use element type alignment.  */
+	    tree access_type
+	      = build_aligned_type (simd_type.itype,
+				    TYPE_ALIGN (simd_type.eltype));
 	    new_stmt
-	      = gimple_build_assign (fold_build2 (MEM_REF,
-				     simd_type.itype,
-				     base,
-				     zero), args[1]);
+	      = gimple_build_assign (fold_build2 (MEM_REF, access_type,
+						  args[0], zero),
+				     args[1]);
 	  }
 	break;
 
diff --git a/gcc/testsuite/gcc.target/aarch64/fmla_intrinsic_1.c b/gcc/testsuite/gcc.target/aarch64/fmla_intrinsic_1.c
index adb787a8599af23847dd62dcd153d7cfe43dacc0..c1aeb06e74753052c2ee441b361b92148f1b4b0a 100644
--- a/gcc/testsuite/gcc.target/aarch64/fmla_intrinsic_1.c
+++ b/gcc/testsuite/gcc.target/aarch64/fmla_intrinsic_1.c
@@ -107,10 +107,12 @@ main (int argc, char **argv)
 
 /* vfma_lane_f64.
    vfma_laneq_f64. */
-/* { dg-final { scan-assembler-times "fmadd\\td\[0-9\]+\, d\[0-9\]+\, d\[0-9\]+\, d\[0-9\]+" 2 } } */
+/* { dg-final { scan-assembler-times "fmadd\\td\[0-9\]+\, d\[0-9\]+\, d\[0-9\]+\, d\[0-9\]+" 1 { target aarch64_big_endian } } } */
+/* { dg-final { scan-assembler-times "fmadd\\td\[0-9\]+\, d\[0-9\]+\, d\[0-9\]+\, d\[0-9\]+" 2 { target aarch64_little_endian } } } */
 
 /* vfmaq_lane_f64.
    vfmaq_laneq_f64.  */
-/* { dg-final { scan-assembler-times "fmla\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d, v\[0-9\]+\.d\\\[\[0-9\]+\\\]" 2 } } */
+/* { dg-final { scan-assembler-times "fmla\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d, v\[0-9\]+\.d\\\[\[0-9\]+\\\]" 3 { target aarch64_big_endian } } } */
+/* { dg-final { scan-assembler-times "fmla\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d, v\[0-9\]+\.d\\\[\[0-9\]+\\\]" 2 { target aarch64_little_endian } } } */
 
 
diff --git a/gcc/testsuite/gcc.target/aarch64/fmls_intrinsic_1.c b/gcc/testsuite/gcc.target/aarch64/fmls_intrinsic_1.c
index 865def28c3f4d04042ab495d232bb865cabb2b50..3137ea91e809e37de589091e9bbd43bfe4d221a1 100644
--- a/gcc/testsuite/gcc.target/aarch64/fmls_intrinsic_1.c
+++ b/gcc/testsuite/gcc.target/aarch64/fmls_intrinsic_1.c
@@ -108,10 +108,12 @@ main (int argc, char **argv)
 
 /* vfms_lane_f64.
    vfms_laneq_f64.  */
-/* { dg-final { scan-assembler-times "fmsub\\td\[0-9\]+\, d\[0-9\]+\, d\[0-9\]+\, d\[0-9\]+" 2 } } */
+/* { dg-final { scan-assembler-times "fmsub\\td\[0-9\]+\, d\[0-9\]+\, d\[0-9\]+\, d\[0-9\]+" 1 { target aarch64_big_endian } } } */
+/* { dg-final { scan-assembler-times "fmsub\\td\[0-9\]+\, d\[0-9\]+\, d\[0-9\]+\, d\[0-9\]+" 2 { target aarch64_little_endian } } } */
 
 /* vfmsq_lane_f64.
    vfmsq_laneq_f64.  */
-/* { dg-final { scan-assembler-times "fmls\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d, v\[0-9\]+\.d\\\[\[0-9\]+\\\]" 2 } } */
+/* { dg-final { scan-assembler-times "fmls\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d, v\[0-9\]+\.d\\\[\[0-9\]+\\\]" 3 { target aarch64_big_endian } } } */
+/* { dg-final { scan-assembler-times "fmls\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d, v\[0-9\]+\.d\\\[\[0-9\]+\\\]" 2 { target aarch64_little_endian } } } */
 
 
diff --git a/gcc/testsuite/gcc.target/aarch64/fmul_intrinsic_1.c b/gcc/testsuite/gcc.target/aarch64/fmul_intrinsic_1.c
index d01095e81c1e45dc1da998aa337ba551b3752ebe..7d4829c40d7042226f2f09fab9fdfa7c3dd211c4 100644
--- a/gcc/testsuite/gcc.target/aarch64/fmul_intrinsic_1.c
+++ b/gcc/testsuite/gcc.target/aarch64/fmul_intrinsic_1.c
@@ -107,10 +107,12 @@ main (int argc, char **argv)
 
 /* vmul_lane_f64.
    Vmul_laneq_f64. */
-/* { dg-final { scan-assembler-times "fmul\\td\[0-9\]+, d\[0-9\]+, d\[0-9\]+" 2 } } */
+/* { dg-final { scan-assembler-times "fmul\\td\[0-9\]+, d\[0-9\]+, d\[0-9\]+" 1 { target aarch64_big_endian } } } */
+/* { dg-final { scan-assembler-times "fmul\\td\[0-9\]+, d\[0-9\]+, d\[0-9\]+" 2 { target aarch64_little_endian } } } */
 
 /* vmulq_lane_f64.
    vmulq_laneq_f64.  */
-/* { dg-final { scan-assembler-times "fmul\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d, v\[0-9\]+\.d\\\[\[0-9\]+\\\]" 2 } } */
+/* { dg-final { scan-assembler-times "fmul\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d, v\[0-9\]+\.d\\\[\[0-9\]+\\\]" 3 { target aarch64_big_endian } } } */
+/* { dg-final { scan-assembler-times "fmul\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d, v\[0-9\]+\.d\\\[\[0-9\]+\\\]" 2 { target aarch64_little_endian } } } */
 
 

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [AArch64] Fix NEON load/store gimple lowering and big-endian testisms
  2021-11-09  9:19     ` Andre Vieira (lists)
@ 2021-11-09 10:47       ` Richard Biener
  2021-11-09 12:24         ` [AArch64] Fix big-endian testisms introduced by NEON gimple lowering patch Andre Vieira (lists)
  0 siblings, 1 reply; 8+ messages in thread
From: Richard Biener @ 2021-11-09 10:47 UTC (permalink / raw)
  To: Andre Vieira (lists); +Cc: Richard Sandiford, Kyrylo Tkachov, gcc-patches

On Tue, Nov 9, 2021 at 10:19 AM Andre Vieira (lists)
<andre.simoesdiasvieira@arm.com> wrote:
>
> Thank you both!
>
> Here is a reworked version, this OK for trunk?

Looks good to me.

Richard.

^ permalink raw reply	[flat|nested] 8+ messages in thread

* [AArch64] Fix big-endian testisms introduced by NEON gimple lowering patch
  2021-11-09 10:47       ` Richard Biener
@ 2021-11-09 12:24         ` Andre Vieira (lists)
  2021-11-09 12:27           ` [AArch64] Fix TBAA information when lowering NEON loads and stores to gimple Andre Vieira (lists)
  0 siblings, 1 reply; 8+ messages in thread
From: Andre Vieira (lists) @ 2021-11-09 12:24 UTC (permalink / raw)
  To: Richard Biener; +Cc: Richard Sandiford, Kyrylo Tkachov, gcc-patches

[-- Attachment #1: Type: text/plain, Size: 722 bytes --]

Decided to split the patches up to make it clear that the testisms fixes 
had nothing to do with the TBAA fix. I'll be committing these two separately

First:

[AArch64] Fix big-endian testisms introduced by NEON gimple lowering patch

This patch reverts the tests for big-endian after the NEON gimple lowering
patch.  The earlier patch only lowers NEON loads and stores for 
little-endian,
meaning the codegen now differs between endinanness so we need target 
specific
testing.

gcc/testsuite/ChangeLog:

         * gcc.target/aarch64/fmla_intrinsic_1.c: Fix big-endian testism.
         * gcc.target/aarch64/fmls_intrinsic_1.c: Likewise.
         * gcc.target/aarch64/fmul_intrinsic_1.c: Likewise.

[-- Attachment #2: fix_be_testisms_neon_lowering.patch --]
[-- Type: text/plain, Size: 3727 bytes --]

diff --git a/gcc/testsuite/gcc.target/aarch64/fmla_intrinsic_1.c b/gcc/testsuite/gcc.target/aarch64/fmla_intrinsic_1.c
index adb787a8599af23847dd62dcd153d7cfe43dacc0..c1aeb06e74753052c2ee441b361b92148f1b4b0a 100644
--- a/gcc/testsuite/gcc.target/aarch64/fmla_intrinsic_1.c
+++ b/gcc/testsuite/gcc.target/aarch64/fmla_intrinsic_1.c
@@ -107,10 +107,12 @@ main (int argc, char **argv)
 
 /* vfma_lane_f64.
    vfma_laneq_f64. */
-/* { dg-final { scan-assembler-times "fmadd\\td\[0-9\]+\, d\[0-9\]+\, d\[0-9\]+\, d\[0-9\]+" 2 } } */
+/* { dg-final { scan-assembler-times "fmadd\\td\[0-9\]+\, d\[0-9\]+\, d\[0-9\]+\, d\[0-9\]+" 1 { target aarch64_big_endian } } } */
+/* { dg-final { scan-assembler-times "fmadd\\td\[0-9\]+\, d\[0-9\]+\, d\[0-9\]+\, d\[0-9\]+" 2 { target aarch64_little_endian } } } */
 
 /* vfmaq_lane_f64.
    vfmaq_laneq_f64.  */
-/* { dg-final { scan-assembler-times "fmla\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d, v\[0-9\]+\.d\\\[\[0-9\]+\\\]" 2 } } */
+/* { dg-final { scan-assembler-times "fmla\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d, v\[0-9\]+\.d\\\[\[0-9\]+\\\]" 3 { target aarch64_big_endian } } } */
+/* { dg-final { scan-assembler-times "fmla\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d, v\[0-9\]+\.d\\\[\[0-9\]+\\\]" 2 { target aarch64_little_endian } } } */
 
 
diff --git a/gcc/testsuite/gcc.target/aarch64/fmls_intrinsic_1.c b/gcc/testsuite/gcc.target/aarch64/fmls_intrinsic_1.c
index 865def28c3f4d04042ab495d232bb865cabb2b50..3137ea91e809e37de589091e9bbd43bfe4d221a1 100644
--- a/gcc/testsuite/gcc.target/aarch64/fmls_intrinsic_1.c
+++ b/gcc/testsuite/gcc.target/aarch64/fmls_intrinsic_1.c
@@ -108,10 +108,12 @@ main (int argc, char **argv)
 
 /* vfms_lane_f64.
    vfms_laneq_f64.  */
-/* { dg-final { scan-assembler-times "fmsub\\td\[0-9\]+\, d\[0-9\]+\, d\[0-9\]+\, d\[0-9\]+" 2 } } */
+/* { dg-final { scan-assembler-times "fmsub\\td\[0-9\]+\, d\[0-9\]+\, d\[0-9\]+\, d\[0-9\]+" 1 { target aarch64_big_endian } } } */
+/* { dg-final { scan-assembler-times "fmsub\\td\[0-9\]+\, d\[0-9\]+\, d\[0-9\]+\, d\[0-9\]+" 2 { target aarch64_little_endian } } } */
 
 /* vfmsq_lane_f64.
    vfmsq_laneq_f64.  */
-/* { dg-final { scan-assembler-times "fmls\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d, v\[0-9\]+\.d\\\[\[0-9\]+\\\]" 2 } } */
+/* { dg-final { scan-assembler-times "fmls\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d, v\[0-9\]+\.d\\\[\[0-9\]+\\\]" 3 { target aarch64_big_endian } } } */
+/* { dg-final { scan-assembler-times "fmls\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d, v\[0-9\]+\.d\\\[\[0-9\]+\\\]" 2 { target aarch64_little_endian } } } */
 
 
diff --git a/gcc/testsuite/gcc.target/aarch64/fmul_intrinsic_1.c b/gcc/testsuite/gcc.target/aarch64/fmul_intrinsic_1.c
index d01095e81c1e45dc1da998aa337ba551b3752ebe..7d4829c40d7042226f2f09fab9fdfa7c3dd211c4 100644
--- a/gcc/testsuite/gcc.target/aarch64/fmul_intrinsic_1.c
+++ b/gcc/testsuite/gcc.target/aarch64/fmul_intrinsic_1.c
@@ -107,10 +107,12 @@ main (int argc, char **argv)
 
 /* vmul_lane_f64.
    Vmul_laneq_f64. */
-/* { dg-final { scan-assembler-times "fmul\\td\[0-9\]+, d\[0-9\]+, d\[0-9\]+" 2 } } */
+/* { dg-final { scan-assembler-times "fmul\\td\[0-9\]+, d\[0-9\]+, d\[0-9\]+" 1 { target aarch64_big_endian } } } */
+/* { dg-final { scan-assembler-times "fmul\\td\[0-9\]+, d\[0-9\]+, d\[0-9\]+" 2 { target aarch64_little_endian } } } */
 
 /* vmulq_lane_f64.
    vmulq_laneq_f64.  */
-/* { dg-final { scan-assembler-times "fmul\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d, v\[0-9\]+\.d\\\[\[0-9\]+\\\]" 2 } } */
+/* { dg-final { scan-assembler-times "fmul\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d, v\[0-9\]+\.d\\\[\[0-9\]+\\\]" 3 { target aarch64_big_endian } } } */
+/* { dg-final { scan-assembler-times "fmul\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d, v\[0-9\]+\.d\\\[\[0-9\]+\\\]" 2 { target aarch64_little_endian } } } */
 
 

^ permalink raw reply	[flat|nested] 8+ messages in thread

* [AArch64] Fix TBAA information when lowering NEON loads and stores to gimple
  2021-11-09 12:24         ` [AArch64] Fix big-endian testisms introduced by NEON gimple lowering patch Andre Vieira (lists)
@ 2021-11-09 12:27           ` Andre Vieira (lists)
  2021-11-09 15:00             ` Richard Sandiford
  0 siblings, 1 reply; 8+ messages in thread
From: Andre Vieira (lists) @ 2021-11-09 12:27 UTC (permalink / raw)
  To: Richard Biener; +Cc: Richard Sandiford, gcc-patches

[-- Attachment #1: Type: text/plain, Size: 488 bytes --]

And second (also added a test):

[AArch64] Fix TBAA information when lowering NEON loads and stores to gimple

This patch fixes the wrong TBAA information when lowering NEON loads and 
stores
to gimple that showed up when bootstrapping with UBSAN.

gcc/ChangeLog:

         * config/aarch64/aarch64-builtins.c 
(aarch64_general_gimple_fold_builtin): Change pointer alignment and alias.

gcc/testsuite/ChangeLog:

         * gcc.target/aarch64/simd/lowering_tbaa.c: New test.

[-- Attachment #2: fix_tbaa_lowering.patch --]
[-- Type: text/plain, Size: 3174 bytes --]

diff --git a/gcc/config/aarch64/aarch64-builtins.c b/gcc/config/aarch64/aarch64-builtins.c
index a815e4cfbccab692ca688ba87c71b06c304abbfb..e06131a7c61d31c1be3278dcdccc49c3053c78cb 100644
--- a/gcc/config/aarch64/aarch64-builtins.c
+++ b/gcc/config/aarch64/aarch64-builtins.c
@@ -2485,18 +2485,18 @@ aarch64_general_gimple_fold_builtin (unsigned int fcode, gcall *stmt,
 	      = get_mem_type_for_load_store(fcode);
 	    aarch64_simd_type_info simd_type
 	      = aarch64_simd_types[mem_type];
-	    tree elt_ptr_type = build_pointer_type (simd_type.eltype);
+	    tree elt_ptr_type = build_pointer_type_for_mode (simd_type.eltype,
+							     VOIDmode, true);
 	    tree zero = build_zero_cst (elt_ptr_type);
-	    gimple_seq stmts = NULL;
-	    tree base = gimple_convert (&stmts, elt_ptr_type,
-					args[0]);
-	    if (stmts)
-	      gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
+	    /* Use element type alignment.  */
+	    tree access_type
+	      = build_aligned_type (simd_type.itype,
+				    TYPE_ALIGN (simd_type.eltype));
 	    new_stmt
 	      = gimple_build_assign (gimple_get_lhs (stmt),
 				     fold_build2 (MEM_REF,
-						  simd_type.itype,
-						  base, zero));
+						  access_type,
+						  args[0], zero));
 	  }
 	break;
 
@@ -2507,18 +2507,17 @@ aarch64_general_gimple_fold_builtin (unsigned int fcode, gcall *stmt,
 	      = get_mem_type_for_load_store(fcode);
 	    aarch64_simd_type_info simd_type
 	      = aarch64_simd_types[mem_type];
-	    tree elt_ptr_type = build_pointer_type (simd_type.eltype);
+	    tree elt_ptr_type = build_pointer_type_for_mode (simd_type.eltype,
+							     VOIDmode, true);
 	    tree zero = build_zero_cst (elt_ptr_type);
-	    gimple_seq stmts = NULL;
-	    tree base = gimple_convert (&stmts, elt_ptr_type,
-					args[0]);
-	    if (stmts)
-	      gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
+	    /* Use element type alignment.  */
+	    tree access_type
+	      = build_aligned_type (simd_type.itype,
+				    TYPE_ALIGN (simd_type.eltype));
 	    new_stmt
-	      = gimple_build_assign (fold_build2 (MEM_REF,
-				     simd_type.itype,
-				     base,
-				     zero), args[1]);
+	      = gimple_build_assign (fold_build2 (MEM_REF, access_type,
+						  args[0], zero),
+				     args[1]);
 	  }
 	break;
 
diff --git a/gcc/testsuite/gcc.target/aarch64/simd/lowering_tbaa.c b/gcc/testsuite/gcc.target/aarch64/simd/lowering_tbaa.c
new file mode 100644
index 0000000000000000000000000000000000000000..eaeae21f19c7d2d8d4e032f2f8b1b22bb96b7ca4
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/simd/lowering_tbaa.c
@@ -0,0 +1,30 @@
+/* Tests the TBAA information of lowered AArch64 SIMD loads.  */
+/* { dg-do run } */
+/* { dg-options "-save-temps -O2" } */
+
+#include <arm_neon.h>
+
+void __attribute__((noipa))
+g (float *)
+{
+}
+
+int32x4_t __attribute__((noipa))
+f (void)
+{
+  float a[4] = { 1, 2, 3, 4 };
+  g (a);
+  a[0] = a[1] = a[2] = a[3] = 0;
+  void *volatile ptr = a;
+  return vld1q_s32 ((int32_t *) ptr);
+}
+
+int
+main (void)
+{
+  int32x4_t x = f ();
+  int32x4_t y = vdupq_n_s32 (0);
+  if (__builtin_memcmp (&x, &y, 16) != 0)
+    __builtin_abort ();
+  return 0;
+}

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [AArch64] Fix TBAA information when lowering NEON loads and stores to gimple
  2021-11-09 12:27           ` [AArch64] Fix TBAA information when lowering NEON loads and stores to gimple Andre Vieira (lists)
@ 2021-11-09 15:00             ` Richard Sandiford
  0 siblings, 0 replies; 8+ messages in thread
From: Richard Sandiford @ 2021-11-09 15:00 UTC (permalink / raw)
  To: Andre Vieira (lists); +Cc: Richard Biener, gcc-patches

"Andre Vieira (lists)" <andre.simoesdiasvieira@arm.com> writes:
> And second (also added a test):
>
> [AArch64] Fix TBAA information when lowering NEON loads and stores to gimple
>
> This patch fixes the wrong TBAA information when lowering NEON loads and 
> stores
> to gimple that showed up when bootstrapping with UBSAN.
>
> gcc/ChangeLog:
>
>          * config/aarch64/aarch64-builtins.c 
> (aarch64_general_gimple_fold_builtin): Change pointer alignment and alias.
>
> gcc/testsuite/ChangeLog:
>
>          * gcc.target/aarch64/simd/lowering_tbaa.c: New test.

OK, thanks.

Richard

> diff --git a/gcc/config/aarch64/aarch64-builtins.c b/gcc/config/aarch64/aarch64-builtins.c
> index a815e4cfbccab692ca688ba87c71b06c304abbfb..e06131a7c61d31c1be3278dcdccc49c3053c78cb 100644
> --- a/gcc/config/aarch64/aarch64-builtins.c
> +++ b/gcc/config/aarch64/aarch64-builtins.c
> @@ -2485,18 +2485,18 @@ aarch64_general_gimple_fold_builtin (unsigned int fcode, gcall *stmt,
>  	      = get_mem_type_for_load_store(fcode);
>  	    aarch64_simd_type_info simd_type
>  	      = aarch64_simd_types[mem_type];
> -	    tree elt_ptr_type = build_pointer_type (simd_type.eltype);
> +	    tree elt_ptr_type = build_pointer_type_for_mode (simd_type.eltype,
> +							     VOIDmode, true);
>  	    tree zero = build_zero_cst (elt_ptr_type);
> -	    gimple_seq stmts = NULL;
> -	    tree base = gimple_convert (&stmts, elt_ptr_type,
> -					args[0]);
> -	    if (stmts)
> -	      gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
> +	    /* Use element type alignment.  */
> +	    tree access_type
> +	      = build_aligned_type (simd_type.itype,
> +				    TYPE_ALIGN (simd_type.eltype));
>  	    new_stmt
>  	      = gimple_build_assign (gimple_get_lhs (stmt),
>  				     fold_build2 (MEM_REF,
> -						  simd_type.itype,
> -						  base, zero));
> +						  access_type,
> +						  args[0], zero));
>  	  }
>  	break;
>  
> @@ -2507,18 +2507,17 @@ aarch64_general_gimple_fold_builtin (unsigned int fcode, gcall *stmt,
>  	      = get_mem_type_for_load_store(fcode);
>  	    aarch64_simd_type_info simd_type
>  	      = aarch64_simd_types[mem_type];
> -	    tree elt_ptr_type = build_pointer_type (simd_type.eltype);
> +	    tree elt_ptr_type = build_pointer_type_for_mode (simd_type.eltype,
> +							     VOIDmode, true);
>  	    tree zero = build_zero_cst (elt_ptr_type);
> -	    gimple_seq stmts = NULL;
> -	    tree base = gimple_convert (&stmts, elt_ptr_type,
> -					args[0]);
> -	    if (stmts)
> -	      gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
> +	    /* Use element type alignment.  */
> +	    tree access_type
> +	      = build_aligned_type (simd_type.itype,
> +				    TYPE_ALIGN (simd_type.eltype));
>  	    new_stmt
> -	      = gimple_build_assign (fold_build2 (MEM_REF,
> -				     simd_type.itype,
> -				     base,
> -				     zero), args[1]);
> +	      = gimple_build_assign (fold_build2 (MEM_REF, access_type,
> +						  args[0], zero),
> +				     args[1]);
>  	  }
>  	break;
>  
> diff --git a/gcc/testsuite/gcc.target/aarch64/simd/lowering_tbaa.c b/gcc/testsuite/gcc.target/aarch64/simd/lowering_tbaa.c
> new file mode 100644
> index 0000000000000000000000000000000000000000..eaeae21f19c7d2d8d4e032f2f8b1b22bb96b7ca4
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/simd/lowering_tbaa.c
> @@ -0,0 +1,30 @@
> +/* Tests the TBAA information of lowered AArch64 SIMD loads.  */
> +/* { dg-do run } */
> +/* { dg-options "-save-temps -O2" } */
> +
> +#include <arm_neon.h>
> +
> +void __attribute__((noipa))
> +g (float *)
> +{
> +}
> +
> +int32x4_t __attribute__((noipa))
> +f (void)
> +{
> +  float a[4] = { 1, 2, 3, 4 };
> +  g (a);
> +  a[0] = a[1] = a[2] = a[3] = 0;
> +  void *volatile ptr = a;
> +  return vld1q_s32 ((int32_t *) ptr);
> +}
> +
> +int
> +main (void)
> +{
> +  int32x4_t x = f ();
> +  int32x4_t y = vdupq_n_s32 (0);
> +  if (__builtin_memcmp (&x, &y, 16) != 0)
> +    __builtin_abort ();
> +  return 0;
> +}

^ permalink raw reply	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2021-11-09 15:00 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-11-04  9:55 [AArch64] Fix NEON load/store gimple lowering and big-endian testisms Andre Vieira (lists)
2021-11-04 17:48 ` Richard Sandiford
2021-11-05 10:27   ` Richard Biener
2021-11-09  9:19     ` Andre Vieira (lists)
2021-11-09 10:47       ` Richard Biener
2021-11-09 12:24         ` [AArch64] Fix big-endian testisms introduced by NEON gimple lowering patch Andre Vieira (lists)
2021-11-09 12:27           ` [AArch64] Fix TBAA information when lowering NEON loads and stores to gimple Andre Vieira (lists)
2021-11-09 15:00             ` Richard Sandiford

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).