public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [committed] openmp: Improve #pragma omp simd vectorization
@ 2020-09-26  8:18 Jakub Jelinek
  0 siblings, 0 replies; only message in thread
From: Jakub Jelinek @ 2020-09-26  8:18 UTC (permalink / raw)
  To: gcc-patches

Hi!

As mentioned earlier, the vectorizer punts on vectorization of loops with non-constant
steps.  As for OpenMP loops it is by the language restriction always possible to compute
the number of loop iterations before the loop, this change helps those cases
by computing it and using an alternate IV that iterates from 0 to < niterations with
step of 1 next to the normal IV which will be just linear in that.

List of functions where we compared to current trunk vectorize some loops where we
previously didn't (for c-c++-common only listing the C function names, both C and C++
are affected though):

gcc/testsuite/gcc.dg/vect/vect-simd-17.c doit
gcc/testsuite/gcc.dg/vect/vect-simd-18.c foo
gcc/testsuite/gcc.dg/vect/vect-simd-19.c foo
gcc/testsuite/gcc.dg/vect/vect-simd-20.c foo
libgomp/testsuite/libgomp.c-c++-common/for-2.c f3_f_simd_auto
libgomp/testsuite/libgomp.c-c++-common/for-2.c f3_f_simd_guided32
libgomp/testsuite/libgomp.c-c++-common/for-2.c f3_f_simd_runtime
libgomp/testsuite/libgomp.c-c++-common/for-2.c f3_f_simd_static
libgomp/testsuite/libgomp.c-c++-common/for-2.c f3_f_simd_static32
libgomp/testsuite/libgomp.c-c++-common/for-2.c f3_pf_simd_auto._omp_fn.0
libgomp/testsuite/libgomp.c-c++-common/for-2.c f3_pf_simd_guided32._omp_fn.0
libgomp/testsuite/libgomp.c-c++-common/for-2.c f3_pf_simd_runtime._omp_fn.0
libgomp/testsuite/libgomp.c-c++-common/for-2.c f3_pf_simd_static32._omp_fn.0
libgomp/testsuite/libgomp.c-c++-common/for-2.c f3_pf_simd_static._omp_fn.0
libgomp/testsuite/libgomp.c-c++-common/for-2.c f3_simd_normal
libgomp/testsuite/libgomp.c-c++-common/for-2.c f5_simd_normal
libgomp/testsuite/libgomp.c-c++-common/for-2.c f6_simd_normal
libgomp/testsuite/libgomp.c-c++-common/for-3.c f3_dpfs_auto._omp_fn.0
libgomp/testsuite/libgomp.c-c++-common/for-3.c f3_dpfs_ds128_auto._omp_fn.0
libgomp/testsuite/libgomp.c-c++-common/for-3.c f3_dpfs_ds128_guided32._omp_fn.0
libgomp/testsuite/libgomp.c-c++-common/for-3.c f3_dpfs_ds128_runtime._omp_fn.0
libgomp/testsuite/libgomp.c-c++-common/for-3.c f3_dpfs_ds128_static32._omp_fn.0
libgomp/testsuite/libgomp.c-c++-common/for-3.c f3_dpfs_ds128_static._omp_fn.0
libgomp/testsuite/libgomp.c-c++-common/for-3.c f3_dpfs_guided32._omp_fn.0
libgomp/testsuite/libgomp.c-c++-common/for-3.c f3_dpfs_runtime._omp_fn.0
libgomp/testsuite/libgomp.c-c++-common/for-3.c f3_dpfs_static32._omp_fn.0
libgomp/testsuite/libgomp.c-c++-common/for-3.c f3_dpfs_static._omp_fn.0
libgomp/testsuite/libgomp.c-c++-common/for-3.c f3_ds_ds128_normal
libgomp/testsuite/libgomp.c-c++-common/for-3.c f3_ds_normal
libgomp/testsuite/libgomp.c-c++-common/for-4.c f3_taskloop_simd_normal._omp_fn.0
libgomp/testsuite/libgomp.c-c++-common/for-5.c f3_tpf_simd_auto._omp_fn.1
libgomp/testsuite/libgomp.c-c++-common/for-5.c f3_tpf_simd_guided32._omp_fn.1
libgomp/testsuite/libgomp.c-c++-common/for-5.c f3_tpf_simd_runtime._omp_fn.1
libgomp/testsuite/libgomp.c-c++-common/for-5.c f3_tpf_simd_static32._omp_fn.1
libgomp/testsuite/libgomp.c-c++-common/for-5.c f3_tpf_simd_static._omp_fn.1
libgomp/testsuite/libgomp.c-c++-common/for-5.c f3_t_simd_normal._omp_fn.0
libgomp/testsuite/libgomp.c-c++-common/for-5.c f3_ttdpfs_auto._omp_fn.1
libgomp/testsuite/libgomp.c-c++-common/for-5.c f3_ttdpfs_ds128_auto._omp_fn.1
libgomp/testsuite/libgomp.c-c++-common/for-5.c f3_ttdpfs_ds128_guided32._omp_fn.1
libgomp/testsuite/libgomp.c-c++-common/for-5.c f3_ttdpfs_ds128_runtime._omp_fn.1
libgomp/testsuite/libgomp.c-c++-common/for-5.c f3_ttdpfs_ds128_static32._omp_fn.1
libgomp/testsuite/libgomp.c-c++-common/for-5.c f3_ttdpfs_ds128_static._omp_fn.1
libgomp/testsuite/libgomp.c-c++-common/for-5.c f3_ttdpfs_guided32._omp_fn.1
libgomp/testsuite/libgomp.c-c++-common/for-5.c f3_ttdpfs_runtime._omp_fn.1
libgomp/testsuite/libgomp.c-c++-common/for-5.c f3_ttdpfs_static32._omp_fn.1
libgomp/testsuite/libgomp.c-c++-common/for-5.c f3_ttdpfs_static._omp_fn.1
libgomp/testsuite/libgomp.c-c++-common/for-5.c f3_ttds_ds128_normal._omp_fn.0
libgomp/testsuite/libgomp.c-c++-common/for-5.c f3_ttds_normal._omp_fn.0
libgomp/testsuite/libgomp.c-c++-common/for-5.c f5_t_simd_normal._omp_fn.0
libgomp/testsuite/libgomp.c-c++-common/for-5.c f6_t_simd_normal._omp_fn.0
libgomp/testsuite/libgomp.c-c++-common/for-6.c f3_tdpfs_auto._omp_fn.1
libgomp/testsuite/libgomp.c-c++-common/for-6.c f3_tdpfs_ds128_auto._omp_fn.1
libgomp/testsuite/libgomp.c-c++-common/for-6.c f3_tdpfs_ds128_guided32._omp_fn.1
libgomp/testsuite/libgomp.c-c++-common/for-6.c f3_tdpfs_ds128_runtime._omp_fn.1
libgomp/testsuite/libgomp.c-c++-common/for-6.c f3_tdpfs_ds128_static32._omp_fn.1
libgomp/testsuite/libgomp.c-c++-common/for-6.c f3_tdpfs_ds128_static._omp_fn.1
libgomp/testsuite/libgomp.c-c++-common/for-6.c f3_tdpfs_guided32._omp_fn.1
libgomp/testsuite/libgomp.c-c++-common/for-6.c f3_tdpfs_runtime._omp_fn.1
libgomp/testsuite/libgomp.c-c++-common/for-6.c f3_tdpfs_static32._omp_fn.1
libgomp/testsuite/libgomp.c-c++-common/for-6.c f3_tdpfs_static._omp_fn.1
libgomp/testsuite/libgomp.c-c++-common/for-6.c f3_tds_ds128_normal._omp_fn.0
libgomp/testsuite/libgomp.c-c++-common/for-6.c f3_tds_normal._omp_fn.0
libgomp/testsuite/libgomp.c-c++-common/for-14.c f3_dpfs_auto._omp_fn.0
libgomp/testsuite/libgomp.c-c++-common/for-14.c f3_dpfs_ds128_auto._omp_fn.0
libgomp/testsuite/libgomp.c-c++-common/for-14.c f3_dpfs_ds128_guided32._omp_fn.0
libgomp/testsuite/libgomp.c-c++-common/for-14.c f3_dpfs_ds128_runtime._omp_fn.0
libgomp/testsuite/libgomp.c-c++-common/for-14.c f3_dpfs_ds128_static32._omp_fn.0
libgomp/testsuite/libgomp.c-c++-common/for-14.c f3_dpfs_ds128_static._omp_fn.0
libgomp/testsuite/libgomp.c-c++-common/for-14.c f3_dpfs_guided32._omp_fn.0
libgomp/testsuite/libgomp.c-c++-common/for-14.c f3_dpfs_runtime._omp_fn.0
libgomp/testsuite/libgomp.c-c++-common/for-14.c f3_dpfs_static32._omp_fn.0
libgomp/testsuite/libgomp.c-c++-common/for-14.c f3_dpfs_static._omp_fn.0
libgomp/testsuite/libgomp.c-c++-common/for-14.c f3_ds_ds128_normal
libgomp/testsuite/libgomp.c-c++-common/for-14.c f3_ds_normal
libgomp/testsuite/libgomp.c-c++-common/for-15.c f3_tdpfs_auto._omp_fn.1
libgomp/testsuite/libgomp.c-c++-common/for-15.c f3_tdpfs_ds128_auto._omp_fn.1
libgomp/testsuite/libgomp.c-c++-common/for-15.c f3_tdpfs_ds128_guided32._omp_fn.1
libgomp/testsuite/libgomp.c-c++-common/for-15.c f3_tdpfs_ds128_runtime._omp_fn.1
libgomp/testsuite/libgomp.c-c++-common/for-15.c f3_tdpfs_ds128_static32._omp_fn.1
libgomp/testsuite/libgomp.c-c++-common/for-15.c f3_tdpfs_ds128_static._omp_fn.1
libgomp/testsuite/libgomp.c-c++-common/for-15.c f3_tdpfs_guided32._omp_fn.1
libgomp/testsuite/libgomp.c-c++-common/for-15.c f3_tdpfs_runtime._omp_fn.1
libgomp/testsuite/libgomp.c-c++-common/for-15.c f3_tdpfs_static32._omp_fn.1
libgomp/testsuite/libgomp.c-c++-common/for-15.c f3_tdpfs_static._omp_fn.1
libgomp/testsuite/libgomp.c-c++-common/for-15.c f3_tds_ds128_normal._omp_fn.0
libgomp/testsuite/libgomp.c-c++-common/for-15.c f3_tds_normal._omp_fn.0

Bootstrapped/regtested on x86_64-linux and i686-linux (the latter with the
switch param reverted), committed to trunk.

2020-09-26  Jakub Jelinek  <jakub@redhat.com>

	* omp-expand.c (expand_omp_simd): Help vectorizer for the collapse == 1
	and non-composite collapse > 1 case with non-constant innermost loop
	step by precomputing number of iterations before loop and using an
	alternate IV from 0 to number of iterations - 1 with step of 1.

	* gcc.dg/vect/vect-simd-17.c: Expect 11 or more vectorized loops.
	* gcc.dg/vect/vect-simd-18.c: New test.
	* gcc.dg/vect/vect-simd-19.c: New test.
	* gcc.dg/vect/vect-simd-20.c: New test.

--- gcc/omp-expand.c.jj	2020-09-25 16:03:06.795459111 +0200
+++ gcc/omp-expand.c	2020-09-25 18:43:42.018193503 +0200
@@ -6452,6 +6452,56 @@ expand_omp_simd (struct omp_region *regi
     }
   else
     expand_omp_build_assign (&gsi, fd->loop.v, fold_convert (type, n1));
+  tree altv = NULL_TREE, altn2 = NULL_TREE;
+  if (fd->collapse == 1
+      && !broken_loop
+      && TREE_CODE (fd->loops[0].step) != INTEGER_CST)
+    {
+      /* The vectorizer currently punts on loops with non-constant steps
+	 for the main IV (can't compute number of iterations and gives up
+	 because of that).  As for OpenMP loops it is always possible to
+	 compute the number of iterations upfront, use an alternate IV
+	 as the loop iterator:
+	 altn2 = n1 < n2 ? (n2 - n1 + step - 1) / step : 0;
+	 for (i = n1, altv = 0; altv < altn2; altv++, i += step)  */
+      altv = create_tmp_var (unsigned_type_for (TREE_TYPE (fd->loops[0].v)));
+      expand_omp_build_assign (&gsi, altv, build_zero_cst (TREE_TYPE (altv)));
+      tree itype = TREE_TYPE (fd->loop.v);
+      if (POINTER_TYPE_P (itype))
+	itype = signed_type_for (itype);
+      t = build_int_cst (itype, (fd->loop.cond_code == LT_EXPR ? -1 : 1));
+      t = fold_build2 (PLUS_EXPR, itype,
+		       fold_convert (itype, fd->loop.step), t);
+      t = fold_build2 (PLUS_EXPR, itype, t, fold_convert (itype, n2));
+      t = fold_build2 (MINUS_EXPR, itype, t,
+		       fold_convert (itype, fd->loop.v));
+      if (TYPE_UNSIGNED (itype) && fd->loop.cond_code == GT_EXPR)
+	t = fold_build2 (TRUNC_DIV_EXPR, itype,
+			 fold_build1 (NEGATE_EXPR, itype, t),
+			 fold_build1 (NEGATE_EXPR, itype,
+				      fold_convert (itype, fd->loop.step)));
+      else
+	t = fold_build2 (TRUNC_DIV_EXPR, itype, t,
+			 fold_convert (itype, fd->loop.step));
+      t = fold_convert (TREE_TYPE (altv), t);
+      altn2 = create_tmp_var (TREE_TYPE (altv));
+      expand_omp_build_assign (&gsi, altn2, t);
+      tree t2 = fold_convert (TREE_TYPE (fd->loop.v), n2);
+      t2 = force_gimple_operand_gsi (&gsi, t2, true, NULL_TREE,
+				     true, GSI_SAME_STMT);
+      t2 = fold_build2 (fd->loop.cond_code, boolean_type_node, fd->loop.v, t2);
+      gassign *g = gimple_build_assign (altn2, COND_EXPR, t2, altn2,
+					build_zero_cst (TREE_TYPE (altv)));
+      gsi_insert_before (&gsi, g, GSI_SAME_STMT);
+    }
+  else if (fd->collapse > 1
+	   && !broken_loop
+	   && !gimple_omp_for_combined_into_p (fd->for_stmt)
+	   && TREE_CODE (fd->loops[fd->collapse - 1].step) != INTEGER_CST)
+    {
+      altv = create_tmp_var (unsigned_type_for (TREE_TYPE (fd->loops[0].v)));
+      altn2 = create_tmp_var (TREE_TYPE (altv));
+    }
   if (cond_var)
     {
       if (POINTER_TYPE_P (type)
@@ -6486,6 +6536,12 @@ expand_omp_simd (struct omp_region *regi
 	}
       else if (TREE_CODE (n2) != INTEGER_CST)
 	expand_omp_build_assign (&gsi, fd->loop.v, build_one_cst (type));
+      if (altv)
+	{
+	  t = fold_build2 (PLUS_EXPR, TREE_TYPE (altv), altv,
+			   build_one_cst (TREE_TYPE (altv)));
+	  expand_omp_build_assign (&gsi, altv, t);
+	}
 
       if (fd->collapse > 1)
 	{
@@ -6525,9 +6581,11 @@ expand_omp_simd (struct omp_region *regi
   /* Emit the condition in L1_BB.  */
   gsi = gsi_start_bb (l1_bb);
 
-  if (fd->collapse > 1
-      && !gimple_omp_for_combined_into_p (fd->for_stmt)
-      && !broken_loop)
+  if (altv)
+    t = build2 (LT_EXPR, boolean_type_node, altv, altn2);
+  else if (fd->collapse > 1
+	   && !gimple_omp_for_combined_into_p (fd->for_stmt)
+	   && !broken_loop)
     {
       i = fd->collapse - 1;
       tree itype = TREE_TYPE (fd->loops[i].v);
@@ -6704,7 +6762,7 @@ expand_omp_simd (struct omp_region *regi
 	  expand_omp_build_assign (&gsi, fd->loops[i + 1].v, t);
 	  if (fd->loops[i + 1].m2)
 	    {
-	      if (i + 2 == fd->collapse && n2var)
+	      if (i + 2 == fd->collapse && (n2var || altv))
 		{
 		  gcc_assert (n2v == NULL_TREE);
 		  n2v = create_tmp_var (TREE_TYPE (fd->loops[i + 1].v));
@@ -6761,6 +6819,50 @@ expand_omp_simd (struct omp_region *regi
 	      t = fold_build2 (PLUS_EXPR, type, fd->loop.v, t);
 	      expand_omp_build_assign (&gsi, n2var, t);
 	    }
+	  if (i + 2 == fd->collapse && altv)
+	    {
+	      /* The vectorizer currently punts on loops with non-constant
+		 steps for the main IV (can't compute number of iterations
+		 and gives up because of that).  As for OpenMP loops it is
+		 always possible to compute the number of iterations upfront,
+		 use an alternate IV as the loop iterator.  */
+	      expand_omp_build_assign (&gsi, altv,
+				       build_zero_cst (TREE_TYPE (altv)));
+	      tree itype = TREE_TYPE (fd->loops[i + 1].v);
+	      if (POINTER_TYPE_P (itype))
+		itype = signed_type_for (itype);
+	      t = build_int_cst (itype, (fd->loops[i + 1].cond_code == LT_EXPR
+					 ? -1 : 1));
+	      t = fold_build2 (PLUS_EXPR, itype,
+			       fold_convert (itype, fd->loops[i + 1].step), t);
+	      t = fold_build2 (PLUS_EXPR, itype, t,
+			       fold_convert (itype,
+					     fd->loops[i + 1].m2
+					     ? n2v : fd->loops[i + 1].n2));
+	      t = fold_build2 (MINUS_EXPR, itype, t,
+			       fold_convert (itype, fd->loops[i + 1].v));
+	      tree step = fold_convert (itype, fd->loops[i + 1].step);
+	      if (TYPE_UNSIGNED (itype)
+		  && fd->loops[i + 1].cond_code == GT_EXPR)
+		t = fold_build2 (TRUNC_DIV_EXPR, itype,
+				 fold_build1 (NEGATE_EXPR, itype, t),
+				 fold_build1 (NEGATE_EXPR, itype, step));
+	      else
+		t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
+	      t = fold_convert (TREE_TYPE (altv), t);
+	      expand_omp_build_assign (&gsi, altn2, t);
+	      tree t2 = fold_convert (TREE_TYPE (fd->loops[i + 1].v),
+				      fd->loops[i + 1].m2
+				      ? n2v : fd->loops[i + 1].n2);
+	      t2 = force_gimple_operand_gsi (&gsi, t2, true, NULL_TREE,
+					     true, GSI_SAME_STMT);
+	      t2 = fold_build2 (fd->loops[i + 1].cond_code, boolean_type_node,
+				fd->loops[i + 1].v, t2);
+	      gassign *g
+		= gimple_build_assign (altn2, COND_EXPR, t2, altn2,
+				       build_zero_cst (TREE_TYPE (altv)));
+	      gsi_insert_before (&gsi, g, GSI_SAME_STMT);
+	    }
 	  n2v = nextn2v;
 
 	  make_edge (init_bb, last_bb, EDGE_FALLTHRU);
--- gcc/testsuite/gcc.dg/vect/vect-simd-17.c.jj	2020-09-25 10:43:23.819834674 +0200
+++ gcc/testsuite/gcc.dg/vect/vect-simd-17.c	2020-09-25 18:46:10.040069254 +0200
@@ -1,6 +1,6 @@
 /* { dg-additional-options "-fopenmp-simd -fno-tree-vectorize" } */
 /* { dg-additional-options "-mavx" { target avx_runtime } } */
-/* { dg-final { scan-tree-dump "vectorized \(\[4-9]\|1\[0-2]\) loops" "vect" { target i?86-*-* x86_64-*-* } } } */
+/* { dg-final { scan-tree-dump "vectorized 1\[1-2] loops" "vect" { target i?86-*-* x86_64-*-* } } } */
 
 #include "tree-vect.h"
 
--- gcc/testsuite/gcc.dg/vect/vect-simd-18.c.jj	2020-09-25 16:39:28.269054366 +0200
+++ gcc/testsuite/gcc.dg/vect/vect-simd-18.c	2020-09-25 16:39:49.629746979 +0200
@@ -0,0 +1,40 @@
+/* { dg-additional-options "-fopenmp-simd -fno-tree-vectorize" } */
+/* { dg-additional-options "-mavx" { target avx_runtime } } */
+/* { dg-final { scan-tree-dump "vectorized 1 loops" "vect" { target i?86-*-* x86_64-*-* } } } */
+
+#include "tree-vect.h"
+
+__attribute__((noipa)) int
+foo (int s, int *p)
+{
+  int r = 0, l = 0, i;
+  #pragma omp simd reduction (+:r) linear(l)
+  for (i = 0; i < 10000; i += s)
+    {
+      p[l++] = i;
+      r += i * 3;
+    }
+  return r;
+}
+
+int p[10000 / 78];
+
+int
+main ()
+{
+  int i, r;
+  check_vect ();
+  r = foo (78, p);
+  for (i = 0; i < 10000 / 78; i++)
+    if (p[i] != 78 * i)
+      abort ();
+  if (r != (10000 / 78) * (10000 / 78 + 1) / 2 * 78 * 3)
+    abort ();
+  r = foo (87, p);
+  for (i = 0; i < 10000 / 87; i++)
+    if (p[i] != 87 * i)
+      abort ();
+  if (r != (10000 / 87) * (10000 / 87 + 1) / 2 * 87 * 3)
+    abort ();
+  return 0;
+}
--- gcc/testsuite/gcc.dg/vect/vect-simd-19.c.jj	2020-09-25 18:36:18.633556451 +0200
+++ gcc/testsuite/gcc.dg/vect/vect-simd-19.c	2020-09-25 18:36:58.101990049 +0200
@@ -0,0 +1,40 @@
+/* { dg-additional-options "-fopenmp-simd -fno-tree-vectorize" } */
+/* { dg-additional-options "-mavx" { target avx_runtime } } */
+/* { dg-final { scan-tree-dump "vectorized 1 loops" "vect" { target i?86-*-* x86_64-*-* } } } */
+
+#include "tree-vect.h"
+
+__attribute__((noipa)) int
+foo (int s, int m, int n, int *p)
+{
+  int r = 0, l = 0, i;
+  #pragma omp simd reduction (+:r) linear(l)
+  for (i = m; i < n; i += s)
+    {
+      p[l++] = i;
+      r += i * 3;
+    }
+  return r;
+}
+
+int p[10000 / 78];
+
+int
+main ()
+{
+  int i, r;
+  check_vect ();
+  r = foo (78, 0, 10000, p);
+  for (i = 0; i < 10000 / 78; i++)
+    if (p[i] != 78 * i)
+      abort ();
+  if (r != (10000 / 78) * (10000 / 78 + 1) / 2 * 78 * 3)
+    abort ();
+  r = foo (87, 0, 10000, p);
+  for (i = 0; i < 10000 / 87; i++)
+    if (p[i] != 87 * i)
+      abort ();
+  if (r != (10000 / 87) * (10000 / 87 + 1) / 2 * 87 * 3)
+    abort ();
+  return 0;
+}
--- gcc/testsuite/gcc.dg/vect/vect-simd-20.c.jj	2020-09-25 18:37:07.038861799 +0200
+++ gcc/testsuite/gcc.dg/vect/vect-simd-20.c	2020-09-25 18:37:32.610494821 +0200
@@ -0,0 +1,43 @@
+/* { dg-additional-options "-fopenmp-simd -fno-tree-vectorize" } */
+/* { dg-additional-options "-mavx" { target avx_runtime } } */
+/* { dg-final { scan-tree-dump "vectorized 1 loops" "vect" { target i?86-*-* x86_64-*-* } } } */
+
+#include "tree-vect.h"
+
+__attribute__((noipa)) int
+foo (int s, int m, int n, int *p)
+{
+  int r = 0, l = 0, i, j;
+  #pragma omp simd reduction (+:r) linear(l) collapse(2)
+  for (j = 0; j < 7; j++)
+    for (i = m; i < n; i += s)
+      {
+	p[l++] = i;
+	r += i * 3;
+      }
+  return r;
+}
+
+int p[10000 / 78 * 7];
+
+int
+main ()
+{
+  int i, j, r;
+  check_vect ();
+  r = foo (78, 0, 10000, p);
+  for (j = 0; j < 7; j++)
+    for (i = 0; i < 10000 / 78; i++)
+      if (p[j * (10000 / 78 + 1) + i] != 78 * i)
+	abort ();
+  if (r != (10000 / 78) * (10000 / 78 + 1) / 2 * 78 * 3 * 7)
+    abort ();
+  r = foo (87, 0, 10000, p);
+  for (j = 0; j < 7; j++)
+    for (i = 0; i < 10000 / 87; i++)
+      if (p[j * (10000 / 87 + 1) + i] != 87 * i)
+	abort ();
+  if (r != (10000 / 87) * (10000 / 87 + 1) / 2 * 87 * 3 * 7)
+    abort ();
+  return 0;
+}

	Jakub


^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2020-09-26  8:19 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-09-26  8:18 [committed] openmp: Improve #pragma omp simd vectorization Jakub Jelinek

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).