public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH] OpenMP: Enable vectorization in all OpenMP loops
@ 2022-09-14 17:31 Sandra Loosemore
  2022-09-14 17:55 ` Jakub Jelinek
  0 siblings, 1 reply; 2+ messages in thread
From: Sandra Loosemore @ 2022-09-14 17:31 UTC (permalink / raw)
  To: gcc-patches

[-- Attachment #1: Type: text/plain, Size: 644 bytes --]

GCC presently enables the loop vectorizer at lower optimization levels 
for OpenMP loops with the "simd" specifier than it does for loops 
without it.  The "simd" specifier isn't defined to be purely an 
optimization hint to the compiler; it also has semantic effects like 
changing the privatization of the loop variable.  It seems reasonable to 
decouple the additional vectorization from those semantic effects and 
apply it also to work-sharing loops without the "simd" specifier at the 
same optimization levels.

I've tested this patch on x86_64-linux-gnu-amdgcn, plain 
x86_64-linux-gnu, and aarch64-linux-gnu.  OK for mainline?

-Sandra

[-- Attachment #2: 0001-OpenMP-Enable-vectorization-in-all-OpenMP-loops.patch --]
[-- Type: text/x-patch, Size: 9423 bytes --]

From 15c6f6b6bc396f53474ea380f506a7f74d7a05af Mon Sep 17 00:00:00 2001
From: Sandra Loosemore <sandra@codesourcery.com>
Date: Tue, 13 Sep 2022 23:50:27 +0000
Subject: [PATCH] OpenMP: Enable vectorization in all OpenMP loops

This patch marks all OpenMP worksharing loops (not just those with the
simd descriptor) as candidates for vectorization when -ftree-loop-optimize
is active and loop vectorization is not explicitly disabled with
-fno-tree-loop-vectorize.

gcc/ChangeLog:

	* omp-expand.cc (maybe_auto_vectorize_loop): New.
	(expand_omp_for_generic): Call it.
	(expand_omp_for_static_chunk): Likewise.
	(expand_omp_taskloop_for_inner): Likewise.
	(expand_oacc_for): Likewise.

gcc/testsuite/ChangeLog:
	* c-c++-common/gomp/vectorize-1.c: New.
	* c-c++-common/gomp/vectorize-2.c: New.
	* c-c++-common/gomp/vectorize-3.c: New.
	* c-c++-common/gomp/vectorize-s.c: New.
	* gcc.dg/gomp/pr46032-2.c: Compile with -fno-tree-loop-vectorize.
	* gcc.dg/gomp/pr46032-3.c: Likewise.
---
 gcc/omp-expand.cc                             | 23 ++++++++++++++
 gcc/testsuite/c-c++-common/gomp/vectorize-1.c | 31 +++++++++++++++++++
 gcc/testsuite/c-c++-common/gomp/vectorize-2.c | 31 +++++++++++++++++++
 gcc/testsuite/c-c++-common/gomp/vectorize-3.c | 31 +++++++++++++++++++
 gcc/testsuite/c-c++-common/gomp/vectorize-s.c | 31 +++++++++++++++++++
 gcc/testsuite/gcc.dg/gomp/pr46032-2.c         |  2 +-
 gcc/testsuite/gcc.dg/gomp/pr46032-3.c         |  2 +-
 7 files changed, 149 insertions(+), 2 deletions(-)
 create mode 100644 gcc/testsuite/c-c++-common/gomp/vectorize-1.c
 create mode 100644 gcc/testsuite/c-c++-common/gomp/vectorize-2.c
 create mode 100644 gcc/testsuite/c-c++-common/gomp/vectorize-3.c
 create mode 100644 gcc/testsuite/c-c++-common/gomp/vectorize-s.c

diff --git a/gcc/omp-expand.cc b/gcc/omp-expand.cc
index fcaf4f6d4e9..bc753814102 100644
--- a/gcc/omp-expand.cc
+++ b/gcc/omp-expand.cc
@@ -3711,6 +3711,22 @@ expand_omp_for_ordered_loops (struct omp_for_data *fd, tree *counts,
   return cont_bb;
 }
 
+/* Helper function for various subroutines of expand_omp_for.
+   If not -fno-tree-loop-vectorize and -ftree-loop-optimize,
+   hint that we want to vectorize the loop LOOP.  */
+static void
+maybe_auto_vectorize_loop (class loop *loop)
+{
+  if ((flag_tree_loop_vectorize
+       || !OPTION_SET_P (flag_tree_loop_vectorize))
+      && flag_tree_loop_optimize)
+    {
+      loop->force_vectorize = true;
+      cfun->has_force_vectorize_loops = true;
+    }
+}
+
+
 /* A subroutine of expand_omp_for.  Generate code for a parallel
    loop with any schedule.  Given parameters:
 
@@ -4650,6 +4666,7 @@ expand_omp_for_generic (struct omp_region *region,
       new_loop->header = l0_bb;
       new_loop->latch = l2_bb;
       add_loop (new_loop, outer_loop);
+      maybe_auto_vectorize_loop (new_loop);
 
       /* Allocate a loop structure for the original loop unless we already
 	 had one.  */
@@ -4660,6 +4677,7 @@ expand_omp_for_generic (struct omp_region *region,
 	  orig_loop->header = l1_bb;
 	  /* The loop may have multiple latches.  */
 	  add_loop (orig_loop, new_loop);
+	  maybe_auto_vectorize_loop (orig_loop);
 	}
     }
 }
@@ -5551,6 +5569,7 @@ expand_omp_for_static_nochunk (struct omp_region *region,
       if (collapse_bb == NULL)
 	loop->latch = cont_bb;
       add_loop (loop, body_bb->loop_father);
+      maybe_auto_vectorize_loop (loop);
     }
 }
 
@@ -6268,6 +6287,7 @@ expand_omp_for_static_chunk (struct omp_region *region,
       trip_loop->header = iter_part_bb;
       trip_loop->latch = trip_update_bb;
       add_loop (trip_loop, iter_part_bb->loop_father);
+      maybe_auto_vectorize_loop (trip_loop);
 
       if (loop != entry_bb->loop_father)
 	{
@@ -6285,6 +6305,7 @@ expand_omp_for_static_chunk (struct omp_region *region,
 	  if (collapse_bb == NULL)
 	    loop->latch = cont_bb;
 	  add_loop (loop, trip_loop);
+	  maybe_auto_vectorize_loop (loop);
 	}
     }
 }
@@ -7439,6 +7460,7 @@ expand_omp_taskloop_for_inner (struct omp_region *region,
       if (collapse_bb == NULL)
 	loop->latch = cont_bb;
       add_loop (loop, body_bb->loop_father);
+      maybe_auto_vectorize_loop (loop);
     }
 }
 
@@ -8006,6 +8028,7 @@ expand_oacc_for (struct omp_region *region, struct omp_for_data *fd)
 	      inner_loop->header = elem_body_bb;
 	      inner_loop->latch = elem_cont_bb;
 	      add_loop (inner_loop, body_loop);
+	      maybe_auto_vectorize_loop (inner_loop);
 	    }
 	}
     }
diff --git a/gcc/testsuite/c-c++-common/gomp/vectorize-1.c b/gcc/testsuite/c-c++-common/gomp/vectorize-1.c
new file mode 100644
index 00000000000..c52b6fd3039
--- /dev/null
+++ b/gcc/testsuite/c-c++-common/gomp/vectorize-1.c
@@ -0,0 +1,31 @@
+/* { dg-do compile } */
+/* { dg-options "-fopenmp -O1 -fdump-tree-vect-details" } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops in function" 2 "vect" } } */
+
+/* Test that OMP loops are vectorized at all optimization levels
+   whether or not the "simd" specifier is present.  */
+
+extern void init (int n, int *p);
+extern int g (int n, int *p);
+
+int f1 (void)
+{
+  int a1[32], a2[32], a3[32];
+  init (32, a1);
+  init (32, a2);
+  #pragma omp for
+  for (int i = 0; i < 32; i++)
+    a3[i] = a1[i] + a2[i];
+  return g (4, a3);
+}
+
+int f2 (void)
+{
+  int a1[32], a2[32], a3[32];
+  init (32, a1);
+  init (32, a2);
+  #pragma omp for simd
+  for (int i = 0; i < 32; i++)
+    a3[i] = a1[i] + a2[i];
+  return g (4, a3);
+}
diff --git a/gcc/testsuite/c-c++-common/gomp/vectorize-2.c b/gcc/testsuite/c-c++-common/gomp/vectorize-2.c
new file mode 100644
index 00000000000..78a10e4ff56
--- /dev/null
+++ b/gcc/testsuite/c-c++-common/gomp/vectorize-2.c
@@ -0,0 +1,31 @@
+/* { dg-do compile } */
+/* { dg-options "-fopenmp -O2 -fdump-tree-vect-details" } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops in function" 2 "vect" } } */
+
+/* Test that OMP loops are vectorized at all optimization levels
+   whether or not the "simd" specifier is present.  */
+
+extern void init (int n, int *p);
+extern int g (int n, int *p);
+
+int f1 (void)
+{
+  int a1[32], a2[32], a3[32];
+  init (32, a1);
+  init (32, a2);
+  #pragma omp for
+  for (int i = 0; i < 32; i++)
+    a3[i] = a1[i] + a2[i];
+  return g (4, a3);
+}
+
+int f2 (void)
+{
+  int a1[32], a2[32], a3[32];
+  init (32, a1);
+  init (32, a2);
+  #pragma omp for simd
+  for (int i = 0; i < 32; i++)
+    a3[i] = a1[i] + a2[i];
+  return g (4, a3);
+}
diff --git a/gcc/testsuite/c-c++-common/gomp/vectorize-3.c b/gcc/testsuite/c-c++-common/gomp/vectorize-3.c
new file mode 100644
index 00000000000..987939fb2d9
--- /dev/null
+++ b/gcc/testsuite/c-c++-common/gomp/vectorize-3.c
@@ -0,0 +1,31 @@
+/* { dg-do compile } */
+/* { dg-options "-fopenmp -O3 -fdump-tree-vect-details" } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops in function" 2 "vect" } } */
+
+/* Test that OMP loops are vectorized at all optimization levels
+   whether or not the "simd" specifier is present.  */
+
+extern void init (int n, int *p);
+extern int g (int n, int *p);
+
+int f1 (void)
+{
+  int a1[32], a2[32], a3[32];
+  init (32, a1);
+  init (32, a2);
+  #pragma omp for
+  for (int i = 0; i < 32; i++)
+    a3[i] = a1[i] + a2[i];
+  return g (4, a3);
+}
+
+int f2 (void)
+{
+  int a1[32], a2[32], a3[32];
+  init (32, a1);
+  init (32, a2);
+  #pragma omp for simd
+  for (int i = 0; i < 32; i++)
+    a3[i] = a1[i] + a2[i];
+  return g (4, a3);
+}
diff --git a/gcc/testsuite/c-c++-common/gomp/vectorize-s.c b/gcc/testsuite/c-c++-common/gomp/vectorize-s.c
new file mode 100644
index 00000000000..f382c9aeba0
--- /dev/null
+++ b/gcc/testsuite/c-c++-common/gomp/vectorize-s.c
@@ -0,0 +1,31 @@
+/* { dg-do compile } */
+/* { dg-options "-fopenmp -Os -fdump-tree-vect-details" } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops in function" 2 "vect" } } */
+
+/* Test that OMP loops are vectorized at all optimization levels
+   whether or not the "simd" specifier is present.  */
+
+extern void init (int n, int *p);
+extern int g (int n, int *p);
+
+int f1 (void)
+{
+  int a1[32], a2[32], a3[32];
+  init (32, a1);
+  init (32, a2);
+  #pragma omp for
+  for (int i = 0; i < 32; i++)
+    a3[i] = a1[i] + a2[i];
+  return g (4, a3);
+}
+
+int f2 (void)
+{
+  int a1[32], a2[32], a3[32];
+  init (32, a1);
+  init (32, a2);
+  #pragma omp for simd
+  for (int i = 0; i < 32; i++)
+    a3[i] = a1[i] + a2[i];
+  return g (4, a3);
+}
diff --git a/gcc/testsuite/gcc.dg/gomp/pr46032-2.c b/gcc/testsuite/gcc.dg/gomp/pr46032-2.c
index 2e562618489..ce925d1bd89 100644
--- a/gcc/testsuite/gcc.dg/gomp/pr46032-2.c
+++ b/gcc/testsuite/gcc.dg/gomp/pr46032-2.c
@@ -1,5 +1,5 @@
 /* { dg-do compile } */
-/* { dg-options "-O2 -fno-tree-vectorize -fopenmp -std=c99 -fipa-pta -fdump-tree-optimized" } */
+/* { dg-options "-O2 -fno-tree-vectorize -fno-tree-loop-vectorize -fopenmp -std=c99 -fipa-pta -fdump-tree-optimized" } */
 
 #define N 2
 
diff --git a/gcc/testsuite/gcc.dg/gomp/pr46032-3.c b/gcc/testsuite/gcc.dg/gomp/pr46032-3.c
index da1ab487385..866b7c9ada5 100644
--- a/gcc/testsuite/gcc.dg/gomp/pr46032-3.c
+++ b/gcc/testsuite/gcc.dg/gomp/pr46032-3.c
@@ -1,5 +1,5 @@
 /* { dg-do compile } */
-/* { dg-options "-O2 -fno-tree-vectorize -fopenmp -std=c99 -fipa-pta -fdump-tree-optimized" } */
+/* { dg-options "-O2 -fno-tree-vectorize -fno-tree-loop-vectorize -fopenmp -std=c99 -fipa-pta -fdump-tree-optimized" } */
 
 #define N 2
 
-- 
2.31.1


^ permalink raw reply	[flat|nested] 2+ messages in thread

end of thread, other threads:[~2022-09-14 17:55 UTC | newest]

Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-09-14 17:31 [PATCH] OpenMP: Enable vectorization in all OpenMP loops Sandra Loosemore
2022-09-14 17:55 ` Jakub Jelinek

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).