public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [gomp4] expand acc_on_device earlier
@ 2015-08-03  1:23 Nathan Sidwell
  2015-08-03 11:37 ` Thomas Schwinge
  2015-08-03 12:03 ` Thomas Schwinge
  0 siblings, 2 replies; 5+ messages in thread
From: Nathan Sidwell @ 2015-08-03  1:23 UTC (permalink / raw)
  To: GCC Patches

[-- Attachment #1: Type: text/plain, Size: 245 bytes --]

I've committed this to gomp4 branch.  It expands the acc_on_device builtin 
earlier in the new oacc_xform pass.  This will allow more optimization earlier on.

The existing expansion point now only needs to deal with the host-side case.

nathan

[-- Attachment #2: gomp4-xform.patch --]
[-- Type: text/x-patch, Size: 7489 bytes --]

2015-08-02  Nathan Sidwell  <nathan@codesourcery.com>

	gcc/
	* omp-low.c (oacc_xform_on_device): New function.
	(execute_oacc_transform): Use get_oacc_fn_attrib.  Call
	oacc_xform_on_device.
	* builtins.c (expand_builtin_on_device): Only expect to be
	expanded on host compiler.

	libgcc/
	* config/nvptx/comp-acc_on_device.c: Include gomp-constants.h.
	(acc_on_device): Code directly here.

	libgomp/
	* openacc.h (acc_on_device): Take int and explain why.
	* oacc-init.c (acc_on_device): Likewise.

Index: gcc/omp-low.c
===================================================================
--- gcc/omp-low.c	(revision 226462)
+++ gcc/omp-low.c	(working copy)
@@ -14510,29 +14510,65 @@ make_pass_late_lower_omp (gcc::context *
   return new pass_late_lower_omp (ctxt);
 }
 
+/* Transform an acc_on_device call.  The std requires this folded at
+   compile time for constant operands.  We always fold it.  In an
+   offloaded function we're never 'none'.  We cannot detect
+   host_nonshm here, as that's a dynamic feature of the runtime.
+   However, users shouldn't be using host_nonshm anyway, only the
+   test harness.  */
+
+static void
+oacc_xform_on_device (gimple_stmt_iterator *gsi, gimple stmt)
+{
+  tree arg = gimple_call_arg (stmt, 0);
+  unsigned val = GOMP_DEVICE_HOST;
+	      
+#ifdef ACCEL_COMPILER
+  val = GOMP_DEVICE_NOT_HOST;
+#endif
+  tree result = build2 (EQ_EXPR, boolean_type_node, arg,
+			build_int_cst (integer_type_node, val));
+#ifdef ACCEL_COMPILER
+  {
+    tree dev  = build2 (EQ_EXPR, boolean_type_node, arg,
+			build_int_cst (integer_type_node,
+				       ACCEL_COMPILER_acc_device));
+    result = build2 (TRUTH_OR_EXPR, boolean_type_node, result, dev);
+  }
+#endif
+  result = fold_convert (integer_type_node, result);
+  tree lhs = gimple_call_lhs (stmt);
+  gimple_seq replace = NULL;
+
+  push_gimplify_context (true);
+  gimplify_assign (lhs, result, &replace);
+  pop_gimplify_context (NULL);
+  gsi_replace_with_seq (gsi, replace, false);
+}
+
 /* Main entry point for oacc transformations which run on the device
-   compiler.  */
+   compilerafter LTO, so we know what the target device is at this
+   point (including the host fallback).  */
 
 static unsigned int
 execute_oacc_transform ()
 {
   basic_block bb;
-  gimple_stmt_iterator gsi;
-  gimple stmt;
 
-  if (!lookup_attribute ("oacc function",
-			 DECL_ATTRIBUTES (current_function_decl)))
+  if (!get_oacc_fn_attrib (current_function_decl))
     return 0;
 
-
   FOR_ALL_BB_FN (bb, cfun)
     {
-      gsi = gsi_start_bb (bb);
-
-      while (!gsi_end_p (gsi))
+      for (gimple_stmt_iterator gsi = gsi_start_bb (bb);
+	   !gsi_end_p (gsi); gsi_next (&gsi))
 	{
-	  stmt = gsi_stmt (gsi);
-	  gsi_next (&gsi);
+	  gimple stmt = gsi_stmt (gsi);
+
+	  /* acc_on_device must be evaluated at compile time for
+	     constant arguments.  */
+	  if (gimple_call_builtin_p (stmt, BUILT_IN_ACC_ON_DEVICE))
+	    oacc_xform_on_device (&gsi, stmt);
 	}
     }
 
Index: gcc/builtins.c
===================================================================
--- gcc/builtins.c	(revision 226462)
+++ gcc/builtins.c	(working copy)
@@ -5880,43 +5880,39 @@ expand_stack_save (void)
 }
 
 
-/* Expand OpenACC acc_on_device.
-
-   This has to happen late (that is, not in early folding; expand_builtin_*,
-   rather than fold_builtin_*), as we have to act differently for host and
-   acceleration device (ACCEL_COMPILER conditional).  */
+/* Expand OpenACC acc_on_device.  This is expanded in the openacc
+   transform pass, but if the user has this outside of an offloaded
+   region, we'll find it here.  In that case we must be host or none.  */
 
 static rtx
 expand_builtin_acc_on_device (tree exp, rtx target)
 {
+#ifdef ACCEL_COMPILER
+  gcc_unreachable ();
+#else
+  gcc_assert (!get_oacc_fn_attrib (current_function_decl));
+  
   if (!validate_arglist (exp, INTEGER_TYPE, VOID_TYPE))
     return NULL_RTX;
 
   tree arg = CALL_EXPR_ARG (exp, 0);
-
-  /* Return (arg == v1 || arg == v2) ? 1 : 0.  */
-  machine_mode v_mode = TYPE_MODE (TREE_TYPE (arg));
-  rtx v = expand_normal (arg), v1, v2;
-#ifdef ACCEL_COMPILER
-  v1 = GEN_INT (GOMP_DEVICE_NOT_HOST);
-  v2 = GEN_INT (ACCEL_COMPILER_acc_device);
-#else
-  v1 = GEN_INT (GOMP_DEVICE_NONE);
-  v2 = GEN_INT (GOMP_DEVICE_HOST);
-#endif
+  rtx val = expand_normal (arg);
   machine_mode target_mode = TYPE_MODE (integer_type_node);
   if (!target || !register_operand (target, target_mode))
     target = gen_reg_rtx (target_mode);
   emit_move_insn (target, const1_rtx);
   rtx_code_label *done_label = gen_label_rtx ();
-  do_compare_rtx_and_jump (v, v1, EQ, false, v_mode, NULL_RTX,
+  do_compare_rtx_and_jump (val, GEN_INT (GOMP_DEVICE_HOST), EQ,
+			   false, GET_MODE (val), NULL_RTX,
 			   NULL, done_label, PROB_EVEN);
-  do_compare_rtx_and_jump (v, v2, EQ, false, v_mode, NULL_RTX,
+  do_compare_rtx_and_jump (val, GEN_INT (GOMP_DEVICE_NONE), EQ,
+			   false, GET_MODE (val), NULL_RTX,
 			   NULL, done_label, PROB_EVEN);
   emit_move_insn (target, const0_rtx);
   emit_label (done_label);
 
   return target;
+#endif
 }
 
 /* Expand a thread-id/thread-count builtin for OpenACC.  */
Index: libgcc/config/nvptx/gomp-acc_on_device.c
===================================================================
--- libgcc/config/nvptx/gomp-acc_on_device.c	(revision 226462)
+++ libgcc/config/nvptx/gomp-acc_on_device.c	(working copy)
@@ -1,6 +1,14 @@
-int acc_on_device(int d)
+#include "gomp-constants.h"
+
+/* For when the builtin is explicitly disabled.  */
+int acc_on_device (int d)
 {
-  return __builtin_acc_on_device(d);
+  /* We can't use the builtin itself here, because that only expands
+     to device-like things inside offloaded compute regions, which
+     this isn't.  Even though it'll be executed on the device --
+     unless someone builds a host-side PTX compiler, which would be
+     very strange.  */
+  return d == GOMP_DEVICE_NOT_HOST || d == GOMP_DEVICE_NVIDIA_PTX;
 }
 
 int acc_on_device_h_(int *d)
Index: libgomp/openacc.h
===================================================================
--- libgomp/openacc.h	(revision 226462)
+++ libgomp/openacc.h	(working copy)
@@ -78,7 +78,11 @@ void acc_wait_all (void) __GOACC_NOTHROW
 void acc_wait_all_async (int) __GOACC_NOTHROW;
 void acc_init (acc_device_t) __GOACC_NOTHROW;
 void acc_shutdown (acc_device_t) __GOACC_NOTHROW;
-int acc_on_device (acc_device_t) __GOACC_NOTHROW;
+/* Library function declaration.  Although it should take an
+   acc_device_t argument, that causes problems with matching the
+   builtin, which takes an int (to avoid declaring the enumeration
+   inside the compiler).  */
+int acc_on_device (int) __GOACC_NOTHROW;
 void *acc_malloc (size_t) __GOACC_NOTHROW;
 void acc_free (void *) __GOACC_NOTHROW;
 /* Some of these would be more correct with const qualifiers, but
Index: libgomp/oacc-init.c
===================================================================
--- libgomp/oacc-init.c	(revision 226462)
+++ libgomp/oacc-init.c	(working copy)
@@ -632,10 +632,14 @@ acc_set_device_num (int ord, acc_device_
 
 ialias (acc_set_device_num)
 
+/* The compiler always attempts to expand acc_on_device, but if the
+   user disables the builtin, or calls it via a pointer, we have this
+   version.  */
+
 int
-acc_on_device (acc_device_t dev)
+acc_on_device (int dev)
 {
-  /* Just rely on the compiler builtin.  */
+  /* It is safe to use the compiler builtin, as we're the host.  */
   return __builtin_acc_on_device (dev);
 }
 

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [gomp4] expand acc_on_device earlier
  2015-08-03  1:23 [gomp4] expand acc_on_device earlier Nathan Sidwell
@ 2015-08-03 11:37 ` Thomas Schwinge
  2015-08-03 12:00   ` Nathan Sidwell
  2015-08-03 12:03 ` Thomas Schwinge
  1 sibling, 1 reply; 5+ messages in thread
From: Thomas Schwinge @ 2015-08-03 11:37 UTC (permalink / raw)
  To: Nathan Sidwell, GCC Patches

[-- Attachment #1: Type: text/plain, Size: 8791 bytes --]

Hi!

On Sun, 2 Aug 2015 21:23:30 -0400, Nathan Sidwell <nathan@acm.org> wrote:
> I've committed this to gomp4 branch.  It expands the acc_on_device builtin 
> earlier in the new oacc_xform pass.  This will allow more optimization earlier on.

Thanks!

> The existing expansion point now only needs to deal with the host-side case.

Actually, no; committed to gomp-4_0-branch in r226498:

commit 5d1009bd4b959da699cf439200ffa571bf547026
Author: tschwinge <tschwinge@138bc75d-0d04-0410-961f-82ee72b054a4>
Date:   Mon Aug 3 11:36:01 2015 +0000

    Fix offload compiler build, and de-duplicate acc_on_device logic
    
    When building an x86_64-intelmicemul-linux-gnu offloading compiler with
    r226484, the __builtin_acc_on_device usage in libgomp/oacc-init.c:acc_on_device
    makes it blow up:
    
        [...]/source-gcc/libgomp/oacc-init.c: In function 'acc_on_device':
        [...]/source-gcc/libgomp/oacc-init.c:643:10: internal compiler error: in expand_builtin_acc_on_device, at builtins.c:5891
           return __builtin_acc_on_device (dev);
                  ^
        0x6d9632 expand_builtin_acc_on_device
                [...]/source-gcc/gcc/builtins.c:5891
        0x6d9632 expand_builtin(tree_node*, rtx_def*, rtx_def*, machine_mode, int)
                [...]/source-gcc/gcc/builtins.c:7127
        0x8021ad expand_expr_real_1(tree_node*, rtx_def*, machine_mode, expand_modifier, rtx_def**, bool)
                [...]/source-gcc/gcc/expr.c:10361
        0x80e8b5 store_expr_with_bounds(tree_node*, rtx_def*, int, bool, tree_node*)
                [...]/source-gcc/gcc/expr.c:5400
        0x810eb4 expand_assignment(tree_node*, tree_node*, bool)
                [...]/source-gcc/gcc/expr.c:5169
        0x6f927b expand_call_stmt
                [...]/source-gcc/gcc/cfgexpand.c:2349
        0x6f927b expand_gimple_stmt_1
                [...]/source-gcc/gcc/cfgexpand.c:3238
        0x6fa77c expand_gimple_stmt
                [...]/source-gcc/gcc/cfgexpand.c:3399
        0x6fa842 expand_gimple_tailcall
                [...]/source-gcc/gcc/cfgexpand.c:3446
        0x6fc615 expand_gimple_basic_block
                [...]/source-gcc/gcc/cfgexpand.c:5388
        0x701456 execute
                [...]/source-gcc/gcc/cfgexpand.c:6023
    
    (This is not a problem for a nvptx-none offloading compiler, because of its
    open-coded libgcc/config/nvptx/gomp-acc_on_device.c:acc_on_device
    implementation.)
    
    	gcc/
    	* builtins.c (expand_builtin_on_device): Don't expect to be
    	expanded on host compiler.
    	libgcc/
    	* config/nvptx/gomp-acc_on_device.c: Don't include
    	"gomp-constants.h".
    	(acc_on_device): Don't code directly here.
    
    git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/branches/gomp-4_0-branch@226498 138bc75d-0d04-0410-961f-82ee72b054a4
---
 gcc/ChangeLog.gomp                       |    3 +++
 gcc/builtins.c                           |   30 ++++++++++++++++++------------
 gcc/omp-low.c                            |    7 ++-----
 libgcc/ChangeLog.gomp                    |   10 ++++++++--
 libgcc/config/nvptx/gomp-acc_on_device.c |   16 +++++++---------
 libgomp/oacc-init.c                      |    2 +-
 6 files changed, 39 insertions(+), 29 deletions(-)

diff --git gcc/ChangeLog.gomp gcc/ChangeLog.gomp
index a30f6a3..d5cc1ed 100644
--- gcc/ChangeLog.gomp
+++ gcc/ChangeLog.gomp
@@ -1,5 +1,8 @@
 2015-08-03  Thomas Schwinge  <thomas@codesourcery.com>
 
+	* builtins.c (expand_builtin_on_device): Don't expect to be
+	expanded on host compiler.
+
 	* gimplify.c (oacc_default_clause): Remove in_code formal
 	parameter.  Adjust all users.
 
diff --git gcc/builtins.c gcc/builtins.c
index 9bc08f6..ce369a1 100644
--- gcc/builtins.c
+++ gcc/builtins.c
@@ -5880,39 +5880,45 @@ expand_stack_save (void)
 }
 
 
-/* Expand OpenACC acc_on_device.  This is expanded in the openacc
-   transform pass, but if the user has this outside of an offloaded
-   region, we'll find it here.  In that case we must be host or none.  */
+/* Expand OpenACC acc_on_device.  This is usually expanded in the
+   oacc_transform pass, earlier on, but if used outside of an offloaded region,
+   we'll find it here.  */
 
 static rtx
 expand_builtin_acc_on_device (tree exp, rtx target)
 {
-#ifdef ACCEL_COMPILER
-  gcc_unreachable ();
-#else
+#ifndef ACCEL_COMPILER
   gcc_assert (!get_oacc_fn_attrib (current_function_decl));
+#endif
   
   if (!validate_arglist (exp, INTEGER_TYPE, VOID_TYPE))
     return NULL_RTX;
 
   tree arg = CALL_EXPR_ARG (exp, 0);
-  rtx val = expand_normal (arg);
+
+  /* Return (arg == v1 || arg == v2) ? 1 : 0.  */
+  machine_mode v_mode = TYPE_MODE (TREE_TYPE (arg));
+  rtx v = expand_normal (arg), v1, v2;
+#ifdef ACCEL_COMPILER
+  v1 = GEN_INT (GOMP_DEVICE_NOT_HOST);
+  v2 = GEN_INT (ACCEL_COMPILER_acc_device);
+#else
+  v1 = GEN_INT (GOMP_DEVICE_NONE);
+  v2 = GEN_INT (GOMP_DEVICE_HOST);
+#endif
   machine_mode target_mode = TYPE_MODE (integer_type_node);
   if (!target || !register_operand (target, target_mode))
     target = gen_reg_rtx (target_mode);
   emit_move_insn (target, const1_rtx);
   rtx_code_label *done_label = gen_label_rtx ();
-  do_compare_rtx_and_jump (val, GEN_INT (GOMP_DEVICE_HOST), EQ,
-			   false, GET_MODE (val), NULL_RTX,
+  do_compare_rtx_and_jump (v, v1, EQ, false, v_mode, NULL_RTX,
 			   NULL, done_label, PROB_EVEN);
-  do_compare_rtx_and_jump (val, GEN_INT (GOMP_DEVICE_NONE), EQ,
-			   false, GET_MODE (val), NULL_RTX,
+  do_compare_rtx_and_jump (v, v2, EQ, false, v_mode, NULL_RTX,
 			   NULL, done_label, PROB_EVEN);
   emit_move_insn (target, const0_rtx);
   emit_label (done_label);
 
   return target;
-#endif
 }
 
 /* Expand a thread-id/thread-count builtin for OpenACC.  */
diff --git gcc/omp-low.c gcc/omp-low.c
index ebf333f..701e8da 100644
--- gcc/omp-low.c
+++ gcc/omp-low.c
@@ -14510,12 +14510,9 @@ make_pass_late_lower_omp (gcc::context *ctxt)
   return new pass_late_lower_omp (ctxt);
 }
 
-/* Transform an acc_on_device call.  The std requires this folded at
+/* Transform an acc_on_device call.  OpenACC 2.0a requires this folded at
    compile time for constant operands.  We always fold it.  In an
-   offloaded function we're never 'none'.  We cannot detect
-   host_nonshm here, as that's a dynamic feature of the runtime.
-   However, users shouldn't be using host_nonshm anyway, only the
-   test harness.  */
+   offloaded function we're never 'none'.  */
 
 static void
 oacc_xform_on_device (gimple_stmt_iterator *gsi, gimple stmt)
diff --git libgcc/ChangeLog.gomp libgcc/ChangeLog.gomp
index 4aa45e7..085bfda 100644
--- libgcc/ChangeLog.gomp
+++ libgcc/ChangeLog.gomp
@@ -1,6 +1,12 @@
-2015-07-17  Nathan Sidwell  <nathan@codesourcery.com>
+2015-08-03  Thomas Schwinge  <thomas@codesourcery.com>
 
-	* config/nvptx/comp-acc_on_device.c: Include gomp-constants.h.
+	* config/nvptx/gomp-acc_on_device.c: Don't include
+	"gomp-constants.h".
+	(acc_on_device): Don't code directly here.
+
+2015-08-02  Nathan Sidwell  <nathan@codesourcery.com>
+
+	* config/nvptx/gomp-acc_on_device.c: Include gomp-constants.h.
 	(acc_on_device): Code directly here.
 
 2015-07-17  Nathan Sidwell  <nathan@codesourcery.com>
diff --git libgcc/config/nvptx/gomp-acc_on_device.c libgcc/config/nvptx/gomp-acc_on_device.c
index e89cd9b..db94350 100644
--- libgcc/config/nvptx/gomp-acc_on_device.c
+++ libgcc/config/nvptx/gomp-acc_on_device.c
@@ -1,14 +1,12 @@
-#include "gomp-constants.h"
+/* The compiler always attempts to expand acc_on_device, but if the
+   user disables the builtin, or calls it via a pointer, we have this
+   version.  */
 
-/* For when the builtin is explicitly disabled.  */
-int acc_on_device (int d)
+int
+acc_on_device (int dev)
 {
-  /* We can't use the builtin itself here, because that only expands
-     to device-like things inside offloaded compute regions, which
-     this isn't.  Even though it'll be executed on the device --
-     unless someone builds a host-side PTX compiler, which would be
-     very strange.  */
-  return d == GOMP_DEVICE_NOT_HOST || d == GOMP_DEVICE_NVIDIA_PTX;
+  /* Just rely on the compiler builtin.  */
+  return __builtin_acc_on_device (dev);
 }
 
 int acc_on_device_h_(int *d)
diff --git libgomp/oacc-init.c libgomp/oacc-init.c
index 79a643c..8dfe4a7 100644
--- libgomp/oacc-init.c
+++ libgomp/oacc-init.c
@@ -639,7 +639,7 @@ ialias (acc_set_device_num)
 int
 acc_on_device (int dev)
 {
-  /* It is safe to use the compiler builtin, as we're the host.  */
+  /* Just rely on the compiler builtin.  */
   return __builtin_acc_on_device (dev);
 }
 


Grüße,
 Thomas

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 472 bytes --]

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [gomp4] expand acc_on_device earlier
  2015-08-03 11:37 ` Thomas Schwinge
@ 2015-08-03 12:00   ` Nathan Sidwell
  2015-08-03 12:41     ` Thomas Schwinge
  0 siblings, 1 reply; 5+ messages in thread
From: Nathan Sidwell @ 2015-08-03 12:00 UTC (permalink / raw)
  To: Thomas Schwinge, GCC Patches

On 08/03/15 07:37, Thomas Schwinge wrote:
> Hi!
>
> On Sun, 2 Aug 2015 21:23:30 -0400, Nathan Sidwell <nathan@acm.org> wrote:
>> I've committed this to gomp4 branch.  It expands the acc_on_device builtin
>> earlier in the new oacc_xform pass.  This will allow more optimization earlier on.
>
> Thanks!
>
>> The existing expansion point now only needs to deal with the host-side case.
>
> Actually, no; committed to gomp-4_0-branch in r226498:

Please clarify.  This suggests a logic fault elsewhere.

nathan
-- 
Nathan Sidwell

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [gomp4] expand acc_on_device earlier
  2015-08-03  1:23 [gomp4] expand acc_on_device earlier Nathan Sidwell
  2015-08-03 11:37 ` Thomas Schwinge
@ 2015-08-03 12:03 ` Thomas Schwinge
  1 sibling, 0 replies; 5+ messages in thread
From: Thomas Schwinge @ 2015-08-03 12:03 UTC (permalink / raw)
  To: Nathan Sidwell; +Cc: GCC Patches, fortran

[-- Attachment #1: Type: text/plain, Size: 6124 bytes --]

Hi!

On Sun, 2 Aug 2015 21:23:30 -0400, Nathan Sidwell <nathan@acm.org> wrote:
> I've committed this to gomp4 branch.  It expands the acc_on_device builtin 
> earlier in the new oacc_xform pass.  This will allow more optimization earlier on.

As far as I remember, the Fortran front end doesn't know about/doesn't
know how to use these GCC builtins, so it's not currently able to do such
compile-time folding of acc_on_device.

> 	libgomp/
> 	* openacc.h (acc_on_device): Take int and explain why.

> --- libgomp/openacc.h	(revision 226462)
> +++ libgomp/openacc.h	(working copy)
> @@ -78,7 +78,11 @@ void acc_wait_all (void) __GOACC_NOTHROW
>  void acc_wait_all_async (int) __GOACC_NOTHROW;
>  void acc_init (acc_device_t) __GOACC_NOTHROW;
>  void acc_shutdown (acc_device_t) __GOACC_NOTHROW;
> -int acc_on_device (acc_device_t) __GOACC_NOTHROW;
> +/* Library function declaration.  Although it should take an
> +   acc_device_t argument, that causes problems with matching the
> +   builtin, which takes an int (to avoid declaring the enumeration
> +   inside the compiler).  */
> +int acc_on_device (int) __GOACC_NOTHROW;
>  void *acc_malloc (size_t) __GOACC_NOTHROW;
>  void acc_free (void *) __GOACC_NOTHROW;
>  /* Some of these would be more correct with const qualifiers, but

Hmm, too bad this doesn't resolve the following C++ XFAIL (the test case
is fine for C), gcc/testsuite/c-c++-common/goacc/acc_on_device-2.c:

    /* Have to enable optimizations, as otherwise builtins won't be expanded.  */
    /* { dg-additional-options "-O -fdump-rtl-expand" } */
    
    /* Duplicate parts of libgomp/openacc.h, because we can't include it here.  */
    
    #if __cplusplus
    extern "C" {
    #endif
    
    typedef enum acc_device_t { acc_device_X = 123 } acc_device_t;
    extern int acc_on_device (int);
    
    #if __cplusplus
    }
    #endif
    
    int
    f (void)
    {
      const acc_device_t dev = acc_device_X;
      return acc_on_device (dev);
    }
    
    /* With -fopenacc, we're expecting the builtin to be expanded, so no calls.
       TODO: in C++, the use of enum acc_device_t for acc_on_device's parameter
       perturbs expansion as a builtin, which expects an int parameter.  It's fine
       when changing acc_device_t to plain int, but that's not necessarily what a
       user will be doing.
    
       { dg-final { scan-rtl-dump-times "\\\(call \[^\\n\]* acc_on_device" 0 "expand" { xfail c++ } } } */

Do you happen to have any insight into that?


Meanwhile, committed to gomp-4_0-branch in r226501:

commit 38d7f116bf18f4a4012fda532aa70d47f3f20652
Author: tschwinge <tschwinge@138bc75d-0d04-0410-961f-82ee72b054a4>
Date:   Mon Aug 3 11:59:15 2015 +0000

    Update acc_on_device test cases
    
    ... to match r226484 changes.
    
    	gcc/testsuite/
    	* c-c++-common/goacc/acc_on_device-2-off.c (acc_on_device): Change
    	formal parameter to int.
    	* c-c++-common/goacc/acc_on_device-2.c (acc_on_device): Likewise.
    
    git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/branches/gomp-4_0-branch@226501 138bc75d-0d04-0410-961f-82ee72b054a4
---
 gcc/testsuite/ChangeLog.gomp                           |  4 ++++
 gcc/testsuite/c-c++-common/goacc/acc_on_device-2-off.c |  4 +++-
 gcc/testsuite/c-c++-common/goacc/acc_on_device-2.c     | 10 ++++++----
 3 files changed, 13 insertions(+), 5 deletions(-)

diff --git gcc/testsuite/ChangeLog.gomp gcc/testsuite/ChangeLog.gomp
index 33519ef..8dd9a4c 100644
--- gcc/testsuite/ChangeLog.gomp
+++ gcc/testsuite/ChangeLog.gomp
@@ -1,5 +1,9 @@
 2015-08-03  Thomas Schwinge  <thomas@codesourcery.com>
 
+	* c-c++-common/goacc/acc_on_device-2-off.c (acc_on_device): Change
+	formal parameter to int.
+	* c-c++-common/goacc/acc_on_device-2.c (acc_on_device): Likewise.
+
 	* c-c++-common/restrict-2.c: Update for new pass_lim.
 	* c-c++-common/restrict-4.c: Same.
 	* g++.dg/tree-ssa/pr33615.C: Same.
diff --git gcc/testsuite/c-c++-common/goacc/acc_on_device-2-off.c gcc/testsuite/c-c++-common/goacc/acc_on_device-2-off.c
index 59c72f7..71abe11 100644
--- gcc/testsuite/c-c++-common/goacc/acc_on_device-2-off.c
+++ gcc/testsuite/c-c++-common/goacc/acc_on_device-2-off.c
@@ -1,12 +1,14 @@
 /* Have to enable optimizations, as otherwise builtins won't be expanded.  */
 /* { dg-additional-options "-O -fdump-rtl-expand -fno-openacc" } */
 
+/* Duplicate parts of libgomp/openacc.h, because we can't include it here.  */
+
 #if __cplusplus
 extern "C" {
 #endif
 
 typedef enum acc_device_t { acc_device_X = 123 } acc_device_t;
-extern int acc_on_device (acc_device_t);
+extern int acc_on_device (int);
 
 #if __cplusplus
 }
diff --git gcc/testsuite/c-c++-common/goacc/acc_on_device-2.c gcc/testsuite/c-c++-common/goacc/acc_on_device-2.c
index ef622a8..243e562 100644
--- gcc/testsuite/c-c++-common/goacc/acc_on_device-2.c
+++ gcc/testsuite/c-c++-common/goacc/acc_on_device-2.c
@@ -1,12 +1,14 @@
 /* Have to enable optimizations, as otherwise builtins won't be expanded.  */
 /* { dg-additional-options "-O -fdump-rtl-expand" } */
 
+/* Duplicate parts of libgomp/openacc.h, because we can't include it here.  */
+
 #if __cplusplus
 extern "C" {
 #endif
 
 typedef enum acc_device_t { acc_device_X = 123 } acc_device_t;
-extern int acc_on_device (acc_device_t);
+extern int acc_on_device (int);
 
 #if __cplusplus
 }
@@ -20,9 +22,9 @@ f (void)
 }
 
 /* With -fopenacc, we're expecting the builtin to be expanded, so no calls.
-   TODO: in C++, even under extern "C", the use of enum for acc_device_t
+   TODO: in C++, the use of enum acc_device_t for acc_on_device's parameter
    perturbs expansion as a builtin, which expects an int parameter.  It's fine
-   when changing acc_device_t to plain int, but that's not what we're doing in
-   <openacc.h>.
+   when changing acc_device_t to plain int, but that's not necessarily what a
+   user will be doing.
 
    { dg-final { scan-rtl-dump-times "\\\(call \[^\\n\]* acc_on_device" 0 "expand" { xfail c++ } } } */


Grüße,
 Thomas

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 472 bytes --]

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [gomp4] expand acc_on_device earlier
  2015-08-03 12:00   ` Nathan Sidwell
@ 2015-08-03 12:41     ` Thomas Schwinge
  0 siblings, 0 replies; 5+ messages in thread
From: Thomas Schwinge @ 2015-08-03 12:41 UTC (permalink / raw)
  To: Nathan Sidwell; +Cc: GCC Patches

[-- Attachment #1: Type: text/plain, Size: 2650 bytes --]

Hi!

On Mon, 3 Aug 2015 08:00:36 -0400, Nathan Sidwell <nathan@codesourcery.com> wrote:
> On 08/03/15 07:37, Thomas Schwinge wrote:
> > On Sun, 2 Aug 2015 21:23:30 -0400, Nathan Sidwell <nathan@acm.org> wrote:
> >> I've committed this to gomp4 branch.  It expands the acc_on_device builtin
> >> earlier in the new oacc_xform pass.  This will allow more optimization earlier on.
> >
> > Thanks!
> >
> >> The existing expansion point now only needs to deal with the host-side case.
> >
> > Actually, no; committed to gomp-4_0-branch in r226498:
> 
> Please clarify.  This suggests a logic fault elsewhere.

Hmm, I had the following in the commit log:

    When building an x86_64-intelmicemul-linux-gnu offloading compiler with
    r226484, the __builtin_acc_on_device usage in libgomp/oacc-init.c:acc_on_device
    makes it blow up:

	[...]/source-gcc/libgomp/oacc-init.c: In function 'acc_on_device':
	[...]/source-gcc/libgomp/oacc-init.c:643:10: internal compiler error: in expand_builtin_acc_on_device, at builtins.c:5891
	   return __builtin_acc_on_device (dev);
		  ^
	0x6d9632 expand_builtin_acc_on_device
		[...]/source-gcc/gcc/builtins.c:5891

In case that has been too terse (sorry), here's a bit more detail.

GCC can be configured to use nvptx-none as well as
x86_64-intelmicemul-linux-gnu offloading compilers.  In offloading
compilers' configurations, ACCEL_COMPILER is defined.  The nvptx-none
offloading compilers' libgcc/libgomp build will use the specific
libgcc/config/nvptx/gomp-acc_on_device.c:acc_on_device implementation,
but the x86_64-intelmicemul-linux-gnu one will use the default
libgomp/oacc-init.c:acc_on_device implementation (which is implemented in
terms of __builtin_acc_on_device).  Thus, in the latter case, we'd run
into this gcc_unreachable during the x86_64-intelmicemul-linux-gnu
offloading compiler's libgomp build:

    /* Expand OpenACC acc_on_device.  This is expanded in the openacc
       transform pass, but if the user has this outside of an offloaded
       region, we'll find it here.  In that case we must be host or none.  */
    
    static rtx
    expand_builtin_acc_on_device (tree exp, rtx target)
    {
    #ifdef ACCEL_COMPILER
      gcc_unreachable ();
    #else
      gcc_assert (!get_oacc_fn_attrib (current_function_decl));

In r226498, I then restored the earlier logic of
gcc/builtins.c:expand_builtin_acc_on_device, and could then also simplify
libgcc/config/nvptx/gomp-acc_on_device.c:acc_on_device to the very same
generic/simple implementation in terms of __builtin_acc_on_device.

Does that clarify?


Grüße,
 Thomas

[-- Attachment #2: Type: application/pgp-signature, Size: 472 bytes --]

^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2015-08-03 12:41 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2015-08-03  1:23 [gomp4] expand acc_on_device earlier Nathan Sidwell
2015-08-03 11:37 ` Thomas Schwinge
2015-08-03 12:00   ` Nathan Sidwell
2015-08-03 12:41     ` Thomas Schwinge
2015-08-03 12:03 ` Thomas Schwinge

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).