public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* OpenACC dimension range propagation optimization
@ 2015-11-03 18:12 Nathan Sidwell
  2015-11-04 10:26 ` Richard Biener
  0 siblings, 1 reply; 6+ messages in thread
From: Nathan Sidwell @ 2015-11-03 18:12 UTC (permalink / raw)
  To: Richard Guenther, GCC Patches

[-- Attachment #1: Type: text/plain, Size: 585 bytes --]

Richard,
this patch implements VRP for the 2 openacc axis internal fns I've added.  We 
know the position within  a dimension cannot exceed that dimensions extend. 
Further, if the extend is dynamic, the target backend may well know there's a 
hardware-mandated maximum value.

Hence, added a new target hook to allow the backend to specify that upper bound, 
and added smarts to extract_range_basic to process the two internal functions.

Incidentally, this was the bit I was working on at the cauldron, which caused me 
to work on the min/max range combining.

ok for trunk?

nathan

[-- Attachment #2: trunk-dim-range.patch --]
[-- Type: text/x-patch, Size: 6524 bytes --]

2015-11-03  Nathan Sidwell  <nathan@codesourcery.com>

	* target.def (goacc.dim_limit): New hook.
	* targhooks.h (default_goacc_dim_limit): Declare.
	* doc/tm.texi.in (TARGET_GOACC_DIM_LIMIT): Add.
	* doc/tm.texi: Rebuilt.
	* omp-low.c (default_goacc_dim_limit): New.
	* config/nvptx/nvptx.c (PTX_VECTOR_LENGTH, PTX_WORKER_LENGTH): New.
	(nvptx_goacc_dim_limit) New.
	(TARGET_GOACC_DIM_LIMIT): Override.
	* tree-vrp.c: Include omp-low.h, target.h.
	(extract_range_basic): Add handling for IFN_GOACC_DIM_SIZE &
	IFN_GOACC_DIM_POS.

Index: gcc/targhooks.h
===================================================================
--- gcc/targhooks.h	(revision 229535)
+++ gcc/targhooks.h	(working copy)
@@ -110,6 +110,7 @@ extern void default_destroy_cost_data (v
 
 /* OpenACC hooks.  */
 extern bool default_goacc_validate_dims (tree, int [], int);
+extern int default_goacc_dim_limit (int);
 extern bool default_goacc_fork_join (gcall *, const int [], bool);
 
 /* These are here, and not in hooks.[ch], because not all users of
Index: gcc/config/nvptx/nvptx.c
===================================================================
--- gcc/config/nvptx/nvptx.c	(revision 229535)
+++ gcc/config/nvptx/nvptx.c	(working copy)
@@ -3248,6 +3248,10 @@ nvptx_file_end (void)
     }
 }
 \f
+/* Define dimension sizes for known hardware.  */
+#define PTX_VECTOR_LENGTH 32
+#define PTX_WORKER_LENGTH 32
+
 /* Validate compute dimensions of an OpenACC offload or routine, fill
    in non-unity defaults.  FN_LEVEL indicates the level at which a
    routine might spawn a loop.  It is negative for non-routines.  */
@@ -3264,6 +3268,25 @@ nvptx_goacc_validate_dims (tree ARG_UNUS
   return changed;
 }
 
+/* Return maximum dimension size, or zero for unbounded.  */
+
+static int
+nvptx_goacc_dim_limit (int axis)
+{
+  switch (axis)
+    {
+    case GOMP_DIM_WORKER:
+      return PTX_WORKER_LENGTH;
+
+    case GOMP_DIM_VECTOR:
+      return PTX_VECTOR_LENGTH;
+
+    default:
+      break;
+    }
+  return 0;
+}
+
 /* Determine whether fork & joins are needed.  */
 
 static bool
@@ -3376,6 +3399,9 @@ nvptx_goacc_fork_join (gcall *call, cons
 #undef TARGET_GOACC_VALIDATE_DIMS
 #define TARGET_GOACC_VALIDATE_DIMS nvptx_goacc_validate_dims
 
+#undef TARGET_GOACC_DIM_LIMIT
+#define TARGET_GOACC_DIM_LIMIT nvptx_goacc_dim_limit
+
 #undef TARGET_GOACC_FORK_JOIN
 #define TARGET_GOACC_FORK_JOIN nvptx_goacc_fork_join
 
Index: gcc/omp-low.c
===================================================================
--- gcc/omp-low.c	(revision 229535)
+++ gcc/omp-low.c	(working copy)
@@ -19380,6 +19380,18 @@ default_goacc_validate_dims (tree ARG_UN
   return changed;
 }
 
+/* Default dimension bound is unknown on accelerator and 1 on host. */
+
+int
+default_goacc_dim_limit (int ARG_UNUSED (axis))
+{
+#ifdef ACCEL_COMPILER
+  return 0;
+#else
+  return 1;
+#endif
+}
+
 namespace {
 
 const pass_data pass_data_oacc_device_lower =
Index: gcc/tree-vrp.c
===================================================================
--- gcc/tree-vrp.c	(revision 229535)
+++ gcc/tree-vrp.c	(working copy)
@@ -58,8 +58,8 @@ along with GCC; see the file COPYING3.
 #include "tree-ssa-threadupdate.h"
 #include "tree-ssa-scopedtables.h"
 #include "tree-ssa-threadedge.h"
-
-
+#include "omp-low.h"
+#include "target.h"
 
 /* Range of values that can be associated with an SSA_NAME after VRP
    has executed.  */
@@ -3976,7 +3976,9 @@ extract_range_basic (value_range *vr, gi
   else if (is_gimple_call (stmt) && gimple_call_internal_p (stmt))
     {
       enum tree_code subcode = ERROR_MARK;
-      switch (gimple_call_internal_fn (stmt))
+      unsigned ifn_code = gimple_call_internal_fn (stmt);
+
+      switch (ifn_code)
 	{
 	case IFN_UBSAN_CHECK_ADD:
 	  subcode = PLUS_EXPR;
@@ -3987,6 +3989,37 @@ extract_range_basic (value_range *vr, gi
 	case IFN_UBSAN_CHECK_MUL:
 	  subcode = MULT_EXPR;
 	  break;
+	case IFN_GOACC_DIM_SIZE:
+	case IFN_GOACC_DIM_POS:
+	  /* Optimizing these two internal functions helps the loop
+	     optimizer eliminate outer comparisons.  Size is [1,N]
+	     and pos is [0,N-1].  */
+	  {
+	    bool is_pos = ifn_code == IFN_GOACC_DIM_POS;
+	    tree attr = get_oacc_fn_attrib (current_function_decl);
+	    tree arg = gimple_call_arg (stmt, 0);
+	    int axis = TREE_INT_CST_LOW (arg);
+	    tree dims = TREE_VALUE (attr);
+
+	    for (int ix = axis; ix--;)
+	      dims = TREE_CHAIN (dims);
+	    int size = TREE_INT_CST_LOW (TREE_VALUE (dims));
+
+	    if (!size)
+	      /* If it's dynamic, the backend might know a hardware
+		 limitation.  */
+	      size = targetm.goacc.dim_limit (axis);
+
+	    if (size)
+	      {
+		tree type = TREE_TYPE (gimple_call_lhs (stmt));
+		
+		set_value_range (vr, VR_RANGE,
+				 build_int_cst (type, is_pos ? 0 : 1),
+				 build_int_cst (type, size - is_pos), NULL);
+	      }
+	  }
+	  return;
 	default:
 	  break;
 	}
Index: gcc/doc/tm.texi
===================================================================
--- gcc/doc/tm.texi	(revision 229535)
+++ gcc/doc/tm.texi	(working copy)
@@ -5777,6 +5777,11 @@ true, if changes have been made.  You mu
 provide dimensions larger than 1.
 @end deftypefn
 
+@deftypefn {Target Hook} int TARGET_GOACC_DIM_LIMIT (int @var{axis})
+This hook should return the maximum size of a particular dimension,
+or zero if unbounded.
+@end deftypefn
+
 @deftypefn {Target Hook} bool TARGET_GOACC_FORK_JOIN (gcall *@var{call}, const int *@var{dims}, bool @var{is_fork})
 This hook can be used to convert IFN_GOACC_FORK and IFN_GOACC_JOIN
 function calls to target-specific gimple, or indicate whether they
Index: gcc/doc/tm.texi.in
===================================================================
--- gcc/doc/tm.texi.in	(revision 229535)
+++ gcc/doc/tm.texi.in	(working copy)
@@ -4262,6 +4262,8 @@ address;  but often a machine-dependent
 
 @hook TARGET_GOACC_VALIDATE_DIMS
 
+@hook TARGET_GOACC_DIM_LIMIT
+
 @hook TARGET_GOACC_FORK_JOIN
 
 @node Anchored Addresses
Index: gcc/target.def
===================================================================
--- gcc/target.def	(revision 229535)
+++ gcc/target.def	(working copy)
@@ -1659,6 +1659,13 @@ bool, (tree decl, int *dims, int fn_leve
 default_goacc_validate_dims)
 
 DEFHOOK
+(dim_limit,
+"This hook should return the maximum size of a particular dimension,\n\
+or zero if unbounded.",
+int, (int axis),
+default_goacc_dim_limit)
+
+DEFHOOK
 (fork_join,
 "This hook can be used to convert IFN_GOACC_FORK and IFN_GOACC_JOIN\n\
 function calls to target-specific gimple, or indicate whether they\n\

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: OpenACC dimension range propagation optimization
  2015-11-03 18:12 OpenACC dimension range propagation optimization Nathan Sidwell
@ 2015-11-04 10:26 ` Richard Biener
  2015-11-04 13:56   ` Nathan Sidwell
  2015-11-04 17:29   ` Nathan Sidwell
  0 siblings, 2 replies; 6+ messages in thread
From: Richard Biener @ 2015-11-04 10:26 UTC (permalink / raw)
  To: Nathan Sidwell; +Cc: GCC Patches

On Tue, Nov 3, 2015 at 7:11 PM, Nathan Sidwell <nathan@acm.org> wrote:
> Richard,
> this patch implements VRP for the 2 openacc axis internal fns I've added.
> We know the position within  a dimension cannot exceed that dimensions
> extend. Further, if the extend is dynamic, the target backend may well know
> there's a hardware-mandated maximum value.
>
> Hence, added a new target hook to allow the backend to specify that upper
> bound, and added smarts to extract_range_basic to process the two internal
> functions.
>
> Incidentally, this was the bit I was working on at the cauldron, which
> caused me to work on the min/max range combining.
>
> ok for trunk?

+         /* Optimizing these two internal functions helps the loop
+            optimizer eliminate outer comparisons.  Size is [1,N]
+            and pos is [0,N-1].  */
+         {
+           bool is_pos = ifn_code == IFN_GOACC_DIM_POS;
+           tree attr = get_oacc_fn_attrib (current_function_decl);
+           tree arg = gimple_call_arg (stmt, 0);
+           int axis = TREE_INT_CST_LOW (arg);
+           tree dims = TREE_VALUE (attr);
+
+           for (int ix = axis; ix--;)
+             dims = TREE_CHAIN (dims);
+           int size = TREE_INT_CST_LOW (TREE_VALUE (dims));
+
+           if (!size)
+             /* If it's dynamic, the backend might know a hardware
+                limitation.  */
+             size = targetm.goacc.dim_limit (axis);

this all seems a little bit fragile and relying on implementation details?
Is the attribute always present?  Is the call argument always a constant
that fits in a HOST_WIDE_INT (or even int here)?  Are there always enough
'dims' in the tree list?  Is the 'dim' value always an INTEGER_CST that
fits a HOST_WIDE_INT (or even an int here)?

I hope all these constraints (esp. 'int' fitting) are verified by the parser.

If so I'd like to see helper functions to hide these implementation details
from generic code like this.

You miss to provide the known lower bound to VRP when size is 0
in the end.  Just unconditioonally do

  tree type = TREE_TYPE (gimple_call_lhs (stmt));
  set_value_range (vr, VR_RANGE,
                           build_int_cst (type, is_pos ? 0 : 1),
                           size
                           ? build_int_cst (type, size - is_pos)
                           : vrp_val_max (type), NULL);

so you get at least [1, +INF] and [0, +INF] here.

Thanks,
Richard.

> nathan

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: OpenACC dimension range propagation optimization
  2015-11-04 10:26 ` Richard Biener
@ 2015-11-04 13:56   ` Nathan Sidwell
  2015-11-04 14:35     ` Richard Biener
  2015-11-04 17:29   ` Nathan Sidwell
  1 sibling, 1 reply; 6+ messages in thread
From: Nathan Sidwell @ 2015-11-04 13:56 UTC (permalink / raw)
  To: Richard Biener; +Cc: GCC Patches

On 11/04/15 05:26, Richard Biener wrote:
> On Tue, Nov 3, 2015 at 7:11 PM, Nathan Sidwell <nathan@acm.org> wrote:
>> Richard,
>> this patch implements VRP for the 2 openacc axis internal fns I've added.
>> We know the position within  a dimension cannot exceed that dimensions
>> extend. Further, if the extend is dynamic, the target backend may well know
>> there's a hardware-mandated maximum value.
>>
>> Hence, added a new target hook to allow the backend to specify that upper
>> bound, and added smarts to extract_range_basic to process the two internal
>> functions.
>>
>> Incidentally, this was the bit I was working on at the cauldron, which
>> caused me to work on the min/max range combining.
>>
>> ok for trunk?
>
> +         /* Optimizing these two internal functions helps the loop
> +            optimizer eliminate outer comparisons.  Size is [1,N]
> +            and pos is [0,N-1].  */
> +         {
> +           bool is_pos = ifn_code == IFN_GOACC_DIM_POS;
> +           tree attr = get_oacc_fn_attrib (current_function_decl);
> +           tree arg = gimple_call_arg (stmt, 0);
> +           int axis = TREE_INT_CST_LOW (arg);
> +           tree dims = TREE_VALUE (attr);
> +
> +           for (int ix = axis; ix--;)
> +             dims = TREE_CHAIN (dims);
> +           int size = TREE_INT_CST_LOW (TREE_VALUE (dims));
> +
> +           if (!size)
> +             /* If it's dynamic, the backend might know a hardware
> +                limitation.  */
> +             size = targetm.goacc.dim_limit (axis);
>
> this all seems a little bit fragile and relying on implementation details?
> Is the attribute always present?

Yes.  The ifns are inserted by a pass (execute_oacc_device_lower) that only 
operates on such functions. (I considered adding  an assert, but figured the 
resulting segfault  would be loud enough)

> Is the call argument always a constant
> that fits in a HOST_WIDE_INT (or even int here)?

Yes.


Are there always enough
> 'dims' in the tree list?

Yes.  The oacc_device_lower pass has already validated and canonicalized the 
dimensions.


Is the 'dim' value always an INTEGER_CST that
> fits a HOST_WIDE_INT (or even an int here)?

Yes.  That's part of the validation.  see set_oacc_fn_attrib (omp-low.c)

> I hope all these constraints (esp. 'int' fitting) are verified by the parser.

It's an internal function not visible the user.  Generation is entirely within 
the compiler

> If so I'd like to see helper functions to hide these implementation details
> from generic code like this.

ok.

>
> You miss to provide the known lower bound to VRP when size is 0
> in the end.  Just unconditioonally do
>
>    tree type = TREE_TYPE (gimple_call_lhs (stmt));
>    set_value_range (vr, VR_RANGE,
>                             build_int_cst (type, is_pos ? 0 : 1),
>                             size
>                             ? build_int_cst (type, size - is_pos)
>                             : vrp_val_max (type), NULL);

I'm confused.  If size is zero, we never execute that path, and IIUC therefore 
never specify a range.  What you suggest looks like an additional improvement 
though.  Is that what you meant?

nathan

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: OpenACC dimension range propagation optimization
  2015-11-04 13:56   ` Nathan Sidwell
@ 2015-11-04 14:35     ` Richard Biener
  0 siblings, 0 replies; 6+ messages in thread
From: Richard Biener @ 2015-11-04 14:35 UTC (permalink / raw)
  To: Nathan Sidwell; +Cc: GCC Patches

On Wed, Nov 4, 2015 at 2:56 PM, Nathan Sidwell <nathan@acm.org> wrote:
> On 11/04/15 05:26, Richard Biener wrote:
>>
>> On Tue, Nov 3, 2015 at 7:11 PM, Nathan Sidwell <nathan@acm.org> wrote:
>>>
>>> Richard,
>>> this patch implements VRP for the 2 openacc axis internal fns I've added.
>>> We know the position within  a dimension cannot exceed that dimensions
>>> extend. Further, if the extend is dynamic, the target backend may well
>>> know
>>> there's a hardware-mandated maximum value.
>>>
>>> Hence, added a new target hook to allow the backend to specify that upper
>>> bound, and added smarts to extract_range_basic to process the two
>>> internal
>>> functions.
>>>
>>> Incidentally, this was the bit I was working on at the cauldron, which
>>> caused me to work on the min/max range combining.
>>>
>>> ok for trunk?
>>
>>
>> +         /* Optimizing these two internal functions helps the loop
>> +            optimizer eliminate outer comparisons.  Size is [1,N]
>> +            and pos is [0,N-1].  */
>> +         {
>> +           bool is_pos = ifn_code == IFN_GOACC_DIM_POS;
>> +           tree attr = get_oacc_fn_attrib (current_function_decl);
>> +           tree arg = gimple_call_arg (stmt, 0);
>> +           int axis = TREE_INT_CST_LOW (arg);
>> +           tree dims = TREE_VALUE (attr);
>> +
>> +           for (int ix = axis; ix--;)
>> +             dims = TREE_CHAIN (dims);
>> +           int size = TREE_INT_CST_LOW (TREE_VALUE (dims));
>> +
>> +           if (!size)
>> +             /* If it's dynamic, the backend might know a hardware
>> +                limitation.  */
>> +             size = targetm.goacc.dim_limit (axis);
>>
>> this all seems a little bit fragile and relying on implementation details?
>> Is the attribute always present?
>
>
> Yes.  The ifns are inserted by a pass (execute_oacc_device_lower) that only
> operates on such functions. (I considered adding  an assert, but figured the
> resulting segfault  would be loud enough)
>
>> Is the call argument always a constant
>> that fits in a HOST_WIDE_INT (or even int here)?
>
>
> Yes.
>
>
> Are there always enough
>>
>> 'dims' in the tree list?
>
>
> Yes.  The oacc_device_lower pass has already validated and canonicalized the
> dimensions.
>
>
> Is the 'dim' value always an INTEGER_CST that
>>
>> fits a HOST_WIDE_INT (or even an int here)?
>
>
> Yes.  That's part of the validation.  see set_oacc_fn_attrib (omp-low.c)
>
>> I hope all these constraints (esp. 'int' fitting) are verified by the
>> parser.
>
>
> It's an internal function not visible the user.  Generation is entirely
> within the compiler
>
>> If so I'd like to see helper functions to hide these implementation
>> details
>> from generic code like this.
>
>
> ok.
>
>>
>> You miss to provide the known lower bound to VRP when size is 0
>> in the end.  Just unconditioonally do
>>
>>    tree type = TREE_TYPE (gimple_call_lhs (stmt));
>>    set_value_range (vr, VR_RANGE,
>>                             build_int_cst (type, is_pos ? 0 : 1),
>>                             size
>>                             ? build_int_cst (type, size - is_pos)
>>                             : vrp_val_max (type), NULL);
>
>
> I'm confused.  If size is zero, we never execute that path, and IIUC
> therefore never specify a range.  What you suggest looks like an additional
> improvement though.  Is that what you meant?

Yes.

> nathan

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: OpenACC dimension range propagation optimization
  2015-11-04 10:26 ` Richard Biener
  2015-11-04 13:56   ` Nathan Sidwell
@ 2015-11-04 17:29   ` Nathan Sidwell
  2015-11-05 11:32     ` Richard Biener
  1 sibling, 1 reply; 6+ messages in thread
From: Nathan Sidwell @ 2015-11-04 17:29 UTC (permalink / raw)
  To: Richard Biener; +Cc: GCC Patches, Jakub Jelinek, Bernd Schmidt

[-- Attachment #1: Type: text/plain, Size: 987 bytes --]

On 11/04/15 05:26, Richard Biener wrote:
> On Tue, Nov 3, 2015 at 7:11 PM, Nathan Sidwell <nathan@acm.org> wrote:
>> Richard,

> this all seems a little bit fragile and relying on implementation details?
> Is the attribute always present?  Is the call argument always a constant
> that fits in a HOST_WIDE_INT (or even int here)?  Are there always enough
> 'dims' in the tree list?  Is the 'dim' value always an INTEGER_CST that
> fits a HOST_WIDE_INT (or even an int here)?


> If so I'd like to see helper functions to hide these implementation details
> from generic code like this.

Like this?

I've added two helper functions to omp-low.c, one to  get the internal fn arg 
number and the other to get a dimension value, given an axis number.  omp-low 
seemed the most appropriate point -- that's  where the dimension processing is, 
and the generation of these internal fn calls.

(Bernd, I'll fixup the dimension folding patch to use these calls before 
applying it.)

ok?

nathan

[-- Attachment #2: trunk-dim-vrp-1104.patch --]
[-- Type: text/x-patch, Size: 8049 bytes --]

2015-11-04  Nathan Sidwell  <nathan@codesourcery.com>

	* target.def (goacc.dim_limit): New hook.
	* targhooks.h (default_goacc_dim_limit): Declare.
	* doc/tm.texi.in (TARGET_GOACC_DIM_LIMIT): Add.
	* doc/tm.texi: Rebuilt.
	* omp-low.h (get_oacc_fn_dim_size, get_oacc_ifn_dim_arg): Declare.
	* omp-low.c (get_oacc_fn_dim_size, get_oacc_ifn_dim_arg): New.
	(default_goacc_dim_limit): New.
	* config/nvptx/nvptx.c (PTX_VECTOR_LENGTH, PTX_WORKER_LENGTH): New.
	(nvptx_goacc_dim_limit) New.
	(TARGET_GOACC_DIM_LIMIT): Override.
	* tree-vrp.c: Include omp-low.h, target.h.
	(extract_range_basic): Add handling for IFN_GOACC_DIM_SIZE &
	IFN_GOACC_DIM_POS.

Index: omp-low.c
===================================================================
--- omp-low.c	(revision 229757)
+++ omp-low.c	(working copy)
@@ -12095,6 +12095,41 @@ get_oacc_fn_attrib (tree fn)
   return lookup_attribute (OACC_FN_ATTRIB, DECL_ATTRIBUTES (fn));
 }
 
+/* Extract an oacc execution dimension from FN.  FN must be an
+   offloaded function or routine that has already had its execution
+   dimensions lowered to the target-specific values.  */
+
+int
+get_oacc_fn_dim_size (tree fn, int axis)
+{
+  tree attrs = get_oacc_fn_attrib (fn);
+  
+  gcc_assert (axis < GOMP_DIM_MAX);
+
+  tree dims = TREE_VALUE (attrs);
+  while (axis--)
+    dims = TREE_CHAIN (dims);
+
+  int size = TREE_INT_CST_LOW (TREE_VALUE (dims));
+
+  return size;
+}
+
+/* Extract the dimension axis from an IFN_GOACC_DIM_POS or
+   IFN_GOACC_DIM_SIZE call.  */
+
+int
+get_oacc_ifn_dim_arg (const gimple *stmt)
+{
+  gcc_checking_assert (gimple_call_internal_fn (stmt) == IFN_GOACC_DIM_SIZE
+		       || gimple_call_internal_fn (stmt) == IFN_GOACC_DIM_POS);
+  tree arg = gimple_call_arg (stmt, 0);
+  HOST_WIDE_INT axis = TREE_INT_CST_LOW (arg);
+
+  gcc_checking_assert (axis >= 0 && axis < GOMP_DIM_MAX);
+  return (int) axis;
+}
+
 /* Expand the GIMPLE_OMP_TARGET starting at REGION.  */
 
 static void
@@ -19383,6 +19418,18 @@ default_goacc_validate_dims (tree ARG_UN
   return changed;
 }
 
+/* Default dimension bound is unknown on accelerator and 1 on host. */
+
+int
+default_goacc_dim_limit (int ARG_UNUSED (axis))
+{
+#ifdef ACCEL_COMPILER
+  return 0;
+#else
+  return 1;
+#endif
+}
+
 namespace {
 
 const pass_data pass_data_oacc_device_lower =
Index: omp-low.h
===================================================================
--- omp-low.h	(revision 229757)
+++ omp-low.h	(working copy)
@@ -31,6 +31,8 @@ extern bool make_gimple_omp_edges (basic
 extern void omp_finish_file (void);
 extern tree omp_member_access_dummy_var (tree);
 extern tree get_oacc_fn_attrib (tree);
+extern int get_oacc_ifn_dim_arg (const gimple *);
+extern int get_oacc_fn_dim_size (tree, int);
 
 extern GTY(()) vec<tree, va_gc> *offload_funcs;
 extern GTY(()) vec<tree, va_gc> *offload_vars;
Index: targhooks.h
===================================================================
--- targhooks.h	(revision 229757)
+++ targhooks.h	(working copy)
@@ -110,6 +110,7 @@ extern void default_destroy_cost_data (v
 
 /* OpenACC hooks.  */
 extern bool default_goacc_validate_dims (tree, int [], int);
+extern int default_goacc_dim_limit (int);
 extern bool default_goacc_fork_join (gcall *, const int [], bool);
 
 /* These are here, and not in hooks.[ch], because not all users of
Index: doc/tm.texi
===================================================================
--- doc/tm.texi	(revision 229757)
+++ doc/tm.texi	(working copy)
@@ -5777,6 +5777,11 @@ true, if changes have been made.  You mu
 provide dimensions larger than 1.
 @end deftypefn
 
+@deftypefn {Target Hook} int TARGET_GOACC_DIM_LIMIT (int @var{axis})
+This hook should return the maximum size of a particular dimension,
+or zero if unbounded.
+@end deftypefn
+
 @deftypefn {Target Hook} bool TARGET_GOACC_FORK_JOIN (gcall *@var{call}, const int *@var{dims}, bool @var{is_fork})
 This hook can be used to convert IFN_GOACC_FORK and IFN_GOACC_JOIN
 function calls to target-specific gimple, or indicate whether they
Index: doc/tm.texi.in
===================================================================
--- doc/tm.texi.in	(revision 229757)
+++ doc/tm.texi.in	(working copy)
@@ -4262,6 +4262,8 @@ address;  but often a machine-dependent
 
 @hook TARGET_GOACC_VALIDATE_DIMS
 
+@hook TARGET_GOACC_DIM_LIMIT
+
 @hook TARGET_GOACC_FORK_JOIN
 
 @node Anchored Addresses
Index: tree-vrp.c
===================================================================
--- tree-vrp.c	(revision 229757)
+++ tree-vrp.c	(working copy)
@@ -55,8 +55,8 @@ along with GCC; see the file COPYING3.
 #include "tree-ssa-threadupdate.h"
 #include "tree-ssa-scopedtables.h"
 #include "tree-ssa-threadedge.h"
-
-
+#include "omp-low.h"
+#include "target.h"
 
 /* Range of values that can be associated with an SSA_NAME after VRP
    has executed.  */
@@ -3973,7 +3973,9 @@ extract_range_basic (value_range *vr, gi
   else if (is_gimple_call (stmt) && gimple_call_internal_p (stmt))
     {
       enum tree_code subcode = ERROR_MARK;
-      switch (gimple_call_internal_fn (stmt))
+      unsigned ifn_code = gimple_call_internal_fn (stmt);
+
+      switch (ifn_code)
 	{
 	case IFN_UBSAN_CHECK_ADD:
 	  subcode = PLUS_EXPR;
@@ -3984,6 +3986,28 @@ extract_range_basic (value_range *vr, gi
 	case IFN_UBSAN_CHECK_MUL:
 	  subcode = MULT_EXPR;
 	  break;
+	case IFN_GOACC_DIM_SIZE:
+	case IFN_GOACC_DIM_POS:
+	  /* Optimizing these two internal functions helps the loop
+	     optimizer eliminate outer comparisons.  Size is [1,N]
+	     and pos is [0,N-1].  */
+	  {
+	    bool is_pos = ifn_code == IFN_GOACC_DIM_POS;
+	    int axis = get_oacc_ifn_dim_arg (stmt);
+	    int size = get_oacc_fn_dim_size (current_function_decl, axis);
+
+	    if (!size)
+	      /* If it's dynamic, the backend might know a hardware
+		 limitation.  */
+	      size = targetm.goacc.dim_limit (axis);
+
+	    tree type = TREE_TYPE (gimple_call_lhs (stmt));
+	    set_value_range (vr, VR_RANGE,
+			     build_int_cst (type, is_pos ? 0 : 1),
+			     size ? build_int_cst (type, size - is_pos)
+			          : vrp_val_max (type), NULL);
+	  }
+	  return;
 	default:
 	  break;
 	}
Index: config/nvptx/nvptx.c
===================================================================
--- config/nvptx/nvptx.c	(revision 229757)
+++ config/nvptx/nvptx.c	(working copy)
@@ -3248,6 +3248,10 @@ nvptx_file_end (void)
     }
 }
 \f
+/* Define dimension sizes for known hardware.  */
+#define PTX_VECTOR_LENGTH 32
+#define PTX_WORKER_LENGTH 32
+
 /* Validate compute dimensions of an OpenACC offload or routine, fill
    in non-unity defaults.  FN_LEVEL indicates the level at which a
    routine might spawn a loop.  It is negative for non-routines.  */
@@ -3264,6 +3268,25 @@ nvptx_goacc_validate_dims (tree ARG_UNUS
   return changed;
 }
 
+/* Return maximum dimension size, or zero for unbounded.  */
+
+static int
+nvptx_dim_limit (int axis)
+{
+  switch (axis)
+    {
+    case GOMP_DIM_WORKER:
+      return PTX_WORKER_LENGTH;
+
+    case GOMP_DIM_VECTOR:
+      return PTX_VECTOR_LENGTH;
+
+    default:
+      break;
+    }
+  return 0;
+}
+
 /* Determine whether fork & joins are needed.  */
 
 static bool
@@ -3376,6 +3399,9 @@ nvptx_goacc_fork_join (gcall *call, cons
 #undef TARGET_GOACC_VALIDATE_DIMS
 #define TARGET_GOACC_VALIDATE_DIMS nvptx_goacc_validate_dims
 
+#undef TARGET_GOACC_DIM_LIMIT
+#define TARGET_GOACC_DIM_LIMIT nvptx_dim_limit
+
 #undef TARGET_GOACC_FORK_JOIN
 #define TARGET_GOACC_FORK_JOIN nvptx_goacc_fork_join
 
Index: target.def
===================================================================
--- target.def	(revision 229757)
+++ target.def	(working copy)
@@ -1659,6 +1659,13 @@ bool, (tree decl, int *dims, int fn_leve
 default_goacc_validate_dims)
 
 DEFHOOK
+(dim_limit,
+"This hook should return the maximum size of a particular dimension,\n\
+or zero if unbounded.",
+int, (int axis),
+default_goacc_dim_limit)
+
+DEFHOOK
 (fork_join,
 "This hook can be used to convert IFN_GOACC_FORK and IFN_GOACC_JOIN\n\
 function calls to target-specific gimple, or indicate whether they\n\

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: OpenACC dimension range propagation optimization
  2015-11-04 17:29   ` Nathan Sidwell
@ 2015-11-05 11:32     ` Richard Biener
  0 siblings, 0 replies; 6+ messages in thread
From: Richard Biener @ 2015-11-05 11:32 UTC (permalink / raw)
  To: Nathan Sidwell; +Cc: GCC Patches, Jakub Jelinek, Bernd Schmidt

On Wed, Nov 4, 2015 at 6:29 PM, Nathan Sidwell <nathan@acm.org> wrote:
> On 11/04/15 05:26, Richard Biener wrote:
>>
>> On Tue, Nov 3, 2015 at 7:11 PM, Nathan Sidwell <nathan@acm.org> wrote:
>>>
>>> Richard,
>
>
>> this all seems a little bit fragile and relying on implementation details?
>> Is the attribute always present?  Is the call argument always a constant
>> that fits in a HOST_WIDE_INT (or even int here)?  Are there always enough
>> 'dims' in the tree list?  Is the 'dim' value always an INTEGER_CST that
>> fits a HOST_WIDE_INT (or even an int here)?
>
>
>
>> If so I'd like to see helper functions to hide these implementation
>> details
>> from generic code like this.
>
>
> Like this?
>
> I've added two helper functions to omp-low.c, one to  get the internal fn
> arg number and the other to get a dimension value, given an axis number.
> omp-low seemed the most appropriate point -- that's  where the dimension
> processing is, and the generation of these internal fn calls.
>
> (Bernd, I'll fixup the dimension folding patch to use these calls before
> applying it.)
>
> ok?

Ok

Thanks,
Richard.

> nathan

^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2015-11-05 11:32 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2015-11-03 18:12 OpenACC dimension range propagation optimization Nathan Sidwell
2015-11-04 10:26 ` Richard Biener
2015-11-04 13:56   ` Nathan Sidwell
2015-11-04 14:35     ` Richard Biener
2015-11-04 17:29   ` Nathan Sidwell
2015-11-05 11:32     ` Richard Biener

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).