public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH, vec-tails 01/10] New compiler options
@ 2016-05-19 19:37 Ilya Enkovich
  2016-05-19 20:23 ` Joseph Myers
  2016-05-20  9:26 ` Richard Biener
  0 siblings, 2 replies; 13+ messages in thread
From: Ilya Enkovich @ 2016-05-19 19:37 UTC (permalink / raw)
  To: gcc-patches

Hi,

This patch introduces new options used for loop epilogues vectorization.

Thanks,
Ilya
--
gcc/

2016-05-19  Ilya Enkovich  <ilya.enkovich@intel.com>

	* common.opt (flag_tree_vectorize_epilogues): New.
	(ftree-vectorize-short-loops): New.
	(ftree-vectorize-epilogues=): New.
	(fno-tree-vectorize-epilogues): New.
	(fvect-epilogue-cost-model=): New.
	* flag-types.h (enum vect_epilogue_mode): New.
	* opts.c (parse_vectorizer_options): New.
	(common_handle_option): Support -ftree-vectorize-epilogues=
	and -fno-tree-vectorize-epilogues options.


diff --git a/gcc/common.opt b/gcc/common.opt
index 682cb41..6b83b79 100644
--- a/gcc/common.opt
+++ b/gcc/common.opt
@@ -243,6 +243,10 @@ bool dump_base_name_prefixed = false
 Variable
 bool flag_disable_hsa = false
 
+; Flag holding modes for loop epilogue vectorization
+Variable
+unsigned int flag_tree_vectorize_epilogues
+
 ###
 Driver
 
@@ -2557,6 +2561,19 @@ ftree-vectorize
 Common Report Var(flag_tree_vectorize) Optimization
 Enable vectorization on trees.
 
+ftree-vectorize-short-loops
+Common Report Var(flag_tree_vectorize_short_loops) Optimization
+Enable vectorization of loops with low trip count using masking.
+
+ftree-vectorize-epilogues=
+Common Report Joined Optimization
+Comma separated list of loop epilogue vectorization modes.
+Available modes: combine, mask, nomask.
+
+fno-tree-vectorize-epilogues
+Common RejectNegative Optimization
+Disable epilogues vectorization.
+
 ftree-vectorizer-verbose=
 Common Joined RejectNegative Ignore
 Does nothing.  Preserved for backward compatibility.
@@ -2577,6 +2594,10 @@ fsimd-cost-model=
 Common Joined RejectNegative Enum(vect_cost_model) Var(flag_simd_cost_model) Init(VECT_COST_MODEL_UNLIMITED) Optimization
 Specifies the vectorization cost model for code marked with a simd directive.
 
+fvect-epilogue-cost-model=
+Common Joined RejectNegative Enum(vect_cost_model) Var(flag_vect_epilogue_cost_model) Init(VECT_COST_MODEL_DEFAULT) Optimization
+Specifies the cost model for epilogue vectorization.
+
 Enum
 Name(vect_cost_model) Type(enum vect_cost_model) UnknownError(unknown vectorizer cost model %qs)
 
diff --git a/gcc/flag-types.h b/gcc/flag-types.h
index dd57e16..24081b1 100644
--- a/gcc/flag-types.h
+++ b/gcc/flag-types.h
@@ -200,6 +200,15 @@ enum vect_cost_model {
   VECT_COST_MODEL_DEFAULT = 3
 };
 
+/* Epilogue vectorization modes.  */
+enum vect_epilogue_mode {
+  VECT_EPILOGUE_COMBINE = 1 << 0,
+  VECT_EPILOGUE_MASK = 1 << 1,
+  VECT_EPILOGUE_NOMASK = 1 << 2,
+  VECT_EPILOGUE_ALL = VECT_EPILOGUE_COMBINE | VECT_EPILOGUE_MASK
+		      | VECT_EPILOGUE_NOMASK
+};
+
 /* Different instrumentation modes.  */
 enum sanitize_code {
   /* AddressSanitizer.  */
diff --git a/gcc/opts.c b/gcc/opts.c
index 0f9431a..a0c0987 100644
--- a/gcc/opts.c
+++ b/gcc/opts.c
@@ -1531,6 +1531,63 @@ parse_sanitizer_options (const char *p, location_t loc, int scode,
   return flags;
 }
 
+/* Parse comma separated vectorizer suboptions from P for option SCODE,
+   adjust previous FLAGS and return new ones.  If COMPLAIN is false,
+   don't issue diagnostics.  */
+
+unsigned int
+parse_vectorizer_options (const char *p, location_t loc, int scode,
+			  unsigned int flags, int value, bool complain)
+{
+  if (scode != OPT_ftree_vectorize_epilogues_)
+    return flags;
+
+  if (!p)
+    return value;
+
+  while (*p != 0)
+    {
+      size_t len;
+      const char *comma = strchr (p, ',');
+      unsigned int flag = 0;
+
+      if (comma == NULL)
+	len = strlen (p);
+      else
+	len = comma - p;
+      if (len == 0)
+	{
+	  p = comma + 1;
+	  continue;
+	}
+
+      /* Check to see if the string matches an option class name.  */
+      if (len == strlen ("combine")
+	  && memcmp (p, "combine", len) == 0)
+	flag = VECT_EPILOGUE_COMBINE;
+      else if (len == strlen ("mask")
+	  && memcmp (p, "mask", len) == 0)
+	flag = VECT_EPILOGUE_MASK;
+      else if (len == strlen ("nomask")
+	  && memcmp (p, "nomask", len) == 0)
+	flag = VECT_EPILOGUE_NOMASK;
+      else if (complain)
+	error_at (loc, "unrecognized argument to -ftree-vectorize-epilogues= "
+		  "option: %q.*s", (int) len, p);
+
+      if (value)
+	flags |= flag;
+      else
+	flags &= ~flag;
+
+      if (comma == NULL)
+	break;
+      p = comma + 1;
+    }
+
+  return flags;
+}
+
 /* Handle target- and language-independent options.  Return zero to
    generate an "unknown option" message.  Only options that need
    extra handling need to be listed here; if you simply want
@@ -2018,6 +2075,18 @@ common_handle_option (struct gcc_options *opts,
       if (!opts_set->x_flag_tree_slp_vectorize)
         opts->x_flag_tree_slp_vectorize = value;
       break;
+
+    case OPT_ftree_vectorize_epilogues_:
+      opts->x_flag_tree_vectorize_epilogues
+	= parse_vectorizer_options (arg, loc, code,
+				    opts->x_flag_tree_vectorize_epilogues,
+				    value, true);
+      break;
+
+    case OPT_fno_tree_vectorize_epilogues:
+      opts->x_flag_tree_vectorize_epilogues = 0;
+      break;
+
     case OPT_fshow_column:
       dc->show_column = value;
       break;

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH, vec-tails 01/10] New compiler options
  2016-05-19 19:37 [PATCH, vec-tails 01/10] New compiler options Ilya Enkovich
@ 2016-05-19 20:23 ` Joseph Myers
  2016-05-20  9:26 ` Richard Biener
  1 sibling, 0 replies; 13+ messages in thread
From: Joseph Myers @ 2016-05-19 20:23 UTC (permalink / raw)
  To: Ilya Enkovich; +Cc: gcc-patches

On Thu, 19 May 2016, Ilya Enkovich wrote:

> Hi,
> 
> This patch introduces new options used for loop epilogues vectorization.

Any patch adding a new option should update invoke.texi (both the summary 
list of options, and adding documentation for the new option).

-- 
Joseph S. Myers
joseph@codesourcery.com

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH, vec-tails 01/10] New compiler options
  2016-05-19 19:37 [PATCH, vec-tails 01/10] New compiler options Ilya Enkovich
  2016-05-19 20:23 ` Joseph Myers
@ 2016-05-20  9:26 ` Richard Biener
  2016-05-20  9:50   ` Ilya Enkovich
  1 sibling, 1 reply; 13+ messages in thread
From: Richard Biener @ 2016-05-20  9:26 UTC (permalink / raw)
  To: Ilya Enkovich; +Cc: GCC Patches

On Thu, May 19, 2016 at 9:36 PM, Ilya Enkovich <enkovich.gnu@gmail.com> wrote:
> Hi,
>
> This patch introduces new options used for loop epilogues vectorization.

Why's that?  This is a bit too much for the casual user and if it is
really necessary
to control this via options then it is not fine-grained enough.

Why doesn't the vectorizer/backend have enough info to decide this itself?

Richard.

> Thanks,
> Ilya
> --
> gcc/
>
> 2016-05-19  Ilya Enkovich  <ilya.enkovich@intel.com>
>
>         * common.opt (flag_tree_vectorize_epilogues): New.
>         (ftree-vectorize-short-loops): New.
>         (ftree-vectorize-epilogues=): New.
>         (fno-tree-vectorize-epilogues): New.
>         (fvect-epilogue-cost-model=): New.
>         * flag-types.h (enum vect_epilogue_mode): New.
>         * opts.c (parse_vectorizer_options): New.
>         (common_handle_option): Support -ftree-vectorize-epilogues=
>         and -fno-tree-vectorize-epilogues options.
>
>
> diff --git a/gcc/common.opt b/gcc/common.opt
> index 682cb41..6b83b79 100644
> --- a/gcc/common.opt
> +++ b/gcc/common.opt
> @@ -243,6 +243,10 @@ bool dump_base_name_prefixed = false
>  Variable
>  bool flag_disable_hsa = false
>
> +; Flag holding modes for loop epilogue vectorization
> +Variable
> +unsigned int flag_tree_vectorize_epilogues
> +
>  ###
>  Driver
>
> @@ -2557,6 +2561,19 @@ ftree-vectorize
>  Common Report Var(flag_tree_vectorize) Optimization
>  Enable vectorization on trees.
>
> +ftree-vectorize-short-loops
> +Common Report Var(flag_tree_vectorize_short_loops) Optimization
> +Enable vectorization of loops with low trip count using masking.
> +
> +ftree-vectorize-epilogues=
> +Common Report Joined Optimization
> +Comma separated list of loop epilogue vectorization modes.
> +Available modes: combine, mask, nomask.
> +
> +fno-tree-vectorize-epilogues
> +Common RejectNegative Optimization
> +Disable epilogues vectorization.
> +
>  ftree-vectorizer-verbose=
>  Common Joined RejectNegative Ignore
>  Does nothing.  Preserved for backward compatibility.
> @@ -2577,6 +2594,10 @@ fsimd-cost-model=
>  Common Joined RejectNegative Enum(vect_cost_model) Var(flag_simd_cost_model) Init(VECT_COST_MODEL_UNLIMITED) Optimization
>  Specifies the vectorization cost model for code marked with a simd directive.
>
> +fvect-epilogue-cost-model=
> +Common Joined RejectNegative Enum(vect_cost_model) Var(flag_vect_epilogue_cost_model) Init(VECT_COST_MODEL_DEFAULT) Optimization
> +Specifies the cost model for epilogue vectorization.
> +
>  Enum
>  Name(vect_cost_model) Type(enum vect_cost_model) UnknownError(unknown vectorizer cost model %qs)
>
> diff --git a/gcc/flag-types.h b/gcc/flag-types.h
> index dd57e16..24081b1 100644
> --- a/gcc/flag-types.h
> +++ b/gcc/flag-types.h
> @@ -200,6 +200,15 @@ enum vect_cost_model {
>    VECT_COST_MODEL_DEFAULT = 3
>  };
>
> +/* Epilogue vectorization modes.  */
> +enum vect_epilogue_mode {
> +  VECT_EPILOGUE_COMBINE = 1 << 0,
> +  VECT_EPILOGUE_MASK = 1 << 1,
> +  VECT_EPILOGUE_NOMASK = 1 << 2,
> +  VECT_EPILOGUE_ALL = VECT_EPILOGUE_COMBINE | VECT_EPILOGUE_MASK
> +                     | VECT_EPILOGUE_NOMASK
> +};
> +
>  /* Different instrumentation modes.  */
>  enum sanitize_code {
>    /* AddressSanitizer.  */
> diff --git a/gcc/opts.c b/gcc/opts.c
> index 0f9431a..a0c0987 100644
> --- a/gcc/opts.c
> +++ b/gcc/opts.c
> @@ -1531,6 +1531,63 @@ parse_sanitizer_options (const char *p, location_t loc, int scode,
>    return flags;
>  }
>
> +/* Parse comma separated vectorizer suboptions from P for option SCODE,
> +   adjust previous FLAGS and return new ones.  If COMPLAIN is false,
> +   don't issue diagnostics.  */
> +
> +unsigned int
> +parse_vectorizer_options (const char *p, location_t loc, int scode,
> +                         unsigned int flags, int value, bool complain)
> +{
> +  if (scode != OPT_ftree_vectorize_epilogues_)
> +    return flags;
> +
> +  if (!p)
> +    return value;
> +
> +  while (*p != 0)
> +    {
> +      size_t len;
> +      const char *comma = strchr (p, ',');
> +      unsigned int flag = 0;
> +
> +      if (comma == NULL)
> +       len = strlen (p);
> +      else
> +       len = comma - p;
> +      if (len == 0)
> +       {
> +         p = comma + 1;
> +         continue;
> +       }
> +
> +      /* Check to see if the string matches an option class name.  */
> +      if (len == strlen ("combine")
> +         && memcmp (p, "combine", len) == 0)
> +       flag = VECT_EPILOGUE_COMBINE;
> +      else if (len == strlen ("mask")
> +         && memcmp (p, "mask", len) == 0)
> +       flag = VECT_EPILOGUE_MASK;
> +      else if (len == strlen ("nomask")
> +         && memcmp (p, "nomask", len) == 0)
> +       flag = VECT_EPILOGUE_NOMASK;
> +      else if (complain)
> +       error_at (loc, "unrecognized argument to -ftree-vectorize-epilogues= "
> +                 "option: %q.*s", (int) len, p);
> +
> +      if (value)
> +       flags |= flag;
> +      else
> +       flags &= ~flag;
> +
> +      if (comma == NULL)
> +       break;
> +      p = comma + 1;
> +    }
> +
> +  return flags;
> +}
> +
>  /* Handle target- and language-independent options.  Return zero to
>     generate an "unknown option" message.  Only options that need
>     extra handling need to be listed here; if you simply want
> @@ -2018,6 +2075,18 @@ common_handle_option (struct gcc_options *opts,
>        if (!opts_set->x_flag_tree_slp_vectorize)
>          opts->x_flag_tree_slp_vectorize = value;
>        break;
> +
> +    case OPT_ftree_vectorize_epilogues_:
> +      opts->x_flag_tree_vectorize_epilogues
> +       = parse_vectorizer_options (arg, loc, code,
> +                                   opts->x_flag_tree_vectorize_epilogues,
> +                                   value, true);
> +      break;
> +
> +    case OPT_fno_tree_vectorize_epilogues:
> +      opts->x_flag_tree_vectorize_epilogues = 0;
> +      break;
> +
>      case OPT_fshow_column:
>        dc->show_column = value;
>        break;

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH, vec-tails 01/10] New compiler options
  2016-05-20  9:26 ` Richard Biener
@ 2016-05-20  9:50   ` Ilya Enkovich
  2016-05-20 11:17     ` Richard Biener
  0 siblings, 1 reply; 13+ messages in thread
From: Ilya Enkovich @ 2016-05-20  9:50 UTC (permalink / raw)
  To: Richard Biener; +Cc: GCC Patches

2016-05-20 12:26 GMT+03:00 Richard Biener <richard.guenther@gmail.com>:
> On Thu, May 19, 2016 at 9:36 PM, Ilya Enkovich <enkovich.gnu@gmail.com> wrote:
>> Hi,
>>
>> This patch introduces new options used for loop epilogues vectorization.
>
> Why's that?  This is a bit too much for the casual user and if it is
> really necessary
> to control this via options then it is not fine-grained enough.
>
> Why doesn't the vectorizer/backend have enough info to decide this itself?

I don't expect casual user to decide which modes to choose.  These controls are
added for debugging and performance measurement purposes.  I see now I miss
-ftree-vectorize-epilogues aliased to -ftree-vectorize-epilogues=all.  Surely
I expect epilogues and short loops vectorization be enabled by default on -O3
or by -ftree-vectorize-loops.

Thanks,
Ilya

>
> Richard.
>
>> Thanks,
>> Ilya
>> --
>> gcc/
>>
>> 2016-05-19  Ilya Enkovich  <ilya.enkovich@intel.com>
>>
>>         * common.opt (flag_tree_vectorize_epilogues): New.
>>         (ftree-vectorize-short-loops): New.
>>         (ftree-vectorize-epilogues=): New.
>>         (fno-tree-vectorize-epilogues): New.
>>         (fvect-epilogue-cost-model=): New.
>>         * flag-types.h (enum vect_epilogue_mode): New.
>>         * opts.c (parse_vectorizer_options): New.
>>         (common_handle_option): Support -ftree-vectorize-epilogues=
>>         and -fno-tree-vectorize-epilogues options.
>>
>>
>> diff --git a/gcc/common.opt b/gcc/common.opt
>> index 682cb41..6b83b79 100644
>> --- a/gcc/common.opt
>> +++ b/gcc/common.opt
>> @@ -243,6 +243,10 @@ bool dump_base_name_prefixed = false
>>  Variable
>>  bool flag_disable_hsa = false
>>
>> +; Flag holding modes for loop epilogue vectorization
>> +Variable
>> +unsigned int flag_tree_vectorize_epilogues
>> +
>>  ###
>>  Driver
>>
>> @@ -2557,6 +2561,19 @@ ftree-vectorize
>>  Common Report Var(flag_tree_vectorize) Optimization
>>  Enable vectorization on trees.
>>
>> +ftree-vectorize-short-loops
>> +Common Report Var(flag_tree_vectorize_short_loops) Optimization
>> +Enable vectorization of loops with low trip count using masking.
>> +
>> +ftree-vectorize-epilogues=
>> +Common Report Joined Optimization
>> +Comma separated list of loop epilogue vectorization modes.
>> +Available modes: combine, mask, nomask.
>> +
>> +fno-tree-vectorize-epilogues
>> +Common RejectNegative Optimization
>> +Disable epilogues vectorization.
>> +
>>  ftree-vectorizer-verbose=
>>  Common Joined RejectNegative Ignore
>>  Does nothing.  Preserved for backward compatibility.
>> @@ -2577,6 +2594,10 @@ fsimd-cost-model=
>>  Common Joined RejectNegative Enum(vect_cost_model) Var(flag_simd_cost_model) Init(VECT_COST_MODEL_UNLIMITED) Optimization
>>  Specifies the vectorization cost model for code marked with a simd directive.
>>
>> +fvect-epilogue-cost-model=
>> +Common Joined RejectNegative Enum(vect_cost_model) Var(flag_vect_epilogue_cost_model) Init(VECT_COST_MODEL_DEFAULT) Optimization
>> +Specifies the cost model for epilogue vectorization.
>> +
>>  Enum
>>  Name(vect_cost_model) Type(enum vect_cost_model) UnknownError(unknown vectorizer cost model %qs)
>>
>> diff --git a/gcc/flag-types.h b/gcc/flag-types.h
>> index dd57e16..24081b1 100644
>> --- a/gcc/flag-types.h
>> +++ b/gcc/flag-types.h
>> @@ -200,6 +200,15 @@ enum vect_cost_model {
>>    VECT_COST_MODEL_DEFAULT = 3
>>  };
>>
>> +/* Epilogue vectorization modes.  */
>> +enum vect_epilogue_mode {
>> +  VECT_EPILOGUE_COMBINE = 1 << 0,
>> +  VECT_EPILOGUE_MASK = 1 << 1,
>> +  VECT_EPILOGUE_NOMASK = 1 << 2,
>> +  VECT_EPILOGUE_ALL = VECT_EPILOGUE_COMBINE | VECT_EPILOGUE_MASK
>> +                     | VECT_EPILOGUE_NOMASK
>> +};
>> +
>>  /* Different instrumentation modes.  */
>>  enum sanitize_code {
>>    /* AddressSanitizer.  */
>> diff --git a/gcc/opts.c b/gcc/opts.c
>> index 0f9431a..a0c0987 100644
>> --- a/gcc/opts.c
>> +++ b/gcc/opts.c
>> @@ -1531,6 +1531,63 @@ parse_sanitizer_options (const char *p, location_t loc, int scode,
>>    return flags;
>>  }
>>
>> +/* Parse comma separated vectorizer suboptions from P for option SCODE,
>> +   adjust previous FLAGS and return new ones.  If COMPLAIN is false,
>> +   don't issue diagnostics.  */
>> +
>> +unsigned int
>> +parse_vectorizer_options (const char *p, location_t loc, int scode,
>> +                         unsigned int flags, int value, bool complain)
>> +{
>> +  if (scode != OPT_ftree_vectorize_epilogues_)
>> +    return flags;
>> +
>> +  if (!p)
>> +    return value;
>> +
>> +  while (*p != 0)
>> +    {
>> +      size_t len;
>> +      const char *comma = strchr (p, ',');
>> +      unsigned int flag = 0;
>> +
>> +      if (comma == NULL)
>> +       len = strlen (p);
>> +      else
>> +       len = comma - p;
>> +      if (len == 0)
>> +       {
>> +         p = comma + 1;
>> +         continue;
>> +       }
>> +
>> +      /* Check to see if the string matches an option class name.  */
>> +      if (len == strlen ("combine")
>> +         && memcmp (p, "combine", len) == 0)
>> +       flag = VECT_EPILOGUE_COMBINE;
>> +      else if (len == strlen ("mask")
>> +         && memcmp (p, "mask", len) == 0)
>> +       flag = VECT_EPILOGUE_MASK;
>> +      else if (len == strlen ("nomask")
>> +         && memcmp (p, "nomask", len) == 0)
>> +       flag = VECT_EPILOGUE_NOMASK;
>> +      else if (complain)
>> +       error_at (loc, "unrecognized argument to -ftree-vectorize-epilogues= "
>> +                 "option: %q.*s", (int) len, p);
>> +
>> +      if (value)
>> +       flags |= flag;
>> +      else
>> +       flags &= ~flag;
>> +
>> +      if (comma == NULL)
>> +       break;
>> +      p = comma + 1;
>> +    }
>> +
>> +  return flags;
>> +}
>> +
>>  /* Handle target- and language-independent options.  Return zero to
>>     generate an "unknown option" message.  Only options that need
>>     extra handling need to be listed here; if you simply want
>> @@ -2018,6 +2075,18 @@ common_handle_option (struct gcc_options *opts,
>>        if (!opts_set->x_flag_tree_slp_vectorize)
>>          opts->x_flag_tree_slp_vectorize = value;
>>        break;
>> +
>> +    case OPT_ftree_vectorize_epilogues_:
>> +      opts->x_flag_tree_vectorize_epilogues
>> +       = parse_vectorizer_options (arg, loc, code,
>> +                                   opts->x_flag_tree_vectorize_epilogues,
>> +                                   value, true);
>> +      break;
>> +
>> +    case OPT_fno_tree_vectorize_epilogues:
>> +      opts->x_flag_tree_vectorize_epilogues = 0;
>> +      break;
>> +
>>      case OPT_fshow_column:
>>        dc->show_column = value;
>>        break;

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH, vec-tails 01/10] New compiler options
  2016-05-20  9:50   ` Ilya Enkovich
@ 2016-05-20 11:17     ` Richard Biener
  2016-05-20 11:40       ` Ilya Enkovich
  0 siblings, 1 reply; 13+ messages in thread
From: Richard Biener @ 2016-05-20 11:17 UTC (permalink / raw)
  To: Ilya Enkovich; +Cc: GCC Patches

On Fri, May 20, 2016 at 11:50 AM, Ilya Enkovich <enkovich.gnu@gmail.com> wrote:
> 2016-05-20 12:26 GMT+03:00 Richard Biener <richard.guenther@gmail.com>:
>> On Thu, May 19, 2016 at 9:36 PM, Ilya Enkovich <enkovich.gnu@gmail.com> wrote:
>>> Hi,
>>>
>>> This patch introduces new options used for loop epilogues vectorization.
>>
>> Why's that?  This is a bit too much for the casual user and if it is
>> really necessary
>> to control this via options then it is not fine-grained enough.
>>
>> Why doesn't the vectorizer/backend have enough info to decide this itself?
>
> I don't expect casual user to decide which modes to choose.  These controls are
> added for debugging and performance measurement purposes.  I see now I miss
> -ftree-vectorize-epilogues aliased to -ftree-vectorize-epilogues=all.  Surely
> I expect epilogues and short loops vectorization be enabled by default on -O3
> or by -ftree-vectorize-loops.

Can you make all these --params then?  I think to be useful to users we'd want
them to be loop pragmas rather than options.

Richard.

> Thanks,
> Ilya
>
>>
>> Richard.
>>
>>> Thanks,
>>> Ilya
>>> --
>>> gcc/
>>>
>>> 2016-05-19  Ilya Enkovich  <ilya.enkovich@intel.com>
>>>
>>>         * common.opt (flag_tree_vectorize_epilogues): New.
>>>         (ftree-vectorize-short-loops): New.
>>>         (ftree-vectorize-epilogues=): New.
>>>         (fno-tree-vectorize-epilogues): New.
>>>         (fvect-epilogue-cost-model=): New.
>>>         * flag-types.h (enum vect_epilogue_mode): New.
>>>         * opts.c (parse_vectorizer_options): New.
>>>         (common_handle_option): Support -ftree-vectorize-epilogues=
>>>         and -fno-tree-vectorize-epilogues options.
>>>
>>>
>>> diff --git a/gcc/common.opt b/gcc/common.opt
>>> index 682cb41..6b83b79 100644
>>> --- a/gcc/common.opt
>>> +++ b/gcc/common.opt
>>> @@ -243,6 +243,10 @@ bool dump_base_name_prefixed = false
>>>  Variable
>>>  bool flag_disable_hsa = false
>>>
>>> +; Flag holding modes for loop epilogue vectorization
>>> +Variable
>>> +unsigned int flag_tree_vectorize_epilogues
>>> +
>>>  ###
>>>  Driver
>>>
>>> @@ -2557,6 +2561,19 @@ ftree-vectorize
>>>  Common Report Var(flag_tree_vectorize) Optimization
>>>  Enable vectorization on trees.
>>>
>>> +ftree-vectorize-short-loops
>>> +Common Report Var(flag_tree_vectorize_short_loops) Optimization
>>> +Enable vectorization of loops with low trip count using masking.
>>> +
>>> +ftree-vectorize-epilogues=
>>> +Common Report Joined Optimization
>>> +Comma separated list of loop epilogue vectorization modes.
>>> +Available modes: combine, mask, nomask.
>>> +
>>> +fno-tree-vectorize-epilogues
>>> +Common RejectNegative Optimization
>>> +Disable epilogues vectorization.
>>> +
>>>  ftree-vectorizer-verbose=
>>>  Common Joined RejectNegative Ignore
>>>  Does nothing.  Preserved for backward compatibility.
>>> @@ -2577,6 +2594,10 @@ fsimd-cost-model=
>>>  Common Joined RejectNegative Enum(vect_cost_model) Var(flag_simd_cost_model) Init(VECT_COST_MODEL_UNLIMITED) Optimization
>>>  Specifies the vectorization cost model for code marked with a simd directive.
>>>
>>> +fvect-epilogue-cost-model=
>>> +Common Joined RejectNegative Enum(vect_cost_model) Var(flag_vect_epilogue_cost_model) Init(VECT_COST_MODEL_DEFAULT) Optimization
>>> +Specifies the cost model for epilogue vectorization.
>>> +
>>>  Enum
>>>  Name(vect_cost_model) Type(enum vect_cost_model) UnknownError(unknown vectorizer cost model %qs)
>>>
>>> diff --git a/gcc/flag-types.h b/gcc/flag-types.h
>>> index dd57e16..24081b1 100644
>>> --- a/gcc/flag-types.h
>>> +++ b/gcc/flag-types.h
>>> @@ -200,6 +200,15 @@ enum vect_cost_model {
>>>    VECT_COST_MODEL_DEFAULT = 3
>>>  };
>>>
>>> +/* Epilogue vectorization modes.  */
>>> +enum vect_epilogue_mode {
>>> +  VECT_EPILOGUE_COMBINE = 1 << 0,
>>> +  VECT_EPILOGUE_MASK = 1 << 1,
>>> +  VECT_EPILOGUE_NOMASK = 1 << 2,
>>> +  VECT_EPILOGUE_ALL = VECT_EPILOGUE_COMBINE | VECT_EPILOGUE_MASK
>>> +                     | VECT_EPILOGUE_NOMASK
>>> +};
>>> +
>>>  /* Different instrumentation modes.  */
>>>  enum sanitize_code {
>>>    /* AddressSanitizer.  */
>>> diff --git a/gcc/opts.c b/gcc/opts.c
>>> index 0f9431a..a0c0987 100644
>>> --- a/gcc/opts.c
>>> +++ b/gcc/opts.c
>>> @@ -1531,6 +1531,63 @@ parse_sanitizer_options (const char *p, location_t loc, int scode,
>>>    return flags;
>>>  }
>>>
>>> +/* Parse comma separated vectorizer suboptions from P for option SCODE,
>>> +   adjust previous FLAGS and return new ones.  If COMPLAIN is false,
>>> +   don't issue diagnostics.  */
>>> +
>>> +unsigned int
>>> +parse_vectorizer_options (const char *p, location_t loc, int scode,
>>> +                         unsigned int flags, int value, bool complain)
>>> +{
>>> +  if (scode != OPT_ftree_vectorize_epilogues_)
>>> +    return flags;
>>> +
>>> +  if (!p)
>>> +    return value;
>>> +
>>> +  while (*p != 0)
>>> +    {
>>> +      size_t len;
>>> +      const char *comma = strchr (p, ',');
>>> +      unsigned int flag = 0;
>>> +
>>> +      if (comma == NULL)
>>> +       len = strlen (p);
>>> +      else
>>> +       len = comma - p;
>>> +      if (len == 0)
>>> +       {
>>> +         p = comma + 1;
>>> +         continue;
>>> +       }
>>> +
>>> +      /* Check to see if the string matches an option class name.  */
>>> +      if (len == strlen ("combine")
>>> +         && memcmp (p, "combine", len) == 0)
>>> +       flag = VECT_EPILOGUE_COMBINE;
>>> +      else if (len == strlen ("mask")
>>> +         && memcmp (p, "mask", len) == 0)
>>> +       flag = VECT_EPILOGUE_MASK;
>>> +      else if (len == strlen ("nomask")
>>> +         && memcmp (p, "nomask", len) == 0)
>>> +       flag = VECT_EPILOGUE_NOMASK;
>>> +      else if (complain)
>>> +       error_at (loc, "unrecognized argument to -ftree-vectorize-epilogues= "
>>> +                 "option: %q.*s", (int) len, p);
>>> +
>>> +      if (value)
>>> +       flags |= flag;
>>> +      else
>>> +       flags &= ~flag;
>>> +
>>> +      if (comma == NULL)
>>> +       break;
>>> +      p = comma + 1;
>>> +    }
>>> +
>>> +  return flags;
>>> +}
>>> +
>>>  /* Handle target- and language-independent options.  Return zero to
>>>     generate an "unknown option" message.  Only options that need
>>>     extra handling need to be listed here; if you simply want
>>> @@ -2018,6 +2075,18 @@ common_handle_option (struct gcc_options *opts,
>>>        if (!opts_set->x_flag_tree_slp_vectorize)
>>>          opts->x_flag_tree_slp_vectorize = value;
>>>        break;
>>> +
>>> +    case OPT_ftree_vectorize_epilogues_:
>>> +      opts->x_flag_tree_vectorize_epilogues
>>> +       = parse_vectorizer_options (arg, loc, code,
>>> +                                   opts->x_flag_tree_vectorize_epilogues,
>>> +                                   value, true);
>>> +      break;
>>> +
>>> +    case OPT_fno_tree_vectorize_epilogues:
>>> +      opts->x_flag_tree_vectorize_epilogues = 0;
>>> +      break;
>>> +
>>>      case OPT_fshow_column:
>>>        dc->show_column = value;
>>>        break;

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH, vec-tails 01/10] New compiler options
  2016-05-20 11:17     ` Richard Biener
@ 2016-05-20 11:40       ` Ilya Enkovich
  2016-06-09 10:36         ` Ilya Enkovich
                           ` (2 more replies)
  0 siblings, 3 replies; 13+ messages in thread
From: Ilya Enkovich @ 2016-05-20 11:40 UTC (permalink / raw)
  To: Richard Biener; +Cc: GCC Patches

2016-05-20 14:17 GMT+03:00 Richard Biener <richard.guenther@gmail.com>:
> On Fri, May 20, 2016 at 11:50 AM, Ilya Enkovich <enkovich.gnu@gmail.com> wrote:
>> 2016-05-20 12:26 GMT+03:00 Richard Biener <richard.guenther@gmail.com>:
>>> On Thu, May 19, 2016 at 9:36 PM, Ilya Enkovich <enkovich.gnu@gmail.com> wrote:
>>>> Hi,
>>>>
>>>> This patch introduces new options used for loop epilogues vectorization.
>>>
>>> Why's that?  This is a bit too much for the casual user and if it is
>>> really necessary
>>> to control this via options then it is not fine-grained enough.
>>>
>>> Why doesn't the vectorizer/backend have enough info to decide this itself?
>>
>> I don't expect casual user to decide which modes to choose.  These controls are
>> added for debugging and performance measurement purposes.  I see now I miss
>> -ftree-vectorize-epilogues aliased to -ftree-vectorize-epilogues=all.  Surely
>> I expect epilogues and short loops vectorization be enabled by default on -O3
>> or by -ftree-vectorize-loops.
>
> Can you make all these --params then?  I think to be useful to users we'd want
> them to be loop pragmas rather than options.

OK, I'll change it to params.  I didn't think about control via
pragmas but will do now.

Thanks,
Ilya

>
> Richard.
>
>> Thanks,
>> Ilya
>>
>>>
>>> Richard.
>>>
>>>> Thanks,
>>>> Ilya
>>>> --
>>>> gcc/
>>>>
>>>> 2016-05-19  Ilya Enkovich  <ilya.enkovich@intel.com>
>>>>
>>>>         * common.opt (flag_tree_vectorize_epilogues): New.
>>>>         (ftree-vectorize-short-loops): New.
>>>>         (ftree-vectorize-epilogues=): New.
>>>>         (fno-tree-vectorize-epilogues): New.
>>>>         (fvect-epilogue-cost-model=): New.
>>>>         * flag-types.h (enum vect_epilogue_mode): New.
>>>>         * opts.c (parse_vectorizer_options): New.
>>>>         (common_handle_option): Support -ftree-vectorize-epilogues=
>>>>         and -fno-tree-vectorize-epilogues options.
>>>>
>>>>
>>>> diff --git a/gcc/common.opt b/gcc/common.opt
>>>> index 682cb41..6b83b79 100644
>>>> --- a/gcc/common.opt
>>>> +++ b/gcc/common.opt
>>>> @@ -243,6 +243,10 @@ bool dump_base_name_prefixed = false
>>>>  Variable
>>>>  bool flag_disable_hsa = false
>>>>
>>>> +; Flag holding modes for loop epilogue vectorization
>>>> +Variable
>>>> +unsigned int flag_tree_vectorize_epilogues
>>>> +
>>>>  ###
>>>>  Driver
>>>>
>>>> @@ -2557,6 +2561,19 @@ ftree-vectorize
>>>>  Common Report Var(flag_tree_vectorize) Optimization
>>>>  Enable vectorization on trees.
>>>>
>>>> +ftree-vectorize-short-loops
>>>> +Common Report Var(flag_tree_vectorize_short_loops) Optimization
>>>> +Enable vectorization of loops with low trip count using masking.
>>>> +
>>>> +ftree-vectorize-epilogues=
>>>> +Common Report Joined Optimization
>>>> +Comma separated list of loop epilogue vectorization modes.
>>>> +Available modes: combine, mask, nomask.
>>>> +
>>>> +fno-tree-vectorize-epilogues
>>>> +Common RejectNegative Optimization
>>>> +Disable epilogues vectorization.
>>>> +
>>>>  ftree-vectorizer-verbose=
>>>>  Common Joined RejectNegative Ignore
>>>>  Does nothing.  Preserved for backward compatibility.
>>>> @@ -2577,6 +2594,10 @@ fsimd-cost-model=
>>>>  Common Joined RejectNegative Enum(vect_cost_model) Var(flag_simd_cost_model) Init(VECT_COST_MODEL_UNLIMITED) Optimization
>>>>  Specifies the vectorization cost model for code marked with a simd directive.
>>>>
>>>> +fvect-epilogue-cost-model=
>>>> +Common Joined RejectNegative Enum(vect_cost_model) Var(flag_vect_epilogue_cost_model) Init(VECT_COST_MODEL_DEFAULT) Optimization
>>>> +Specifies the cost model for epilogue vectorization.
>>>> +
>>>>  Enum
>>>>  Name(vect_cost_model) Type(enum vect_cost_model) UnknownError(unknown vectorizer cost model %qs)
>>>>
>>>> diff --git a/gcc/flag-types.h b/gcc/flag-types.h
>>>> index dd57e16..24081b1 100644
>>>> --- a/gcc/flag-types.h
>>>> +++ b/gcc/flag-types.h
>>>> @@ -200,6 +200,15 @@ enum vect_cost_model {
>>>>    VECT_COST_MODEL_DEFAULT = 3
>>>>  };
>>>>
>>>> +/* Epilogue vectorization modes.  */
>>>> +enum vect_epilogue_mode {
>>>> +  VECT_EPILOGUE_COMBINE = 1 << 0,
>>>> +  VECT_EPILOGUE_MASK = 1 << 1,
>>>> +  VECT_EPILOGUE_NOMASK = 1 << 2,
>>>> +  VECT_EPILOGUE_ALL = VECT_EPILOGUE_COMBINE | VECT_EPILOGUE_MASK
>>>> +                     | VECT_EPILOGUE_NOMASK
>>>> +};
>>>> +
>>>>  /* Different instrumentation modes.  */
>>>>  enum sanitize_code {
>>>>    /* AddressSanitizer.  */
>>>> diff --git a/gcc/opts.c b/gcc/opts.c
>>>> index 0f9431a..a0c0987 100644
>>>> --- a/gcc/opts.c
>>>> +++ b/gcc/opts.c
>>>> @@ -1531,6 +1531,63 @@ parse_sanitizer_options (const char *p, location_t loc, int scode,
>>>>    return flags;
>>>>  }
>>>>
>>>> +/* Parse comma separated vectorizer suboptions from P for option SCODE,
>>>> +   adjust previous FLAGS and return new ones.  If COMPLAIN is false,
>>>> +   don't issue diagnostics.  */
>>>> +
>>>> +unsigned int
>>>> +parse_vectorizer_options (const char *p, location_t loc, int scode,
>>>> +                         unsigned int flags, int value, bool complain)
>>>> +{
>>>> +  if (scode != OPT_ftree_vectorize_epilogues_)
>>>> +    return flags;
>>>> +
>>>> +  if (!p)
>>>> +    return value;
>>>> +
>>>> +  while (*p != 0)
>>>> +    {
>>>> +      size_t len;
>>>> +      const char *comma = strchr (p, ',');
>>>> +      unsigned int flag = 0;
>>>> +
>>>> +      if (comma == NULL)
>>>> +       len = strlen (p);
>>>> +      else
>>>> +       len = comma - p;
>>>> +      if (len == 0)
>>>> +       {
>>>> +         p = comma + 1;
>>>> +         continue;
>>>> +       }
>>>> +
>>>> +      /* Check to see if the string matches an option class name.  */
>>>> +      if (len == strlen ("combine")
>>>> +         && memcmp (p, "combine", len) == 0)
>>>> +       flag = VECT_EPILOGUE_COMBINE;
>>>> +      else if (len == strlen ("mask")
>>>> +         && memcmp (p, "mask", len) == 0)
>>>> +       flag = VECT_EPILOGUE_MASK;
>>>> +      else if (len == strlen ("nomask")
>>>> +         && memcmp (p, "nomask", len) == 0)
>>>> +       flag = VECT_EPILOGUE_NOMASK;
>>>> +      else if (complain)
>>>> +       error_at (loc, "unrecognized argument to -ftree-vectorize-epilogues= "
>>>> +                 "option: %q.*s", (int) len, p);
>>>> +
>>>> +      if (value)
>>>> +       flags |= flag;
>>>> +      else
>>>> +       flags &= ~flag;
>>>> +
>>>> +      if (comma == NULL)
>>>> +       break;
>>>> +      p = comma + 1;
>>>> +    }
>>>> +
>>>> +  return flags;
>>>> +}
>>>> +
>>>>  /* Handle target- and language-independent options.  Return zero to
>>>>     generate an "unknown option" message.  Only options that need
>>>>     extra handling need to be listed here; if you simply want
>>>> @@ -2018,6 +2075,18 @@ common_handle_option (struct gcc_options *opts,
>>>>        if (!opts_set->x_flag_tree_slp_vectorize)
>>>>          opts->x_flag_tree_slp_vectorize = value;
>>>>        break;
>>>> +
>>>> +    case OPT_ftree_vectorize_epilogues_:
>>>> +      opts->x_flag_tree_vectorize_epilogues
>>>> +       = parse_vectorizer_options (arg, loc, code,
>>>> +                                   opts->x_flag_tree_vectorize_epilogues,
>>>> +                                   value, true);
>>>> +      break;
>>>> +
>>>> +    case OPT_fno_tree_vectorize_epilogues:
>>>> +      opts->x_flag_tree_vectorize_epilogues = 0;
>>>> +      break;
>>>> +
>>>>      case OPT_fshow_column:
>>>>        dc->show_column = value;
>>>>        break;

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH, vec-tails 01/10] New compiler options
  2016-05-20 11:40       ` Ilya Enkovich
@ 2016-06-09 10:36         ` Ilya Enkovich
  2016-06-09 12:19           ` Richard Biener
  2016-06-16  5:06         ` Jeff Law
  2016-06-16 13:45         ` Ilya Enkovich
  2 siblings, 1 reply; 13+ messages in thread
From: Ilya Enkovich @ 2016-06-09 10:36 UTC (permalink / raw)
  To: Richard Biener; +Cc: GCC Patches

Hi Richard,

Do you have some comments on other parts of this series?

Thanks,
Ilya

2016-05-20 14:40 GMT+03:00 Ilya Enkovich <enkovich.gnu@gmail.com>:
> 2016-05-20 14:17 GMT+03:00 Richard Biener <richard.guenther@gmail.com>:
>> On Fri, May 20, 2016 at 11:50 AM, Ilya Enkovich <enkovich.gnu@gmail.com> wrote:
>>> 2016-05-20 12:26 GMT+03:00 Richard Biener <richard.guenther@gmail.com>:
>>>> On Thu, May 19, 2016 at 9:36 PM, Ilya Enkovich <enkovich.gnu@gmail.com> wrote:
>>>>> Hi,
>>>>>
>>>>> This patch introduces new options used for loop epilogues vectorization.
>>>>
>>>> Why's that?  This is a bit too much for the casual user and if it is
>>>> really necessary
>>>> to control this via options then it is not fine-grained enough.
>>>>
>>>> Why doesn't the vectorizer/backend have enough info to decide this itself?
>>>
>>> I don't expect casual user to decide which modes to choose.  These controls are
>>> added for debugging and performance measurement purposes.  I see now I miss
>>> -ftree-vectorize-epilogues aliased to -ftree-vectorize-epilogues=all.  Surely
>>> I expect epilogues and short loops vectorization be enabled by default on -O3
>>> or by -ftree-vectorize-loops.
>>
>> Can you make all these --params then?  I think to be useful to users we'd want
>> them to be loop pragmas rather than options.
>
> OK, I'll change it to params.  I didn't think about control via
> pragmas but will do now.
>
> Thanks,
> Ilya
>
>>
>> Richard.
>>
>>> Thanks,
>>> Ilya
>>>
>>>>
>>>> Richard.
>>>>
>>>>> Thanks,
>>>>> Ilya
>>>>> --
>>>>> gcc/
>>>>>
>>>>> 2016-05-19  Ilya Enkovich  <ilya.enkovich@intel.com>
>>>>>
>>>>>         * common.opt (flag_tree_vectorize_epilogues): New.
>>>>>         (ftree-vectorize-short-loops): New.
>>>>>         (ftree-vectorize-epilogues=): New.
>>>>>         (fno-tree-vectorize-epilogues): New.
>>>>>         (fvect-epilogue-cost-model=): New.
>>>>>         * flag-types.h (enum vect_epilogue_mode): New.
>>>>>         * opts.c (parse_vectorizer_options): New.
>>>>>         (common_handle_option): Support -ftree-vectorize-epilogues=
>>>>>         and -fno-tree-vectorize-epilogues options.
>>>>>
>>>>>
>>>>> diff --git a/gcc/common.opt b/gcc/common.opt
>>>>> index 682cb41..6b83b79 100644
>>>>> --- a/gcc/common.opt
>>>>> +++ b/gcc/common.opt
>>>>> @@ -243,6 +243,10 @@ bool dump_base_name_prefixed = false
>>>>>  Variable
>>>>>  bool flag_disable_hsa = false
>>>>>
>>>>> +; Flag holding modes for loop epilogue vectorization
>>>>> +Variable
>>>>> +unsigned int flag_tree_vectorize_epilogues
>>>>> +
>>>>>  ###
>>>>>  Driver
>>>>>
>>>>> @@ -2557,6 +2561,19 @@ ftree-vectorize
>>>>>  Common Report Var(flag_tree_vectorize) Optimization
>>>>>  Enable vectorization on trees.
>>>>>
>>>>> +ftree-vectorize-short-loops
>>>>> +Common Report Var(flag_tree_vectorize_short_loops) Optimization
>>>>> +Enable vectorization of loops with low trip count using masking.
>>>>> +
>>>>> +ftree-vectorize-epilogues=
>>>>> +Common Report Joined Optimization
>>>>> +Comma separated list of loop epilogue vectorization modes.
>>>>> +Available modes: combine, mask, nomask.
>>>>> +
>>>>> +fno-tree-vectorize-epilogues
>>>>> +Common RejectNegative Optimization
>>>>> +Disable epilogues vectorization.
>>>>> +
>>>>>  ftree-vectorizer-verbose=
>>>>>  Common Joined RejectNegative Ignore
>>>>>  Does nothing.  Preserved for backward compatibility.
>>>>> @@ -2577,6 +2594,10 @@ fsimd-cost-model=
>>>>>  Common Joined RejectNegative Enum(vect_cost_model) Var(flag_simd_cost_model) Init(VECT_COST_MODEL_UNLIMITED) Optimization
>>>>>  Specifies the vectorization cost model for code marked with a simd directive.
>>>>>
>>>>> +fvect-epilogue-cost-model=
>>>>> +Common Joined RejectNegative Enum(vect_cost_model) Var(flag_vect_epilogue_cost_model) Init(VECT_COST_MODEL_DEFAULT) Optimization
>>>>> +Specifies the cost model for epilogue vectorization.
>>>>> +
>>>>>  Enum
>>>>>  Name(vect_cost_model) Type(enum vect_cost_model) UnknownError(unknown vectorizer cost model %qs)
>>>>>
>>>>> diff --git a/gcc/flag-types.h b/gcc/flag-types.h
>>>>> index dd57e16..24081b1 100644
>>>>> --- a/gcc/flag-types.h
>>>>> +++ b/gcc/flag-types.h
>>>>> @@ -200,6 +200,15 @@ enum vect_cost_model {
>>>>>    VECT_COST_MODEL_DEFAULT = 3
>>>>>  };
>>>>>
>>>>> +/* Epilogue vectorization modes.  */
>>>>> +enum vect_epilogue_mode {
>>>>> +  VECT_EPILOGUE_COMBINE = 1 << 0,
>>>>> +  VECT_EPILOGUE_MASK = 1 << 1,
>>>>> +  VECT_EPILOGUE_NOMASK = 1 << 2,
>>>>> +  VECT_EPILOGUE_ALL = VECT_EPILOGUE_COMBINE | VECT_EPILOGUE_MASK
>>>>> +                     | VECT_EPILOGUE_NOMASK
>>>>> +};
>>>>> +
>>>>>  /* Different instrumentation modes.  */
>>>>>  enum sanitize_code {
>>>>>    /* AddressSanitizer.  */
>>>>> diff --git a/gcc/opts.c b/gcc/opts.c
>>>>> index 0f9431a..a0c0987 100644
>>>>> --- a/gcc/opts.c
>>>>> +++ b/gcc/opts.c
>>>>> @@ -1531,6 +1531,63 @@ parse_sanitizer_options (const char *p, location_t loc, int scode,
>>>>>    return flags;
>>>>>  }
>>>>>
>>>>> +/* Parse comma separated vectorizer suboptions from P for option SCODE,
>>>>> +   adjust previous FLAGS and return new ones.  If COMPLAIN is false,
>>>>> +   don't issue diagnostics.  */
>>>>> +
>>>>> +unsigned int
>>>>> +parse_vectorizer_options (const char *p, location_t loc, int scode,
>>>>> +                         unsigned int flags, int value, bool complain)
>>>>> +{
>>>>> +  if (scode != OPT_ftree_vectorize_epilogues_)
>>>>> +    return flags;
>>>>> +
>>>>> +  if (!p)
>>>>> +    return value;
>>>>> +
>>>>> +  while (*p != 0)
>>>>> +    {
>>>>> +      size_t len;
>>>>> +      const char *comma = strchr (p, ',');
>>>>> +      unsigned int flag = 0;
>>>>> +
>>>>> +      if (comma == NULL)
>>>>> +       len = strlen (p);
>>>>> +      else
>>>>> +       len = comma - p;
>>>>> +      if (len == 0)
>>>>> +       {
>>>>> +         p = comma + 1;
>>>>> +         continue;
>>>>> +       }
>>>>> +
>>>>> +      /* Check to see if the string matches an option class name.  */
>>>>> +      if (len == strlen ("combine")
>>>>> +         && memcmp (p, "combine", len) == 0)
>>>>> +       flag = VECT_EPILOGUE_COMBINE;
>>>>> +      else if (len == strlen ("mask")
>>>>> +         && memcmp (p, "mask", len) == 0)
>>>>> +       flag = VECT_EPILOGUE_MASK;
>>>>> +      else if (len == strlen ("nomask")
>>>>> +         && memcmp (p, "nomask", len) == 0)
>>>>> +       flag = VECT_EPILOGUE_NOMASK;
>>>>> +      else if (complain)
>>>>> +       error_at (loc, "unrecognized argument to -ftree-vectorize-epilogues= "
>>>>> +                 "option: %q.*s", (int) len, p);
>>>>> +
>>>>> +      if (value)
>>>>> +       flags |= flag;
>>>>> +      else
>>>>> +       flags &= ~flag;
>>>>> +
>>>>> +      if (comma == NULL)
>>>>> +       break;
>>>>> +      p = comma + 1;
>>>>> +    }
>>>>> +
>>>>> +  return flags;
>>>>> +}
>>>>> +
>>>>>  /* Handle target- and language-independent options.  Return zero to
>>>>>     generate an "unknown option" message.  Only options that need
>>>>>     extra handling need to be listed here; if you simply want
>>>>> @@ -2018,6 +2075,18 @@ common_handle_option (struct gcc_options *opts,
>>>>>        if (!opts_set->x_flag_tree_slp_vectorize)
>>>>>          opts->x_flag_tree_slp_vectorize = value;
>>>>>        break;
>>>>> +
>>>>> +    case OPT_ftree_vectorize_epilogues_:
>>>>> +      opts->x_flag_tree_vectorize_epilogues
>>>>> +       = parse_vectorizer_options (arg, loc, code,
>>>>> +                                   opts->x_flag_tree_vectorize_epilogues,
>>>>> +                                   value, true);
>>>>> +      break;
>>>>> +
>>>>> +    case OPT_fno_tree_vectorize_epilogues:
>>>>> +      opts->x_flag_tree_vectorize_epilogues = 0;
>>>>> +      break;
>>>>> +
>>>>>      case OPT_fshow_column:
>>>>>        dc->show_column = value;
>>>>>        break;

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH, vec-tails 01/10] New compiler options
  2016-06-09 10:36         ` Ilya Enkovich
@ 2016-06-09 12:19           ` Richard Biener
  0 siblings, 0 replies; 13+ messages in thread
From: Richard Biener @ 2016-06-09 12:19 UTC (permalink / raw)
  To: Ilya Enkovich; +Cc: GCC Patches

On Thu, Jun 9, 2016 at 12:36 PM, Ilya Enkovich <enkovich.gnu@gmail.com> wrote:
> Hi Richard,
>
> Do you have some comments on other parts of this series?

It's queued for review ... I'll need a slot of some spare hours to go over it.

Richard.

> Thanks,
> Ilya
>
> 2016-05-20 14:40 GMT+03:00 Ilya Enkovich <enkovich.gnu@gmail.com>:
>> 2016-05-20 14:17 GMT+03:00 Richard Biener <richard.guenther@gmail.com>:
>>> On Fri, May 20, 2016 at 11:50 AM, Ilya Enkovich <enkovich.gnu@gmail.com> wrote:
>>>> 2016-05-20 12:26 GMT+03:00 Richard Biener <richard.guenther@gmail.com>:
>>>>> On Thu, May 19, 2016 at 9:36 PM, Ilya Enkovich <enkovich.gnu@gmail.com> wrote:
>>>>>> Hi,
>>>>>>
>>>>>> This patch introduces new options used for loop epilogues vectorization.
>>>>>
>>>>> Why's that?  This is a bit too much for the casual user and if it is
>>>>> really necessary
>>>>> to control this via options then it is not fine-grained enough.
>>>>>
>>>>> Why doesn't the vectorizer/backend have enough info to decide this itself?
>>>>
>>>> I don't expect casual user to decide which modes to choose.  These controls are
>>>> added for debugging and performance measurement purposes.  I see now I miss
>>>> -ftree-vectorize-epilogues aliased to -ftree-vectorize-epilogues=all.  Surely
>>>> I expect epilogues and short loops vectorization be enabled by default on -O3
>>>> or by -ftree-vectorize-loops.
>>>
>>> Can you make all these --params then?  I think to be useful to users we'd want
>>> them to be loop pragmas rather than options.
>>
>> OK, I'll change it to params.  I didn't think about control via
>> pragmas but will do now.
>>
>> Thanks,
>> Ilya
>>
>>>
>>> Richard.
>>>
>>>> Thanks,
>>>> Ilya
>>>>
>>>>>
>>>>> Richard.
>>>>>
>>>>>> Thanks,
>>>>>> Ilya
>>>>>> --
>>>>>> gcc/
>>>>>>
>>>>>> 2016-05-19  Ilya Enkovich  <ilya.enkovich@intel.com>
>>>>>>
>>>>>>         * common.opt (flag_tree_vectorize_epilogues): New.
>>>>>>         (ftree-vectorize-short-loops): New.
>>>>>>         (ftree-vectorize-epilogues=): New.
>>>>>>         (fno-tree-vectorize-epilogues): New.
>>>>>>         (fvect-epilogue-cost-model=): New.
>>>>>>         * flag-types.h (enum vect_epilogue_mode): New.
>>>>>>         * opts.c (parse_vectorizer_options): New.
>>>>>>         (common_handle_option): Support -ftree-vectorize-epilogues=
>>>>>>         and -fno-tree-vectorize-epilogues options.
>>>>>>
>>>>>>
>>>>>> diff --git a/gcc/common.opt b/gcc/common.opt
>>>>>> index 682cb41..6b83b79 100644
>>>>>> --- a/gcc/common.opt
>>>>>> +++ b/gcc/common.opt
>>>>>> @@ -243,6 +243,10 @@ bool dump_base_name_prefixed = false
>>>>>>  Variable
>>>>>>  bool flag_disable_hsa = false
>>>>>>
>>>>>> +; Flag holding modes for loop epilogue vectorization
>>>>>> +Variable
>>>>>> +unsigned int flag_tree_vectorize_epilogues
>>>>>> +
>>>>>>  ###
>>>>>>  Driver
>>>>>>
>>>>>> @@ -2557,6 +2561,19 @@ ftree-vectorize
>>>>>>  Common Report Var(flag_tree_vectorize) Optimization
>>>>>>  Enable vectorization on trees.
>>>>>>
>>>>>> +ftree-vectorize-short-loops
>>>>>> +Common Report Var(flag_tree_vectorize_short_loops) Optimization
>>>>>> +Enable vectorization of loops with low trip count using masking.
>>>>>> +
>>>>>> +ftree-vectorize-epilogues=
>>>>>> +Common Report Joined Optimization
>>>>>> +Comma separated list of loop epilogue vectorization modes.
>>>>>> +Available modes: combine, mask, nomask.
>>>>>> +
>>>>>> +fno-tree-vectorize-epilogues
>>>>>> +Common RejectNegative Optimization
>>>>>> +Disable epilogues vectorization.
>>>>>> +
>>>>>>  ftree-vectorizer-verbose=
>>>>>>  Common Joined RejectNegative Ignore
>>>>>>  Does nothing.  Preserved for backward compatibility.
>>>>>> @@ -2577,6 +2594,10 @@ fsimd-cost-model=
>>>>>>  Common Joined RejectNegative Enum(vect_cost_model) Var(flag_simd_cost_model) Init(VECT_COST_MODEL_UNLIMITED) Optimization
>>>>>>  Specifies the vectorization cost model for code marked with a simd directive.
>>>>>>
>>>>>> +fvect-epilogue-cost-model=
>>>>>> +Common Joined RejectNegative Enum(vect_cost_model) Var(flag_vect_epilogue_cost_model) Init(VECT_COST_MODEL_DEFAULT) Optimization
>>>>>> +Specifies the cost model for epilogue vectorization.
>>>>>> +
>>>>>>  Enum
>>>>>>  Name(vect_cost_model) Type(enum vect_cost_model) UnknownError(unknown vectorizer cost model %qs)
>>>>>>
>>>>>> diff --git a/gcc/flag-types.h b/gcc/flag-types.h
>>>>>> index dd57e16..24081b1 100644
>>>>>> --- a/gcc/flag-types.h
>>>>>> +++ b/gcc/flag-types.h
>>>>>> @@ -200,6 +200,15 @@ enum vect_cost_model {
>>>>>>    VECT_COST_MODEL_DEFAULT = 3
>>>>>>  };
>>>>>>
>>>>>> +/* Epilogue vectorization modes.  */
>>>>>> +enum vect_epilogue_mode {
>>>>>> +  VECT_EPILOGUE_COMBINE = 1 << 0,
>>>>>> +  VECT_EPILOGUE_MASK = 1 << 1,
>>>>>> +  VECT_EPILOGUE_NOMASK = 1 << 2,
>>>>>> +  VECT_EPILOGUE_ALL = VECT_EPILOGUE_COMBINE | VECT_EPILOGUE_MASK
>>>>>> +                     | VECT_EPILOGUE_NOMASK
>>>>>> +};
>>>>>> +
>>>>>>  /* Different instrumentation modes.  */
>>>>>>  enum sanitize_code {
>>>>>>    /* AddressSanitizer.  */
>>>>>> diff --git a/gcc/opts.c b/gcc/opts.c
>>>>>> index 0f9431a..a0c0987 100644
>>>>>> --- a/gcc/opts.c
>>>>>> +++ b/gcc/opts.c
>>>>>> @@ -1531,6 +1531,63 @@ parse_sanitizer_options (const char *p, location_t loc, int scode,
>>>>>>    return flags;
>>>>>>  }
>>>>>>
>>>>>> +/* Parse comma separated vectorizer suboptions from P for option SCODE,
>>>>>> +   adjust previous FLAGS and return new ones.  If COMPLAIN is false,
>>>>>> +   don't issue diagnostics.  */
>>>>>> +
>>>>>> +unsigned int
>>>>>> +parse_vectorizer_options (const char *p, location_t loc, int scode,
>>>>>> +                         unsigned int flags, int value, bool complain)
>>>>>> +{
>>>>>> +  if (scode != OPT_ftree_vectorize_epilogues_)
>>>>>> +    return flags;
>>>>>> +
>>>>>> +  if (!p)
>>>>>> +    return value;
>>>>>> +
>>>>>> +  while (*p != 0)
>>>>>> +    {
>>>>>> +      size_t len;
>>>>>> +      const char *comma = strchr (p, ',');
>>>>>> +      unsigned int flag = 0;
>>>>>> +
>>>>>> +      if (comma == NULL)
>>>>>> +       len = strlen (p);
>>>>>> +      else
>>>>>> +       len = comma - p;
>>>>>> +      if (len == 0)
>>>>>> +       {
>>>>>> +         p = comma + 1;
>>>>>> +         continue;
>>>>>> +       }
>>>>>> +
>>>>>> +      /* Check to see if the string matches an option class name.  */
>>>>>> +      if (len == strlen ("combine")
>>>>>> +         && memcmp (p, "combine", len) == 0)
>>>>>> +       flag = VECT_EPILOGUE_COMBINE;
>>>>>> +      else if (len == strlen ("mask")
>>>>>> +         && memcmp (p, "mask", len) == 0)
>>>>>> +       flag = VECT_EPILOGUE_MASK;
>>>>>> +      else if (len == strlen ("nomask")
>>>>>> +         && memcmp (p, "nomask", len) == 0)
>>>>>> +       flag = VECT_EPILOGUE_NOMASK;
>>>>>> +      else if (complain)
>>>>>> +       error_at (loc, "unrecognized argument to -ftree-vectorize-epilogues= "
>>>>>> +                 "option: %q.*s", (int) len, p);
>>>>>> +
>>>>>> +      if (value)
>>>>>> +       flags |= flag;
>>>>>> +      else
>>>>>> +       flags &= ~flag;
>>>>>> +
>>>>>> +      if (comma == NULL)
>>>>>> +       break;
>>>>>> +      p = comma + 1;
>>>>>> +    }
>>>>>> +
>>>>>> +  return flags;
>>>>>> +}
>>>>>> +
>>>>>>  /* Handle target- and language-independent options.  Return zero to
>>>>>>     generate an "unknown option" message.  Only options that need
>>>>>>     extra handling need to be listed here; if you simply want
>>>>>> @@ -2018,6 +2075,18 @@ common_handle_option (struct gcc_options *opts,
>>>>>>        if (!opts_set->x_flag_tree_slp_vectorize)
>>>>>>          opts->x_flag_tree_slp_vectorize = value;
>>>>>>        break;
>>>>>> +
>>>>>> +    case OPT_ftree_vectorize_epilogues_:
>>>>>> +      opts->x_flag_tree_vectorize_epilogues
>>>>>> +       = parse_vectorizer_options (arg, loc, code,
>>>>>> +                                   opts->x_flag_tree_vectorize_epilogues,
>>>>>> +                                   value, true);
>>>>>> +      break;
>>>>>> +
>>>>>> +    case OPT_fno_tree_vectorize_epilogues:
>>>>>> +      opts->x_flag_tree_vectorize_epilogues = 0;
>>>>>> +      break;
>>>>>> +
>>>>>>      case OPT_fshow_column:
>>>>>>        dc->show_column = value;
>>>>>>        break;

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH, vec-tails 01/10] New compiler options
  2016-05-20 11:40       ` Ilya Enkovich
  2016-06-09 10:36         ` Ilya Enkovich
@ 2016-06-16  5:06         ` Jeff Law
  2016-06-17 10:41           ` Ilya Enkovich
  2016-06-16 13:45         ` Ilya Enkovich
  2 siblings, 1 reply; 13+ messages in thread
From: Jeff Law @ 2016-06-16  5:06 UTC (permalink / raw)
  To: Ilya Enkovich, Richard Biener; +Cc: GCC Patches

On 05/20/2016 05:40 AM, Ilya Enkovich wrote:
> 2016-05-20 14:17 GMT+03:00 Richard Biener <richard.guenther@gmail.com>:
>> On Fri, May 20, 2016 at 11:50 AM, Ilya Enkovich <enkovich.gnu@gmail.com> wrote:
>>> 2016-05-20 12:26 GMT+03:00 Richard Biener <richard.guenther@gmail.com>:
>>>> On Thu, May 19, 2016 at 9:36 PM, Ilya Enkovich <enkovich.gnu@gmail.com> wrote:
>>>>> Hi,
>>>>>
>>>>> This patch introduces new options used for loop epilogues vectorization.
>>>>
>>>> Why's that?  This is a bit too much for the casual user and if it is
>>>> really necessary
>>>> to control this via options then it is not fine-grained enough.
>>>>
>>>> Why doesn't the vectorizer/backend have enough info to decide this itself?
>>>
>>> I don't expect casual user to decide which modes to choose.  These controls are
>>> added for debugging and performance measurement purposes.  I see now I miss
>>> -ftree-vectorize-epilogues aliased to -ftree-vectorize-epilogues=all.  Surely
>>> I expect epilogues and short loops vectorization be enabled by default on -O3
>>> or by -ftree-vectorize-loops.
>>
>> Can you make all these --params then?  I think to be useful to users we'd want
>> them to be loop pragmas rather than options.
>
> OK, I'll change it to params.  I didn't think about control via
> pragmas but will do now.
So the questions I'd like to see answered:

1. You've got 3 modes for epilogue vectorization.  Is this an artifact 
of not really having good heuristics yet for which mode to apply to a 
particular loop at this time?

2. Similarly for cost models.


In the cover message you indicated you were getting expected gains of 
KNL, but not on Haswell.  Do you have any sense yet why you're not 
getting good resuls on Haswell yet?  For KNL are you getting those 
speedups with a generic set of options or are those with a custom set of 
options to set the mode & cost models?

jeff

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH, vec-tails 01/10] New compiler options
  2016-05-20 11:40       ` Ilya Enkovich
  2016-06-09 10:36         ` Ilya Enkovich
  2016-06-16  5:06         ` Jeff Law
@ 2016-06-16 13:45         ` Ilya Enkovich
  2016-07-11 13:37           ` Ilya Enkovich
  2 siblings, 1 reply; 13+ messages in thread
From: Ilya Enkovich @ 2016-06-16 13:45 UTC (permalink / raw)
  To: Richard Biener; +Cc: GCC Patches

On 20 May 14:40, Ilya Enkovich wrote:
> > Can you make all these --params then?  I think to be useful to users we'd want
> > them to be loop pragmas rather than options.
> 
> OK, I'll change it to params.  I didn't think about control via
> pragmas but will do now.
> 
> Thanks,
> Ilya
> 
> >
> > Richard.
> >

Hi,

Here is a set of params to be used instead of new flags.  Does this set looks OK?
I still use new option for cost model for convenient soct model enum re-use.

Thanks,
Ilya
--
gcc/

2016-06-16  Ilya Enkovich  <ilya.enkovich@intel.com>

	* common.opt (fvect-epilogue-cost-model=): New.
	* params.def (PARAM_VECT_EPILOGUES_COMBINE): New.
	(PARAM_VECT_EPILOGUES_MASK): New.
	(PARAM_VECT_EPILOGUES_NOMASK): New.
	(PARAM_VECT_SHORT_LOOPS): New.
	* doc/invoke.texi (-fvect-epilogue-cost-model): New.


diff --git a/gcc/common.opt b/gcc/common.opt
index fccd4b5..10cd75b 100644
--- a/gcc/common.opt
+++ b/gcc/common.opt
@@ -2582,6 +2582,10 @@ fsimd-cost-model=
 Common Joined RejectNegative Enum(vect_cost_model) Var(flag_simd_cost_model) Init(VECT_COST_MODEL_UNLIMITED) Optimization
 Specifies the vectorization cost model for code marked with a simd directive.
 
+fvect-epilogue-cost-model=
+Common Joined RejectNegative Enum(vect_cost_model) Var(flag_vect_epilogue_cost_model) Init(VECT_COST_MODEL_DEFAULT) Optimization
+Specifies the cost model for epilogue vectorization.
+
 Enum
 Name(vect_cost_model) Type(enum vect_cost_model) UnknownError(unknown vectorizer cost model %qs)
 
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index ce162a0..ecbd7ce 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -7638,6 +7638,14 @@ or Cilk Plus simd directive.  The @var{model} argument should be one of
 have the same meaning as described in @option{-fvect-cost-model} and by
 default a cost model defined with @option{-fvect-cost-model} is used.
 
+@item -fvect-epilogue-cost-model=@var{model}
+@opindex fvect-epilogue-cost-model
+Alter the cost model used for vectorization of loop epilogues.  The
+@var{model} argument should be one of @samp{unlimited}, @samp{dynamic},
+@samp{cheap}.  All values of @var{model} have the same meaning as
+described in @option{-fvect-cost-model} and by default @samp{dynamic}
+cost model is used.
+
 @item -ftree-vrp
 @opindex ftree-vrp
 Perform Value Range Propagation on trees.  This is similar to the
diff --git a/gcc/params.def b/gcc/params.def
index 62a1e40..3bac68c 100644
--- a/gcc/params.def
+++ b/gcc/params.def
@@ -1220,6 +1220,28 @@ DEFPARAM (PARAM_MAX_SPECULATIVE_DEVIRT_MAYDEFS,
 	  "Maximum number of may-defs visited when devirtualizing "
 	  "speculatively", 50, 0, 0)
 
+DEFPARAM (PARAM_VECT_EPILOGUES_COMBINE,
+	  "vect-epilogues-combine",
+	  "Enable loop epilogue vectorization by combining it with "
+	  "vectorized loop body.",
+	  0, 0, 1)
+
+DEFPARAM (PARAM_VECT_EPILOGUES_MASK,
+	  "vect-epilogues-mask",
+	  "Enable loop epilogue vectorization using the same vector "
+	  "size and masking.",
+	  0, 0, 1)
+
+DEFPARAM (PARAM_VECT_EPILOGUES_NOMASK,
+	  "vect-epilogues-nomask",
+	  "Enable loop epilogue vectorization using smaller vector size.",
+	  0, 0, 1)
+
+DEFPARAM (PARAM_VECT_SHORT_LOOPS,
+	  "vect-short-loops",
+	  "Enable vectorization of low trip count loops using masking.",
+	  0, 0, 1)
+
 /*
 
 Local variables:

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH, vec-tails 01/10] New compiler options
  2016-06-16  5:06         ` Jeff Law
@ 2016-06-17 10:41           ` Ilya Enkovich
  2016-06-20 22:33             ` Jeff Law
  0 siblings, 1 reply; 13+ messages in thread
From: Ilya Enkovich @ 2016-06-17 10:41 UTC (permalink / raw)
  To: Jeff Law; +Cc: Richard Biener, GCC Patches

2016-06-16 8:06 GMT+03:00 Jeff Law <law@redhat.com>:
> On 05/20/2016 05:40 AM, Ilya Enkovich wrote:
>>
>> 2016-05-20 14:17 GMT+03:00 Richard Biener <richard.guenther@gmail.com>:
>>>
>>> On Fri, May 20, 2016 at 11:50 AM, Ilya Enkovich <enkovich.gnu@gmail.com>
>>> wrote:
>>>>
>>>> 2016-05-20 12:26 GMT+03:00 Richard Biener <richard.guenther@gmail.com>:
>>>>>
>>>>> On Thu, May 19, 2016 at 9:36 PM, Ilya Enkovich <enkovich.gnu@gmail.com>
>>>>> wrote:
>>>>>>
>>>>>> Hi,
>>>>>>
>>>>>> This patch introduces new options used for loop epilogues
>>>>>> vectorization.
>>>>>
>>>>>
>>>>> Why's that?  This is a bit too much for the casual user and if it is
>>>>> really necessary
>>>>> to control this via options then it is not fine-grained enough.
>>>>>
>>>>> Why doesn't the vectorizer/backend have enough info to decide this
>>>>> itself?
>>>>
>>>>
>>>> I don't expect casual user to decide which modes to choose.  These
>>>> controls are
>>>> added for debugging and performance measurement purposes.  I see now I
>>>> miss
>>>> -ftree-vectorize-epilogues aliased to -ftree-vectorize-epilogues=all.
>>>> Surely
>>>> I expect epilogues and short loops vectorization be enabled by default
>>>> on -O3
>>>> or by -ftree-vectorize-loops.
>>>
>>>
>>> Can you make all these --params then?  I think to be useful to users we'd
>>> want
>>> them to be loop pragmas rather than options.
>>
>>
>> OK, I'll change it to params.  I didn't think about control via
>> pragmas but will do now.
>
> So the questions I'd like to see answered:
>
> 1. You've got 3 modes for epilogue vectorization.  Is this an artifact of
> not really having good heuristics yet for which mode to apply to a
> particular loop at this time?
>
> 2. Similarly for cost models.

All three modes are profitable in different situations.  Profitable mode depends
on a loop structure and target capabilities.  Ultimate goal is to have all three
modes enabled by default.  I can't state current heuristics are good enough
for all cases and targets and therefore don't enable epilogues vectorization
by default for now.  This is to be measured, analyzed and tuned in
time for GCC 7.1.

I add cost model simply to have an ability to force epilogue vectorization for
stability testing (force some mode of epilogue vectorization and check nothing
fails) and performance testing/tuning (try to find cases where we may benefit
from epilogue vectorization but don't due to bad cost model).  Also I don't
want to force epilogue vectorization for all loops for which vectorization is
forced using unlimited cost model because that may hurt performance for
simd loops.

>
> In the cover message you indicated you were getting expected gains of KNL,
> but not on Haswell.  Do you have any sense yet why you're not getting good
> resuls on Haswell yet?  For KNL are you getting those speedups with a
> generic set of options or are those with a custom set of options to set the
> mode & cost models?

Currently I have numbers collected on various suites for KNL machine.  Masking
mode (-ftree-vectorize-epilogues=mask) shows not bad results (dynamic
cost model,
-Ofast -flto -funroll-loops).  I don't see significant losses and there are few
significant gains.  For combine and nomask modes the result is not good enough
yet - there are several significant performance losses.  My guess is that
current threshold for combine is way too high and for nomask variant we better
choose the smallest vector size for epilogues instead of the next available
(use zmm for body and xmm for epilogue instead of zmmm for body and ymm for
epilogue).

ICC shows better results in these modes which makes me believe we can tune them
as well.  Overall nomask mode shows worse results comparing to options with
masking which is quite expected for KNL.

Unfortunately some big gains demonstrated by ICC are not reproducible
using GCC because we originally can't vectorize required hot loops.  E.g. on
200.sixtrack GCC has nothing and ICC has ~40% for all three modes.

I don't have the whole statistics for Haswell but synthetic tests show the
situation is really different from KNL.  Even for the 'perfect' iterations count
number (VF * 2 - 1) scalar version of epilogue shows the same result as a masked
one.  It means ratio of vector code performance vs. scalar code performance is
not as high as for KNL (KNL is more vector oriented and has weaker
scalar performance,
double vector size also matters here) and masking cost is higher for Haswell.
We still focus on AVX-512 targets more because of their rich masking
capabilities
and wider vector.

Thanks,
Ilya

>
> jeff

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH, vec-tails 01/10] New compiler options
  2016-06-17 10:41           ` Ilya Enkovich
@ 2016-06-20 22:33             ` Jeff Law
  0 siblings, 0 replies; 13+ messages in thread
From: Jeff Law @ 2016-06-20 22:33 UTC (permalink / raw)
  To: Ilya Enkovich; +Cc: Richard Biener, GCC Patches

On 06/17/2016 04:41 AM, Ilya Enkovich wrote:

>>
>> 1. You've got 3 modes for epilogue vectorization.  Is this an artifact of
>> not really having good heuristics yet for which mode to apply to a
>> particular loop at this time?
>>
>> 2. Similarly for cost models.
>
> All three modes are profitable in different situations.  Profitable mode depends
> on a loop structure and target capabilities.  Ultimate goal is to have all three
> modes enabled by default.  I can't state current heuristics are good enough
> for all cases and targets and therefore don't enable epilogues vectorization
> by default for now.  This is to be measured, analyzed and tuned in
> time for GCC 7.1.

>
> I add cost model simply to have an ability to force epilogue vectorization for
> stability testing (force some mode of epilogue vectorization and check nothing
> fails) and performance testing/tuning (try to find cases where we may benefit
> from epilogue vectorization but don't due to bad cost model).  Also I don't
> want to force epilogue vectorization for all loops for which vectorization is
> forced using unlimited cost model because that may hurt performance for
> simd loops.
Thanks.  That overview helps a lot.

We've done something similar to what you're doing with cost models for 
testing in the scheduler and other places in the past.   The costing 
models seem more geared towards us as developers rather than users.  you 
might consider keep those as local changes and not documenting them.

Understood completely on the modes.


>
> Currently I have numbers collected on various suites for KNL machine.  Masking
> mode (-ftree-vectorize-epilogues=mask) shows not bad results (dynamic
> cost model,
> -Ofast -flto -funroll-loops).  I don't see significant losses and there are few
> significant gains.  For combine and nomask modes the result is not good enough
> yet - there are several significant performance losses.  My guess is that
> current threshold for combine is way too high and for nomask variant we better
> choose the smallest vector size for epilogues instead of the next available
> (use zmm for body and xmm for epilogue instead of zmmm for body and ymm for
> epilogue).
>
> ICC shows better results in these modes which makes me believe we can tune them
> as well.  Overall nomask mode shows worse results comparing to options with
> masking which is quite expected for KNL.
>
> Unfortunately some big gains demonstrated by ICC are not reproducible
> using GCC because we originally can't vectorize required hot loops.  E.g. on
> 200.sixtrack GCC has nothing and ICC has ~40% for all three modes.
I hadn't pondered that case.  Certainly if GCC isn't vectorizing as 
much, we're not going to have as many opportunities for optimizing the 
vec-tails.

Given the results with ICC, we're probably best off keeping all 3 modes 
and working to get them tuned correctly.


>
> I don't have the whole statistics for Haswell but synthetic tests show the
> situation is really different from KNL.  Even for the 'perfect' iterations count
> number (VF * 2 - 1) scalar version of epilogue shows the same result as a masked
> one.  It means ratio of vector code performance vs. scalar code performance is
> not as high as for KNL (KNL is more vector oriented and has weaker
> scalar performance,
> double vector size also matters here) and masking cost is higher for Haswell.
> We still focus on AVX-512 targets more because of their rich masking
> capabilities and wider vector.
Understood.

Jeff

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH, vec-tails 01/10] New compiler options
  2016-06-16 13:45         ` Ilya Enkovich
@ 2016-07-11 13:37           ` Ilya Enkovich
  0 siblings, 0 replies; 13+ messages in thread
From: Ilya Enkovich @ 2016-07-11 13:37 UTC (permalink / raw)
  To: Richard Biener; +Cc: GCC Patches, Yuri Rumyantsev, Igor Zamyatin

Ping

2016-06-16 16:42 GMT+03:00 Ilya Enkovich <enkovich.gnu@gmail.com>:
> On 20 May 14:40, Ilya Enkovich wrote:
>> > Can you make all these --params then?  I think to be useful to users we'd want
>> > them to be loop pragmas rather than options.
>>
>> OK, I'll change it to params.  I didn't think about control via
>> pragmas but will do now.
>>
>> Thanks,
>> Ilya
>>
>> >
>> > Richard.
>> >
>
> Hi,
>
> Here is a set of params to be used instead of new flags.  Does this set looks OK?
> I still use new option for cost model for convenient soct model enum re-use.
>
> Thanks,
> Ilya
> --
> gcc/
>
> 2016-06-16  Ilya Enkovich  <ilya.enkovich@intel.com>
>
>         * common.opt (fvect-epilogue-cost-model=): New.
>         * params.def (PARAM_VECT_EPILOGUES_COMBINE): New.
>         (PARAM_VECT_EPILOGUES_MASK): New.
>         (PARAM_VECT_EPILOGUES_NOMASK): New.
>         (PARAM_VECT_SHORT_LOOPS): New.
>         * doc/invoke.texi (-fvect-epilogue-cost-model): New.
>
>
> diff --git a/gcc/common.opt b/gcc/common.opt
> index fccd4b5..10cd75b 100644
> --- a/gcc/common.opt
> +++ b/gcc/common.opt
> @@ -2582,6 +2582,10 @@ fsimd-cost-model=
>  Common Joined RejectNegative Enum(vect_cost_model) Var(flag_simd_cost_model) Init(VECT_COST_MODEL_UNLIMITED) Optimization
>  Specifies the vectorization cost model for code marked with a simd directive.
>
> +fvect-epilogue-cost-model=
> +Common Joined RejectNegative Enum(vect_cost_model) Var(flag_vect_epilogue_cost_model) Init(VECT_COST_MODEL_DEFAULT) Optimization
> +Specifies the cost model for epilogue vectorization.
> +
>  Enum
>  Name(vect_cost_model) Type(enum vect_cost_model) UnknownError(unknown vectorizer cost model %qs)
>
> diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
> index ce162a0..ecbd7ce 100644
> --- a/gcc/doc/invoke.texi
> +++ b/gcc/doc/invoke.texi
> @@ -7638,6 +7638,14 @@ or Cilk Plus simd directive.  The @var{model} argument should be one of
>  have the same meaning as described in @option{-fvect-cost-model} and by
>  default a cost model defined with @option{-fvect-cost-model} is used.
>
> +@item -fvect-epilogue-cost-model=@var{model}
> +@opindex fvect-epilogue-cost-model
> +Alter the cost model used for vectorization of loop epilogues.  The
> +@var{model} argument should be one of @samp{unlimited}, @samp{dynamic},
> +@samp{cheap}.  All values of @var{model} have the same meaning as
> +described in @option{-fvect-cost-model} and by default @samp{dynamic}
> +cost model is used.
> +
>  @item -ftree-vrp
>  @opindex ftree-vrp
>  Perform Value Range Propagation on trees.  This is similar to the
> diff --git a/gcc/params.def b/gcc/params.def
> index 62a1e40..3bac68c 100644
> --- a/gcc/params.def
> +++ b/gcc/params.def
> @@ -1220,6 +1220,28 @@ DEFPARAM (PARAM_MAX_SPECULATIVE_DEVIRT_MAYDEFS,
>           "Maximum number of may-defs visited when devirtualizing "
>           "speculatively", 50, 0, 0)
>
> +DEFPARAM (PARAM_VECT_EPILOGUES_COMBINE,
> +         "vect-epilogues-combine",
> +         "Enable loop epilogue vectorization by combining it with "
> +         "vectorized loop body.",
> +         0, 0, 1)
> +
> +DEFPARAM (PARAM_VECT_EPILOGUES_MASK,
> +         "vect-epilogues-mask",
> +         "Enable loop epilogue vectorization using the same vector "
> +         "size and masking.",
> +         0, 0, 1)
> +
> +DEFPARAM (PARAM_VECT_EPILOGUES_NOMASK,
> +         "vect-epilogues-nomask",
> +         "Enable loop epilogue vectorization using smaller vector size.",
> +         0, 0, 1)
> +
> +DEFPARAM (PARAM_VECT_SHORT_LOOPS,
> +         "vect-short-loops",
> +         "Enable vectorization of low trip count loops using masking.",
> +         0, 0, 1)
> +
>  /*
>
>  Local variables:

^ permalink raw reply	[flat|nested] 13+ messages in thread

end of thread, other threads:[~2016-07-11 13:37 UTC | newest]

Thread overview: 13+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2016-05-19 19:37 [PATCH, vec-tails 01/10] New compiler options Ilya Enkovich
2016-05-19 20:23 ` Joseph Myers
2016-05-20  9:26 ` Richard Biener
2016-05-20  9:50   ` Ilya Enkovich
2016-05-20 11:17     ` Richard Biener
2016-05-20 11:40       ` Ilya Enkovich
2016-06-09 10:36         ` Ilya Enkovich
2016-06-09 12:19           ` Richard Biener
2016-06-16  5:06         ` Jeff Law
2016-06-17 10:41           ` Ilya Enkovich
2016-06-20 22:33             ` Jeff Law
2016-06-16 13:45         ` Ilya Enkovich
2016-07-11 13:37           ` Ilya Enkovich

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).