public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH, rs6000] Improve swap optimization to modify general xxpermdi patterns
@ 2015-08-31  0:08 Bill Schmidt
  2015-08-31  0:23 ` David Edelsohn
  0 siblings, 1 reply; 2+ messages in thread
From: Bill Schmidt @ 2015-08-31  0:08 UTC (permalink / raw)
  To: gcc-patches; +Cc: dje.gcc

Hi,

The VSX swap optimization currently misses opportunities to optimize
loops when expressions corresponding to xxpermdi instructions are
present (other than xxswapd instructions associated with loads and
stores).  These occur commonly when interleaving vector double or vector
unsigned long operands, or when concatenating two doubles or unsigned
longs to make a V2DI or V2DF result.  This patch adds logic to recognize
these insns and adjust them to account for swapped doublewords in the
computation.

Both opportunities arise in a simple test case that performs a reduction
on complex multiplications, which I've added here.

Bootstrapped and tested on powerpc64le-unknown-linux-gnu with no
regressions.  Is this ok for trunk?

Thanks,
Bill


[gcc]

2015-08-30  Bill Schmidt  <wschmidt@linux.vnet.ibm.com>

        * config/rs6000/rs6000.c (swap_web_entry): Enlarge
        special_handling bitfield.
        (special_handling_values): Add SH_XXPERMDI and SH_CONCAT.
        (rtx_is_swappable_p): Add handling for vec_select/vec_concat
form
        that represents a general xxpermdi.
        (insn_is_swappable_p): Add handling for vec_concat of two
        doublewords, which maps to a specific xxpermdi.
        (adjust_xxpermdi): New function.
        (adjust_concat): Likewise.
        (handle_special_swappables): Call adjust_xxpermdi and
        adjust_concat.
        (dump_swap_insn_table): Handle SH_XXPERMDI and SH_CONCAT.

[gcc/testsuite]

2015-08-30  Bill Schmidt  <wschmidt@linux.vnet.ibm.com>

        * gcc.target/powerpc/swaps-p8-19.c: New test.


Index: gcc/config/rs6000/rs6000.c
===================================================================
--- gcc/config/rs6000/rs6000.c  (revision 227326)
+++ gcc/config/rs6000/rs6000.c  (working copy)
@@ -34986,7 +34986,7 @@ class swap_web_entry : public web_entry_base
   /* A nonzero value indicates what kind of special handling for this
      insn is required if doublewords are swapped.  Undefined if
      is_swappable is not set.  */
-  unsigned int special_handling : 3;
+  unsigned int special_handling : 4;
   /* Set if the web represented by this entry cannot be optimized.  */
   unsigned int web_not_optimizable : 1;
   /* Set if this insn should be deleted.  */
@@ -35000,7 +35000,9 @@ enum special_handling_values {
   SH_NOSWAP_LD,
   SH_NOSWAP_ST,
   SH_EXTRACT,
-  SH_SPLAT
+  SH_SPLAT,
+  SH_XXPERMDI,
+  SH_CONCAT
 };
 
 /* Union INSN with all insns containing definitions that reach USE.
@@ -35192,6 +35194,20 @@ rtx_is_swappable_p (rtx op, unsigned int
*special)
          *special = SH_EXTRACT;
          return 1;
        }
+      /* An XXPERMDI is ok if we adjust the lanes.  Note that if the
+        XXPERMDI is a swap operation, it will be identified by
+        insn_is_swap_p and therefore we won't get here.  */
+      else if (GET_CODE (XEXP (op, 0)) == VEC_CONCAT
+              && (GET_MODE (XEXP (op, 0)) == V4DFmode
+                  || GET_MODE (XEXP (op, 0)) == V4DImode)
+              && GET_CODE ((parallel = XEXP (op, 1))) == PARALLEL
+              && XVECLEN (parallel, 0) == 2
+              && GET_CODE (XVECEXP (parallel, 0, 0)) == CONST_INT
+              && GET_CODE (XVECEXP (parallel, 0, 1)) == CONST_INT)
+       {
+         *special = SH_XXPERMDI;
+         return 1;
+       }
       else
        return 0;
 
@@ -35369,6 +35385,17 @@ insn_is_swappable_p (swap_web_entry
*insn_entry, r
       return 1;
     }
 
+  /* A concatenation of two doublewords is ok if we reverse the
+     order of the inputs.  */
+  if (GET_CODE (body) == SET
+      && GET_CODE (SET_SRC (body)) == VEC_CONCAT
+      && (GET_MODE (SET_SRC (body)) == V2DFmode
+         || GET_MODE (SET_SRC (body)) == V2DImode))
+    {
+      *special = SH_CONCAT;
+      return 1;
+    }
+
   /* Otherwise check the operands for vector lane violations.  */
   return rtx_is_swappable_p (body, special);
 }
@@ -35658,6 +35685,49 @@ adjust_splat (rtx_insn *insn)
     fprintf (dump_file, "Changing lane for splat %d\n", INSN_UID
(insn));
 }
 
+/* Given OP that contains an XXPERMDI operation (that is not a
doubleword
+   swap), reverse the order of the source operands and adjust the
indices
+   of the source lanes to account for doubleword reversal.  */
+static void
+adjust_xxpermdi (rtx_insn *insn)
+{
+  rtx set = PATTERN (insn);
+  rtx select = XEXP (set, 1);
+  rtx concat = XEXP (select, 0);
+  rtx src0 = XEXP (concat, 0);
+  XEXP (concat, 0) = XEXP (concat, 1);
+  XEXP (concat, 1) = src0;
+  rtx parallel = XEXP (select, 1);
+  int lane0 = INTVAL (XVECEXP (parallel, 0, 0));
+  int lane1 = INTVAL (XVECEXP (parallel, 0, 1));
+  int new_lane0 = 3 - lane1;
+  int new_lane1 = 3 - lane0;
+  XVECEXP (parallel, 0, 0) = GEN_INT (new_lane0);
+  XVECEXP (parallel, 0, 1) = GEN_INT (new_lane1);
+  INSN_CODE (insn) = -1; /* Force re-recognition.  */
+  df_insn_rescan (insn);
+
+  if (dump_file)
+    fprintf (dump_file, "Changing lanes for xxpermdi %d\n", INSN_UID
(insn));
+}
+
+/* Given OP that contains a VEC_CONCAT operation of two doublewords,
+   reverse the order of those inputs.  */
+static void
+adjust_concat (rtx_insn *insn)
+{
+  rtx set = PATTERN (insn);
+  rtx concat = XEXP (set, 1);
+  rtx src0 = XEXP (concat, 0);
+  XEXP (concat, 0) = XEXP (concat, 1);
+  XEXP (concat, 1) = src0;
+  INSN_CODE (insn) = -1; /* Force re-recognition.  */
+  df_insn_rescan (insn);
+
+  if (dump_file)
+    fprintf (dump_file, "Reversing inputs for concat %d\n", INSN_UID
(insn));
+}
+
 /* The insn described by INSN_ENTRY[I] can be swapped, but only
    with special handling.  Take care of that here.  */
 static void
@@ -35704,6 +35774,14 @@ handle_special_swappables (swap_web_entry
*insn_en
       /* Change the lane on a direct-splat operation.  */
       adjust_splat (insn);
       break;
+    case SH_XXPERMDI:
+      /* Change the lanes on an XXPERMDI operation.  */
+      adjust_xxpermdi (insn);
+      break;
+    case SH_CONCAT:
+      /* Reverse the order of a concatenation operation.  */
+      adjust_concat (insn);
+      break;
     }
 }
 
@@ -35776,6 +35854,10 @@ dump_swap_insn_table (swap_web_entry
*insn_entry)
              fputs ("special:extract ", dump_file);
            else if (insn_entry[i].special_handling == SH_SPLAT)
              fputs ("special:splat ", dump_file);
+           else if (insn_entry[i].special_handling == SH_XXPERMDI)
+             fputs ("special:xxpermdi ", dump_file);
+           else if (insn_entry[i].special_handling == SH_CONCAT)
+             fputs ("special:concat ", dump_file);
          }
        if (insn_entry[i].web_not_optimizable)
          fputs ("unoptimizable ", dump_file);
Index: gcc/testsuite/gcc.target/powerpc/swaps-p8-19.c
===================================================================
--- gcc/testsuite/gcc.target/powerpc/swaps-p8-19.c      (revision 0)
+++ gcc/testsuite/gcc.target/powerpc/swaps-p8-19.c      (working copy)
@@ -0,0 +1,20 @@
+/* { dg-do compile { target { powerpc64le-*-* } } } */
+/* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" }
{ "-mcpu=power8" } } */
+/* { dg-options "-O2 -ftree-vectorize -mcpu=power8 -ffast-math
-fvect-cost-model=unlimited" } */
+
+/* This tests special handling for various uses of xxpermdi, other than
+   to perform doubleword swaps.  */
+
+void foo (_Complex double *self, _Complex double *a, _Complex double
*b,
+         int a1, int a2)
+{
+  int i, j;
+  for (i = 0; i < a1; ++i)
+    for (j = 0; j < a2; ++j)
+      self[i] = self[i] + a[i,j] * b[j];
+}
+
+/* { dg-final { scan-assembler-times "xxpermdi .*,.*,.*,0" 1 } } */
+/* { dg-final { scan-assembler-times "xxpermdi .*,.*,.*,1" 1 } } */
+/* { dg-final { scan-assembler-times "xxpermdi .*,.*,.*,2" 1 } } */
+/* { dg-final { scan-assembler-times "xxpermdi .*,.*,.*,3" 1 } } */


^ permalink raw reply	[flat|nested] 2+ messages in thread

* Re: [PATCH, rs6000] Improve swap optimization to modify general xxpermdi patterns
  2015-08-31  0:08 [PATCH, rs6000] Improve swap optimization to modify general xxpermdi patterns Bill Schmidt
@ 2015-08-31  0:23 ` David Edelsohn
  0 siblings, 0 replies; 2+ messages in thread
From: David Edelsohn @ 2015-08-31  0:23 UTC (permalink / raw)
  To: Bill Schmidt; +Cc: GCC Patches

On Sun, Aug 30, 2015 at 7:19 PM, Bill Schmidt
<wschmidt@linux.vnet.ibm.com> wrote:
> Hi,
>
> The VSX swap optimization currently misses opportunities to optimize
> loops when expressions corresponding to xxpermdi instructions are
> present (other than xxswapd instructions associated with loads and
> stores).  These occur commonly when interleaving vector double or vector
> unsigned long operands, or when concatenating two doubles or unsigned
> longs to make a V2DI or V2DF result.  This patch adds logic to recognize
> these insns and adjust them to account for swapped doublewords in the
> computation.
>
> Both opportunities arise in a simple test case that performs a reduction
> on complex multiplications, which I've added here.
>
> Bootstrapped and tested on powerpc64le-unknown-linux-gnu with no
> regressions.  Is this ok for trunk?
>
> Thanks,
> Bill
>
>
> [gcc]
>
> 2015-08-30  Bill Schmidt  <wschmidt@linux.vnet.ibm.com>
>
>         * config/rs6000/rs6000.c (swap_web_entry): Enlarge
>         special_handling bitfield.
>         (special_handling_values): Add SH_XXPERMDI and SH_CONCAT.
>         (rtx_is_swappable_p): Add handling for vec_select/vec_concat
> form
>         that represents a general xxpermdi.
>         (insn_is_swappable_p): Add handling for vec_concat of two
>         doublewords, which maps to a specific xxpermdi.
>         (adjust_xxpermdi): New function.
>         (adjust_concat): Likewise.
>         (handle_special_swappables): Call adjust_xxpermdi and
>         adjust_concat.
>         (dump_swap_insn_table): Handle SH_XXPERMDI and SH_CONCAT.
>
> [gcc/testsuite]
>
> 2015-08-30  Bill Schmidt  <wschmidt@linux.vnet.ibm.com>
>
>         * gcc.target/powerpc/swaps-p8-19.c: New test.

Okay.

Thanks, David

^ permalink raw reply	[flat|nested] 2+ messages in thread

end of thread, other threads:[~2015-08-31  0:08 UTC | newest]

Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2015-08-31  0:08 [PATCH, rs6000] Improve swap optimization to modify general xxpermdi patterns Bill Schmidt
2015-08-31  0:23 ` David Edelsohn

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).