public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH V3, rs6000] Disable generation of scalar modulo instructions
@ 2023-06-27 18:52 Pat Haugen
  2023-06-27 18:56 ` Pat Haugen
  0 siblings, 1 reply; 3+ messages in thread
From: Pat Haugen @ 2023-06-27 18:52 UTC (permalink / raw)
  To: GCC Patches, Segher Boessenkool, Kewen.Lin, Peter Bergner,
	David Edelsohn

Updated from prior version to address review comments (update 
rs6000_rtx_cost,
update scan strings of mod-1.c/mod-2.c)l.

Disable generation of scalar modulo instructions.

It was recently discovered that the scalar modulo instructions can suffer
noticeable performance issues for certain input values. This patch disables
their generation since the equivalent div/mul/sub sequence does not suffer
the same problem.

Bootstrapped and regression tested on powerpc64/powerpc64le.
Ok for master and backports after burn in?

-Pat


2023-06-27  Pat Haugen  <pthaugen@linux.ibm.com>

gcc/
	* config/rs6000/rs6000.cc (rs6000_rtx_costs): Check if disabling
	scalar modulo.
	* config/rs6000/rs6000.h (RS6000_DISABLE_SCALAR_MODULO): New.
	* config/rs6000/rs6000.md (mod<mode>3, *mod<mode>3): Disable.
	(define_expand umod<mode>3): New.
	(define_insn umod<mode>3): Rename to *umod<mode>3 and disable.
	(umodti3, modti3): Disable.

gcc/testsuite/
	* gcc.target/powerpc/clone1.c: Add xfails.
	* gcc.target/powerpc/clone3.c: Likewise.
	* gcc.target/powerpc/mod-1.c: Update scan strings and add xfails.
	* gcc.target/powerpc/mod-2.c: Likewise.
	* gcc.target/powerpc/p10-vdivq-vmodq.c: Add xfails.


diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc
index 546c353029b..2dae217bf64 100644
--- a/gcc/config/rs6000/rs6000.cc
+++ b/gcc/config/rs6000/rs6000.cc
@@ -22127,7 +22127,9 @@ rs6000_rtx_costs (rtx x, machine_mode mode, int 
outer_code,
  	    *total = rs6000_cost->divsi;
  	}
        /* Add in shift and subtract for MOD unless we have a mod 
instruction. */
-      if (!TARGET_MODULO && (code == MOD || code == UMOD))
+      if ((!TARGET_MODULO
+	   || (RS6000_DISABLE_SCALAR_MODULO && SCALAR_INT_MODE_P (mode)))
+	 && (code == MOD || code == UMOD))
  	*total += COSTS_N_INSNS (2);
        return false;

diff --git a/gcc/config/rs6000/rs6000.h b/gcc/config/rs6000/rs6000.h
index 3503614efbd..22595f6ebd7 100644
--- a/gcc/config/rs6000/rs6000.h
+++ b/gcc/config/rs6000/rs6000.h
@@ -2492,3 +2492,9 @@ while (0)
         rs6000_asm_output_opcode (STREAM);				\
      }									\
    while (0)
+
+/* Disable generation of scalar modulo instructions due to performance 
issues
+   with certain input values.  This can be removed in the future when the
+   issues have been resolved.  */
+#define RS6000_DISABLE_SCALAR_MODULO 1
+
diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
index b0db8ae508d..6c2f237a539 100644
--- a/gcc/config/rs6000/rs6000.md
+++ b/gcc/config/rs6000/rs6000.md
@@ -3421,6 +3421,17 @@ (define_expand "mod<mode>3"
  	FAIL;

        operands[2] = force_reg (<MODE>mode, operands[2]);
+
+      if (RS6000_DISABLE_SCALAR_MODULO)
+	{
+	  temp1 = gen_reg_rtx (<MODE>mode);
+	  temp2 = gen_reg_rtx (<MODE>mode);
+
+	  emit_insn (gen_div<mode>3 (temp1, operands[1], operands[2]));
+	  emit_insn (gen_mul<mode>3 (temp2, temp1, operands[2]));
+	  emit_insn (gen_sub<mode>3 (operands[0], operands[1], temp2));
+	  DONE;
+	}
      }
    else
      {
@@ -3440,17 +3451,42 @@ (define_insn "*mod<mode>3"
    [(set (match_operand:GPR 0 "gpc_reg_operand" "=&r,r")
          (mod:GPR (match_operand:GPR 1 "gpc_reg_operand" "r,r")
  		 (match_operand:GPR 2 "gpc_reg_operand" "r,r")))]
-  "TARGET_MODULO"
+  "TARGET_MODULO && !RS6000_DISABLE_SCALAR_MODULO"
    "mods<wd> %0,%1,%2"
    [(set_attr "type" "div")
     (set_attr "size" "<bits>")])

+;; This define_expand can be removed when RS6000_DISABLE_SCALAR_MODULO is
+;; removed.
+(define_expand "umod<mode>3"
+  [(set (match_operand:GPR 0 "gpc_reg_operand")
+	(umod:GPR (match_operand:GPR 1 "gpc_reg_operand")
+		  (match_operand:GPR 2 "gpc_reg_operand")))]
+  ""
+{
+  rtx temp1;
+  rtx temp2;
+
+  if (!TARGET_MODULO)
+	FAIL;

-(define_insn "umod<mode>3"
+  if (RS6000_DISABLE_SCALAR_MODULO)
+    {
+      temp1 = gen_reg_rtx (<MODE>mode);
+      temp2 = gen_reg_rtx (<MODE>mode);
+
+      emit_insn (gen_udiv<mode>3 (temp1, operands[1], operands[2]));
+      emit_insn (gen_mul<mode>3 (temp2, temp1, operands[2]));
+      emit_insn (gen_sub<mode>3 (operands[0], operands[1], temp2));
+      DONE;
+    }
+})
+
+(define_insn "*umod<mode>3"
    [(set (match_operand:GPR 0 "gpc_reg_operand" "=&r,r")
          (umod:GPR (match_operand:GPR 1 "gpc_reg_operand" "r,r")
  		  (match_operand:GPR 2 "gpc_reg_operand" "r,r")))]
-  "TARGET_MODULO"
+  "TARGET_MODULO && !RS6000_DISABLE_SCALAR_MODULO"
    "modu<wd> %0,%1,%2"
    [(set_attr "type" "div")
     (set_attr "size" "<bits>")])
@@ -3507,7 +3543,7 @@ (define_insn "umodti3"
    [(set (match_operand:TI 0 "altivec_register_operand" "=v")
  	(umod:TI (match_operand:TI 1 "altivec_register_operand" "v")
  		 (match_operand:TI 2 "altivec_register_operand" "v")))]
-  "TARGET_POWER10 && TARGET_POWERPC64"
+  "TARGET_POWER10 && TARGET_POWERPC64 && !RS6000_DISABLE_SCALAR_MODULO"
    "vmoduq %0,%1,%2"
    [(set_attr "type" "vecdiv")
     (set_attr "size" "128")])
@@ -3516,7 +3552,7 @@ (define_insn "modti3"
    [(set (match_operand:TI 0 "altivec_register_operand" "=v")
  	(mod:TI (match_operand:TI 1 "altivec_register_operand" "v")
  		(match_operand:TI 2 "altivec_register_operand" "v")))]
-  "TARGET_POWER10 && TARGET_POWERPC64"
+  "TARGET_POWER10 && TARGET_POWERPC64 && !RS6000_DISABLE_SCALAR_MODULO"
    "vmodsq %0,%1,%2"
    [(set_attr "type" "vecdiv")
     (set_attr "size" "128")])
diff --git a/gcc/testsuite/gcc.target/powerpc/clone1.c 
b/gcc/testsuite/gcc.target/powerpc/clone1.c
index c69fd2aa1b8..74323ca0e8c 100644
--- a/gcc/testsuite/gcc.target/powerpc/clone1.c
+++ b/gcc/testsuite/gcc.target/powerpc/clone1.c
@@ -21,6 +21,7 @@ long mod_func_or (long a, long b, long c)
    return mod_func (a, b) | c;
  }

-/* { dg-final { scan-assembler-times {\mdivd\M}  1 } } */
-/* { dg-final { scan-assembler-times {\mmulld\M} 1 } } */
-/* { dg-final { scan-assembler-times {\mmodsd\M} 1 } } */
+/* { Fail due to RS6000_DISABLE_SCALAR_MODULO. */
+/* { dg-final { scan-assembler-times {\mdivd\M}  1 { xfail *-*-* } } } */
+/* { dg-final { scan-assembler-times {\mmulld\M} 1 { xfail *-*-* } } } */
+/* { dg-final { scan-assembler-times {\mmodsd\M} 1 { xfail *-*-* } } } */
diff --git a/gcc/testsuite/gcc.target/powerpc/clone3.c 
b/gcc/testsuite/gcc.target/powerpc/clone3.c
index 911b88b781d..d3eb4dd2378 100644
--- a/gcc/testsuite/gcc.target/powerpc/clone3.c
+++ b/gcc/testsuite/gcc.target/powerpc/clone3.c
@@ -27,7 +27,8 @@ long mod_func_or (long a, long b, long c)
    return mod_func (a, b) | c;
  }

-/* { dg-final { scan-assembler-times {\mdivd\M}  1 } } */
-/* { dg-final { scan-assembler-times {\mmulld\M} 1 } } */
-/* { dg-final { scan-assembler-times {\mmodsd\M} 2 } } */
+/* { Fail due to RS6000_DISABLE_SCALAR_MODULO. */
+/* { dg-final { scan-assembler-times {\mdivd\M}  1 { xfail *-*-* } } } */
+/* { dg-final { scan-assembler-times {\mmulld\M} 1 { xfail *-*-* } } } */
+/* { dg-final { scan-assembler-times {\mmodsd\M} 2 { xfail *-*-* } } } */
  /* { dg-final { scan-assembler-times {\mpld\M}   1 } } */
diff --git a/gcc/testsuite/gcc.target/powerpc/mod-1.c 
b/gcc/testsuite/gcc.target/powerpc/mod-1.c
index 861ba670af4..8720ffb3346 100644
--- a/gcc/testsuite/gcc.target/powerpc/mod-1.c
+++ b/gcc/testsuite/gcc.target/powerpc/mod-1.c
@@ -7,13 +7,14 @@ long lsmod (long a, long b) { return a%b; }
  unsigned int iumod (unsigned int a, unsigned int b) { return a%b; }
  unsigned long lumod (unsigned long a, unsigned long b) { return a%b; }

-/* { dg-final { scan-assembler-times "modsw " 1 } } */
-/* { dg-final { scan-assembler-times "modsd " 1 } } */
-/* { dg-final { scan-assembler-times "moduw " 1 } } */
-/* { dg-final { scan-assembler-times "modud " 1 } } */
-/* { dg-final { scan-assembler-not   "mullw "   } } */
-/* { dg-final { scan-assembler-not   "mulld "   } } */
-/* { dg-final { scan-assembler-not   "divw "    } } */
-/* { dg-final { scan-assembler-not   "divd "    } } */
-/* { dg-final { scan-assembler-not   "divwu "   } } */
-/* { dg-final { scan-assembler-not   "divdu "   } } */
+/* { Fail due to RS6000_DISABLE_SCALAR_MODULO. */
+/* { dg-final { scan-assembler-times {\mmodsw\M} 1 { xfail *-*-* } } } */
+/* { dg-final { scan-assembler-times {\mmodsd\M} 1 { xfail *-*-* } } } */
+/* { dg-final { scan-assembler-times {\mmoduw\M} 1 { xfail *-*-* } } } */
+/* { dg-final { scan-assembler-times {\mmodud\M} 1 { xfail *-*-* } } } */
+/* { dg-final { scan-assembler-not   {\mmullw\M}   { xfail *-*-* } } } */
+/* { dg-final { scan-assembler-not   {\mmulld\M}   { xfail *-*-* } } } */
+/* { dg-final { scan-assembler-not   {\mdivw\M}    { xfail *-*-* } } } */
+/* { dg-final { scan-assembler-not   {\mdivd\M}    { xfail *-*-* } } } */
+/* { dg-final { scan-assembler-not   {\mdivwu\M}   { xfail *-*-* } } } */
+/* { dg-final { scan-assembler-not   {\mdivdu\M}   { xfail *-*-* } } } */
diff --git a/gcc/testsuite/gcc.target/powerpc/mod-2.c 
b/gcc/testsuite/gcc.target/powerpc/mod-2.c
index 441ec5878f1..54bdca88607 100644
--- a/gcc/testsuite/gcc.target/powerpc/mod-2.c
+++ b/gcc/testsuite/gcc.target/powerpc/mod-2.c
@@ -5,8 +5,9 @@
  int ismod (int a, int b) { return a%b; }
  unsigned int iumod (unsigned int a, unsigned int b) { return a%b; }

-/* { dg-final { scan-assembler-times "modsw " 1 } } */
-/* { dg-final { scan-assembler-times "moduw " 1 } } */
-/* { dg-final { scan-assembler-not   "mullw "   } } */
-/* { dg-final { scan-assembler-not   "divw "    } } */
-/* { dg-final { scan-assembler-not   "divwu "   } } */
+/* { Fail due to RS6000_DISABLE_SCALAR_MODULO. */
+/* { dg-final { scan-assembler-times {\mmodsw\M} 1 { xfail *-*-* } } } */
+/* { dg-final { scan-assembler-times {\mmoduw\M} 1 { xfail *-*-* } } } */
+/* { dg-final { scan-assembler-not   {\mmullw\M}   { xfail *-*-* } } } */
+/* { dg-final { scan-assembler-not   {\mdivw\M}    { xfail *-*-* } } } */
+/* { dg-final { scan-assembler-not   {\mdivwu\M}   { xfail *-*-* } } } */
diff --git a/gcc/testsuite/gcc.target/powerpc/p10-vdivq-vmodq.c 
b/gcc/testsuite/gcc.target/powerpc/p10-vdivq-vmodq.c
index 84685e5ff43..148998c8c9d 100644
--- a/gcc/testsuite/gcc.target/powerpc/p10-vdivq-vmodq.c
+++ b/gcc/testsuite/gcc.target/powerpc/p10-vdivq-vmodq.c
@@ -23,5 +23,6 @@ __int128 s_mod(__int128 a, __int128 b)

  /* { dg-final { scan-assembler {\mvdivsq\M} } } */
  /* { dg-final { scan-assembler {\mvdivuq\M} } } */
-/* { dg-final { scan-assembler {\mvmodsq\M} } } */
-/* { dg-final { scan-assembler {\mvmoduq\M} } } */
+/* { Fail due to RS6000_DISABLE_SCALAR_MODULO. */
+/* { dg-final { scan-assembler {\mvmodsq\M} { xfail *-*-* } } } */
+/* { dg-final { scan-assembler {\mvmoduq\M} { xfail *-*-* } } } */

^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [PATCH V3, rs6000] Disable generation of scalar modulo instructions
  2023-06-27 18:52 [PATCH V3, rs6000] Disable generation of scalar modulo instructions Pat Haugen
@ 2023-06-27 18:56 ` Pat Haugen
  2023-06-28  8:18   ` Kewen.Lin
  0 siblings, 1 reply; 3+ messages in thread
From: Pat Haugen @ 2023-06-27 18:56 UTC (permalink / raw)
  To: GCC Patches, Segher Boessenkool, Kewen.Lin, Peter Bergner,
	David Edelsohn

[-- Attachment #1: Type: text/plain, Size: 1490 bytes --]

On 6/27/23 1:52 PM, Pat Haugen via Gcc-patches wrote:
> Updated from prior version to address review comments (update 
> rs6000_rtx_cost,
> update scan strings of mod-1.c/mod-2.c)l.
> 
> Disable generation of scalar modulo instructions.
> 
> It was recently discovered that the scalar modulo instructions can suffer
> noticeable performance issues for certain input values. This patch disables
> their generation since the equivalent div/mul/sub sequence does not suffer
> the same problem.
> 
> Bootstrapped and regression tested on powerpc64/powerpc64le.
> Ok for master and backports after burn in?
> 
> -Pat
> 
> 
> 2023-06-27  Pat Haugen  <pthaugen@linux.ibm.com>
> 
> gcc/
>      * config/rs6000/rs6000.cc (rs6000_rtx_costs): Check if disabling
>      scalar modulo.
>      * config/rs6000/rs6000.h (RS6000_DISABLE_SCALAR_MODULO): New.
>      * config/rs6000/rs6000.md (mod<mode>3, *mod<mode>3): Disable.
>      (define_expand umod<mode>3): New.
>      (define_insn umod<mode>3): Rename to *umod<mode>3 and disable.
>      (umodti3, modti3): Disable.
> 
> gcc/testsuite/
>      * gcc.target/powerpc/clone1.c: Add xfails.
>      * gcc.target/powerpc/clone3.c: Likewise.
>      * gcc.target/powerpc/mod-1.c: Update scan strings and add xfails.
>      * gcc.target/powerpc/mod-2.c: Likewise.
>      * gcc.target/powerpc/p10-vdivq-vmodq.c: Add xfails.
> 

Attaching patch since my mailer apparently messed up some formatting again.

-Pat

[-- Attachment #2: no-modulo.diff --]
[-- Type: text/plain, Size: 9307 bytes --]

diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc
index 546c353029b..2dae217bf64 100644
--- a/gcc/config/rs6000/rs6000.cc
+++ b/gcc/config/rs6000/rs6000.cc
@@ -22127,7 +22127,9 @@ rs6000_rtx_costs (rtx x, machine_mode mode, int outer_code,
 	    *total = rs6000_cost->divsi;
 	}
       /* Add in shift and subtract for MOD unless we have a mod instruction. */
-      if (!TARGET_MODULO && (code == MOD || code == UMOD))
+      if ((!TARGET_MODULO
+	   || (RS6000_DISABLE_SCALAR_MODULO && SCALAR_INT_MODE_P (mode)))
+	 && (code == MOD || code == UMOD))
 	*total += COSTS_N_INSNS (2);
       return false;
 
diff --git a/gcc/config/rs6000/rs6000.h b/gcc/config/rs6000/rs6000.h
index 3503614efbd..22595f6ebd7 100644
--- a/gcc/config/rs6000/rs6000.h
+++ b/gcc/config/rs6000/rs6000.h
@@ -2492,3 +2492,9 @@ while (0)
        rs6000_asm_output_opcode (STREAM);				\
     }									\
   while (0)
+
+/* Disable generation of scalar modulo instructions due to performance issues
+   with certain input values.  This can be removed in the future when the
+   issues have been resolved.  */
+#define RS6000_DISABLE_SCALAR_MODULO 1
+
diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
index b0db8ae508d..6c2f237a539 100644
--- a/gcc/config/rs6000/rs6000.md
+++ b/gcc/config/rs6000/rs6000.md
@@ -3421,6 +3421,17 @@ (define_expand "mod<mode>3"
 	FAIL;
 
       operands[2] = force_reg (<MODE>mode, operands[2]);
+
+      if (RS6000_DISABLE_SCALAR_MODULO)
+	{
+	  temp1 = gen_reg_rtx (<MODE>mode);
+	  temp2 = gen_reg_rtx (<MODE>mode);
+
+	  emit_insn (gen_div<mode>3 (temp1, operands[1], operands[2]));
+	  emit_insn (gen_mul<mode>3 (temp2, temp1, operands[2]));
+	  emit_insn (gen_sub<mode>3 (operands[0], operands[1], temp2));
+	  DONE;
+	}
     }
   else
     {
@@ -3440,17 +3451,42 @@ (define_insn "*mod<mode>3"
   [(set (match_operand:GPR 0 "gpc_reg_operand" "=&r,r")
         (mod:GPR (match_operand:GPR 1 "gpc_reg_operand" "r,r")
 		 (match_operand:GPR 2 "gpc_reg_operand" "r,r")))]
-  "TARGET_MODULO"
+  "TARGET_MODULO && !RS6000_DISABLE_SCALAR_MODULO"
   "mods<wd> %0,%1,%2"
   [(set_attr "type" "div")
    (set_attr "size" "<bits>")])
 
+;; This define_expand can be removed when RS6000_DISABLE_SCALAR_MODULO is
+;; removed.
+(define_expand "umod<mode>3"
+  [(set (match_operand:GPR 0 "gpc_reg_operand")
+	(umod:GPR (match_operand:GPR 1 "gpc_reg_operand")
+		  (match_operand:GPR 2 "gpc_reg_operand")))]
+  ""
+{
+  rtx temp1;
+  rtx temp2;
+
+  if (!TARGET_MODULO)
+	FAIL;
 
-(define_insn "umod<mode>3"
+  if (RS6000_DISABLE_SCALAR_MODULO)
+    {
+      temp1 = gen_reg_rtx (<MODE>mode);
+      temp2 = gen_reg_rtx (<MODE>mode);
+
+      emit_insn (gen_udiv<mode>3 (temp1, operands[1], operands[2]));
+      emit_insn (gen_mul<mode>3 (temp2, temp1, operands[2]));
+      emit_insn (gen_sub<mode>3 (operands[0], operands[1], temp2));
+      DONE;
+    }
+})
+
+(define_insn "*umod<mode>3"
   [(set (match_operand:GPR 0 "gpc_reg_operand" "=&r,r")
         (umod:GPR (match_operand:GPR 1 "gpc_reg_operand" "r,r")
 		  (match_operand:GPR 2 "gpc_reg_operand" "r,r")))]
-  "TARGET_MODULO"
+  "TARGET_MODULO && !RS6000_DISABLE_SCALAR_MODULO"
   "modu<wd> %0,%1,%2"
   [(set_attr "type" "div")
    (set_attr "size" "<bits>")])
@@ -3507,7 +3543,7 @@ (define_insn "umodti3"
   [(set (match_operand:TI 0 "altivec_register_operand" "=v")
 	(umod:TI (match_operand:TI 1 "altivec_register_operand" "v")
 		 (match_operand:TI 2 "altivec_register_operand" "v")))]
-  "TARGET_POWER10 && TARGET_POWERPC64"
+  "TARGET_POWER10 && TARGET_POWERPC64 && !RS6000_DISABLE_SCALAR_MODULO"
   "vmoduq %0,%1,%2"
   [(set_attr "type" "vecdiv")
    (set_attr "size" "128")])
@@ -3516,7 +3552,7 @@ (define_insn "modti3"
   [(set (match_operand:TI 0 "altivec_register_operand" "=v")
 	(mod:TI (match_operand:TI 1 "altivec_register_operand" "v")
 		(match_operand:TI 2 "altivec_register_operand" "v")))]
-  "TARGET_POWER10 && TARGET_POWERPC64"
+  "TARGET_POWER10 && TARGET_POWERPC64 && !RS6000_DISABLE_SCALAR_MODULO"
   "vmodsq %0,%1,%2"
   [(set_attr "type" "vecdiv")
    (set_attr "size" "128")])
diff --git a/gcc/testsuite/gcc.target/powerpc/clone1.c b/gcc/testsuite/gcc.target/powerpc/clone1.c
index c69fd2aa1b8..74323ca0e8c 100644
--- a/gcc/testsuite/gcc.target/powerpc/clone1.c
+++ b/gcc/testsuite/gcc.target/powerpc/clone1.c
@@ -21,6 +21,7 @@ long mod_func_or (long a, long b, long c)
   return mod_func (a, b) | c;
 }
 
-/* { dg-final { scan-assembler-times {\mdivd\M}  1 } } */
-/* { dg-final { scan-assembler-times {\mmulld\M} 1 } } */
-/* { dg-final { scan-assembler-times {\mmodsd\M} 1 } } */
+/* { Fail due to RS6000_DISABLE_SCALAR_MODULO. */
+/* { dg-final { scan-assembler-times {\mdivd\M}  1 { xfail *-*-* } } } */
+/* { dg-final { scan-assembler-times {\mmulld\M} 1 { xfail *-*-* } } } */
+/* { dg-final { scan-assembler-times {\mmodsd\M} 1 { xfail *-*-* } } } */
diff --git a/gcc/testsuite/gcc.target/powerpc/clone3.c b/gcc/testsuite/gcc.target/powerpc/clone3.c
index 911b88b781d..d3eb4dd2378 100644
--- a/gcc/testsuite/gcc.target/powerpc/clone3.c
+++ b/gcc/testsuite/gcc.target/powerpc/clone3.c
@@ -27,7 +27,8 @@ long mod_func_or (long a, long b, long c)
   return mod_func (a, b) | c;
 }
 
-/* { dg-final { scan-assembler-times {\mdivd\M}  1 } } */
-/* { dg-final { scan-assembler-times {\mmulld\M} 1 } } */
-/* { dg-final { scan-assembler-times {\mmodsd\M} 2 } } */
+/* { Fail due to RS6000_DISABLE_SCALAR_MODULO. */
+/* { dg-final { scan-assembler-times {\mdivd\M}  1 { xfail *-*-* } } } */
+/* { dg-final { scan-assembler-times {\mmulld\M} 1 { xfail *-*-* } } } */
+/* { dg-final { scan-assembler-times {\mmodsd\M} 2 { xfail *-*-* } } } */
 /* { dg-final { scan-assembler-times {\mpld\M}   1 } } */
diff --git a/gcc/testsuite/gcc.target/powerpc/mod-1.c b/gcc/testsuite/gcc.target/powerpc/mod-1.c
index 861ba670af4..8720ffb3346 100644
--- a/gcc/testsuite/gcc.target/powerpc/mod-1.c
+++ b/gcc/testsuite/gcc.target/powerpc/mod-1.c
@@ -7,13 +7,14 @@ long lsmod (long a, long b) { return a%b; }
 unsigned int iumod (unsigned int a, unsigned int b) { return a%b; }
 unsigned long lumod (unsigned long a, unsigned long b) { return a%b; }
 
-/* { dg-final { scan-assembler-times "modsw " 1 } } */
-/* { dg-final { scan-assembler-times "modsd " 1 } } */
-/* { dg-final { scan-assembler-times "moduw " 1 } } */
-/* { dg-final { scan-assembler-times "modud " 1 } } */
-/* { dg-final { scan-assembler-not   "mullw "   } } */
-/* { dg-final { scan-assembler-not   "mulld "   } } */
-/* { dg-final { scan-assembler-not   "divw "    } } */
-/* { dg-final { scan-assembler-not   "divd "    } } */
-/* { dg-final { scan-assembler-not   "divwu "   } } */
-/* { dg-final { scan-assembler-not   "divdu "   } } */
+/* { Fail due to RS6000_DISABLE_SCALAR_MODULO. */
+/* { dg-final { scan-assembler-times {\mmodsw\M} 1 { xfail *-*-* } } } */
+/* { dg-final { scan-assembler-times {\mmodsd\M} 1 { xfail *-*-* } } } */
+/* { dg-final { scan-assembler-times {\mmoduw\M} 1 { xfail *-*-* } } } */
+/* { dg-final { scan-assembler-times {\mmodud\M} 1 { xfail *-*-* } } } */
+/* { dg-final { scan-assembler-not   {\mmullw\M}   { xfail *-*-* } } } */
+/* { dg-final { scan-assembler-not   {\mmulld\M}   { xfail *-*-* } } } */
+/* { dg-final { scan-assembler-not   {\mdivw\M}    { xfail *-*-* } } } */
+/* { dg-final { scan-assembler-not   {\mdivd\M}    { xfail *-*-* } } } */
+/* { dg-final { scan-assembler-not   {\mdivwu\M}   { xfail *-*-* } } } */
+/* { dg-final { scan-assembler-not   {\mdivdu\M}   { xfail *-*-* } } } */
diff --git a/gcc/testsuite/gcc.target/powerpc/mod-2.c b/gcc/testsuite/gcc.target/powerpc/mod-2.c
index 441ec5878f1..54bdca88607 100644
--- a/gcc/testsuite/gcc.target/powerpc/mod-2.c
+++ b/gcc/testsuite/gcc.target/powerpc/mod-2.c
@@ -5,8 +5,9 @@
 int ismod (int a, int b) { return a%b; }
 unsigned int iumod (unsigned int a, unsigned int b) { return a%b; }
 
-/* { dg-final { scan-assembler-times "modsw " 1 } } */
-/* { dg-final { scan-assembler-times "moduw " 1 } } */
-/* { dg-final { scan-assembler-not   "mullw "   } } */
-/* { dg-final { scan-assembler-not   "divw "    } } */
-/* { dg-final { scan-assembler-not   "divwu "   } } */
+/* { Fail due to RS6000_DISABLE_SCALAR_MODULO. */
+/* { dg-final { scan-assembler-times {\mmodsw\M} 1 { xfail *-*-* } } } */
+/* { dg-final { scan-assembler-times {\mmoduw\M} 1 { xfail *-*-* } } } */
+/* { dg-final { scan-assembler-not   {\mmullw\M}   { xfail *-*-* } } } */
+/* { dg-final { scan-assembler-not   {\mdivw\M}    { xfail *-*-* } } } */
+/* { dg-final { scan-assembler-not   {\mdivwu\M}   { xfail *-*-* } } } */
diff --git a/gcc/testsuite/gcc.target/powerpc/p10-vdivq-vmodq.c b/gcc/testsuite/gcc.target/powerpc/p10-vdivq-vmodq.c
index 84685e5ff43..148998c8c9d 100644
--- a/gcc/testsuite/gcc.target/powerpc/p10-vdivq-vmodq.c
+++ b/gcc/testsuite/gcc.target/powerpc/p10-vdivq-vmodq.c
@@ -23,5 +23,6 @@ __int128 s_mod(__int128 a, __int128 b)
 
 /* { dg-final { scan-assembler {\mvdivsq\M} } } */
 /* { dg-final { scan-assembler {\mvdivuq\M} } } */
-/* { dg-final { scan-assembler {\mvmodsq\M} } } */
-/* { dg-final { scan-assembler {\mvmoduq\M} } } */
+/* { Fail due to RS6000_DISABLE_SCALAR_MODULO. */
+/* { dg-final { scan-assembler {\mvmodsq\M} { xfail *-*-* } } } */
+/* { dg-final { scan-assembler {\mvmoduq\M} { xfail *-*-* } } } */

^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [PATCH V3, rs6000] Disable generation of scalar modulo instructions
  2023-06-27 18:56 ` Pat Haugen
@ 2023-06-28  8:18   ` Kewen.Lin
  0 siblings, 0 replies; 3+ messages in thread
From: Kewen.Lin @ 2023-06-28  8:18 UTC (permalink / raw)
  To: Pat Haugen; +Cc: GCC Patches, Segher Boessenkool, David Edelsohn, Peter Bergner

Hi Pat,

> On 6/27/23 1:52 PM, Pat Haugen via Gcc-patches wrote:
>> Updated from prior version to address review comments (update rs6000_rtx_cost,
>> update scan strings of mod-1.c/mod-2.c)l.
>>
>> Disable generation of scalar modulo instructions.
>>
>> It was recently discovered that the scalar modulo instructions can suffer
>> noticeable performance issues for certain input values. This patch disables
>> their generation since the equivalent div/mul/sub sequence does not suffer
>> the same problem.
>>
>> Bootstrapped and regression tested on powerpc64/powerpc64le.
>> Ok for master and backports after burn in?
>>
>> -Pat
>>
>>
>> 2023-06-27  Pat Haugen  <pthaugen@linux.ibm.com>
>>
>> gcc/
>>      * config/rs6000/rs6000.cc (rs6000_rtx_costs): Check if disabling
>>      scalar modulo.
>>      * config/rs6000/rs6000.h (RS6000_DISABLE_SCALAR_MODULO): New.
>>      * config/rs6000/rs6000.md (mod<mode>3, *mod<mode>3): Disable.
>>      (define_expand umod<mode>3): New.
>>      (define_insn umod<mode>3): Rename to *umod<mode>3 and disable.
>>      (umodti3, modti3): Disable.
>>
>> gcc/testsuite/
>>      * gcc.target/powerpc/clone1.c: Add xfails.
>>      * gcc.target/powerpc/clone3.c: Likewise.
>>      * gcc.target/powerpc/mod-1.c: Update scan strings and add xfails.
>>      * gcc.target/powerpc/mod-2.c: Likewise.
>>      * gcc.target/powerpc/p10-vdivq-vmodq.c: Add xfails.
>>
> 
> Attaching patch since my mailer apparently messed up some formatting again.
> 
> -Pat
> 
> no-modulo.diff
> 
> diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc
> index 546c353029b..2dae217bf64 100644
> --- a/gcc/config/rs6000/rs6000.cc
> +++ b/gcc/config/rs6000/rs6000.cc
> @@ -22127,7 +22127,9 @@ rs6000_rtx_costs (rtx x, machine_mode mode, int outer_code,
>  	    *total = rs6000_cost->divsi;
>  	}
>        /* Add in shift and subtract for MOD unless we have a mod instruction. */
> -      if (!TARGET_MODULO && (code == MOD || code == UMOD))
> +      if ((!TARGET_MODULO
> +	   || (RS6000_DISABLE_SCALAR_MODULO && SCALAR_INT_MODE_P (mode)))
> +	 && (code == MOD || code == UMOD))
>  	*total += COSTS_N_INSNS (2);
>        return false;
>  
> diff --git a/gcc/config/rs6000/rs6000.h b/gcc/config/rs6000/rs6000.h
> index 3503614efbd..22595f6ebd7 100644
> --- a/gcc/config/rs6000/rs6000.h
> +++ b/gcc/config/rs6000/rs6000.h
> @@ -2492,3 +2492,9 @@ while (0)
>         rs6000_asm_output_opcode (STREAM);				\
>      }									\
>    while (0)
> +
> +/* Disable generation of scalar modulo instructions due to performance issues
> +   with certain input values.  This can be removed in the future when the
> +   issues have been resolved.  */
> +#define RS6000_DISABLE_SCALAR_MODULO 1
> +
> diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
> index b0db8ae508d..6c2f237a539 100644
> --- a/gcc/config/rs6000/rs6000.md
> +++ b/gcc/config/rs6000/rs6000.md
> @@ -3421,6 +3421,17 @@ (define_expand "mod<mode>3"
>  	FAIL;
>  
>        operands[2] = force_reg (<MODE>mode, operands[2]);
> +
> +      if (RS6000_DISABLE_SCALAR_MODULO)
> +	{
> +	  temp1 = gen_reg_rtx (<MODE>mode);
> +	  temp2 = gen_reg_rtx (<MODE>mode);
> +
> +	  emit_insn (gen_div<mode>3 (temp1, operands[1], operands[2]));
> +	  emit_insn (gen_mul<mode>3 (temp2, temp1, operands[2]));
> +	  emit_insn (gen_sub<mode>3 (operands[0], operands[1], temp2));
> +	  DONE;
> +	}
>      }
>    else
>      {
> @@ -3440,17 +3451,42 @@ (define_insn "*mod<mode>3"
>    [(set (match_operand:GPR 0 "gpc_reg_operand" "=&r,r")
>          (mod:GPR (match_operand:GPR 1 "gpc_reg_operand" "r,r")
>  		 (match_operand:GPR 2 "gpc_reg_operand" "r,r")))]
> -  "TARGET_MODULO"
> +  "TARGET_MODULO && !RS6000_DISABLE_SCALAR_MODULO"
>    "mods<wd> %0,%1,%2"
>    [(set_attr "type" "div")
>     (set_attr "size" "<bits>")])
>  
> +;; This define_expand can be removed when RS6000_DISABLE_SCALAR_MODULO is
> +;; removed.
> +(define_expand "umod<mode>3"
> +  [(set (match_operand:GPR 0 "gpc_reg_operand")
> +	(umod:GPR (match_operand:GPR 1 "gpc_reg_operand")
> +		  (match_operand:GPR 2 "gpc_reg_operand")))]
> +  ""
> +{
> +  rtx temp1;
> +  rtx temp2;
> +
> +  if (!TARGET_MODULO)
> +	FAIL;
>  

Nit: Sorry that I didn't catch this in the previous review, it seems we can
just put TARGET_MODULO in the condition of define_expand.  Besides, it seems
the declarations of temp1 and temp2 can be merged into their assignments
below.

The others look pretty good to me, thanks!  As I know this patch is still on
Segher's list to review, I'd defer the final approval to Segher.

BR,
Kewen

^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2023-06-28  8:18 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-06-27 18:52 [PATCH V3, rs6000] Disable generation of scalar modulo instructions Pat Haugen
2023-06-27 18:56 ` Pat Haugen
2023-06-28  8:18   ` Kewen.Lin

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).