public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH, rs6000] Correct some Power9 scheduling info
@ 2017-09-27 17:56 Pat Haugen
  2017-11-15 20:03 ` Pat Haugen
  0 siblings, 1 reply; 3+ messages in thread
From: Pat Haugen @ 2017-09-27 17:56 UTC (permalink / raw)
  To: GCC Patches; +Cc: Segher Boessenkool, David Edelsohn

[-- Attachment #1: Type: text/plain, Size: 628 bytes --]

The following patch corrects some Power9 resource requirements and
instruction latencies. Bootstrap/regtest on powerpc64le-linux with no
new regressions. Ok for trunk?

-Pat


2017-09-27  Pat Haugen  <pthaugen@us.ibm.com>

	* config/rs6000/power9.md (DU_C2_3_power9): Remove an incorrect
	combination.
	(power9-alu): Split out insert/shift types...
	(power9-rot): ... to here. Correct dispatch resources.
	(power9-cracked-alu): Correct dispatch resources.
	(power9-mul): Likewise.
	(power9-mul-compare): Likewise.
	(power9-fp): Correct latency.
	(power9-ddiv): Likewise.
	(power9-vecfdiv): Likewise.
	(power9-vecdiv): Likewise.

[-- Attachment #2: p9.diff --]
[-- Type: text/x-patch, Size: 3470 bytes --]

Index: gcc/config/rs6000/power9.md
===================================================================
--- gcc/config/rs6000/power9.md	(revision 252029)
+++ gcc/config/rs6000/power9.md	(working copy)
@@ -80,7 +80,6 @@ (define_reservation "DU_C2_power9" "x0_p
 ; 2-way cracked plus 3rd slot
 (define_reservation "DU_C2_3_power9" "x0_power9+x1_power9+xa0_power9|
 				      x1_power9+x2_power9+xa0_power9|
-				      x1_power9+x2_power9+xb0_power9|
 				      x2_power9+x3_power9+xb0_power9")
 
 ; 3-way cracked (consumes whole decode/dispatch cycle)
@@ -243,21 +242,29 @@ (define_insn_reservation "power9-sync" 4
 
 ; Most ALU insns are simple 2 cycle, including record form
 (define_insn_reservation "power9-alu" 2
-  (and (ior (eq_attr "type" "add,exts,integer,logical,isel")
-	    (and (eq_attr "type" "insert,shift")
-		 (eq_attr "dot" "no")))
+  (and (eq_attr "type" "add,exts,integer,logical,isel")
        (eq_attr "cpu" "power9"))
   "DU_any_power9,VSU_power9")
 ; 5 cycle CR latency
 (define_bypass 5 "power9-alu"
 		 "power9-crlogical,power9-mfcr,power9-mfcrf")
 
+; Rotate/shift prevent use of third slot
+(define_insn_reservation "power9-rot" 2
+  (and (eq_attr "type" "insert,shift")
+       (eq_attr "dot" "no")
+       (eq_attr "cpu" "power9"))
+  "DU_slice_3_power9,VSU_power9")
+; 5 cycle CR latency
+(define_bypass 5 "power9-rot"
+		 "power9-crlogical,power9-mfcr,power9-mfcrf")
+
 ; Record form rotate/shift are cracked
 (define_insn_reservation "power9-cracked-alu" 2
   (and (eq_attr "type" "insert,shift")
        (eq_attr "dot" "yes")
        (eq_attr "cpu" "power9"))
-  "DU_C2_power9,VSU_power9")
+  "DU_C2_3_power9,VSU_power9")
 ; 7 cycle CR latency
 (define_bypass 7 "power9-cracked-alu"
 		 "power9-crlogical,power9-mfcr,power9-mfcrf")
@@ -291,13 +298,13 @@ (define_insn_reservation "power9-mul" 5
   (and (eq_attr "type" "mul")
        (eq_attr "dot" "no")
        (eq_attr "cpu" "power9"))
-  "DU_any_power9,VSU_power9")
+  "DU_slice_3_power9,VSU_power9")
 
 (define_insn_reservation "power9-mul-compare" 5
   (and (eq_attr "type" "mul")
        (eq_attr "dot" "yes")
        (eq_attr "cpu" "power9"))
-  "DU_C2_power9,VSU_power9")
+  "DU_C2_3_power9,VSU_power9")
 ; 10 cycle CR latency
 (define_bypass 10 "power9-mul-compare"
 		 "power9-crlogical,power9-mfcr,power9-mfcrf")
@@ -349,7 +356,7 @@ (define_insn_reservation "power9-fpsimpl
        (eq_attr "cpu" "power9"))
   "DU_slice_3_power9,VSU_power9")
 
-(define_insn_reservation "power9-fp" 7
+(define_insn_reservation "power9-fp" 5
   (and (eq_attr "type" "fp,dmul")
        (eq_attr "cpu" "power9"))
   "DU_slice_3_power9,VSU_power9")
@@ -366,7 +373,7 @@ (define_insn_reservation "power9-sdiv" 2
        (eq_attr "cpu" "power9"))
   "DU_slice_3_power9,VSU_power9")
 
-(define_insn_reservation "power9-ddiv" 33
+(define_insn_reservation "power9-ddiv" 27
   (and (eq_attr "type" "ddiv")
        (eq_attr "cpu" "power9"))
   "DU_slice_3_power9,VSU_power9")
@@ -419,12 +426,12 @@ (define_insn_reservation "power9-veccomp
        (eq_attr "cpu" "power9"))
   "DU_super_power9,VSU_super_power9")
 
-(define_insn_reservation "power9-vecfdiv" 28
+(define_insn_reservation "power9-vecfdiv" 24
   (and (eq_attr "type" "vecfdiv")
        (eq_attr "cpu" "power9"))
   "DU_super_power9,VSU_super_power9")
 
-(define_insn_reservation "power9-vecdiv" 32
+(define_insn_reservation "power9-vecdiv" 27
   (and (eq_attr "type" "vecdiv")
        (eq_attr "size" "!128")
        (eq_attr "cpu" "power9"))

^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [PATCH, rs6000] Correct some Power9 scheduling info
  2017-09-27 17:56 [PATCH, rs6000] Correct some Power9 scheduling info Pat Haugen
@ 2017-11-15 20:03 ` Pat Haugen
  2017-11-16 10:23   ` Segher Boessenkool
  0 siblings, 1 reply; 3+ messages in thread
From: Pat Haugen @ 2017-11-15 20:03 UTC (permalink / raw)
  To: GCC Patches; +Cc: Segher Boessenkool, David Edelsohn

[-- Attachment #1: Type: text/plain, Size: 1036 bytes --]

On 09/27/2017 12:56 PM, Pat Haugen wrote:
> The following patch corrects some Power9 resource requirements and
> instruction latencies. Bootstrap/regtest on powerpc64le-linux with no
> new regressions. Ok for trunk?

Updated patch follows. Bootstrap/regtest on powerpc64le-linux (Power9)
with no regressions. Ok for trunk?

-Pat

2017-11-15  Pat Haugen  <pthaugen@us.ibm.com>

	* rs6000/power9.md (power9fpdiv): New automaton and cpu_unit defined
	for it.
	(DU_C2_3_power9): Correct reservation combinations.
	(FP_DIV_power9, VEC_DIV_power9): New.
	(power9-alu): Split out rotate/shift...
	(power9-rot): ...to here, correct dispatch resource.
	(power9-cracked-alu, power9-mul, power9-mul-compare): Correct dispatch
	resource.
	(power9-fp): Correct latency.
	(power9-sdiv): Add div/sqrt resource.
	(power9-ddiv): Correct latency, add div/sqrt resource.
	(power9-sqrt, power9-dsqrt): Add div/sqrt resource.
	(power9-vecfdiv, power9-vecdiv): Correct latency, add div/sqrt
	resource.
	(power9-qpdiv, power9-qpmul): Adjust resource usage.



[-- Attachment #2: p9_fp.diff --]
[-- Type: text/x-patch, Size: 6523 bytes --]

Index: gcc/config/rs6000/power9.md
===================================================================
--- gcc/config/rs6000/power9.md	(revision 254708)
+++ gcc/config/rs6000/power9.md	(working copy)
@@ -19,7 +19,7 @@
 ;; along with GCC; see the file COPYING3.  If not see
 ;; <http://www.gnu.org/licenses/>.
 
-(define_automaton "power9dsp,power9lsu,power9vsu,power9misc")
+(define_automaton "power9dsp,power9lsu,power9vsu,power9fpdiv,power9misc")
 
 (define_cpu_unit "lsu0_power9,lsu1_power9,lsu2_power9,lsu3_power9" "power9lsu")
 (define_cpu_unit "vsu0_power9,vsu1_power9,vsu2_power9,vsu3_power9" "power9vsu")
@@ -28,7 +28,11 @@
 ; Two fixed point divide units, not pipelined
 (define_cpu_unit "fx_div0_power9,fx_div1_power9" "power9misc")
 (define_cpu_unit "bru_power9,cryptu_power9,dfu_power9" "power9misc")
+; Create a false unit for use by non-pipelined FP div/sqrt
+(define_cpu_unit "fp_div0_power9,fp_div1_power9,fp_div2_power9,fp_div3_power9"
+		 "power9fpdiv")
 
+
 (define_cpu_unit "x0_power9,x1_power9,xa0_power9,xa1_power9,
 		  x2_power9,x3_power9,xb0_power9,xb1_power9,
 		  br0_power9,br1_power9" "power9dsp")
@@ -79,8 +83,7 @@
 
 ; 2-way cracked plus 3rd slot
 (define_reservation "DU_C2_3_power9" "x0_power9+x1_power9+xa0_power9|
-				      x1_power9+x2_power9+xa0_power9|
-				      x1_power9+x2_power9+xb0_power9|
+				      x1_power9+x2_power9+xa1_power9|
 				      x2_power9+x3_power9+xb0_power9")
 
 ; 3-way cracked (consumes whole decode/dispatch cycle)
@@ -108,7 +111,19 @@
 
 (define_reservation "VSU_PRM_power9" "prm0_power9|prm1_power9")
 
+; Define the reservation to be used by FP div/sqrt which allows other insns
+; to be issued to the VSU, but blocks other div/sqrt for a number of cycles.
+; Note that the number of cycles blocked varies depending on insn, but we
+; just use the same number for all in order to keep the number of DFA states
+; reasonable.
+(define_reservation "FP_DIV_power9"
+		    "fp_div0_power9*8|fp_div1_power9*8|fp_div2_power9*8|
+		     fp_div3_power9*8")
+(define_reservation "VEC_DIV_power9"
+		    "fp_div0_power9*8+fp_div1_power9*8|
+		     fp_div2_power9*8+fp_div3_power9*8")
 
+
 ; LS Unit
 (define_insn_reservation "power9-load" 4
   (and (eq_attr "type" "load")
@@ -243,9 +258,7 @@
 
 ; Most ALU insns are simple 2 cycle, including record form
 (define_insn_reservation "power9-alu" 2
-  (and (ior (eq_attr "type" "add,exts,integer,logical,isel")
-	    (and (eq_attr "type" "insert,shift")
-		 (eq_attr "dot" "no")))
+  (and (eq_attr "type" "add,exts,integer,logical,isel")
        (eq_attr "cpu" "power9"))
   "DU_any_power9,VSU_power9")
 ; 5 cycle CR latency
@@ -252,12 +265,19 @@
 (define_bypass 5 "power9-alu"
 		 "power9-crlogical,power9-mfcr,power9-mfcrf")
 
+; Rotate/shift prevent use of third slot
+(define_insn_reservation "power9-rot" 2
+  (and (eq_attr "type" "insert,shift")
+       (eq_attr "dot" "no")
+       (eq_attr "cpu" "power9"))
+  "DU_slice_3_power9,VSU_power9")
+
 ; Record form rotate/shift are cracked
 (define_insn_reservation "power9-cracked-alu" 2
   (and (eq_attr "type" "insert,shift")
        (eq_attr "dot" "yes")
        (eq_attr "cpu" "power9"))
-  "DU_C2_power9,VSU_power9")
+  "DU_C2_3_power9,VSU_power9")
 ; 7 cycle CR latency
 (define_bypass 7 "power9-cracked-alu"
 		 "power9-crlogical,power9-mfcr,power9-mfcrf")
@@ -291,13 +311,13 @@
   (and (eq_attr "type" "mul")
        (eq_attr "dot" "no")
        (eq_attr "cpu" "power9"))
-  "DU_any_power9,VSU_power9")
+  "DU_slice_3_power9,VSU_power9")
 
 (define_insn_reservation "power9-mul-compare" 5
   (and (eq_attr "type" "mul")
        (eq_attr "dot" "yes")
        (eq_attr "cpu" "power9"))
-  "DU_C2_power9,VSU_power9")
+  "DU_C2_3_power9,VSU_power9")
 ; 10 cycle CR latency
 (define_bypass 10 "power9-mul-compare"
 		 "power9-crlogical,power9-mfcr,power9-mfcrf")
@@ -349,7 +369,7 @@
        (eq_attr "cpu" "power9"))
   "DU_slice_3_power9,VSU_power9")
 
-(define_insn_reservation "power9-fp" 7
+(define_insn_reservation "power9-fp" 5
   (and (eq_attr "type" "fp,dmul")
        (eq_attr "cpu" "power9"))
   "DU_slice_3_power9,VSU_power9")
@@ -360,26 +380,26 @@
   "DU_slice_3_power9,VSU_power9")
 
 ; FP div/sqrt are executed in the VSU slices.  They are not pipelined wrt other
-; divide insns, but for the most part do not block pipelined ops.
+; div/sqrt insns, but for the most part do not block pipelined ops.
 (define_insn_reservation "power9-sdiv" 22
   (and (eq_attr "type" "sdiv")
        (eq_attr "cpu" "power9"))
-  "DU_slice_3_power9,VSU_power9")
+  "DU_slice_3_power9,VSU_power9,FP_DIV_power9")
 
-(define_insn_reservation "power9-ddiv" 33
+(define_insn_reservation "power9-ddiv" 27
   (and (eq_attr "type" "ddiv")
        (eq_attr "cpu" "power9"))
-  "DU_slice_3_power9,VSU_power9")
+  "DU_slice_3_power9,VSU_power9,FP_DIV_power9")
 
 (define_insn_reservation "power9-sqrt" 26
   (and (eq_attr "type" "ssqrt")
        (eq_attr "cpu" "power9"))
-  "DU_slice_3_power9,VSU_power9")
+  "DU_slice_3_power9,VSU_power9,FP_DIV_power9")
 
 (define_insn_reservation "power9-dsqrt" 36
   (and (eq_attr "type" "dsqrt")
        (eq_attr "cpu" "power9"))
-  "DU_slice_3_power9,VSU_power9")
+  "DU_slice_3_power9,VSU_power9,FP_DIV_power9")
 
 (define_insn_reservation "power9-vec-2cyc" 2
   (and (eq_attr "type" "vecmove,veclogical,vecexts,veccmpfx")
@@ -419,28 +439,29 @@
        (eq_attr "cpu" "power9"))
   "DU_super_power9,VSU_super_power9")
 
-(define_insn_reservation "power9-vecfdiv" 28
+(define_insn_reservation "power9-vecfdiv" 24
   (and (eq_attr "type" "vecfdiv")
        (eq_attr "cpu" "power9"))
-  "DU_super_power9,VSU_super_power9")
+  "DU_super_power9,VSU_super_power9,VEC_DIV_power9")
 
-(define_insn_reservation "power9-vecdiv" 32
+(define_insn_reservation "power9-vecdiv" 27
   (and (eq_attr "type" "vecdiv")
        (eq_attr "size" "!128")
        (eq_attr "cpu" "power9"))
-  "DU_super_power9,VSU_super_power9")
+  "DU_super_power9,VSU_super_power9,VEC_DIV_power9")
 
+; Use 8 for DFU reservation on QP div/mul to limit DFA state size
 (define_insn_reservation "power9-qpdiv" 56
   (and (eq_attr "type" "vecdiv")
        (eq_attr "size" "128")
        (eq_attr "cpu" "power9"))
-  "DU_super_power9,dfu_power9*44")
+  "DU_super_power9,dfu_power9*8")
 
 (define_insn_reservation "power9-qpmul" 24
   (and (eq_attr "type" "qmul")
        (eq_attr "size" "128")
        (eq_attr "cpu" "power9"))
-  "DU_super_power9,dfu_power9*12")
+  "DU_super_power9,dfu_power9*8")
 
 (define_insn_reservation "power9-mffgpr" 2
   (and (eq_attr "type" "mffgpr")

^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [PATCH, rs6000] Correct some Power9 scheduling info
  2017-11-15 20:03 ` Pat Haugen
@ 2017-11-16 10:23   ` Segher Boessenkool
  0 siblings, 0 replies; 3+ messages in thread
From: Segher Boessenkool @ 2017-11-16 10:23 UTC (permalink / raw)
  To: Pat Haugen; +Cc: GCC Patches, David Edelsohn

Hi Pat,

That look good, thanks!  Okay for trunk.


Segher

^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2017-11-16 10:15 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-09-27 17:56 [PATCH, rs6000] Correct some Power9 scheduling info Pat Haugen
2017-11-15 20:03 ` Pat Haugen
2017-11-16 10:23   ` Segher Boessenkool

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).