public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [Mechanical Patch ARM/AArch64 1/2] Rename load/store scheduling types to encode data size
@ 2017-06-12 13:54 James Greenhalgh
  2017-06-12 13:54 ` [Patch AArch64 2/2] Fix memory sizes to load/store patterns James Greenhalgh
  2017-06-12 14:28 ` [Mechanical Patch ARM/AArch64 1/2] Rename load/store scheduling types to encode data size Kyrill Tkachov
  0 siblings, 2 replies; 10+ messages in thread
From: James Greenhalgh @ 2017-06-12 13:54 UTC (permalink / raw)
  To: gcc-patches; +Cc: nd, richard.earnshaw, marcus.shawcroft

[-- Attachment #1: Type: text/plain, Size: 2415 bytes --]


Hi,

In the AArch64 backend and scheduling models there is some confusion as to
what the load1/load2 etc. scheduling types refer to. This leads to us using
load1/load2 in two contexts - for a variety of 32-bit, 64-bit and 128-bit
loads in AArch32 and 128-bit loads in AArch64. That leads to an undesirable
confusion in scheduling.

Fixing it is easy, but mechanical and boring. Essentially,

  s/load1/load_4/
  s/load2/load_8/
  s/load3/load_12/
  s/load4/load_16/
  s/store1/store_4/
  s/store2/store_8/
  s/store3/store_12/
  s/store4/store_16/

Across all sorts of pipeline models, and the two backends.

I have intentionally not modified any of the patterns which now look obviously
incorrect. I'll be doing a second pass over the AArch64 back-end in patch
2/2 which will fix these bugs. The AArch32 back-end looked to me to get this
correct.

Bootstrapped on AArch64 and ARM without issue - there's no functional
change here.

OK?

Thanks,
James

---
gcc/

2017-06-12  James Greenhalgh  <james.greenhalgh@arm.com>

	* config/arm/types.md (type): Rename load1/2/3/4 to load_4/8/12/16
	and store1/2/3/4 to store_4/8/12/16.
	* config/aarch64/aarch64.md: Update for rename.
	* config/arm/arm.md: Likewise.: Likewise.
	* config/arm/arm.c: Likewise.
	* config/arm/thumb1.md: Likewise.
	* config/arm/thumb2.md: Likewise.
	* config/arm/vfp.md: Likewise.
	* config/arm/arm-generic.md: Likewise.
	* config/arm/arm1020e.md: Likewise.
	* config/arm/arm1026ejs.md: Likewise.
	* config/arm/arm1136jfs.md: Likewise.
	* config/arm/arm926ejs.md: Likewise.
	* config/arm/cortex-a15.md: Likewise.
	* config/arm/cortex-a17.md: Likewise.
	* config/arm/cortex-a5.md: Likewise.
	* config/arm/cortex-a53.md: Likewise.
	* config/arm/cortex-a57.md: Likewise.
	* config/arm/cortex-a7.md: Likewise.
	* config/arm/cortex-a8.md: Likewise.
	* config/arm/cortex-a9.md: Likewise.
	* config/arm/cortex-m4.md: Likewise.
	* config/arm/cortex-m7.md: Likewise.
	* config/arm/cortex-r4.md: Likewise.
	* config/arm/exynos-m1.md: Likewise.
	* config/arm/fa526.md: Likewise.
	* config/arm/fa606te.md: Likewise.
	* config/arm/fa626te.md: Likewise.
	* config/arm/fa726te.md: Likewise.
	* config/arm/fmp626.md: Likewise.
	* config/arm/iwmmxt.md: Likewise.
	* config/arm/ldmstm.md: Likewise.
	* config/arm/marvell-pj4.md: Likewise.
	* config/arm/xgene1.md: Likewise.
	* config/aarch64/thunderx.md: Likewise.
	* config/aarch64/thunderx2t99.md: Likewise.


[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #2: 0001-Mechanical-Patch-ARM-AArch64-1-2-Rename-load-store-s.patch --]
[-- Type: text/x-patch; name="0001-Mechanical-Patch-ARM-AArch64-1-2-Rename-load-store-s.patch", Size: 92550 bytes --]

diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index 1a721bf..11295a6 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -546,7 +546,7 @@
     operands[0] = gen_rtx_MEM (DImode, operands[0]);
     return pftype[INTVAL(operands[1])][locality];
   }
-  [(set_attr "type" "load1")]
+  [(set_attr "type" "load_4")]
 )
 
 (define_insn "trap"
@@ -890,7 +890,7 @@
        gcc_unreachable ();
      }
 }
-  [(set_attr "type" "mov_reg,mov_imm,neon_move,load1,load1,store1,store1,\
+  [(set_attr "type" "mov_reg,mov_imm,neon_move,load_4,load_4,store_4,store_4,\
                      neon_to_gp<q>,neon_from_gp<q>,neon_dup")
    (set_attr "simd" "*,*,yes,*,*,*,*,yes,yes,yes")]
 )
@@ -946,7 +946,8 @@
        aarch64_expand_mov_immediate (operands[0], operands[1]);
        DONE;
     }"
-  [(set_attr "type" "mov_reg,mov_reg,mov_reg,mov_imm,mov_imm,load1,load1,store1,store1,\
+  [(set_attr "type" "mov_reg,mov_reg,mov_reg,mov_imm,mov_imm,\
+                     load_4,load_4,store_4,store_4,\
                      adr,adr,f_mcr,f_mrc,fmov")
    (set_attr "fp" "*,*,*,*,*,*,yes,*,yes,*,*,yes,yes,yes")]
 )
@@ -979,7 +980,8 @@
        aarch64_expand_mov_immediate (operands[0], operands[1]);
        DONE;
     }"
-  [(set_attr "type" "mov_reg,mov_reg,mov_reg,mov_imm,mov_imm,load1,load1,store1,store1,\
+  [(set_attr "type" "mov_reg,mov_reg,mov_reg,mov_imm,mov_imm,\
+                     load_4,load_4,store_4,store_4,\
                      adr,adr,f_mcr,f_mrc,fmov,neon_move")
    (set_attr "fp" "*,*,*,*,*,*,yes,*,yes,*,*,yes,yes,yes,*")
    (set_attr "simd" "*,*,*,*,*,*,*,*,*,*,*,*,*,*,yes")]
@@ -1024,7 +1026,7 @@
    ldr\\t%q0, %1
    str\\t%q1, %0"
   [(set_attr "type" "multiple,f_mcr,f_mrc,neon_logic_q, \
-		             load2,store2,store2,f_loadd,f_stored")
+		             load_8,store_8,store_8,f_loadd,f_stored")
    (set_attr "length" "8,8,8,4,4,4,4,4,4")
    (set_attr "simd" "*,*,*,yes,*,*,*,*,*")
    (set_attr "fp" "*,*,*,*,*,*,*,yes,yes")]
@@ -1077,7 +1079,7 @@
    strh\\t%w1, %0
    mov\\t%w0, %w1"
   [(set_attr "type" "neon_move,neon_from_gp,neon_to_gp,neon_move,\
-                     f_loads,f_stores,load1,store1,mov_reg")
+                     f_loads,f_stores,load_4,store_4,mov_reg")
    (set_attr "simd" "yes,yes,yes,yes,*,*,*,*,*")]
 )
 
@@ -1098,7 +1100,7 @@
    str\\t%w1, %0
    mov\\t%w0, %w1"
   [(set_attr "type" "neon_move,f_mcr,f_mrc,fmov,fconsts,\
-                     f_loads,f_stores,load1,store1,mov_reg")
+                     f_loads,f_stores,load_4,store_4,mov_reg")
    (set_attr "simd" "yes,*,*,*,*,*,*,*,*,*")]
 )
 
@@ -1119,7 +1121,7 @@
    str\\t%x1, %0
    mov\\t%x0, %x1"
   [(set_attr "type" "neon_move,f_mcr,f_mrc,fmov,fconstd,\
-                     f_loadd,f_stored,load1,store1,mov_reg")
+                     f_loadd,f_stored,load_4,store_4,mov_reg")
    (set_attr "simd" "yes,*,*,*,*,*,*,*,*,*")]
 )
 
@@ -1143,7 +1145,7 @@
    stp\\t%1, %H1, %0
    stp\\txzr, xzr, %0"
   [(set_attr "type" "logic_reg,multiple,f_mcr,f_mrc,neon_move_q,f_mcr,\
-                     f_loadd,f_stored,load2,store2,store2")
+                     f_loadd,f_stored,load_8,store_8,store_8")
    (set_attr "length" "4,8,8,8,4,4,4,4,4,4,4")
    (set_attr "simd" "yes,*,*,*,yes,*,*,*,*,*,*")]
 )
@@ -1191,7 +1193,7 @@
   "@
    ldp\\t%w0, %w2, %1
    ldp\\t%s0, %s2, %1"
-  [(set_attr "type" "load2,neon_load1_2reg")
+  [(set_attr "type" "load_8,neon_load1_2reg")
    (set_attr "fp" "*,yes")]
 )
 
@@ -1207,7 +1209,7 @@
   "@
    ldp\\t%x0, %x2, %1
    ldp\\t%d0, %d2, %1"
-  [(set_attr "type" "load2,neon_load1_2reg")
+  [(set_attr "type" "load_8,neon_load1_2reg")
    (set_attr "fp" "*,yes")]
 )
 
@@ -1226,7 +1228,7 @@
   "@
    stp\\t%w1, %w3, %0
    stp\\t%s1, %s3, %0"
-  [(set_attr "type" "store2,neon_store1_2reg")
+  [(set_attr "type" "store_8,neon_store1_2reg")
    (set_attr "fp" "*,yes")]
 )
 
@@ -1242,7 +1244,7 @@
   "@
    stp\\t%x1, %x3, %0
    stp\\t%d1, %d3, %0"
-  [(set_attr "type" "store2,neon_store1_2reg")
+  [(set_attr "type" "store_8,neon_store1_2reg")
    (set_attr "fp" "*,yes")]
 )
 
@@ -1260,7 +1262,7 @@
   "@
    ldp\\t%s0, %s2, %1
    ldp\\t%w0, %w2, %1"
-  [(set_attr "type" "neon_load1_2reg,load2")
+  [(set_attr "type" "neon_load1_2reg,load_8")
    (set_attr "fp" "yes,*")]
 )
 
@@ -1276,7 +1278,7 @@
   "@
    ldp\\t%d0, %d2, %1
    ldp\\t%x0, %x2, %1"
-  [(set_attr "type" "neon_load1_2reg,load2")
+  [(set_attr "type" "neon_load1_2reg,load_8")
    (set_attr "fp" "yes,*")]
 )
 
@@ -1294,7 +1296,7 @@
   "@
    stp\\t%s1, %s3, %0
    stp\\t%w1, %w3, %0"
-  [(set_attr "type" "neon_store1_2reg,store2")
+  [(set_attr "type" "neon_store1_2reg,store_8")
    (set_attr "fp" "yes,*")]
 )
 
@@ -1310,7 +1312,7 @@
   "@
    stp\\t%d1, %d3, %0
    stp\\t%x1, %x3, %0"
-  [(set_attr "type" "neon_store1_2reg,store2")
+  [(set_attr "type" "neon_store1_2reg,store_8")
    (set_attr "fp" "yes,*")]
 )
 
@@ -1328,7 +1330,7 @@
                    (match_operand:P 5 "const_int_operand" "n"))))])]
   "INTVAL (operands[5]) == GET_MODE_SIZE (<GPI:MODE>mode)"
   "ldp\\t%<w>2, %<w>3, [%1], %4"
-  [(set_attr "type" "load2")]
+  [(set_attr "type" "load_8")]
 )
 
 (define_insn "loadwb_pair<GPF:mode>_<P:mode>"
@@ -1361,7 +1363,7 @@
           (match_operand:GPI 3 "register_operand" "r"))])]
   "INTVAL (operands[5]) == INTVAL (operands[4]) + GET_MODE_SIZE (<GPI:MODE>mode)"
   "stp\\t%<w>2, %<w>3, [%0, %4]!"
-  [(set_attr "type" "store2")]
+  [(set_attr "type" "store_8")]
 )
 
 (define_insn "storewb_pair<GPF:mode>_<P:mode>"
@@ -1397,7 +1399,7 @@
   "@
    sxtw\t%0, %w1
    ldrsw\t%0, %1"
-  [(set_attr "type" "extend,load1")]
+  [(set_attr "type" "extend,load_4")]
 )
 
 (define_insn "*load_pair_extendsidi2_aarch64"
@@ -1410,7 +1412,7 @@
 			       XEXP (operands[1], 0),
 			       GET_MODE_SIZE (SImode)))"
   "ldpsw\\t%0, %2, %1"
-  [(set_attr "type" "load2")]
+  [(set_attr "type" "load_8")]
 )
 
 (define_insn "*zero_extendsidi2_aarch64"
@@ -1420,7 +1422,7 @@
   "@
    uxtw\t%0, %w1
    ldr\t%w0, %1"
-  [(set_attr "type" "extend,load1")]
+  [(set_attr "type" "extend,load_4")]
 )
 
 (define_insn "*load_pair_zero_extendsidi2_aarch64"
@@ -1433,7 +1435,7 @@
 			       XEXP (operands[1], 0),
 			       GET_MODE_SIZE (SImode)))"
   "ldp\\t%w0, %w2, %1"
-  [(set_attr "type" "load2")]
+  [(set_attr "type" "load_8")]
 )
 
 (define_expand "<ANY_EXTEND:optab><SHORT:mode><GPI:mode>2"
@@ -1449,7 +1451,7 @@
   "@
    sxt<SHORT:size>\t%<GPI:w>0, %w1
    ldrs<SHORT:size>\t%<GPI:w>0, %1"
-  [(set_attr "type" "extend,load1")]
+  [(set_attr "type" "extend,load_4")]
 )
 
 (define_insn "*zero_extend<SHORT:mode><GPI:mode>2_aarch64"
@@ -1460,7 +1462,7 @@
    and\t%<GPI:w>0, %<GPI:w>1, <SHORT:short_mask>
    ldr<SHORT:size>\t%w0, %1
    ldr\t%<SHORT:size>0, %1"
-  [(set_attr "type" "logic_imm,load1,load1")]
+  [(set_attr "type" "logic_imm,load_4,load_4")]
 )
 
 (define_expand "<optab>qihi2"
@@ -1476,7 +1478,7 @@
   "@
    sxtb\t%w0, %w1
    ldrsb\t%w0, %1"
-  [(set_attr "type" "extend,load1")]
+  [(set_attr "type" "extend,load_4")]
 )
 
 (define_insn "*zero_extendqihi2_aarch64"
@@ -1486,7 +1488,7 @@
   "@
    and\t%w0, %w1, 255
    ldrb\t%w0, %1"
-  [(set_attr "type" "logic_imm,load1")]
+  [(set_attr "type" "logic_imm,load_4")]
 )
 
 ;; -------------------------------------------------------------------
@@ -5137,7 +5139,7 @@
 		    UNSPEC_GOTSMALLPIC))]
   ""
   "ldr\\t%<w>0, [%1, #:got_lo12:%a2]"
-  [(set_attr "type" "load1")]
+  [(set_attr "type" "load_4")]
 )
 
 (define_insn "ldr_got_small_sidi"
@@ -5149,7 +5151,7 @@
 		    UNSPEC_GOTSMALLPIC)))]
   "TARGET_ILP32"
   "ldr\\t%w0, [%1, #:got_lo12:%a2]"
-  [(set_attr "type" "load1")]
+  [(set_attr "type" "load_4")]
 )
 
 (define_insn "ldr_got_small_28k_<mode>"
@@ -5160,7 +5162,7 @@
 		    UNSPEC_GOTSMALLPIC28K))]
   ""
   "ldr\\t%<w>0, [%1, #:<got_modifier>:%a2]"
-  [(set_attr "type" "load1")]
+  [(set_attr "type" "load_4")]
 )
 
 (define_insn "ldr_got_small_28k_sidi"
@@ -5172,7 +5174,7 @@
 		    UNSPEC_GOTSMALLPIC28K)))]
   "TARGET_ILP32"
   "ldr\\t%w0, [%1, #:gotpage_lo14:%a2]"
-  [(set_attr "type" "load1")]
+  [(set_attr "type" "load_4")]
 )
 
 (define_insn "ldr_got_tiny"
@@ -5181,7 +5183,7 @@
 		   UNSPEC_GOTTINYPIC))]
   ""
   "ldr\\t%0, %L1"
-  [(set_attr "type" "load1")]
+  [(set_attr "type" "load_4")]
 )
 
 (define_insn "aarch64_load_tp_hard"
@@ -5222,7 +5224,7 @@
 		   UNSPEC_GOTSMALLTLS))]
   ""
   "adrp\\t%0, %A1\;ldr\\t%<w>0, [%0, #%L1]"
-  [(set_attr "type" "load1")
+  [(set_attr "type" "load_4")
    (set_attr "length" "8")]
 )
 
@@ -5233,7 +5235,7 @@
 		      UNSPEC_GOTSMALLTLS)))]
   ""
   "adrp\\t%0, %A1\;ldr\\t%w0, [%0, #%L1]"
-  [(set_attr "type" "load1")
+  [(set_attr "type" "load_4")
    (set_attr "length" "8")]
 )
 
diff --git a/gcc/config/aarch64/thunderx.md b/gcc/config/aarch64/thunderx.md
index b67671d..c18da2f 100644
--- a/gcc/config/aarch64/thunderx.md
+++ b/gcc/config/aarch64/thunderx.md
@@ -94,13 +94,13 @@
 ;; Stores take one cycle in pipe 0
 (define_insn_reservation "thunderx_store" 1
   (and (eq_attr "tune" "thunderx")
-       (eq_attr "type" "store1"))
+       (eq_attr "type" "store_4"))
   "thunderx_pipe0")
 
 ;; Store pair are single issued
 (define_insn_reservation "thunderx_storepair" 1
   (and (eq_attr "tune" "thunderx")
-       (eq_attr "type" "store2"))
+       (eq_attr "type" "store_8"))
   "thunderx_pipe0 + thunderx_pipe1")
 
 ;; Prefetch are single issued
@@ -112,7 +112,7 @@
 ;; loads (and load pairs) from L1 take 3 cycles in pipe 0
 (define_insn_reservation "thunderx_load" 3
   (and (eq_attr "tune" "thunderx")
-       (eq_attr "type" "load1, load2"))
+       (eq_attr "type" "load_4, load_8"))
   "thunderx_pipe0")
 
 (define_insn_reservation "thunderx_brj" 1
diff --git a/gcc/config/aarch64/thunderx2t99.md b/gcc/config/aarch64/thunderx2t99.md
index adb010c..632396f 100644
--- a/gcc/config/aarch64/thunderx2t99.md
+++ b/gcc/config/aarch64/thunderx2t99.md
@@ -123,22 +123,22 @@
 
 (define_insn_reservation "thunderx2t99_load_basic" 4
   (and (eq_attr "tune" "thunderx2t99")
-       (eq_attr "type" "load1"))
+       (eq_attr "type" "load_4"))
   "thunderx2t99_ls01")
 
 (define_insn_reservation "thunderx2t99_loadpair" 5
   (and (eq_attr "tune" "thunderx2t99")
-       (eq_attr "type" "load2"))
+       (eq_attr "type" "load_8"))
   "thunderx2t99_i012,thunderx2t99_ls01")
 
 (define_insn_reservation "thunderx2t99_store_basic" 1
   (and (eq_attr "tune" "thunderx2t99")
-       (eq_attr "type" "store1"))
+       (eq_attr "type" "store_4"))
   "thunderx2t99_ls01,thunderx2t99_sd")
 
 (define_insn_reservation "thunderx2t99_storepair_basic" 1
   (and (eq_attr "tune" "thunderx2t99")
-       (eq_attr "type" "store2"))
+       (eq_attr "type" "store_8"))
   "thunderx2t99_ls01,thunderx2t99_sd")
 
 ;; FP data processing instructions.
diff --git a/gcc/config/arm/arm-generic.md b/gcc/config/arm/arm-generic.md
index 52c8b2e..0a45608 100644
--- a/gcc/config/arm/arm-generic.md
+++ b/gcc/config/arm/arm-generic.md
@@ -47,69 +47,69 @@
 (define_insn_reservation "store_wbuf" 5
   (and (eq_attr "generic_sched" "yes")
        (and (eq_attr "model_wbuf" "yes")
-       	    (eq_attr "type" "store1")))
+       	    (eq_attr "type" "store_4")))
   "core+write_buf*3+write_blockage*5")
 
 (define_insn_reservation "store2_wbuf" 7
   (and (eq_attr "generic_sched" "yes")
        (and (eq_attr "model_wbuf" "yes")
-	    (eq_attr "type" "store2")))
+	    (eq_attr "type" "store_8")))
   "core+write_buf*4+write_blockage*7")
 
 (define_insn_reservation "store3_wbuf" 9
   (and (eq_attr "generic_sched" "yes")
        (and (eq_attr "model_wbuf" "yes")
-	    (eq_attr "type" "store3")))
+	    (eq_attr "type" "store_12")))
   "core+write_buf*5+write_blockage*9")
 
 (define_insn_reservation "store4_wbuf" 11
   (and (eq_attr "generic_sched" "yes")
        (and (eq_attr "model_wbuf" "yes")
-            (eq_attr "type" "store4")))
+            (eq_attr "type" "store_16")))
   "core+write_buf*6+write_blockage*11")
 
 (define_insn_reservation "store2" 3
   (and (eq_attr "generic_sched" "yes")
        (and (eq_attr "model_wbuf" "no")
-            (eq_attr "type" "store2")))
+            (eq_attr "type" "store_8")))
   "core*3")
 
 (define_insn_reservation "store3" 4
   (and (eq_attr "generic_sched" "yes")
        (and (eq_attr "model_wbuf" "no")
-            (eq_attr "type" "store3")))
+            (eq_attr "type" "store_12")))
   "core*4")
 
 (define_insn_reservation "store4" 5
   (and (eq_attr "generic_sched" "yes")
        (and (eq_attr "model_wbuf" "no")
-	    (eq_attr "type" "store4")))
+	    (eq_attr "type" "store_16")))
   "core*5")
 
 (define_insn_reservation "store_ldsched" 1
   (and (eq_attr "generic_sched" "yes")
        (and (eq_attr "ldsched" "yes") 
-	    (eq_attr "type" "store1")))
+	    (eq_attr "type" "store_4")))
   "core")
 
 (define_insn_reservation "load_ldsched_xscale" 3
   (and (eq_attr "generic_sched" "yes")
        (and (eq_attr "ldsched" "yes") 
-	    (and (eq_attr "type" "load_byte,load1")
+	    (and (eq_attr "type" "load_byte,load_4")
 	         (eq_attr "tune" "xscale,iwmmxt,iwmmxt2"))))
   "core")
 
 (define_insn_reservation "load_ldsched" 2
   (and (eq_attr "generic_sched" "yes")
        (and (eq_attr "ldsched" "yes") 
-	    (and (eq_attr "type" "load_byte,load1")
+	    (and (eq_attr "type" "load_byte,load_4")
 	         (eq_attr "tune" "!xscale,iwmmxt,iwmmxt2"))))
   "core")
 
 (define_insn_reservation "load_or_store" 2
   (and (eq_attr "generic_sched" "yes")
        (and (eq_attr "ldsched" "!yes") 
-	    (eq_attr "type" "load_byte,load1,load2,load3,load4,store1")))
+	    (eq_attr "type" "load_byte,load_4,load_8,load_12,load_16,store_4")))
   "core*2")
 
 (define_insn_reservation "mult" 16
@@ -140,8 +140,8 @@
 (define_insn_reservation "multi_cycle" 32
   (and (eq_attr "generic_sched" "yes")
        (and (eq_attr "core_cycles" "multi")
-            (and (eq_attr "type" "!load_byte,load1,load2,load3,load4,\
-                                  store1,store2,store3,store4")
+            (and (eq_attr "type" "!load_byte,load_4,load_8,load_12,load_16,\
+                                  store_4,store_8,store_12,store_16")
 		 (not (ior (eq_attr "mul32" "yes")
 			   (eq_attr "mul64" "yes"))))))
   "core*32")
diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c
index 259597d..e503891 100644
--- a/gcc/config/arm/arm.c
+++ b/gcc/config/arm/arm.c
@@ -11264,8 +11264,8 @@ cortexa7_older_only (rtx_insn *insn)
     case TYPE_SHIFT_IMM:
     case TYPE_SHIFT_REG:
     case TYPE_LOAD_BYTE:
-    case TYPE_LOAD1:
-    case TYPE_STORE1:
+    case TYPE_LOAD_4:
+    case TYPE_STORE_4:
     case TYPE_FFARITHS:
     case TYPE_FADDS:
     case TYPE_FFARITHD:
diff --git a/gcc/config/arm/arm.md b/gcc/config/arm/arm.md
index e6e1ac5..3e84af9 100644
--- a/gcc/config/arm/arm.md
+++ b/gcc/config/arm/arm.md
@@ -314,7 +314,7 @@
 ; to stall the processor.  Used with model_wbuf above.
 (define_attr "write_conflict" "no,yes"
   (if_then_else (eq_attr "type"
-		 "block,call,load1")
+		 "block,call,load_4")
 		(const_string "yes")
 		(const_string "no")))
 
@@ -3770,7 +3770,7 @@
   ""
   [(set_attr "conds" "clob")
    (set_attr "length" "8,8,12")
-   (set_attr "type" "store1")]
+   (set_attr "type" "store_4")]
 )
 
 (define_expand "uminsi3"
@@ -3803,7 +3803,7 @@
   ""
   [(set_attr "conds" "clob")
    (set_attr "length" "8,8,12")
-   (set_attr "type" "store1")]
+   (set_attr "type" "store_4")]
 )
 
 (define_insn "*store_minmaxsi"
@@ -3828,7 +3828,7 @@
 	(if_then_else (eq_attr "is_thumb" "yes")
 		      (const_int 14)
 		      (const_int 12)))
-   (set_attr "type" "store1")]
+   (set_attr "type" "store_4")]
 )
 
 ; Reject the frame pointer in operand[1], since reloading this after
@@ -4495,7 +4495,7 @@
    (set_attr "length" "2,4")
    (set_attr "predicable" "yes")
    (set_attr "predicable_short_it" "yes,no")
-   (set_attr "type" "load1")])
+   (set_attr "type" "load_4")])
 
 (define_insn "unaligned_loadhis"
   [(set (match_operand:SI 0 "s_register_operand" "=l,r")
@@ -4533,7 +4533,7 @@
    (set_attr "length" "2,4")
    (set_attr "predicable" "yes")
    (set_attr "predicable_short_it" "yes,no")
-   (set_attr "type" "store1")])
+   (set_attr "type" "store_4")])
 
 (define_insn "unaligned_storehi"
   [(set (match_operand:HI 0 "memory_operand" "=Uw,m")
@@ -4545,7 +4545,7 @@
    (set_attr "length" "2,4")
    (set_attr "predicable" "yes")
    (set_attr "predicable_short_it" "yes,no")
-   (set_attr "type" "store1")])
+   (set_attr "type" "store_4")])
 
 
 (define_insn "*extv_reg"
@@ -5862,7 +5862,7 @@
     }
   "
   [(set_attr "length" "8,12,16,8,8")
-   (set_attr "type" "multiple,multiple,multiple,load2,store2")
+   (set_attr "type" "multiple,multiple,multiple,load_8,store_8")
    (set_attr "arm_pool_range" "*,*,*,1020,*")
    (set_attr "arm_neg_pool_range" "*,*,*,1004,*")
    (set_attr "thumb2_pool_range" "*,*,*,4094,*")
@@ -6083,7 +6083,7 @@
    movw%?\\t%0, %1
    ldr%?\\t%0, %1
    str%?\\t%1, %0"
-  [(set_attr "type" "mov_reg,mov_imm,mvn_imm,mov_imm,load1,store1")
+  [(set_attr "type" "mov_reg,mov_imm,mvn_imm,mov_imm,load_4,store_4")
    (set_attr "predicable" "yes")
    (set_attr "arch" "*,*,*,v6t2,*,*")
    (set_attr "pool_range" "*,*,*,*,4096,*")
@@ -6213,7 +6213,7 @@
   (set (match_dup 0) (unspec:SI [(match_dup 0) (match_dup 3)
        		     		 (match_dup 2)] UNSPEC_PIC_BASE))]
  "operands[3] = TARGET_THUMB ? GEN_INT (4) : GEN_INT (8);"
- [(set_attr "type" "load1,load1,load1")
+ [(set_attr "type" "load_4,load_4,load_4")
   (set_attr "pool_range" "4096,4094,1022")
   (set_attr "neg_pool_range" "4084,0,0")
   (set_attr "arch"  "a,t2,t1")    
@@ -6229,7 +6229,7 @@
 	(unspec:SI [(match_operand:SI 1 "" "mX")] UNSPEC_PIC_SYM))]
   "TARGET_32BIT && flag_pic"
   "ldr%?\\t%0, %1"
-  [(set_attr "type" "load1")
+  [(set_attr "type" "load_4")
    (set (attr "pool_range")
 	(if_then_else (eq_attr "is_thumb" "no")
 		      (const_int 4096)
@@ -6245,7 +6245,7 @@
 	(unspec:SI [(match_operand:SI 1 "" "mX")] UNSPEC_PIC_SYM))]
   "TARGET_THUMB1 && flag_pic"
   "ldr\\t%0, %1"
-  [(set_attr "type" "load1")
+  [(set_attr "type" "load_4")
    (set (attr "pool_range") (const_int 1018))]
 )
 
@@ -6294,7 +6294,7 @@
     return \"ldr%?\\t%0, [%|pc, %1]\t\t@ tls_load_dot_plus_eight\";
   "
   [(set_attr "predicable" "yes")
-   (set_attr "type" "load1")]
+   (set_attr "type" "load_4")]
 )
 
 ;; PIC references to local variables can generate pic_add_dot_plus_eight
@@ -6325,7 +6325,7 @@
 				    UNSPEC_PIC_OFFSET))))]
   "TARGET_VXWORKS_RTP && TARGET_ARM && flag_pic"
   "ldr%?\\t%0, [%1,%2]"
-  [(set_attr "type" "load1")]
+  [(set_attr "type" "load_4")]
 )
 
 (define_expand "builtin_setjmp_receiver"
@@ -6741,8 +6741,8 @@
                                         (const_string "mov_reg"))
                           (const_string "mvn_imm")
                           (const_string "mov_imm")
-                          (const_string "store1")
-                          (const_string "load1")])]
+                          (const_string "store_4")
+                          (const_string "load_4")])]
 )
 
 (define_insn "*movhi_bytes"
@@ -6876,7 +6876,7 @@
    strb%?\\t%1, %0
    ldrb%?\\t%0, %1
    strb%?\\t%1, %0"
-  [(set_attr "type" "mov_reg,mov_reg,mov_imm,mov_imm,mvn_imm,load1,store1,load1,store1")
+  [(set_attr "type" "mov_reg,mov_reg,mov_imm,mov_imm,mvn_imm,load_4,store_4,load_4,store_4")
    (set_attr "predicable" "yes")
    (set_attr "predicable_short_it" "yes,yes,no,yes,no,no,no,no,no")
    (set_attr "arch" "t2,any,any,t2,any,t2,t2,any,any")
@@ -6943,7 +6943,7 @@
     }
   "
   [(set_attr "conds" "unconditional")
-   (set_attr "type" "load1,store1,mov_reg,multiple")
+   (set_attr "type" "load_4,store_4,mov_reg,multiple")
    (set_attr "length" "4,4,4,8")
    (set_attr "predicable" "yes")
    (set_attr "predicable_short_it" "no")]
@@ -7000,7 +7000,7 @@
    str%?\\t%1, %0\\t%@ float"
   [(set_attr "predicable" "yes")
    (set_attr "predicable_short_it" "no")
-   (set_attr "type" "mov_reg,load1,store1")
+   (set_attr "type" "mov_reg,load_4,store_4")
    (set_attr "arm_pool_range" "*,4096,*")
    (set_attr "thumb2_pool_range" "*,4094,*")
    (set_attr "arm_neg_pool_range" "*,4084,*")
@@ -7089,7 +7089,7 @@
     }
   "
   [(set_attr "length" "8,12,16,8,8")
-   (set_attr "type" "multiple,multiple,multiple,load2,store2")
+   (set_attr "type" "multiple,multiple,multiple,load_8,store_8")
    (set_attr "arm_pool_range" "*,*,*,1020,*")
    (set_attr "thumb2_pool_range" "*,*,*,1018,*")
    (set_attr "arm_neg_pool_range" "*,*,*,1004,*")
@@ -8416,7 +8416,7 @@
       }
     return output_return_instruction (const_true_rtx, true, false, false);
   }"
-  [(set_attr "type" "load1")
+  [(set_attr "type" "load_4")
    (set_attr "length" "12")
    (set_attr "predicable" "yes")]
 )
@@ -8440,7 +8440,7 @@
   }"
   [(set_attr "conds" "use")
    (set_attr "length" "12")
-   (set_attr "type" "load1")]
+   (set_attr "type" "load_4")]
 )
 
 (define_insn "*cond_<return_str>return_inverted"
@@ -8462,7 +8462,7 @@
   }"
   [(set_attr "conds" "use")
    (set_attr "length" "12")
-   (set_attr "type" "load1")]
+   (set_attr "type" "load_4")]
 )
 
 (define_insn "*arm_simple_return"
@@ -8645,7 +8645,7 @@
         (unspec:SI [(const_int 0)] UNSPEC_PROBE_STACK))]
   "TARGET_32BIT"
   "str%?\\tr0, %0"
-  [(set_attr "type" "store1")
+  [(set_attr "type" "store_4")
    (set_attr "predicable" "yes")]
 )
 
@@ -8755,7 +8755,7 @@
 	(match_operand:SI 0 "memory_operand" "m"))]
   "TARGET_ARM"
   "ldr%?\\t%|pc, %0\\t%@ indirect memory jump"
-  [(set_attr "type" "load1")
+  [(set_attr "type" "load_4")
    (set_attr "pool_range" "4096")
    (set_attr "neg_pool_range" "4084")
    (set_attr "predicable" "yes")]
@@ -10666,7 +10666,7 @@
   }"
   [(set_attr "length" "12")
    (set_attr "predicable" "yes")
-   (set_attr "type" "load1")]
+   (set_attr "type" "load_4")]
 )
 
 ; This pattern is never tried by combine, so do it as a peephole
@@ -11013,7 +11013,7 @@
 
     return \"\";
   }"
-  [(set_attr "type" "store4")
+  [(set_attr "type" "store_16")
    (set (attr "length")
 	(symbol_ref "arm_attr_length_push_multi (operands[2], operands[1])"))]
 )
@@ -11049,7 +11049,7 @@
     return \"\";
   }
   "
-  [(set_attr "type" "load4")
+  [(set_attr "type" "load_16")
    (set_attr "predicable" "yes")
    (set (attr "length")
 	(symbol_ref "arm_attr_length_pop_multi (operands,
@@ -11082,7 +11082,7 @@
     return \"\";
   }
   "
-  [(set_attr "type" "load4")
+  [(set_attr "type" "load_16")
    (set_attr "predicable" "yes")
    (set (attr "length")
 	(symbol_ref "arm_attr_length_pop_multi (operands, /*return_pc=*/true,
@@ -11105,7 +11105,7 @@
     return \"\";
   }
   "
-  [(set_attr "type" "load4")
+  [(set_attr "type" "load_16")
    (set_attr "predicable" "yes")
    (set (attr "length")
 	(symbol_ref "arm_attr_length_pop_multi (operands, /*return_pc=*/true,
@@ -11119,7 +11119,7 @@
         (mem:SI (post_inc:SI (match_operand:SI 0 "s_register_operand" "+rk"))))]
   "TARGET_32BIT && (reload_in_progress || reload_completed)"
   "ldr%?\t%|pc, [%0], #4"
-  [(set_attr "type" "load1")
+  [(set_attr "type" "load_4")
    (set_attr "predicable" "yes")]
 )
 ;; Pop for floating point registers (as used in epilogue RTL)
@@ -11152,7 +11152,7 @@
     return \"\";
   }
   "
-  [(set_attr "type" "load4")
+  [(set_attr "type" "load_16")
    (set_attr "conds" "unconditional")
    (set_attr "predicable" "no")]
 )
@@ -11345,7 +11345,7 @@
 	     (match_operand:SI 2 "" ""))]
   "TARGET_32BIT && arm_arch5e"
   "pld\\t%a0"
-  [(set_attr "type" "load1")]
+  [(set_attr "type" "load_4")]
 )
 
 ;; General predication pattern
@@ -11732,7 +11732,7 @@
                                   operands[1], INTVAL (operands[2]),
                                   false, true))"
   "ldrd%?\t%0, %3, [%1, %2]"
-  [(set_attr "type" "load2")
+  [(set_attr "type" "load_8")
    (set_attr "predicable" "yes")
    (set_attr "predicable_short_it" "no")])
 
@@ -11746,7 +11746,7 @@
      && (operands_ok_ldrd_strd (operands[0], operands[2],
                                   operands[1], 0, false, true))"
   "ldrd%?\t%0, %2, [%1]"
-  [(set_attr "type" "load2")
+  [(set_attr "type" "load_8")
    (set_attr "predicable" "yes")
    (set_attr "predicable_short_it" "no")])
 
@@ -11760,7 +11760,7 @@
      && (operands_ok_ldrd_strd (operands[0], operands[2],
                                   operands[1], -4, false, true))"
   "ldrd%?\t%0, %2, [%1, #-4]"
-  [(set_attr "type" "load2")
+  [(set_attr "type" "load_8")
    (set_attr "predicable" "yes")
    (set_attr "predicable_short_it" "no")])
 
@@ -11777,7 +11777,7 @@
                                   operands[0], INTVAL (operands[1]),
                                   false, false))"
   "strd%?\t%2, %4, [%0, %1]"
-  [(set_attr "type" "store2")
+  [(set_attr "type" "store_8")
    (set_attr "predicable" "yes")
    (set_attr "predicable_short_it" "no")])
 
@@ -11791,7 +11791,7 @@
      && (operands_ok_ldrd_strd (operands[1], operands[2],
                                   operands[0], 0, false, false))"
   "strd%?\t%1, %2, [%0]"
-  [(set_attr "type" "store2")
+  [(set_attr "type" "store_8")
    (set_attr "predicable" "yes")
    (set_attr "predicable_short_it" "no")])
 
@@ -11805,7 +11805,7 @@
      && (operands_ok_ldrd_strd (operands[1], operands[2],
                                   operands[0], -4, false, false))"
   "strd%?\t%1, %2, [%0, #-4]"
-  [(set_attr "type" "store2")
+  [(set_attr "type" "store_8")
    (set_attr "predicable" "yes")
    (set_attr "predicable_short_it" "no")])
 
diff --git a/gcc/config/arm/arm1020e.md b/gcc/config/arm/arm1020e.md
index bae76e5..d91241f 100644
--- a/gcc/config/arm/arm1020e.md
+++ b/gcc/config/arm/arm1020e.md
@@ -176,12 +176,12 @@
 
 (define_insn_reservation "1020load1_op" 2
  (and (eq_attr "tune" "arm1020e,arm1022e")
-      (eq_attr "type" "load_byte,load1"))
+      (eq_attr "type" "load_byte,load_4"))
  "1020a_e+1020l_e,1020l_m,1020l_w")
 
 (define_insn_reservation "1020store1_op" 0
  (and (eq_attr "tune" "arm1020e,arm1022e")
-      (eq_attr "type" "store1"))
+      (eq_attr "type" "store_4"))
  "1020a_e+1020l_e,1020l_m,1020l_w")
 
 ;; A load's result can be stored by an immediately following store
@@ -211,22 +211,22 @@
 
 (define_insn_reservation "1020load2_op" 2
  (and (eq_attr "tune" "arm1020e,arm1022e")
-      (eq_attr "type" "load2"))
+      (eq_attr "type" "load_8"))
  "1020a_e+1020l_e,1020l_m,1020l_w")
 
 (define_insn_reservation "1020store2_op" 0
  (and (eq_attr "tune" "arm1020e,arm1022e")
-      (eq_attr "type" "store2"))
+      (eq_attr "type" "store_8"))
  "1020a_e+1020l_e,1020l_m,1020l_w")
 
 (define_insn_reservation "1020load34_op" 3
  (and (eq_attr "tune" "arm1020e,arm1022e")
-      (eq_attr "type" "load3,load4"))
+      (eq_attr "type" "load_12,load_16"))
  "1020a_e+1020l_e,1020l_e+1020l_m,1020l_m,1020l_w")
 
 (define_insn_reservation "1020store34_op" 0
  (and (eq_attr "tune" "arm1020e,arm1022e")
-      (eq_attr "type" "store3,store4"))
+      (eq_attr "type" "store_12,store_16"))
  "1020a_e+1020l_e,1020l_e+1020l_m,1020l_m,1020l_w")
 
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
diff --git a/gcc/config/arm/arm1026ejs.md b/gcc/config/arm/arm1026ejs.md
index 2fe6248..951c2a4 100644
--- a/gcc/config/arm/arm1026ejs.md
+++ b/gcc/config/arm/arm1026ejs.md
@@ -176,12 +176,12 @@
 
 (define_insn_reservation "load1_op" 2
  (and (eq_attr "tune" "arm1026ejs")
-      (eq_attr "type" "load_byte,load1"))
+      (eq_attr "type" "load_byte,load_4"))
  "a_e+l_e,l_m,a_w+l_w")
 
 (define_insn_reservation "store1_op" 0
  (and (eq_attr "tune" "arm1026ejs")
-      (eq_attr "type" "store1"))
+      (eq_attr "type" "store_4"))
  "a_e+l_e,l_m,a_w+l_w")
 
 ;; A load's result can be stored by an immediately following store
@@ -206,22 +206,22 @@
 
 (define_insn_reservation "load2_op" 2
  (and (eq_attr "tune" "arm1026ejs")
-      (eq_attr "type" "load2"))
+      (eq_attr "type" "load_8"))
  "a_e+l_e,l_m,a_w+l_w")
 
 (define_insn_reservation "store2_op" 0
  (and (eq_attr "tune" "arm1026ejs")
-      (eq_attr "type" "store2"))
+      (eq_attr "type" "store_8"))
  "a_e+l_e,l_m,a_w+l_w")
 
 (define_insn_reservation "load34_op" 3
  (and (eq_attr "tune" "arm1026ejs")
-      (eq_attr "type" "load3,load4"))
+      (eq_attr "type" "load_12,load_16"))
  "a_e+l_e,a_e+l_e+l_m,a_e+l_m,a_w+l_w")
 
 (define_insn_reservation "store34_op" 0
  (and (eq_attr "tune" "arm1026ejs")
-      (eq_attr "type" "store3,store4"))
+      (eq_attr "type" "store_12,store_16"))
  "a_e+l_e,a_e+l_e+l_m,a_e+l_m,a_w+l_w")
 
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
diff --git a/gcc/config/arm/arm1136jfs.md b/gcc/config/arm/arm1136jfs.md
index d086e83..e434c36 100644
--- a/gcc/config/arm/arm1136jfs.md
+++ b/gcc/config/arm/arm1136jfs.md
@@ -294,7 +294,7 @@
 
 (define_insn_reservation "11_load1" 3
  (and (eq_attr "tune" "arm1136js,arm1136jfs")
-      (eq_attr "type" "load1"))
+      (eq_attr "type" "load_4"))
  "l_a+e_1,l_dc1,l_dc2,l_wb")
 
 ;; Load byte results are not available until the writeback stage, where
@@ -307,7 +307,7 @@
 
 (define_insn_reservation "11_store1" 0
  (and (eq_attr "tune" "arm1136js,arm1136jfs")
-      (eq_attr "type" "store1"))
+      (eq_attr "type" "store_4"))
  "l_a+e_1,l_dc1,l_dc2,l_wb")
 
 ;; Load/store double words into adjacent registers.  The timing and
@@ -315,12 +315,12 @@
 ;; aligned.  This model assumes that it is.
 (define_insn_reservation "11_load2" 3
  (and (eq_attr "tune" "arm1136js,arm1136jfs")
-      (eq_attr "type" "load2"))
+      (eq_attr "type" "load_8"))
  "l_a+e_1,l_dc1,l_dc2,l_wb")
 
 (define_insn_reservation "11_store2" 0
  (and (eq_attr "tune" "arm1136js,arm1136jfs")
-      (eq_attr "type" "store2"))
+      (eq_attr "type" "store_8"))
  "l_a+e_1,l_dc1,l_dc2,l_wb")
 
 ;; Load/store multiple registers.  Two registers are stored per cycle.
@@ -328,12 +328,12 @@
 ;; optimistically schedule a low latency.
 (define_insn_reservation "11_load34" 4
  (and (eq_attr "tune" "arm1136js,arm1136jfs")
-      (eq_attr "type" "load3,load4"))
+      (eq_attr "type" "load_12,load_16"))
  "l_a+e_1,l_dc1*2,l_dc2,l_wb")
 
 (define_insn_reservation "11_store34" 0
  (and (eq_attr "tune" "arm1136js,arm1136jfs")
-      (eq_attr "type" "store3,store4"))
+      (eq_attr "type" "store_12,store_16"))
  "l_a+e_1,l_dc1*2,l_dc2,l_wb")
 
 ;; A store can start immediately after an alu op, if that alu op does
diff --git a/gcc/config/arm/arm926ejs.md b/gcc/config/arm/arm926ejs.md
index 61e9379..cc5dbee 100644
--- a/gcc/config/arm/arm926ejs.md
+++ b/gcc/config/arm/arm926ejs.md
@@ -133,43 +133,43 @@
 ;; most common and (b) the pessimistic assumption will lead to fewer stalls.
 (define_insn_reservation "9_load1_op" 3
  (and (eq_attr "tune" "arm926ejs")
-      (eq_attr "type" "load1,load_byte"))
+      (eq_attr "type" "load_4,load_byte"))
  "e*2,m,w")
 
 (define_insn_reservation "9_store1_op" 0
  (and (eq_attr "tune" "arm926ejs")
-      (eq_attr "type" "store1"))
+      (eq_attr "type" "store_4"))
  "e,m,w")
 
 ;; multiple word loads and stores
 (define_insn_reservation "9_load2_op" 3
  (and (eq_attr "tune" "arm926ejs")
-      (eq_attr "type" "load2"))
+      (eq_attr "type" "load_8"))
  "e,m*2,w")
 
 (define_insn_reservation "9_load3_op" 4
  (and (eq_attr "tune" "arm926ejs")
-      (eq_attr "type" "load3"))
+      (eq_attr "type" "load_12"))
  "e,m*3,w")
 
 (define_insn_reservation "9_load4_op" 5
  (and (eq_attr "tune" "arm926ejs")
-      (eq_attr "type" "load4"))
+      (eq_attr "type" "load_16"))
  "e,m*4,w")
 
 (define_insn_reservation "9_store2_op" 0
  (and (eq_attr "tune" "arm926ejs")
-      (eq_attr "type" "store2"))
+      (eq_attr "type" "store_8"))
  "e,m*2,w")
 
 (define_insn_reservation "9_store3_op" 0
  (and (eq_attr "tune" "arm926ejs")
-      (eq_attr "type" "store3"))
+      (eq_attr "type" "store_12"))
  "e,m*3,w")
 
 (define_insn_reservation "9_store4_op" 0
  (and (eq_attr "tune" "arm926ejs")
-      (eq_attr "type" "store4"))
+      (eq_attr "type" "store_16"))
  "e,m*4,w")
 
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
diff --git a/gcc/config/arm/cortex-a15.md b/gcc/config/arm/cortex-a15.md
index c4853c7..78b5e3a 100644
--- a/gcc/config/arm/cortex-a15.md
+++ b/gcc/config/arm/cortex-a15.md
@@ -139,25 +139,25 @@
 ;; Loads of up to two words.
 (define_insn_reservation "cortex_a15_load1" 4
   (and (eq_attr "tune" "cortexa15")
-       (eq_attr "type" "load_byte,load1,load2"))
+       (eq_attr "type" "load_byte,load_4,load_8"))
   "ca15_issue1,ca15_ls,ca15_ldr,nothing")
 
 ;; Loads of three or four words.
 (define_insn_reservation "cortex_a15_load3" 5
   (and (eq_attr "tune" "cortexa15")
-       (eq_attr "type" "load3,load4"))
+       (eq_attr "type" "load_12,load_16"))
   "ca15_issue2,ca15_ls1+ca15_ls2,ca15_ldr,ca15_ldr,nothing")
 
 ;; Stores of up to two words.
 (define_insn_reservation "cortex_a15_store1" 0
   (and (eq_attr "tune" "cortexa15")
-       (eq_attr "type" "store1,store2"))
+       (eq_attr "type" "store_4,store_8"))
   "ca15_issue1,ca15_ls,ca15_str")
 
 ;; Stores of three or four words.
 (define_insn_reservation "cortex_a15_store3" 0
   (and (eq_attr "tune" "cortexa15")
-       (eq_attr "type" "store3,store4"))
+       (eq_attr "type" "store_12,store_16"))
   "ca15_issue2,ca15_ls1+ca15_ls2,ca15_str,ca15_str")
 
 ;; We include Neon.md here to ensure that the branch can block the Neon units.
diff --git a/gcc/config/arm/cortex-a17.md b/gcc/config/arm/cortex-a17.md
index 1190e33..d83cca1 100644
--- a/gcc/config/arm/cortex-a17.md
+++ b/gcc/config/arm/cortex-a17.md
@@ -127,37 +127,37 @@
 ;; Loads of up to two words.
 (define_insn_reservation "cortex_a17_load1" 4
   (and (eq_attr "tune" "cortexa17")
-       (eq_attr "type" "load_byte,load1,load2"))
+       (eq_attr "type" "load_byte,load_4,load_8"))
   "ca17_ls0|ca17_ls1")
 
 ;; Loads of three words.
 (define_insn_reservation "cortex_a17_load3" 4
   (and (eq_attr "tune" "cortexa17")
-       (eq_attr "type" "load3"))
+       (eq_attr "type" "load_12"))
   "ca17_ls0+ca17_ls1")
 
 ;; Loads of four words.
 (define_insn_reservation "cortex_a17_load4" 4
   (and (eq_attr "tune" "cortexa17")
-       (eq_attr "type" "load4"))
+       (eq_attr "type" "load_16"))
   "ca17_ls0+ca17_ls1")
 
 ;; Stores of up to two words.
 (define_insn_reservation "cortex_a17_store1" 0
   (and (eq_attr "tune" "cortexa17")
-       (eq_attr "type" "store1,store2"))
+       (eq_attr "type" "store_4,store_8"))
   "ca17_ls0|ca17_ls1")
 
 ;; Stores of three words
 (define_insn_reservation "cortex_a17_store3" 0
   (and (eq_attr "tune" "cortexa17")
-       (eq_attr "type" "store3"))
+       (eq_attr "type" "store_12"))
   "ca17_ls0+ca17_ls1")
 
 ;; Stores of four words.
 (define_insn_reservation "cortex_a17_store4" 0
   (and (eq_attr "tune" "cortexa17")
-       (eq_attr "type" "store4"))
+       (eq_attr "type" "store_16"))
   "ca17_ls0+ca17_ls1")
 
 (define_insn_reservation "cortex_a17_call" 0
diff --git a/gcc/config/arm/cortex-a5.md b/gcc/config/arm/cortex-a5.md
index da54601..a7a521f 100644
--- a/gcc/config/arm/cortex-a5.md
+++ b/gcc/config/arm/cortex-a5.md
@@ -106,45 +106,45 @@
 
 (define_insn_reservation "cortex_a5_load1" 2
   (and (eq_attr "tune" "cortexa5")
-       (eq_attr "type" "load_byte,load1"))
+       (eq_attr "type" "load_byte,load_4"))
   "cortex_a5_ex1")
 
 (define_insn_reservation "cortex_a5_store1" 0
   (and (eq_attr "tune" "cortexa5")
-       (eq_attr "type" "store1"))
+       (eq_attr "type" "store_4"))
   "cortex_a5_ex1")
 
 (define_insn_reservation "cortex_a5_load2" 3
   (and (eq_attr "tune" "cortexa5")
-       (eq_attr "type" "load2"))
+       (eq_attr "type" "load_8"))
   "cortex_a5_ex1+cortex_a5_branch, cortex_a5_ex1")
 
 (define_insn_reservation "cortex_a5_store2" 0
   (and (eq_attr "tune" "cortexa5")
-       (eq_attr "type" "store2"))
+       (eq_attr "type" "store_8"))
   "cortex_a5_ex1+cortex_a5_branch, cortex_a5_ex1")
 
 (define_insn_reservation "cortex_a5_load3" 4
   (and (eq_attr "tune" "cortexa5")
-       (eq_attr "type" "load3"))
+       (eq_attr "type" "load_12"))
   "cortex_a5_ex1+cortex_a5_branch, cortex_a5_ex1+cortex_a5_branch,\
    cortex_a5_ex1")
 
 (define_insn_reservation "cortex_a5_store3" 0
   (and (eq_attr "tune" "cortexa5")
-       (eq_attr "type" "store3"))
+       (eq_attr "type" "store_12"))
   "cortex_a5_ex1+cortex_a5_branch, cortex_a5_ex1+cortex_a5_branch,\
    cortex_a5_ex1")
 
 (define_insn_reservation "cortex_a5_load4" 5
   (and (eq_attr "tune" "cortexa5")
-       (eq_attr "type" "load3"))
+       (eq_attr "type" "load_12"))
   "cortex_a5_ex1+cortex_a5_branch, cortex_a5_ex1+cortex_a5_branch,\
    cortex_a5_ex1+cortex_a5_branch, cortex_a5_ex1")
 
 (define_insn_reservation "cortex_a5_store4" 0
   (and (eq_attr "tune" "cortexa5")
-       (eq_attr "type" "store3"))
+       (eq_attr "type" "store_12"))
   "cortex_a5_ex1+cortex_a5_branch, cortex_a5_ex1+cortex_a5_branch,\
    cortex_a5_ex1+cortex_a5_branch, cortex_a5_ex1")
 
diff --git a/gcc/config/arm/cortex-a53.md b/gcc/config/arm/cortex-a53.md
index b7e0c92..73f2b90 100644
--- a/gcc/config/arm/cortex-a53.md
+++ b/gcc/config/arm/cortex-a53.md
@@ -141,13 +141,13 @@
 
 (define_insn_reservation "cortex_a53_load1" 4
   (and (eq_attr "tune" "cortexa53")
-       (eq_attr "type" "load_byte,load1,load_acq"))
+       (eq_attr "type" "load_byte,load_4,load_acq"))
   "cortex_a53_slot_any+cortex_a53_ls_agen,
    cortex_a53_load")
 
 (define_insn_reservation "cortex_a53_store1" 2
   (and (eq_attr "tune" "cortexa53")
-       (eq_attr "type" "store1,store_rel"))
+       (eq_attr "type" "store_4,store_rel"))
   "cortex_a53_slot_any+cortex_a53_ls_agen,
    cortex_a53_store")
 
@@ -155,14 +155,14 @@
 
 (define_insn_reservation "cortex_a53_load2" 4
   (and (eq_attr "tune" "cortexa53")
-       (eq_attr "type" "load2"))
+       (eq_attr "type" "load_8"))
   "cortex_a53_single_issue+cortex_a53_ls_agen,
    cortex_a53_load+cortex_a53_slot0,
    cortex_a53_load")
 
 (define_insn_reservation "cortex_a53_store2" 2
   (and (eq_attr "tune" "cortexa53")
-       (eq_attr "type" "store2"))
+       (eq_attr "type" "store_8"))
   "cortex_a53_slot_any+cortex_a53_ls_agen,
    cortex_a53_store")
 
@@ -170,14 +170,14 @@
 
 (define_insn_reservation "cortex_a53_load3plus" 6
   (and (eq_attr "tune" "cortexa53")
-       (eq_attr "type" "load3,load4"))
+       (eq_attr "type" "load_12,load_16"))
   "cortex_a53_single_issue+cortex_a53_ls_agen,
    cortex_a53_load+cortex_a53_slot0,
    cortex_a53_load")
 
 (define_insn_reservation "cortex_a53_store3plus" 2
   (and (eq_attr "tune" "cortexa53")
-       (eq_attr "type" "store3,store4"))
+       (eq_attr "type" "store_12,store_16"))
   "cortex_a53_slot_any+cortex_a53_ls_agen,
    cortex_a53_store+cortex_a53_slot0,
    cortex_a53_store")
diff --git a/gcc/config/arm/cortex-a57.md b/gcc/config/arm/cortex-a57.md
index fd30758..59919f9 100644
--- a/gcc/config/arm/cortex-a57.md
+++ b/gcc/config/arm/cortex-a57.md
@@ -357,25 +357,25 @@
 ;; Loads of up to two words.
 (define_insn_reservation "cortex_a57_load1" 5
   (and (eq_attr "tune" "cortexa57")
-       (eq_attr "type" "load_byte,load1,load2"))
+       (eq_attr "type" "load_byte,load_4,load_8"))
   "ca57_load_model")
 
 ;; Loads of three or four words.
 (define_insn_reservation "cortex_a57_load3" 5
   (and (eq_attr "tune" "cortexa57")
-       (eq_attr "type" "load3,load4"))
+       (eq_attr "type" "load_12,load_16"))
   "ca57_ls_issue*2,ca57_load_model")
 
 ;; Stores of up to two words.
 (define_insn_reservation "cortex_a57_store1" 0
   (and (eq_attr "tune" "cortexa57")
-       (eq_attr "type" "store1,store2"))
+       (eq_attr "type" "store_4,store_8"))
   "ca57_store_model")
 
 ;; Stores of three or four words.
 (define_insn_reservation "cortex_a57_store3" 0
   (and (eq_attr "tune" "cortexa57")
-       (eq_attr "type" "store3,store4"))
+       (eq_attr "type" "store_12,store_16"))
   "ca57_ls_issue*2,ca57_store_model")
 
 ;; Advanced SIMD Unit - Integer Arithmetic Instructions.
diff --git a/gcc/config/arm/cortex-a7.md b/gcc/config/arm/cortex-a7.md
index acb9f1a..0dbe0a7 100644
--- a/gcc/config/arm/cortex-a7.md
+++ b/gcc/config/arm/cortex-a7.md
@@ -196,42 +196,42 @@
 
 (define_insn_reservation "cortex_a7_load1" 2
   (and (eq_attr "tune" "cortexa7")
-       (eq_attr "type" "load_byte,load1"))
+       (eq_attr "type" "load_byte,load_4"))
   "cortex_a7_ex1")
 
 (define_insn_reservation "cortex_a7_store1" 0
   (and (eq_attr "tune" "cortexa7")
-       (eq_attr "type" "store1"))
+       (eq_attr "type" "store_4"))
   "cortex_a7_ex1")
 
 (define_insn_reservation "cortex_a7_load2" 2
   (and (eq_attr "tune" "cortexa7")
-       (eq_attr "type" "load2"))
+       (eq_attr "type" "load_8"))
   "cortex_a7_both")
 
 (define_insn_reservation "cortex_a7_store2" 0
   (and (eq_attr "tune" "cortexa7")
-       (eq_attr "type" "store2"))
+       (eq_attr "type" "store_8"))
   "cortex_a7_both")
 
 (define_insn_reservation "cortex_a7_load3" 3
   (and (eq_attr "tune" "cortexa7")
-       (eq_attr "type" "load3"))
+       (eq_attr "type" "load_12"))
   "cortex_a7_both, cortex_a7_ex1")
 
 (define_insn_reservation "cortex_a7_store3" 0
   (and (eq_attr "tune" "cortexa7")
-       (eq_attr "type" "store4"))
+       (eq_attr "type" "store_16"))
   "cortex_a7_both, cortex_a7_ex1")
 
 (define_insn_reservation "cortex_a7_load4" 3
   (and (eq_attr "tune" "cortexa7")
-       (eq_attr "type" "load4"))
+       (eq_attr "type" "load_16"))
   "cortex_a7_both, cortex_a7_both")
 
 (define_insn_reservation "cortex_a7_store4" 0
   (and (eq_attr "tune" "cortexa7")
-       (eq_attr "type" "store3"))
+       (eq_attr "type" "store_12"))
   "cortex_a7_both, cortex_a7_both")
 
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
diff --git a/gcc/config/arm/cortex-a8.md b/gcc/config/arm/cortex-a8.md
index 1fabb3c..ceec5be 100644
--- a/gcc/config/arm/cortex-a8.md
+++ b/gcc/config/arm/cortex-a8.md
@@ -198,7 +198,7 @@
 ;; We assume 64-bit alignment for doubleword loads.
 (define_insn_reservation "cortex_a8_load1_2" 3
   (and (eq_attr "tune" "cortexa8")
-       (eq_attr "type" "load1,load2,load_byte"))
+       (eq_attr "type" "load_4,load_8,load_byte"))
   "cortex_a8_load_store_1")
 
 (define_bypass 2 "cortex_a8_load1_2"
@@ -221,7 +221,7 @@
 ;; issued as two micro-ops.
 (define_insn_reservation "cortex_a8_load3_4" 5
   (and (eq_attr "tune" "cortexa8")
-       (eq_attr "type" "load3,load4"))
+       (eq_attr "type" "load_12,load_16"))
   "cortex_a8_load_store_2")
 
 (define_bypass 4 "cortex_a8_load3_4"
@@ -238,12 +238,12 @@
 
 (define_insn_reservation "cortex_a8_store1_2" 0
   (and (eq_attr "tune" "cortexa8")
-       (eq_attr "type" "store1,store2"))
+       (eq_attr "type" "store_4,store_8"))
   "cortex_a8_load_store_1")
 
 (define_insn_reservation "cortex_a8_store3_4" 0
   (and (eq_attr "tune" "cortexa8")
-       (eq_attr "type" "store3,store4"))
+       (eq_attr "type" "store_12,store_16"))
   "cortex_a8_load_store_2")
 
 ;; An ALU instruction acting as a producer for a store instruction
diff --git a/gcc/config/arm/cortex-a9.md b/gcc/config/arm/cortex-a9.md
index 592a2bf..1140e18 100644
--- a/gcc/config/arm/cortex-a9.md
+++ b/gcc/config/arm/cortex-a9.md
@@ -107,7 +107,7 @@ cortex_a9_p1_e2 + cortex_a9_p0_e1 + cortex_a9_p1_e1")
 
 (define_insn_reservation "cortex_a9_load1_2" 4
   (and (eq_attr "tune" "cortexa9")
-       (eq_attr "type" "load1, load2, load_byte, f_loads, f_loadd"))
+       (eq_attr "type" "load_4, load_8, load_byte, f_loads, f_loadd"))
   "cortex_a9_ls")
 
 ;; Loads multiples and store multiples can't be issued for 2 cycles in a
@@ -116,12 +116,12 @@ cortex_a9_p1_e2 + cortex_a9_p0_e1 + cortex_a9_p1_e1")
 
 (define_insn_reservation "cortex_a9_load3_4" 5
   (and (eq_attr "tune" "cortexa9")
-       (eq_attr "type" "load3, load4"))
+       (eq_attr "type" "load_12, load_16"))
   "cortex_a9_ls, cortex_a9_ls")
 
 (define_insn_reservation "cortex_a9_store1_2" 0
   (and (eq_attr "tune" "cortexa9")
-       (eq_attr "type" "store1, store2, f_stores, f_stored"))
+       (eq_attr "type" "store_4, store_8, f_stores, f_stored"))
   "cortex_a9_ls")
 
 ;; Almost all our store multiples use an auto-increment
@@ -130,7 +130,7 @@ cortex_a9_p1_e2 + cortex_a9_p0_e1 + cortex_a9_p1_e1")
 
 (define_insn_reservation "cortex_a9_store3_4" 0
   (and (eq_attr "tune" "cortexa9")
-       (eq_attr "type" "store3, store4"))
+       (eq_attr "type" "store_12, store_16"))
   "cortex_a9_ls+(cortex_a9_p0_default | cortex_a9_p1_default), cortex_a9_ls")
 
 ;; We get 16*16 multiply / mac results in 3 cycles.
diff --git a/gcc/config/arm/cortex-m4.md b/gcc/config/arm/cortex-m4.md
index 9084729..123a2a0 100644
--- a/gcc/config/arm/cortex-m4.md
+++ b/gcc/config/arm/cortex-m4.md
@@ -50,51 +50,51 @@
 ;; Byte, half-word and word load is two cycles.
 (define_insn_reservation "cortex_m4_load1" 2
   (and (eq_attr "tune" "cortexm4")
-       (eq_attr "type" "load_byte,load1"))
+       (eq_attr "type" "load_byte,load_4"))
   "cortex_m4_a, cortex_m4_b")
 
 ;; str rx, [ry, #imm] is always one cycle.
 (define_insn_reservation "cortex_m4_store1_1" 1
   (and (and (eq_attr "tune" "cortexm4")
-	    (eq_attr "type" "store1"))
+	    (eq_attr "type" "store_4"))
        (match_test "arm_address_offset_is_imm (insn)"))
   "cortex_m4_a")
 
 ;; Other byte, half-word and word load is two cycles.
 (define_insn_reservation "cortex_m4_store1_2" 2
   (and (and (eq_attr "tune" "cortexm4")
-	    (eq_attr "type" "store1"))
+	    (eq_attr "type" "store_4"))
        (not (match_test "arm_address_offset_is_imm (insn)")))
   "cortex_m4_a*2")
 
 (define_insn_reservation "cortex_m4_load2" 3
   (and (eq_attr "tune" "cortexm4")
-       (eq_attr "type" "load2"))
+       (eq_attr "type" "load_8"))
   "cortex_m4_ex*3")
 
 (define_insn_reservation "cortex_m4_store2" 3
   (and (eq_attr "tune" "cortexm4")
-       (eq_attr "type" "store2"))
+       (eq_attr "type" "store_8"))
   "cortex_m4_ex*3")
 
 (define_insn_reservation "cortex_m4_load3" 4
   (and (eq_attr "tune" "cortexm4")
-       (eq_attr "type" "load3"))
+       (eq_attr "type" "load_12"))
   "cortex_m4_ex*4")
 
 (define_insn_reservation "cortex_m4_store3" 4
   (and (eq_attr "tune" "cortexm4")
-       (eq_attr "type" "store3"))
+       (eq_attr "type" "store_12"))
   "cortex_m4_ex*4")
 
 (define_insn_reservation "cortex_m4_load4" 5
   (and (eq_attr "tune" "cortexm4")
-       (eq_attr "type" "load4"))
+       (eq_attr "type" "load_16"))
   "cortex_m4_ex*5")
 
 (define_insn_reservation "cortex_m4_store4" 5
   (and (eq_attr "tune" "cortexm4")
-       (eq_attr "type" "store4"))
+       (eq_attr "type" "store_16"))
   "cortex_m4_ex*5")
 
 (define_bypass 1 "cortex_m4_load1"
diff --git a/gcc/config/arm/cortex-m7.md b/gcc/config/arm/cortex-m7.md
index 218b26f..e07d7cd 100644
--- a/gcc/config/arm/cortex-m7.md
+++ b/gcc/config/arm/cortex-m7.md
@@ -93,33 +93,33 @@
 ;; The load instructions.
 (define_insn_reservation "cortex_m7_load1" 2
    (and (eq_attr "tune" "cortexm7")
-        (eq_attr "type" "load_byte, load1"))
+        (eq_attr "type" "load_byte, load_4"))
    "cm7_i0|cm7_i1,cm7_lsu")
 
 (define_insn_reservation "cortex_m7_load2" 2
    (and (eq_attr "tune" "cortexm7")
-        (eq_attr "type" "load2"))
+        (eq_attr "type" "load_8"))
    "cm7_all_units")
 
 (define_insn_reservation "cortex_m7_loadm" 2
    (and (eq_attr "tune" "cortexm7")
-        (eq_attr "type" "load3,load4"))
+        (eq_attr "type" "load_12,load_16"))
    "cm7_all_units*2")
 
 ;; The store instructions.
 (define_insn_reservation "cortex_m7_store1" 0
    (and (eq_attr "tune" "cortexm7")
-        (eq_attr "type" "store1"))
+        (eq_attr "type" "store_4"))
    "cm7_i0|cm7_i1,cm7_lsu+cm7_wb")
 
 (define_insn_reservation "cortex_m7_store2" 0
    (and (eq_attr "tune" "cortexm7")
-        (eq_attr "type" "store2"))
+        (eq_attr "type" "store_8"))
    "cm7_all_units")
 
 (define_insn_reservation "cortex_m7_storem" 0
    (and (eq_attr "tune" "cortexm7")
-        (eq_attr "type" "store3,store4"))
+        (eq_attr "type" "store_12,store_16"))
    "cm7_all_units*2")
 
 ;; The FPU instructions.
diff --git a/gcc/config/arm/cortex-r4.md b/gcc/config/arm/cortex-r4.md
index 29ddcc1..bc65689 100644
--- a/gcc/config/arm/cortex-r4.md
+++ b/gcc/config/arm/cortex-r4.md
@@ -234,12 +234,12 @@
 
 (define_insn_reservation "cortex_r4_load_1_2" 3
   (and (eq_attr "tune_cortexr4" "yes")
-       (eq_attr "type" "load1,load2"))
+       (eq_attr "type" "load_4,load_8"))
   "cortex_r4_load_store")
 
 (define_insn_reservation "cortex_r4_load_3_4" 4
   (and (eq_attr "tune_cortexr4" "yes")
-       (eq_attr "type" "load3,load4"))
+       (eq_attr "type" "load_12,load_16"))
   "cortex_r4_load_store_2")
 
 ;; If a producing load is followed by an instruction consuming only
@@ -289,11 +289,11 @@
 
 (define_insn_reservation "cortex_r4_store_1_2" 0
   (and (eq_attr "tune_cortexr4" "yes")
-       (eq_attr "type" "store1,store2"))
+       (eq_attr "type" "store_4,store_8"))
   "cortex_r4_load_store")
 
 (define_insn_reservation "cortex_r4_store_3_4" 0
   (and (eq_attr "tune_cortexr4" "yes")
-       (eq_attr "type" "store3,store4"))
+       (eq_attr "type" "store_12,store_16"))
   "cortex_r4_load_store_2")
 
diff --git a/gcc/config/arm/exynos-m1.md b/gcc/config/arm/exynos-m1.md
index 5d397cc..71622a1 100644
--- a/gcc/config/arm/exynos-m1.md
+++ b/gcc/config/arm/exynos-m1.md
@@ -432,25 +432,25 @@
 ;; Loads of up to 2 words.
 (define_insn_reservation "exynos_m1_load" 4
   (and (eq_attr "tune" "exynosm1")
-       (eq_attr "type" "load_byte, load1, load2"))
+       (eq_attr "type" "load_byte, load_4, load_8"))
   "em1_ld")
 
 ;; Loads of 3 or 4 words.
 (define_insn_reservation "exynos_m1_loadm" 6
   (and (eq_attr "tune" "exynosm1")
-       (eq_attr "type" "load3, load4"))
+       (eq_attr "type" "load_12, load_16"))
   "(em1_ld * 3)")
 
 ;; Stores of up to 2 words.
 (define_insn_reservation "exynos_m1_store" 1
   (and (eq_attr "tune" "exynosm1")
-       (eq_attr "type" "store1, store2"))
+       (eq_attr "type" "store_4, store_8"))
   "em1_st")
 
 ;; Stores of 3 or 4 words.
 (define_insn_reservation "exynos_m1_storem" 3
   (and (eq_attr "tune" "exynosm1")
-       (eq_attr "type" "store3, store4"))
+       (eq_attr "type" "store_12, store_16"))
   "(em1_st * 3)")
 
 ;; Advanced SIMD Unit
diff --git a/gcc/config/arm/fa526.md b/gcc/config/arm/fa526.md
index 86f78e7..1cf63d6 100644
--- a/gcc/config/arm/fa526.md
+++ b/gcc/config/arm/fa526.md
@@ -108,42 +108,42 @@
 
 (define_insn_reservation "526_load1_op" 3
  (and (eq_attr "tune" "fa526")
-      (eq_attr "type" "load1,load_byte"))
+      (eq_attr "type" "load_4,load_byte"))
  "fa526_core")
 
 (define_insn_reservation "526_load2_op" 4
  (and (eq_attr "tune" "fa526")
-      (eq_attr "type" "load2"))
+      (eq_attr "type" "load_8"))
  "fa526_core*2")
 
 (define_insn_reservation "526_load3_op" 5
  (and (eq_attr "tune" "fa526")
-      (eq_attr "type" "load3"))
+      (eq_attr "type" "load_12"))
  "fa526_core*3")
 
 (define_insn_reservation "526_load4_op" 6
  (and (eq_attr "tune" "fa526")
-      (eq_attr "type" "load4"))
+      (eq_attr "type" "load_16"))
  "fa526_core*4")
 
 (define_insn_reservation "526_store1_op" 0
  (and (eq_attr "tune" "fa526")
-      (eq_attr "type" "store1"))
+      (eq_attr "type" "store_4"))
  "fa526_core")
 
 (define_insn_reservation "526_store2_op" 1
  (and (eq_attr "tune" "fa526")
-      (eq_attr "type" "store2"))
+      (eq_attr "type" "store_8"))
  "fa526_core*2")
 
 (define_insn_reservation "526_store3_op" 2
  (and (eq_attr "tune" "fa526")
-      (eq_attr "type" "store3"))
+      (eq_attr "type" "store_12"))
  "fa526_core*3")
 
 (define_insn_reservation "526_store4_op" 3
  (and (eq_attr "tune" "fa526")
-      (eq_attr "type" "store4"))
+      (eq_attr "type" "store_16"))
  "fa526_core*4")
 
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
diff --git a/gcc/config/arm/fa606te.md b/gcc/config/arm/fa606te.md
index 3fadbe1..d4b3e25 100644
--- a/gcc/config/arm/fa606te.md
+++ b/gcc/config/arm/fa606te.md
@@ -111,48 +111,48 @@
 
 (define_insn_reservation "606te_load1_op" 2
  (and (eq_attr "tune" "fa606te")
-      (eq_attr "type" "load1,load_byte"))
+      (eq_attr "type" "load_4,load_byte"))
  "fa606te_core")
 
 (define_insn_reservation "606te_load2_op" 3
  (and (eq_attr "tune" "fa606te")
-      (eq_attr "type" "load2"))
+      (eq_attr "type" "load_8"))
  "fa606te_core*2")
 
 (define_insn_reservation "606te_load3_op" 4
  (and (eq_attr "tune" "fa606te")
-      (eq_attr "type" "load3"))
+      (eq_attr "type" "load_12"))
  "fa606te_core*3")
 
 (define_insn_reservation "606te_load4_op" 5
  (and (eq_attr "tune" "fa606te")
-      (eq_attr "type" "load4"))
+      (eq_attr "type" "load_16"))
  "fa606te_core*4")
 
 (define_insn_reservation "606te_store1_op" 0
  (and (eq_attr "tune" "fa606te")
-      (eq_attr "type" "store1"))
+      (eq_attr "type" "store_4"))
  "fa606te_core")
 
 (define_insn_reservation "606te_store2_op" 1
  (and (eq_attr "tune" "fa606te")
-      (eq_attr "type" "store2"))
+      (eq_attr "type" "store_8"))
  "fa606te_core*2")
 
 (define_insn_reservation "606te_store3_op" 2
  (and (eq_attr "tune" "fa606te")
-      (eq_attr "type" "store3"))
+      (eq_attr "type" "store_12"))
  "fa606te_core*3")
 
 (define_insn_reservation "606te_store4_op" 3
  (and (eq_attr "tune" "fa606te")
-      (eq_attr "type" "store4"))
+      (eq_attr "type" "store_16"))
  "fa606te_core*4")
 
 
 ;;(define_insn_reservation "606te_ldm_op" 9
 ;; (and (eq_attr "tune" "fa606te")
-;;      (eq_attr "type" "load2,load3,load4,store2,store3,store4"))
+;;      (eq_attr "type" "load_8,load_12,load_16,store_8,store_12,store_16"))
 ;; "fa606te_core*7")
 
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
diff --git a/gcc/config/arm/fa626te.md b/gcc/config/arm/fa626te.md
index 5fa895a..ca6096d 100644
--- a/gcc/config/arm/fa626te.md
+++ b/gcc/config/arm/fa626te.md
@@ -123,32 +123,32 @@
 
 (define_insn_reservation "626te_load1_op" 3
  (and (eq_attr "tune" "fa626,fa626te")
-      (eq_attr "type" "load1,load_byte"))
+      (eq_attr "type" "load_4,load_byte"))
  "fa626te_core")
 
 (define_insn_reservation "626te_load2_op" 4
  (and (eq_attr "tune" "fa626,fa626te")
-      (eq_attr "type" "load2,load3"))
+      (eq_attr "type" "load_8,load_12"))
  "fa626te_core*2")
 
 (define_insn_reservation "626te_load3_op" 5
  (and (eq_attr "tune" "fa626,fa626te")
-      (eq_attr "type" "load4"))
+      (eq_attr "type" "load_16"))
  "fa626te_core*3")
 
 (define_insn_reservation "626te_store1_op" 0
  (and (eq_attr "tune" "fa626,fa626te")
-      (eq_attr "type" "store1"))
+      (eq_attr "type" "store_4"))
  "fa626te_core")
 
 (define_insn_reservation "626te_store2_op" 1
  (and (eq_attr "tune" "fa626,fa626te")
-      (eq_attr "type" "store2,store3"))
+      (eq_attr "type" "store_8,store_12"))
  "fa626te_core*2")
 
 (define_insn_reservation "626te_store3_op" 2
  (and (eq_attr "tune" "fa626,fa626te")
-      (eq_attr "type" "store4"))
+      (eq_attr "type" "store_16"))
  "fa626te_core*3")
 
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
diff --git a/gcc/config/arm/fa726te.md b/gcc/config/arm/fa726te.md
index 7437280..b06a6f5 100644
--- a/gcc/config/arm/fa726te.md
+++ b/gcc/config/arm/fa726te.md
@@ -144,13 +144,13 @@
 
 (define_insn_reservation "726te_load1_op" 3
  (and (eq_attr "tune" "fa726te")
-      (eq_attr "type" "load1,load_byte"))
+      (eq_attr "type" "load_4,load_byte"))
  "(fa726te_issue+fa726te_lsu_pipe_e+fa726te_lsu_pipe_w)\
   | (fa726te_issue+fa726te_lsu1_pipe_e+fa726te_lsu1_pipe_w,fa726te_blockage)")
 
 (define_insn_reservation "726te_store1_op" 1
  (and (eq_attr "tune" "fa726te")
-      (eq_attr "type" "store1"))
+      (eq_attr "type" "store_4"))
  "fa726te_blockage*2")
 
 ;; Load/Store Multiple blocks all pipelines in EX stages until WB.
@@ -161,22 +161,22 @@
 ;; the pipe 1 is stalled.
 (define_insn_reservation "726te_ldm2_op" 4
  (and (eq_attr "tune" "fa726te")
-      (eq_attr "type" "load2,load3"))
+      (eq_attr "type" "load_8,load_12"))
  "fa726te_blockage*4")
 
 (define_insn_reservation "726te_ldm3_op" 5
  (and (eq_attr "tune" "fa726te")
-      (eq_attr "type" "load4"))
+      (eq_attr "type" "load_16"))
  "fa726te_blockage*5")
 
 (define_insn_reservation "726te_stm2_op" 2
  (and (eq_attr "tune" "fa726te")
-      (eq_attr "type" "store2,store3"))
+      (eq_attr "type" "store_8,store_12"))
  "fa726te_blockage*3")
 
 (define_insn_reservation "726te_stm3_op" 3
  (and (eq_attr "tune" "fa726te")
-      (eq_attr "type" "store4"))
+      (eq_attr "type" "store_16"))
  "fa726te_blockage*4")
 
 (define_bypass 1 "726te_load1_op,726te_ldm2_op,726te_ldm3_op" "726te_store1_op,\
diff --git a/gcc/config/arm/fmp626.md b/gcc/config/arm/fmp626.md
index 4225a46..2833920 100644
--- a/gcc/config/arm/fmp626.md
+++ b/gcc/config/arm/fmp626.md
@@ -115,32 +115,32 @@
 
 (define_insn_reservation "mp626_load1_op" 5
  (and (eq_attr "tune" "fmp626")
-      (eq_attr "type" "load1,load_byte"))
+      (eq_attr "type" "load_4,load_byte"))
  "fmp626_core")
 
 (define_insn_reservation "mp626_load2_op" 6
  (and (eq_attr "tune" "fmp626")
-      (eq_attr "type" "load2,load3"))
+      (eq_attr "type" "load_8,load_12"))
  "fmp626_core*2")
 
 (define_insn_reservation "mp626_load3_op" 7
  (and (eq_attr "tune" "fmp626")
-      (eq_attr "type" "load4"))
+      (eq_attr "type" "load_16"))
  "fmp626_core*3")
 
 (define_insn_reservation "mp626_store1_op" 0
  (and (eq_attr "tune" "fmp626")
-      (eq_attr "type" "store1"))
+      (eq_attr "type" "store_4"))
  "fmp626_core")
 
 (define_insn_reservation "mp626_store2_op" 1
  (and (eq_attr "tune" "fmp626")
-      (eq_attr "type" "store2,store3"))
+      (eq_attr "type" "store_8,store_12"))
  "fmp626_core*2")
 
 (define_insn_reservation "mp626_store3_op" 2
  (and (eq_attr "tune" "fmp626")
-      (eq_attr "type" "store4"))
+      (eq_attr "type" "store_16"))
  "fmp626_core*3")
 
 (define_bypass 1 "mp626_load1_op,mp626_load2_op,mp626_load3_op"
diff --git a/gcc/config/arm/iwmmxt.md b/gcc/config/arm/iwmmxt.md
index e25b819..ab5ae65 100644
--- a/gcc/config/arm/iwmmxt.md
+++ b/gcc/config/arm/iwmmxt.md
@@ -155,7 +155,7 @@
                                  (const_int 8)
                                  (const_int 4))]
                               (const_int 4)))
-   (set_attr "type" "*,*,*,load2,store2,*,*,*,*,*,f_mcrr,f_mrrc,\
+   (set_attr "type" "*,*,*,load_8,store_8,*,*,*,*,*,f_mcrr,f_mrrc,\
                      ffarithd,f_loadd,f_stored")
    (set_attr "arm_pool_range" "*,*,*,1020,*,*,*,*,*,*,*,*,*,1020,*")
    (set_attr "arm_neg_pool_range" "*,*,*,1008,*,*,*,*,*,*,*,*,*,1008,*")]
@@ -188,7 +188,7 @@
      default:
        gcc_unreachable ();
      }"
-  [(set_attr "type"           "*,*,*,*,load1,store1,*,*,*,*,f_mcr,f_mrc,\
+  [(set_attr "type"           "*,*,*,*,load_4,store_4,*,*,*,*,f_mcr,f_mrc,\
                                fmov,f_loads,f_stores")
    (set_attr "length"         "*,*,*,*,*,        *,*,*,  16,     *,*,*,*,*,*")
    (set_attr "pool_range"     "*,*,*,*,4096,     *,*,*,1024,     *,*,*,*,1020,*")
@@ -227,7 +227,7 @@
    case 4: return \"tmcr%?\\t%0, %1\";
    default: return \"tmrc%?\\t%0, %1\";
   }"
-  [(set_attr "type"           "*,*,load1,store1,*,*")
+  [(set_attr "type"           "*,*,load_4,store_4,*,*")
    (set_attr "pool_range"     "*,*,4096,     *,*,*")
    (set_attr "neg_pool_range" "*,*,4084,     *,*,*")]
 )
@@ -249,7 +249,7 @@
    }"
   [(set_attr "predicable" "yes")
    (set_attr "length"         "4,     4,   4,4,4,8,   8,8")
-   (set_attr "type"           "wmmx_wmov,wmmx_wstr,wmmx_wldr,wmmx_tmrrc,wmmx_tmcrr,*,load1,store1")
+   (set_attr "type"           "wmmx_wmov,wmmx_wstr,wmmx_wldr,wmmx_tmrrc,wmmx_tmcrr,*,load_4,store_4")
    (set_attr "pool_range"     "*,     *, 256,*,*,*, 256,*")
    (set_attr "neg_pool_range" "*,     *, 244,*,*,*, 244,*")]
 )
diff --git a/gcc/config/arm/ldmstm.md b/gcc/config/arm/ldmstm.md
index 303303e..01fbb55 100644
--- a/gcc/config/arm/ldmstm.md
+++ b/gcc/config/arm/ldmstm.md
@@ -36,7 +36,7 @@
                   (const_int 12))))])]
   "TARGET_32BIT && XVECLEN (operands[0], 0) == 4"
   "ldm%?\t%5, {%1, %2, %3, %4}"
-  [(set_attr "type" "load4")
+  [(set_attr "type" "load_16")
    (set_attr "predicable" "yes")
    (set_attr "predicable_short_it" "no")])
 
@@ -55,7 +55,7 @@
                   (const_int 12))))])]
   "TARGET_THUMB1 && XVECLEN (operands[0], 0) == 4"
   "ldmia\t%5, {%1, %2, %3, %4}"
-  [(set_attr "type" "load4")])
+  [(set_attr "type" "load_16")])
 
 (define_insn "*ldm4_ia_update"
   [(match_parallel 0 "load_multiple_operation"
@@ -74,7 +74,7 @@
                   (const_int 12))))])]
   "TARGET_32BIT && XVECLEN (operands[0], 0) == 5"
   "ldmia%?\t%5!, {%1, %2, %3, %4}"
-  [(set_attr "type" "load4")
+  [(set_attr "type" "load_16")
    (set_attr "predicable" "yes")
    (set_attr "predicable_short_it" "no")])
 
@@ -95,7 +95,7 @@
                   (const_int 12))))])]
   "TARGET_THUMB1 && XVECLEN (operands[0], 0) == 5"
   "ldmia\t%5!, {%1, %2, %3, %4}"
-  [(set_attr "type" "load4")])
+  [(set_attr "type" "load_16")])
 
 (define_insn "*stm4_"
   [(match_parallel 0 "store_multiple_operation"
@@ -109,7 +109,7 @@
           (match_operand:SI 4 "arm_hard_general_register_operand" ""))])]
   "TARGET_32BIT && XVECLEN (operands[0], 0) == 4"
   "stm%?\t%5, {%1, %2, %3, %4}"
-  [(set_attr "type" "store4")
+  [(set_attr "type" "store_16")
    (set_attr "predicable" "yes")
    (set_attr "predicable_short_it" "no")])
 
@@ -127,7 +127,7 @@
           (match_operand:SI 4 "arm_hard_general_register_operand" ""))])]
   "TARGET_32BIT && XVECLEN (operands[0], 0) == 5"
   "stmia%?\t%5!, {%1, %2, %3, %4}"
-  [(set_attr "type" "store4")
+  [(set_attr "type" "store_16")
    (set_attr "predicable" "yes")
    (set_attr "predicable_short_it" "no")])
 
@@ -145,7 +145,7 @@
           (match_operand:SI 4 "low_register_operand" ""))])]
   "TARGET_THUMB1 && XVECLEN (operands[0], 0) == 5"
   "stmia\t%5!, {%1, %2, %3, %4}"
-  [(set_attr "type" "store4")])
+  [(set_attr "type" "store_16")])
 
 (define_insn "*ldm4_ib"
   [(match_parallel 0 "load_multiple_operation"
@@ -163,7 +163,7 @@
                   (const_int 16))))])]
   "TARGET_ARM && XVECLEN (operands[0], 0) == 4"
   "ldmib%?\t%5, {%1, %2, %3, %4}"
-  [(set_attr "type" "load4")
+  [(set_attr "type" "load_16")
    (set_attr "predicable" "yes")])
 
 (define_insn "*ldm4_ib_update"
@@ -184,7 +184,7 @@
                   (const_int 16))))])]
   "TARGET_ARM && XVECLEN (operands[0], 0) == 5"
   "ldmib%?\t%5!, {%1, %2, %3, %4}"
-  [(set_attr "type" "load4")
+  [(set_attr "type" "load_16")
    (set_attr "predicable" "yes")])
 
 (define_insn "*stm4_ib"
@@ -199,7 +199,7 @@
           (match_operand:SI 4 "arm_hard_general_register_operand" ""))])]
   "TARGET_ARM && XVECLEN (operands[0], 0) == 4"
   "stmib%?\t%5, {%1, %2, %3, %4}"
-  [(set_attr "type" "store4")
+  [(set_attr "type" "store_16")
    (set_attr "predicable" "yes")])
 
 (define_insn "*stm4_ib_update"
@@ -216,7 +216,7 @@
           (match_operand:SI 4 "arm_hard_general_register_operand" ""))])]
   "TARGET_ARM && XVECLEN (operands[0], 0) == 5"
   "stmib%?\t%5!, {%1, %2, %3, %4}"
-  [(set_attr "type" "store4")
+  [(set_attr "type" "store_16")
    (set_attr "predicable" "yes")])
 
 (define_insn "*ldm4_da"
@@ -234,7 +234,7 @@
           (mem:SI (match_dup 5)))])]
   "TARGET_ARM && XVECLEN (operands[0], 0) == 4"
   "ldmda%?\t%5, {%1, %2, %3, %4}"
-  [(set_attr "type" "load4")
+  [(set_attr "type" "load_16")
    (set_attr "predicable" "yes")])
 
 (define_insn "*ldm4_da_update"
@@ -254,7 +254,7 @@
           (mem:SI (match_dup 5)))])]
   "TARGET_ARM && XVECLEN (operands[0], 0) == 5"
   "ldmda%?\t%5!, {%1, %2, %3, %4}"
-  [(set_attr "type" "load4")
+  [(set_attr "type" "load_16")
    (set_attr "predicable" "yes")])
 
 (define_insn "*stm4_da"
@@ -269,7 +269,7 @@
           (match_operand:SI 4 "arm_hard_general_register_operand" ""))])]
   "TARGET_ARM && XVECLEN (operands[0], 0) == 4"
   "stmda%?\t%5, {%1, %2, %3, %4}"
-  [(set_attr "type" "store4")
+  [(set_attr "type" "store_16")
    (set_attr "predicable" "yes")])
 
 (define_insn "*stm4_da_update"
@@ -286,7 +286,7 @@
           (match_operand:SI 4 "arm_hard_general_register_operand" ""))])]
   "TARGET_ARM && XVECLEN (operands[0], 0) == 5"
   "stmda%?\t%5!, {%1, %2, %3, %4}"
-  [(set_attr "type" "store4")
+  [(set_attr "type" "store_16")
    (set_attr "predicable" "yes")])
 
 (define_insn "*ldm4_db"
@@ -305,7 +305,7 @@
                   (const_int -4))))])]
   "TARGET_32BIT && XVECLEN (operands[0], 0) == 4"
   "ldmdb%?\t%5, {%1, %2, %3, %4}"
-  [(set_attr "type" "load4")
+  [(set_attr "type" "load_16")
    (set_attr "predicable" "yes")
    (set_attr "predicable_short_it" "no")])
 
@@ -327,7 +327,7 @@
                   (const_int -4))))])]
   "TARGET_32BIT && XVECLEN (operands[0], 0) == 5"
   "ldmdb%?\t%5!, {%1, %2, %3, %4}"
-  [(set_attr "type" "load4")
+  [(set_attr "type" "load_16")
    (set_attr "predicable" "yes")
    (set_attr "predicable_short_it" "no")])
 
@@ -343,7 +343,7 @@
           (match_operand:SI 4 "arm_hard_general_register_operand" ""))])]
   "TARGET_32BIT && XVECLEN (operands[0], 0) == 4"
   "stmdb%?\t%5, {%1, %2, %3, %4}"
-  [(set_attr "type" "store4")
+  [(set_attr "type" "store_16")
    (set_attr "predicable" "yes")
    (set_attr "predicable_short_it" "no")])
 
@@ -361,7 +361,7 @@
           (match_operand:SI 4 "arm_hard_general_register_operand" ""))])]
   "TARGET_32BIT && XVECLEN (operands[0], 0) == 5"
   "stmdb%?\t%5!, {%1, %2, %3, %4}"
-  [(set_attr "type" "store4")
+  [(set_attr "type" "store_16")
    (set_attr "predicable" "yes")
    (set_attr "predicable_short_it" "no")])
 
@@ -484,7 +484,7 @@
                   (const_int 8))))])]
   "TARGET_32BIT && XVECLEN (operands[0], 0) == 3"
   "ldm%?\t%4, {%1, %2, %3}"
-  [(set_attr "type" "load3")
+  [(set_attr "type" "load_12")
    (set_attr "predicable" "yes")
    (set_attr "predicable_short_it" "no")])
 
@@ -500,7 +500,7 @@
                   (const_int 8))))])]
   "TARGET_THUMB1 && XVECLEN (operands[0], 0) == 3"
   "ldmia\t%4, {%1, %2, %3}"
-  [(set_attr "type" "load3")])
+  [(set_attr "type" "load_12")])
 
 (define_insn "*ldm3_ia_update"
   [(match_parallel 0 "load_multiple_operation"
@@ -516,7 +516,7 @@
                   (const_int 8))))])]
   "TARGET_32BIT && XVECLEN (operands[0], 0) == 4"
   "ldmia%?\t%4!, {%1, %2, %3}"
-  [(set_attr "type" "load3")
+  [(set_attr "type" "load_12")
    (set_attr "predicable" "yes")
    (set_attr "predicable_short_it" "no")])
 
@@ -534,7 +534,7 @@
                   (const_int 8))))])]
   "TARGET_THUMB1 && XVECLEN (operands[0], 0) == 4"
   "ldmia\t%4!, {%1, %2, %3}"
-  [(set_attr "type" "load3")])
+  [(set_attr "type" "load_12")])
 
 (define_insn "*stm3_"
   [(match_parallel 0 "store_multiple_operation"
@@ -546,7 +546,7 @@
           (match_operand:SI 3 "arm_hard_general_register_operand" ""))])]
   "TARGET_32BIT && XVECLEN (operands[0], 0) == 3"
   "stm%?\t%4, {%1, %2, %3}"
-  [(set_attr "type" "store3")
+  [(set_attr "type" "store_12")
    (set_attr "predicable" "yes")
    (set_attr "predicable_short_it" "no")])
 
@@ -562,7 +562,7 @@
           (match_operand:SI 3 "arm_hard_general_register_operand" ""))])]
   "TARGET_32BIT && XVECLEN (operands[0], 0) == 4"
   "stmia%?\t%4!, {%1, %2, %3}"
-  [(set_attr "type" "store3")
+  [(set_attr "type" "store_12")
    (set_attr "predicable" "yes")
    (set_attr "predicable_short_it" "no")])
 
@@ -578,7 +578,7 @@
           (match_operand:SI 3 "low_register_operand" ""))])]
   "TARGET_THUMB1 && XVECLEN (operands[0], 0) == 4"
   "stmia\t%4!, {%1, %2, %3}"
-  [(set_attr "type" "store3")])
+  [(set_attr "type" "store_12")])
 
 (define_insn "*ldm3_ib"
   [(match_parallel 0 "load_multiple_operation"
@@ -593,7 +593,7 @@
                   (const_int 12))))])]
   "TARGET_ARM && XVECLEN (operands[0], 0) == 3"
   "ldmib%?\t%4, {%1, %2, %3}"
-  [(set_attr "type" "load3")
+  [(set_attr "type" "load_12")
    (set_attr "predicable" "yes")])
 
 (define_insn "*ldm3_ib_update"
@@ -611,7 +611,7 @@
                   (const_int 12))))])]
   "TARGET_ARM && XVECLEN (operands[0], 0) == 4"
   "ldmib%?\t%4!, {%1, %2, %3}"
-  [(set_attr "type" "load3")
+  [(set_attr "type" "load_12")
    (set_attr "predicable" "yes")])
 
 (define_insn "*stm3_ib"
@@ -624,7 +624,7 @@
           (match_operand:SI 3 "arm_hard_general_register_operand" ""))])]
   "TARGET_ARM && XVECLEN (operands[0], 0) == 3"
   "stmib%?\t%4, {%1, %2, %3}"
-  [(set_attr "type" "store3")
+  [(set_attr "type" "store_12")
    (set_attr "predicable" "yes")])
 
 (define_insn "*stm3_ib_update"
@@ -639,7 +639,7 @@
           (match_operand:SI 3 "arm_hard_general_register_operand" ""))])]
   "TARGET_ARM && XVECLEN (operands[0], 0) == 4"
   "stmib%?\t%4!, {%1, %2, %3}"
-  [(set_attr "type" "store3")
+  [(set_attr "type" "store_12")
    (set_attr "predicable" "yes")])
 
 (define_insn "*ldm3_da"
@@ -654,7 +654,7 @@
           (mem:SI (match_dup 4)))])]
   "TARGET_ARM && XVECLEN (operands[0], 0) == 3"
   "ldmda%?\t%4, {%1, %2, %3}"
-  [(set_attr "type" "load3")
+  [(set_attr "type" "load_12")
    (set_attr "predicable" "yes")])
 
 (define_insn "*ldm3_da_update"
@@ -671,7 +671,7 @@
           (mem:SI (match_dup 4)))])]
   "TARGET_ARM && XVECLEN (operands[0], 0) == 4"
   "ldmda%?\t%4!, {%1, %2, %3}"
-  [(set_attr "type" "load3")
+  [(set_attr "type" "load_12")
    (set_attr "predicable" "yes")])
 
 (define_insn "*stm3_da"
@@ -684,7 +684,7 @@
           (match_operand:SI 3 "arm_hard_general_register_operand" ""))])]
   "TARGET_ARM && XVECLEN (operands[0], 0) == 3"
   "stmda%?\t%4, {%1, %2, %3}"
-  [(set_attr "type" "store3")
+  [(set_attr "type" "store_12")
    (set_attr "predicable" "yes")])
 
 (define_insn "*stm3_da_update"
@@ -699,7 +699,7 @@
           (match_operand:SI 3 "arm_hard_general_register_operand" ""))])]
   "TARGET_ARM && XVECLEN (operands[0], 0) == 4"
   "stmda%?\t%4!, {%1, %2, %3}"
-  [(set_attr "type" "store3")
+  [(set_attr "type" "store_12")
    (set_attr "predicable" "yes")])
 
 (define_insn "*ldm3_db"
@@ -715,7 +715,7 @@
                   (const_int -4))))])]
   "TARGET_32BIT && XVECLEN (operands[0], 0) == 3"
   "ldmdb%?\t%4, {%1, %2, %3}"
-  [(set_attr "type" "load3")
+  [(set_attr "type" "load_12")
    (set_attr "predicable" "yes")
    (set_attr "predicable_short_it" "no")])
 
@@ -734,7 +734,7 @@
                   (const_int -4))))])]
   "TARGET_32BIT && XVECLEN (operands[0], 0) == 4"
   "ldmdb%?\t%4!, {%1, %2, %3}"
-  [(set_attr "type" "load3")
+  [(set_attr "type" "load_12")
    (set_attr "predicable" "yes")
    (set_attr "predicable_short_it" "no")])
 
@@ -748,7 +748,7 @@
           (match_operand:SI 3 "arm_hard_general_register_operand" ""))])]
   "TARGET_32BIT && XVECLEN (operands[0], 0) == 3"
   "stmdb%?\t%4, {%1, %2, %3}"
-  [(set_attr "type" "store3")
+  [(set_attr "type" "store_12")
    (set_attr "predicable" "yes")
    (set_attr "predicable_short_it" "no")])
 
@@ -764,7 +764,7 @@
           (match_operand:SI 3 "arm_hard_general_register_operand" ""))])]
   "TARGET_32BIT && XVECLEN (operands[0], 0) == 4"
   "stmdb%?\t%4!, {%1, %2, %3}"
-  [(set_attr "type" "store3")
+  [(set_attr "type" "store_12")
    (set_attr "predicable" "yes")
    (set_attr "predicable_short_it" "no")])
 
@@ -870,7 +870,7 @@
                   (const_int 4))))])]
   "TARGET_32BIT && XVECLEN (operands[0], 0) == 2"
   "ldm%?\t%3, {%1, %2}"
-  [(set_attr "type" "load2")
+  [(set_attr "type" "load_8")
    (set_attr "predicable" "yes")
    (set_attr "predicable_short_it" "no")])
 
@@ -883,7 +883,7 @@
                   (const_int 4))))])]
   "TARGET_THUMB1 && XVECLEN (operands[0], 0) == 2"
   "ldmia\t%3, {%1, %2}"
-  [(set_attr "type" "load2")])
+  [(set_attr "type" "load_8")])
 
 (define_insn "*ldm2_ia_update"
   [(match_parallel 0 "load_multiple_operation"
@@ -896,7 +896,7 @@
                   (const_int 4))))])]
   "TARGET_32BIT && XVECLEN (operands[0], 0) == 3"
   "ldmia%?\t%3!, {%1, %2}"
-  [(set_attr "type" "load2")
+  [(set_attr "type" "load_8")
    (set_attr "predicable" "yes")
    (set_attr "predicable_short_it" "no")])
 
@@ -911,7 +911,7 @@
                   (const_int 4))))])]
   "TARGET_THUMB1 && XVECLEN (operands[0], 0) == 3"
   "ldmia\t%3!, {%1, %2}"
-  [(set_attr "type" "load2")])
+  [(set_attr "type" "load_8")])
 
 (define_insn "*stm2_"
   [(match_parallel 0 "store_multiple_operation"
@@ -921,7 +921,7 @@
           (match_operand:SI 2 "arm_hard_general_register_operand" ""))])]
   "TARGET_32BIT && XVECLEN (operands[0], 0) == 2"
   "stm%?\t%3, {%1, %2}"
-  [(set_attr "type" "store2")
+  [(set_attr "type" "store_8")
    (set_attr "predicable" "yes")
    (set_attr "predicable_short_it" "no")])
 
@@ -935,7 +935,7 @@
           (match_operand:SI 2 "arm_hard_general_register_operand" ""))])]
   "TARGET_32BIT && XVECLEN (operands[0], 0) == 3"
   "stmia%?\t%3!, {%1, %2}"
-  [(set_attr "type" "store2")
+  [(set_attr "type" "store_8")
    (set_attr "predicable" "yes")
    (set_attr "predicable_short_it" "no")])
 
@@ -949,7 +949,7 @@
           (match_operand:SI 2 "low_register_operand" ""))])]
   "TARGET_THUMB1 && XVECLEN (operands[0], 0) == 3"
   "stmia\t%3!, {%1, %2}"
-  [(set_attr "type" "store2")])
+  [(set_attr "type" "store_8")])
 
 (define_insn "*ldm2_ib"
   [(match_parallel 0 "load_multiple_operation"
@@ -961,7 +961,7 @@
                   (const_int 8))))])]
   "TARGET_ARM && XVECLEN (operands[0], 0) == 2"
   "ldmib%?\t%3, {%1, %2}"
-  [(set_attr "type" "load2")
+  [(set_attr "type" "load_8")
    (set_attr "predicable" "yes")])
 
 (define_insn "*ldm2_ib_update"
@@ -976,7 +976,7 @@
                   (const_int 8))))])]
   "TARGET_ARM && XVECLEN (operands[0], 0) == 3"
   "ldmib%?\t%3!, {%1, %2}"
-  [(set_attr "type" "load2")
+  [(set_attr "type" "load_8")
    (set_attr "predicable" "yes")])
 
 (define_insn "*stm2_ib"
@@ -987,7 +987,7 @@
           (match_operand:SI 2 "arm_hard_general_register_operand" ""))])]
   "TARGET_ARM && XVECLEN (operands[0], 0) == 2"
   "stmib%?\t%3, {%1, %2}"
-  [(set_attr "type" "store2")
+  [(set_attr "type" "store_8")
    (set_attr "predicable" "yes")])
 
 (define_insn "*stm2_ib_update"
@@ -1000,7 +1000,7 @@
           (match_operand:SI 2 "arm_hard_general_register_operand" ""))])]
   "TARGET_ARM && XVECLEN (operands[0], 0) == 3"
   "stmib%?\t%3!, {%1, %2}"
-  [(set_attr "type" "store2")
+  [(set_attr "type" "store_8")
    (set_attr "predicable" "yes")])
 
 (define_insn "*ldm2_da"
@@ -1012,7 +1012,7 @@
           (mem:SI (match_dup 3)))])]
   "TARGET_ARM && XVECLEN (operands[0], 0) == 2"
   "ldmda%?\t%3, {%1, %2}"
-  [(set_attr "type" "load2")
+  [(set_attr "type" "load_8")
    (set_attr "predicable" "yes")])
 
 (define_insn "*ldm2_da_update"
@@ -1026,7 +1026,7 @@
           (mem:SI (match_dup 3)))])]
   "TARGET_ARM && XVECLEN (operands[0], 0) == 3"
   "ldmda%?\t%3!, {%1, %2}"
-  [(set_attr "type" "load2")
+  [(set_attr "type" "load_8")
    (set_attr "predicable" "yes")])
 
 (define_insn "*stm2_da"
@@ -1037,7 +1037,7 @@
           (match_operand:SI 2 "arm_hard_general_register_operand" ""))])]
   "TARGET_ARM && XVECLEN (operands[0], 0) == 2"
   "stmda%?\t%3, {%1, %2}"
-  [(set_attr "type" "store2")
+  [(set_attr "type" "store_8")
    (set_attr "predicable" "yes")])
 
 (define_insn "*stm2_da_update"
@@ -1050,7 +1050,7 @@
           (match_operand:SI 2 "arm_hard_general_register_operand" ""))])]
   "TARGET_ARM && XVECLEN (operands[0], 0) == 3"
   "stmda%?\t%3!, {%1, %2}"
-  [(set_attr "type" "store2")
+  [(set_attr "type" "store_8")
    (set_attr "predicable" "yes")])
 
 (define_insn "*ldm2_db"
@@ -1063,7 +1063,7 @@
                   (const_int -4))))])]
   "TARGET_32BIT && XVECLEN (operands[0], 0) == 2"
   "ldmdb%?\t%3, {%1, %2}"
-  [(set_attr "type" "load2")
+  [(set_attr "type" "load_8")
    (set_attr "predicable" "yes")
    (set_attr "predicable_short_it" "no")])
 
@@ -1079,7 +1079,7 @@
                   (const_int -4))))])]
   "TARGET_32BIT && XVECLEN (operands[0], 0) == 3"
   "ldmdb%?\t%3!, {%1, %2}"
-  [(set_attr "type" "load2")
+  [(set_attr "type" "load_8")
    (set_attr "predicable" "yes")
    (set_attr "predicable_short_it" "no")])
 
@@ -1091,7 +1091,7 @@
           (match_operand:SI 2 "arm_hard_general_register_operand" ""))])]
   "TARGET_32BIT && XVECLEN (operands[0], 0) == 2"
   "stmdb%?\t%3, {%1, %2}"
-  [(set_attr "type" "store2")
+  [(set_attr "type" "store_8")
    (set_attr "predicable" "yes")
    (set_attr "predicable_short_it" "no")])
 
@@ -1105,7 +1105,7 @@
           (match_operand:SI 2 "arm_hard_general_register_operand" ""))])]
   "TARGET_32BIT && XVECLEN (operands[0], 0) == 3"
   "stmdb%?\t%3!, {%1, %2}"
-  [(set_attr "type" "store2")
+  [(set_attr "type" "store_8")
    (set_attr "predicable" "yes")
    (set_attr "predicable_short_it" "no")])
 
diff --git a/gcc/config/arm/marvell-pj4.md b/gcc/config/arm/marvell-pj4.md
index bed10f7..33e3432 100644
--- a/gcc/config/arm/marvell-pj4.md
+++ b/gcc/config/arm/marvell-pj4.md
@@ -138,31 +138,31 @@
 
 (define_insn_reservation "pj4_ldr"  3
   (and (eq_attr "tune" "marvell_pj4")
-       (eq_attr "type" "load_byte,load1"))
+       (eq_attr "type" "load_byte,load_4"))
                        "pj4_is,pj4_alu1,nothing*2,pj4_cp")
 
 (define_insn_reservation "pj4_ldrd" 3
   (and (eq_attr "tune" "marvell_pj4")
-       (eq_attr "type" "load2"))
+       (eq_attr "type" "load_8"))
                        "pj4_is,pj4_alu1,nothing*2,pj4_cpb")
 
 (define_insn_reservation "pj4_str"  1
   (and (eq_attr "tune" "marvell_pj4")
-       (eq_attr "type" "store1"))
+       (eq_attr "type" "store_4"))
                        "pj4_is,pj4_alu1,nothing*2,pj4_cp")
 
 (define_insn_reservation "pj4_strd" 1
   (and (eq_attr "tune" "marvell_pj4")
-       (eq_attr "type" "store2"))
+       (eq_attr "type" "store_8"))
                        "pj4_is,pj4_alu1,nothing*2,pj4_cpb")
 
 (define_insn_reservation "pj4_ldm" 4
   (and (eq_attr "tune" "marvell_pj4")
-       (eq_attr "type" "load3,load4")) "pj4_isb,pj4_isb+pj4_alu1,pj4_alu1,nothing,pj4_cp,pj4_cp")
+       (eq_attr "type" "load_12,load_16")) "pj4_isb,pj4_isb+pj4_alu1,pj4_alu1,nothing,pj4_cp,pj4_cp")
 
 (define_insn_reservation "pj4_stm" 2
   (and (eq_attr "tune" "marvell_pj4")
-       (eq_attr "type" "store3,store4")) "pj4_isb,pj4_isb+pj4_alu1,pj4_alu1,nothing,pj4_cp,pj4_cp")
+       (eq_attr "type" "store_12,store_16")) "pj4_isb,pj4_isb+pj4_alu1,pj4_alu1,nothing,pj4_cp,pj4_cp")
 
 ;; Loads forward at WR-stage to ALU pipes
 (define_bypass 2 "pj4_ldr,pj4_ldrd" "pj4_alu")
diff --git a/gcc/config/arm/thumb1.md b/gcc/config/arm/thumb1.md
index f162fc7..5d196a6 100644
--- a/gcc/config/arm/thumb1.md
+++ b/gcc/config/arm/thumb1.md
@@ -650,7 +650,7 @@
     }
   }"
   [(set_attr "length" "4,4,6,6,2,2,6,4,4")
-   (set_attr "type" "multiple,multiple,multiple,multiple,load2,store2,load2,store2,multiple")
+   (set_attr "type" "multiple,multiple,multiple,multiple,load_8,store_8,load_8,store_8,multiple")
    (set_attr "arch" "t1,t1,t1,v8mb,t1,t1,t1,t1,t1")
    (set_attr "pool_range" "*,*,*,*,*,*,1018,*,*")]
 )
@@ -673,7 +673,7 @@
    str\\t%1, %0
    mov\\t%0, %1"
   [(set_attr "length" "2,2,4,4,4,2,2,2,2,2")
-   (set_attr "type" "mov_reg,mov_imm,mov_imm,multiple,multiple,load1,store1,load1,store1,mov_reg")
+   (set_attr "type" "mov_reg,mov_imm,mov_imm,multiple,multiple,load_4,store_4,load_4,store_4,mov_reg")
    (set_attr "pool_range" "*,*,*,*,*,*,*,1018,*,*")
    (set_attr "arch" "t1,t1,v8mb,t1,t1,t1,t1,t1,t1,t1")
    (set_attr "conds" "set,clob,nocond,*,*,nocond,nocond,nocond,nocond,nocond")])
@@ -789,7 +789,7 @@
       return \"ldrh	%0, %1\";
     }"
   [(set_attr "length" "2,4,2,2,2,2,4")
-   (set_attr "type" "alus_imm,load1,store1,mov_reg,mov_reg,mov_imm,mov_imm")
+   (set_attr "type" "alus_imm,load_4,store_4,mov_reg,mov_reg,mov_imm,mov_imm")
    (set_attr "arch" "t1,t1,t1,t1,t1,t1,v8mb")
    (set_attr "conds" "clob,nocond,nocond,nocond,nocond,clob,nocond")])
 
@@ -824,7 +824,7 @@
    mov\\t%0, %1
    movs\\t%0, %1"
   [(set_attr "length" "2")
-   (set_attr "type" "alu_imm,load1,store1,mov_reg,mov_imm,mov_imm")
+   (set_attr "type" "alu_imm,load_4,store_4,mov_reg,mov_imm,mov_imm")
    (set_attr "pool_range" "*,32,*,*,*,*")
    (set_attr "conds" "clob,nocond,nocond,nocond,nocond,clob")])
 
@@ -860,7 +860,7 @@
     }
   "
   [(set_attr "length" "2")
-   (set_attr "type" "mov_reg,load1,store1,mov_reg,mov_reg")
+   (set_attr "type" "mov_reg,load_4,store_4,mov_reg,mov_reg")
    (set_attr "pool_range" "*,1018,*,*,*")
    (set_attr "conds" "clob,nocond,nocond,nocond,nocond")])
 ;;; ??? This should have alternatives for constants.
@@ -879,7 +879,7 @@
    mov\\t%0, %1
    mov\\t%0, %1"
   [(set_attr "length" "2")
-   (set_attr "type" "alus_imm,load1,store1,load1,store1,mov_reg,mov_reg")
+   (set_attr "type" "alus_imm,load_4,store_4,load_4,store_4,mov_reg,mov_reg")
    (set_attr "pool_range" "*,*,*,1018,*,*,*")
    (set_attr "conds" "clob,nocond,nocond,nocond,nocond,nocond,nocond")]
 )
@@ -921,7 +921,7 @@
     }
   "
   [(set_attr "length" "4,2,2,6,4,4")
-   (set_attr "type" "multiple,load2,store2,load2,store2,multiple")
+   (set_attr "type" "multiple,load_8,store_8,load_8,store_8,multiple")
    (set_attr "pool_range" "*,*,*,1018,*,*")]
 )
 \f
@@ -947,7 +947,7 @@
   [(set_attr "length" "4")
    ; This isn't entirely accurate...  It loads as well, but in terms of
    ; scheduling the following insn it is better to consider it as a store
-   (set_attr "type" "store3")]
+   (set_attr "type" "store_12")]
 )
 
 (define_insn "movmem8b"
@@ -966,7 +966,7 @@
   [(set_attr "length" "4")
    ; This isn't entirely accurate...  It loads as well, but in terms of
    ; scheduling the following insn it is better to consider it as a store
-   (set_attr "type" "store2")]
+   (set_attr "type" "store_8")]
 )
 
 \f
diff --git a/gcc/config/arm/thumb2.md b/gcc/config/arm/thumb2.md
index 2e7580f..b78c3d2 100644
--- a/gcc/config/arm/thumb2.md
+++ b/gcc/config/arm/thumb2.md
@@ -239,7 +239,7 @@
         (mem:SI (post_inc:SI (reg:SI SP_REGNUM))))]
   "TARGET_THUMB2 && (reload_in_progress || reload_completed)"
   "pop\t{%0}"
-  [(set_attr "type" "load1")
+  [(set_attr "type" "load_4")
    (set_attr "length" "2")
    (set_attr "predicable" "yes")]
 )
@@ -265,7 +265,7 @@
    ldr%?\\t%0, %1
    str%?\\t%1, %0
    str%?\\t%1, %0"
-  [(set_attr "type" "mov_reg,mov_imm,mov_imm,mvn_imm,mov_imm,load1,load1,store1,store1")
+  [(set_attr "type" "mov_reg,mov_imm,mov_imm,mvn_imm,mov_imm,load_4,load_4,store_4,store_4")
    (set_attr "length" "2,4,2,4,4,4,4,4,4")
    (set_attr "predicable" "yes")
    (set_attr "predicable_short_it" "yes,no,yes,no,no,no,no,no,no")
@@ -305,7 +305,7 @@
    movw%?\\t%0, %L1\\t%@ movhi
    strh%?\\t%1, %0\\t%@ movhi
    ldrh%?\\t%0, %1\\t%@ movhi"
-  [(set_attr "type" "mov_reg,mov_imm,mov_imm,mov_imm,store1,load1")
+  [(set_attr "type" "mov_reg,mov_imm,mov_imm,mov_imm,store_4,load_4")
    (set_attr "predicable" "yes")
    (set_attr "predicable_short_it" "yes,no,yes,no,no,no")
    (set_attr "length" "2,4,2,4,4,4")
@@ -325,7 +325,7 @@
   "TARGET_THUMB2
    && INTVAL (operands[5]) == INTVAL (operands[2]) + 4"
   "strd\\t%3, %4, [%0, %2]!"
-  [(set_attr "type" "store2")]
+  [(set_attr "type" "store_8")]
 )
 
 (define_insn "*thumb2_cmpsi_neg_shiftsi"
diff --git a/gcc/config/arm/types.md b/gcc/config/arm/types.md
index b0b375c..c9f0835 100644
--- a/gcc/config/arm/types.md
+++ b/gcc/config/arm/types.md
@@ -84,11 +84,11 @@
 ; fmul[d,s]          double/single floating point multiply.
 ; fsqrt[d,s]         double/single precision floating point square root.
 ; load_acq           load-acquire.
-; load_byte          load byte(s) from memory to arm registers.
-; load1              load 1 word from memory to arm registers.
-; load2              load 2 words from memory to arm registers.
-; load3              load 3 words from memory to arm registers.
-; load4              load 4 words from memory to arm registers.
+; load_byte          load 1 byte from memory.
+; load_4             load 4 bytes from memory.
+; load_8             load 8 bytes from memory.
+; load_12            load 12 bytes from memory.
+; load_16            load 16 bytes from memory.
 ; logic_imm          any logical instruction that doesn't have a shifted
 ;                    operand and has an immediate operand.
 ; logic_reg          any logical instruction that doesn't have a shifted
@@ -152,10 +152,10 @@
 ; smusd              signed dual multiply subtract.
 ; smusdx             signed dual multiply subtract reverse.
 ; store_rel          store-release.
-; store1             store 1 word to memory from arm registers.
-; store2             store 2 words to memory from arm registers.
-; store3             store 3 words to memory from arm registers.
-; store4             store 4 (or more) words to memory from arm registers.
+; store_4            store 4 bytes to memory.
+; store_8            store 8 bytes to memory.
+; store_12           store 12 bytes to memory.
+; store_16           store 16 bytes (or more) to memory.
 ; trap               cause a trap in the kernel.
 ; udiv               unsigned division.
 ; umaal              unsigned multiply accumulate accumulate long.
@@ -612,10 +612,10 @@
   fsqrtd,\
   load_acq,\
   load_byte,\
-  load1,\
-  load2,\
-  load3,\
-  load4,\
+  load_4,\
+  load_8,\
+  load_12,\
+  load_16,\
   logic_imm,\
   logic_reg,\
   logic_shift_imm,\
@@ -669,10 +669,10 @@
   smusd,\
   smusdx,\
   store_rel,\
-  store1,\
-  store2,\
-  store3,\
-  store4,\
+  store_4,\
+  store_8,\
+  store_12,\
+  store_16,\
   trap,\
   udiv,\
   umaal,\
diff --git a/gcc/config/arm/vfp.md b/gcc/config/arm/vfp.md
index d8f77e2..cb352d2 100644
--- a/gcc/config/arm/vfp.md
+++ b/gcc/config/arm/vfp.md
@@ -60,8 +60,8 @@
      (const_string "mov_reg"))
     (const_string "mvn_imm")
     (const_string "mov_imm")
-    (const_string "store1")
-    (const_string "load1")
+    (const_string "store_4")
+    (const_string "load_4")
     (const_string "f_mcr")
     (const_string "f_mrc")
     (const_string "fmov")])
@@ -107,7 +107,7 @@
   (set_attr "predicable_short_it"
    "yes, no, yes, no, no, no, no, no, no")
   (set_attr "type"
-   "mov_reg, mov_imm, mov_imm, mov_imm, store1, load1,\
+   "mov_reg, mov_imm, mov_imm, mov_imm, store_4, load_4,\
     f_mcr, f_mrc, fmov")
   (set_attr "arch" "*, *, *, v6t2, *, *, *, *, *")
   (set_attr "pool_range" "*, *, *, *, *, 4094, *, *, *")
@@ -156,8 +156,8 @@
      (const_string "mov_reg"))
     (const_string "mvn_imm")
     (const_string "mov_imm")
-    (const_string "store1")
-    (const_string "load1")
+    (const_string "store_4")
+    (const_string "load_4")
     (const_string "f_mcr")
     (const_string "f_mrc")
     (const_string "fmov")])
@@ -203,7 +203,7 @@
   (set_attr "predicable_short_it"
    "yes, no, yes, no, no, no, no, no, no")
   (set_attr "type"
-   "mov_reg, mov_imm, mov_imm, mov_imm, store1, load1,\
+   "mov_reg, mov_imm, mov_imm, mov_imm, store_4, load_4,\
     f_mcr, f_mrc, fmov")
   (set_attr "arch" "*, *, *, v6t2, *, *, *, *, *")
   (set_attr "pool_range" "*, *, *, *, *, 4094, *, *, *")
@@ -246,7 +246,7 @@
     }
   "
   [(set_attr "predicable" "yes")
-   (set_attr "type" "mov_reg,mov_reg,mvn_imm,mov_imm,load1,store1,
+   (set_attr "type" "mov_reg,mov_reg,mvn_imm,mov_imm,load_4,store_4,
 		     f_mcr,f_mrc,fmov,f_loads,f_stores")
    (set_attr "pool_range"     "*,*,*,*,4096,*,*,*,*,1020,*")
    (set_attr "neg_pool_range" "*,*,*,*,4084,*,*,*,*,1008,*")]
@@ -294,7 +294,7 @@
   "
   [(set_attr "predicable" "yes")
    (set_attr "predicable_short_it" "yes,no,yes,no,no,no,no,no,no,no,no,no,no,no")
-   (set_attr "type" "mov_reg,mov_reg,mov_reg,mvn_reg,mov_imm,load1,load1,store1,store1,f_mcr,f_mrc,fmov,f_loads,f_stores")
+   (set_attr "type" "mov_reg,mov_reg,mov_reg,mvn_reg,mov_imm,load_4,load_4,store_4,store_4,f_mcr,f_mrc,fmov,f_loads,f_stores")
    (set_attr "length" "2,4,2,4,4,4,4,4,4,4,4,4,4,4")
    (set_attr "pool_range"     "*,*,*,*,*,1018,4094,*,*,*,*,*,1018,*")
    (set_attr "neg_pool_range" "*,*,*,*,*,   0,   0,*,*,*,*,*,1008,*")]
@@ -338,7 +338,7 @@
       gcc_unreachable ();
     }
   "
-  [(set_attr "type" "multiple,multiple,multiple,multiple,load2,load2,store2,f_mcrr,f_mrrc,ffarithd,f_loadd,f_stored")
+  [(set_attr "type" "multiple,multiple,multiple,multiple,load_8,load_8,store_8,f_mcrr,f_mrrc,ffarithd,f_loadd,f_stored")
    (set (attr "length") (cond [(eq_attr "alternative" "1,4,5,6") (const_int 8)
                               (eq_attr "alternative" "2") (const_int 12)
                               (eq_attr "alternative" "3") (const_int 16)
@@ -386,7 +386,7 @@
       gcc_unreachable ();
     }
   "
-  [(set_attr "type" "multiple,multiple,multiple,multiple,load2,load2,store2,f_mcrr,f_mrrc,ffarithd,f_loadd,f_stored")
+  [(set_attr "type" "multiple,multiple,multiple,multiple,load_8,load_8,store_8,f_mcrr,f_mrrc,ffarithd,f_loadd,f_stored")
    (set (attr "length") (cond [(eq_attr "alternative" "1") (const_int 8)
                                (eq_attr "alternative" "2") (const_int 12)
                                (eq_attr "alternative" "3") (const_int 16)
@@ -461,7 +461,7 @@
 				    no, no, no, no,\
 				    no, no")
    (set_attr_alternative "type"
-    [(const_string "load1") (const_string "store1")
+    [(const_string "load_4") (const_string "store_4")
      (const_string "fmov") (const_string "mov_reg")
      (const_string "f_mcr") (const_string "f_mrc")
      (const_string "fconsts") (const_string "neon_load1_1reg")
@@ -531,7 +531,7 @@
   "
   [(set_attr "conds" "unconditional")
    (set_attr "type" "neon_load1_1reg,neon_store1_1reg,\
-                     load1,store1,fmov,mov_reg,f_mcr,f_mrc,multiple")
+                     load_4,store_4,fmov,mov_reg,f_mcr,f_mrc,multiple")
    (set_attr "length" "4,4,4,4,4,4,4,4,8")]
 )
 
@@ -583,7 +583,7 @@
     }
   "
   [(set_attr "conds" "unconditional")
-   (set_attr "type" "load1,store1,fmov,mov_reg,f_mcr,f_mrc,multiple")
+   (set_attr "type" "load_4,store_4,fmov,mov_reg,f_mcr,f_mrc,multiple")
    (set_attr "length" "4,4,4,4,4,4,8")]
 )
 
@@ -623,7 +623,7 @@
   "
   [(set_attr "predicable" "yes")
    (set_attr "type"
-     "f_mcr,f_mrc,fconsts,f_loads,f_stores,load1,store1,fmov,mov_reg")
+     "f_mcr,f_mrc,fconsts,f_loads,f_stores,load_4,store_4,fmov,mov_reg")
    (set_attr "pool_range" "*,*,*,1020,*,4096,*,*,*")
    (set_attr "neg_pool_range" "*,*,*,1008,*,4080,*,*,*")]
 )
@@ -660,7 +660,7 @@
   [(set_attr "predicable" "yes")
    (set_attr "predicable_short_it" "no")
    (set_attr "type"
-     "f_mcr,f_mrc,fconsts,f_loads,f_stores,load1,store1,fmov,mov_reg")
+     "f_mcr,f_mrc,fconsts,f_loads,f_stores,load_4,store_4,fmov,mov_reg")
    (set_attr "pool_range" "*,*,*,1018,*,4090,*,*,*")
    (set_attr "neg_pool_range" "*,*,*,1008,*,0,*,*,*")]
 )
@@ -704,7 +704,7 @@
     }
   "
   [(set_attr "type" "f_mcrr,f_mrrc,fconstd,neon_move,f_loadd,f_stored,\
-                     load2,store2,ffarithd,multiple")
+                     load_8,store_8,ffarithd,multiple")
    (set (attr "length") (cond [(eq_attr "alternative" "6,7,9") (const_int 8)
 			       (eq_attr "alternative" "8")
 				(if_then_else
@@ -753,7 +753,7 @@
     }
   "
   [(set_attr "type" "f_mcrr,f_mrrc,fconstd,neon_move,f_loadd,\
-                     f_stored,load2,store2,ffarithd,multiple")
+                     f_stored,load_8,store_8,ffarithd,multiple")
    (set (attr "length") (cond [(eq_attr "alternative" "6,7,9") (const_int 8)
 			       (eq_attr "alternative" "8")
 				(if_then_else
diff --git a/gcc/config/arm/xgene1.md b/gcc/config/arm/xgene1.md
index 62a0732..7e70408 100644
--- a/gcc/config/arm/xgene1.md
+++ b/gcc/config/arm/xgene1.md
@@ -92,28 +92,28 @@
 
 (define_insn_reservation "xgene1_load_pair" 6
   (and (eq_attr "tune" "xgene1")
-       (eq_attr "type" "load2"))
+       (eq_attr "type" "load_8"))
   "xgene1_decodeIsolated")
 
 (define_insn_reservation "xgene1_store_pair" 2
   (and (eq_attr "tune" "xgene1")
-       (eq_attr "type" "store2"))
+       (eq_attr "type" "store_8"))
   "xgene1_decodeIsolated")
 
 (define_insn_reservation "xgene1_fp_load1" 10
   (and (eq_attr "tune" "xgene1")
-       (eq_attr "type" "load1")
+       (eq_attr "type" "load_4")
        (eq_attr "fp" "yes"))
   "xgene1_decode1op")
 
 (define_insn_reservation "xgene1_load1" 5
   (and (eq_attr "tune" "xgene1")
-       (eq_attr "type" "load1"))
+       (eq_attr "type" "load_4"))
   "xgene1_decode1op")
 
 (define_insn_reservation "xgene1_store1" 2
   (and (eq_attr "tune" "xgene1")
-       (eq_attr "type" "store1"))
+       (eq_attr "type" "store_4"))
   "xgene1_decode2op")
 
 (define_insn_reservation "xgene1_move" 1

^ permalink raw reply	[flat|nested] 10+ messages in thread

* [Patch AArch64 2/2] Fix memory sizes to load/store patterns
  2017-06-12 13:54 [Mechanical Patch ARM/AArch64 1/2] Rename load/store scheduling types to encode data size James Greenhalgh
@ 2017-06-12 13:54 ` James Greenhalgh
  2017-06-21 10:50   ` James Greenhalgh
  2017-06-12 14:28 ` [Mechanical Patch ARM/AArch64 1/2] Rename load/store scheduling types to encode data size Kyrill Tkachov
  1 sibling, 1 reply; 10+ messages in thread
From: James Greenhalgh @ 2017-06-12 13:54 UTC (permalink / raw)
  To: gcc-patches; +Cc: nd, richard.earnshaw, marcus.shawcroft

[-- Attachment #1: Type: text/plain, Size: 1582 bytes --]


Hi,

There seems to be a partial misconception in the AArch64 backend that
load1/load2 referred to the number of registers to load, rather than the
number of words to load. This patch fixes that using the new "number of
byte" types added in the previous patch.

That means using the load_16 and store_16 types that were defined in the
previous patch for the first time in the AArch64 backend. To ensure
continuity for scheduling models, I've just split this out from load_8.
Please update your models if this is very wrong!

Bootstrapped on aarch64-none-linux-gnu with no issue.

OK?

Thanks,
James

---
2017-06-12  James Greenhalgh  <james.greenhalgh@arm.com>

	* config/aarch64/aarch64.md (movdi_aarch64): Set load/store
	types correctly.
	(movti_aarch64): Likewise.
	(movdf_aarch64): Likewise.
	(movtf_aarch64): Likewise.
	(load_pairdi): Likewise.
	(store_pairdi): Likewise.
	(load_pairdf): Likewise.
	(store_pairdf): Likewise.
	(loadwb_pair<GPI:mode>_<P:mode>): Likewise.
	(storewb_pair<GPI:mode>_<P:mode>): Likewise.
	(ldr_got_small_<mode>): Likewise.
	(ldr_got_small_28k_<mode>): Likewise.
	(ldr_got_tiny): Likewise.
	* config/aarch64/iterators.md (ldst_sz): New.
	(ldpstp_sz): Likewise.
	* config/aarch64/thunderx.md (thunderx_storepair): Split store_8
	to store_16.
	(thunderx_load): Split load_8 to load_16.
	* config/aarch64/thunderx2t99.md (thunderx2t99_loadpair): Split
	load_8 to load_16.
	(thunderx2t99_storepair_basic): Split store_8 to store_16.
	* config/arm/xgene1.md (xgene1_load_pair): Split load_8 to load_16.
	(xgene1_store_pair): Split store_8 to store_16.


[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #2: 0002-Patch-AArch64-2-2-Fix-memory-sizes-to-load-store-pat.patch --]
[-- Type: text/x-patch; name="0002-Patch-AArch64-2-2-Fix-memory-sizes-to-load-store-pat.patch", Size: 6545 bytes --]

diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index 11295a6..a1385e3 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -981,7 +981,7 @@
        DONE;
     }"
   [(set_attr "type" "mov_reg,mov_reg,mov_reg,mov_imm,mov_imm,\
-                     load_4,load_4,store_4,store_4,\
+                     load_8,load_8,store_8,store_8,\
                      adr,adr,f_mcr,f_mrc,fmov,neon_move")
    (set_attr "fp" "*,*,*,*,*,*,yes,*,yes,*,*,yes,yes,yes,*")
    (set_attr "simd" "*,*,*,*,*,*,*,*,*,*,*,*,*,*,yes")]
@@ -1026,7 +1026,8 @@
    ldr\\t%q0, %1
    str\\t%q1, %0"
   [(set_attr "type" "multiple,f_mcr,f_mrc,neon_logic_q, \
-		             load_8,store_8,store_8,f_loadd,f_stored")
+		             load_16,store_16,store_16,\
+                             load_16,store_16")
    (set_attr "length" "8,8,8,4,4,4,4,4,4")
    (set_attr "simd" "*,*,*,yes,*,*,*,*,*")
    (set_attr "fp" "*,*,*,*,*,*,*,yes,yes")]
@@ -1121,7 +1122,7 @@
    str\\t%x1, %0
    mov\\t%x0, %x1"
   [(set_attr "type" "neon_move,f_mcr,f_mrc,fmov,fconstd,\
-                     f_loadd,f_stored,load_4,store_4,mov_reg")
+                     f_loadd,f_stored,load_8,store_8,mov_reg")
    (set_attr "simd" "yes,*,*,*,*,*,*,*,*,*")]
 )
 
@@ -1145,7 +1146,7 @@
    stp\\t%1, %H1, %0
    stp\\txzr, xzr, %0"
   [(set_attr "type" "logic_reg,multiple,f_mcr,f_mrc,neon_move_q,f_mcr,\
-                     f_loadd,f_stored,load_8,store_8,store_8")
+                     f_loadd,f_stored,load_16,store_16,store_16")
    (set_attr "length" "4,8,8,8,4,4,4,4,4,4,4")
    (set_attr "simd" "yes,*,*,*,yes,*,*,*,*,*,*")]
 )
@@ -1209,7 +1210,7 @@
   "@
    ldp\\t%x0, %x2, %1
    ldp\\t%d0, %d2, %1"
-  [(set_attr "type" "load_8,neon_load1_2reg")
+  [(set_attr "type" "load_16,neon_load1_2reg")
    (set_attr "fp" "*,yes")]
 )
 
@@ -1244,7 +1245,7 @@
   "@
    stp\\t%x1, %x3, %0
    stp\\t%d1, %d3, %0"
-  [(set_attr "type" "store_8,neon_store1_2reg")
+  [(set_attr "type" "store_16,neon_store1_2reg")
    (set_attr "fp" "*,yes")]
 )
 
@@ -1278,7 +1279,7 @@
   "@
    ldp\\t%d0, %d2, %1
    ldp\\t%x0, %x2, %1"
-  [(set_attr "type" "neon_load1_2reg,load_8")
+  [(set_attr "type" "neon_load1_2reg,load_16")
    (set_attr "fp" "yes,*")]
 )
 
@@ -1312,7 +1313,7 @@
   "@
    stp\\t%d1, %d3, %0
    stp\\t%x1, %x3, %0"
-  [(set_attr "type" "neon_store1_2reg,store_8")
+  [(set_attr "type" "neon_store1_2reg,store_16")
    (set_attr "fp" "yes,*")]
 )
 
@@ -1330,7 +1331,7 @@
                    (match_operand:P 5 "const_int_operand" "n"))))])]
   "INTVAL (operands[5]) == GET_MODE_SIZE (<GPI:MODE>mode)"
   "ldp\\t%<w>2, %<w>3, [%1], %4"
-  [(set_attr "type" "load_8")]
+  [(set_attr "type" "load_<ldpstp_sz>")]
 )
 
 (define_insn "loadwb_pair<GPF:mode>_<P:mode>"
@@ -1363,7 +1364,7 @@
           (match_operand:GPI 3 "register_operand" "r"))])]
   "INTVAL (operands[5]) == INTVAL (operands[4]) + GET_MODE_SIZE (<GPI:MODE>mode)"
   "stp\\t%<w>2, %<w>3, [%0, %4]!"
-  [(set_attr "type" "store_8")]
+  [(set_attr "type" "store_<ldpstp_sz>")]
 )
 
 (define_insn "storewb_pair<GPF:mode>_<P:mode>"
@@ -5139,7 +5140,7 @@
 		    UNSPEC_GOTSMALLPIC))]
   ""
   "ldr\\t%<w>0, [%1, #:got_lo12:%a2]"
-  [(set_attr "type" "load_4")]
+  [(set_attr "type" "load_<ldst_sz>")]
 )
 
 (define_insn "ldr_got_small_sidi"
@@ -5162,7 +5163,7 @@
 		    UNSPEC_GOTSMALLPIC28K))]
   ""
   "ldr\\t%<w>0, [%1, #:<got_modifier>:%a2]"
-  [(set_attr "type" "load_4")]
+  [(set_attr "type" "load_<ldst_sz>")]
 )
 
 (define_insn "ldr_got_small_28k_sidi"
@@ -5183,7 +5184,7 @@
 		   UNSPEC_GOTTINYPIC))]
   ""
   "ldr\\t%0, %L1"
-  [(set_attr "type" "load_4")]
+  [(set_attr "type" "load_8")]
 )
 
 (define_insn "aarch64_load_tp_hard"
diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md
index 43be7fd..a65c3aa 100644
--- a/gcc/config/aarch64/iterators.md
+++ b/gcc/config/aarch64/iterators.md
@@ -384,6 +384,11 @@
 ;; 32-bit version and "%x0" in the 64-bit version.
 (define_mode_attr w [(QI "w") (HI "w") (SI "w") (DI "x") (SF "s") (DF "d")])
 
+;; The size of access, in bytes.
+(define_mode_attr ldst_sz [(SI "4") (DI "8")])
+;; Likewise for load/store pair.
+(define_mode_attr ldpstp_sz [(SI "8") (DI "16")])
+
 ;; For inequal width int to float conversion
 (define_mode_attr w1 [(HF "w") (SF "w") (DF "x")])
 (define_mode_attr w2 [(HF "x") (SF "x") (DF "w")])
diff --git a/gcc/config/aarch64/thunderx.md b/gcc/config/aarch64/thunderx.md
index c18da2f..84ac6cd 100644
--- a/gcc/config/aarch64/thunderx.md
+++ b/gcc/config/aarch64/thunderx.md
@@ -100,7 +100,7 @@
 ;; Store pair are single issued
 (define_insn_reservation "thunderx_storepair" 1
   (and (eq_attr "tune" "thunderx")
-       (eq_attr "type" "store_8"))
+       (eq_attr "type" "store_8,store_16"))
   "thunderx_pipe0 + thunderx_pipe1")
 
 ;; Prefetch are single issued
@@ -112,7 +112,7 @@
 ;; loads (and load pairs) from L1 take 3 cycles in pipe 0
 (define_insn_reservation "thunderx_load" 3
   (and (eq_attr "tune" "thunderx")
-       (eq_attr "type" "load_4, load_8"))
+       (eq_attr "type" "load_4, load_8, load_16"))
   "thunderx_pipe0")
 
 (define_insn_reservation "thunderx_brj" 1
diff --git a/gcc/config/aarch64/thunderx2t99.md b/gcc/config/aarch64/thunderx2t99.md
index 632396f..4e39610 100644
--- a/gcc/config/aarch64/thunderx2t99.md
+++ b/gcc/config/aarch64/thunderx2t99.md
@@ -128,7 +128,7 @@
 
 (define_insn_reservation "thunderx2t99_loadpair" 5
   (and (eq_attr "tune" "thunderx2t99")
-       (eq_attr "type" "load_8"))
+       (eq_attr "type" "load_8,load_16"))
   "thunderx2t99_i012,thunderx2t99_ls01")
 
 (define_insn_reservation "thunderx2t99_store_basic" 1
@@ -138,7 +138,7 @@
 
 (define_insn_reservation "thunderx2t99_storepair_basic" 1
   (and (eq_attr "tune" "thunderx2t99")
-       (eq_attr "type" "store_8"))
+       (eq_attr "type" "store_8,store_16"))
   "thunderx2t99_ls01,thunderx2t99_sd")
 
 ;; FP data processing instructions.
diff --git a/gcc/config/arm/xgene1.md b/gcc/config/arm/xgene1.md
index 7e70408..0b457ee 100644
--- a/gcc/config/arm/xgene1.md
+++ b/gcc/config/arm/xgene1.md
@@ -92,12 +92,12 @@
 
 (define_insn_reservation "xgene1_load_pair" 6
   (and (eq_attr "tune" "xgene1")
-       (eq_attr "type" "load_8"))
+       (eq_attr "type" "load_8, load_16"))
   "xgene1_decodeIsolated")
 
 (define_insn_reservation "xgene1_store_pair" 2
   (and (eq_attr "tune" "xgene1")
-       (eq_attr "type" "store_8"))
+       (eq_attr "type" "store_8, store_16"))
   "xgene1_decodeIsolated")
 
 (define_insn_reservation "xgene1_fp_load1" 10

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [Mechanical Patch ARM/AArch64 1/2] Rename load/store scheduling types to encode data size
  2017-06-12 13:54 [Mechanical Patch ARM/AArch64 1/2] Rename load/store scheduling types to encode data size James Greenhalgh
  2017-06-12 13:54 ` [Patch AArch64 2/2] Fix memory sizes to load/store patterns James Greenhalgh
@ 2017-06-12 14:28 ` Kyrill Tkachov
  2017-06-21 10:50   ` James Greenhalgh
  1 sibling, 1 reply; 10+ messages in thread
From: Kyrill Tkachov @ 2017-06-12 14:28 UTC (permalink / raw)
  To: James Greenhalgh, gcc-patches; +Cc: nd, richard.earnshaw, marcus.shawcroft


On 12/06/17 14:53, James Greenhalgh wrote:
> Hi,
>
> In the AArch64 backend and scheduling models there is some confusion as to
> what the load1/load2 etc. scheduling types refer to. This leads to us using
> load1/load2 in two contexts - for a variety of 32-bit, 64-bit and 128-bit
> loads in AArch32 and 128-bit loads in AArch64. That leads to an undesirable
> confusion in scheduling.
>
> Fixing it is easy, but mechanical and boring. Essentially,
>
>    s/load1/load_4/
>    s/load2/load_8/
>    s/load3/load_12/
>    s/load4/load_16/
>    s/store1/store_4/
>    s/store2/store_8/
>    s/store3/store_12/
>    s/store4/store_16/

So the number now is the number of bytes being loaded?

> Across all sorts of pipeline models, and the two backends.
>
> I have intentionally not modified any of the patterns which now look obviously
> incorrect. I'll be doing a second pass over the AArch64 back-end in patch
> 2/2 which will fix these bugs. The AArch32 back-end looked to me to get this
> correct.
>
> Bootstrapped on AArch64 and ARM without issue - there's no functional
> change here.
>
> OK?

Ok from an arm perspective.

Kyrill

>
> Thanks,
> James
>
> ---
> gcc/
>
> 2017-06-12  James Greenhalgh  <james.greenhalgh@arm.com>
>
> 	* config/arm/types.md (type): Rename load1/2/3/4 to load_4/8/12/16
> 	and store1/2/3/4 to store_4/8/12/16.
> 	* config/aarch64/aarch64.md: Update for rename.
> 	* config/arm/arm.md: Likewise.: Likewise.
> 	* config/arm/arm.c: Likewise.
> 	* config/arm/thumb1.md: Likewise.
> 	* config/arm/thumb2.md: Likewise.
> 	* config/arm/vfp.md: Likewise.
> 	* config/arm/arm-generic.md: Likewise.
> 	* config/arm/arm1020e.md: Likewise.
> 	* config/arm/arm1026ejs.md: Likewise.
> 	* config/arm/arm1136jfs.md: Likewise.
> 	* config/arm/arm926ejs.md: Likewise.
> 	* config/arm/cortex-a15.md: Likewise.
> 	* config/arm/cortex-a17.md: Likewise.
> 	* config/arm/cortex-a5.md: Likewise.
> 	* config/arm/cortex-a53.md: Likewise.
> 	* config/arm/cortex-a57.md: Likewise.
> 	* config/arm/cortex-a7.md: Likewise.
> 	* config/arm/cortex-a8.md: Likewise.
> 	* config/arm/cortex-a9.md: Likewise.
> 	* config/arm/cortex-m4.md: Likewise.
> 	* config/arm/cortex-m7.md: Likewise.
> 	* config/arm/cortex-r4.md: Likewise.
> 	* config/arm/exynos-m1.md: Likewise.
> 	* config/arm/fa526.md: Likewise.
> 	* config/arm/fa606te.md: Likewise.
> 	* config/arm/fa626te.md: Likewise.
> 	* config/arm/fa726te.md: Likewise.
> 	* config/arm/fmp626.md: Likewise.
> 	* config/arm/iwmmxt.md: Likewise.
> 	* config/arm/ldmstm.md: Likewise.
> 	* config/arm/marvell-pj4.md: Likewise.
> 	* config/arm/xgene1.md: Likewise.
> 	* config/aarch64/thunderx.md: Likewise.
> 	* config/aarch64/thunderx2t99.md: Likewise.
>

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [Mechanical Patch ARM/AArch64 1/2] Rename load/store scheduling types to encode data size
  2017-06-12 14:28 ` [Mechanical Patch ARM/AArch64 1/2] Rename load/store scheduling types to encode data size Kyrill Tkachov
@ 2017-06-21 10:50   ` James Greenhalgh
  2017-07-27 18:09     ` James Greenhalgh
  0 siblings, 1 reply; 10+ messages in thread
From: James Greenhalgh @ 2017-06-21 10:50 UTC (permalink / raw)
  To: Kyrill Tkachov; +Cc: gcc-patches, nd, richard.earnshaw, marcus.shawcroft

On Mon, Jun 12, 2017 at 03:28:52PM +0100, Kyrill Tkachov wrote:
> 
> On 12/06/17 14:53, James Greenhalgh wrote:
> >Hi,
> >
> >In the AArch64 backend and scheduling models there is some confusion as to
> >what the load1/load2 etc. scheduling types refer to. This leads to us using
> >load1/load2 in two contexts - for a variety of 32-bit, 64-bit and 128-bit
> >loads in AArch32 and 128-bit loads in AArch64. That leads to an undesirable
> >confusion in scheduling.
> >
> >Fixing it is easy, but mechanical and boring. Essentially,
> >
> >   s/load1/load_4/
> >   s/load2/load_8/
> >   s/load3/load_12/
> >   s/load4/load_16/
> >   s/store1/store_4/
> >   s/store2/store_8/
> >   s/store3/store_12/
> >   s/store4/store_16/
> 
> So the number now is the number of bytes being loaded?
> 
> >Across all sorts of pipeline models, and the two backends.
> >
> >I have intentionally not modified any of the patterns which now look obviously
> >incorrect. I'll be doing a second pass over the AArch64 back-end in patch
> >2/2 which will fix these bugs. The AArch32 back-end looked to me to get this
> >correct.
> >
> >Bootstrapped on AArch64 and ARM without issue - there's no functional
> >change here.
> >
> >OK?
> 
> Ok from an arm perspective.

*Ping* for the AArch64 maintainers.

Thanks,
James

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [Patch AArch64 2/2] Fix memory sizes to load/store patterns
  2017-06-12 13:54 ` [Patch AArch64 2/2] Fix memory sizes to load/store patterns James Greenhalgh
@ 2017-06-21 10:50   ` James Greenhalgh
  2017-07-03 10:47     ` James Greenhalgh
  2017-07-03 10:47     ` James Greenhalgh
  0 siblings, 2 replies; 10+ messages in thread
From: James Greenhalgh @ 2017-06-21 10:50 UTC (permalink / raw)
  To: gcc-patches; +Cc: nd, richard.earnshaw, marcus.shawcroft

*ping*

Thanks,
James

On Mon, Jun 12, 2017 at 02:54:00PM +0100, James Greenhalgh wrote:
> 
> Hi,
> 
> There seems to be a partial misconception in the AArch64 backend that
> load1/load2 referred to the number of registers to load, rather than the
> number of words to load. This patch fixes that using the new "number of
> byte" types added in the previous patch.
> 
> That means using the load_16 and store_16 types that were defined in the
> previous patch for the first time in the AArch64 backend. To ensure
> continuity for scheduling models, I've just split this out from load_8.
> Please update your models if this is very wrong!
> 
> Bootstrapped on aarch64-none-linux-gnu with no issue.
> 
> OK?
> 
> Thanks,
> James
> 
> ---
> 2017-06-12  James Greenhalgh  <james.greenhalgh@arm.com>
> 
> 	* config/aarch64/aarch64.md (movdi_aarch64): Set load/store
> 	types correctly.
> 	(movti_aarch64): Likewise.
> 	(movdf_aarch64): Likewise.
> 	(movtf_aarch64): Likewise.
> 	(load_pairdi): Likewise.
> 	(store_pairdi): Likewise.
> 	(load_pairdf): Likewise.
> 	(store_pairdf): Likewise.
> 	(loadwb_pair<GPI:mode>_<P:mode>): Likewise.
> 	(storewb_pair<GPI:mode>_<P:mode>): Likewise.
> 	(ldr_got_small_<mode>): Likewise.
> 	(ldr_got_small_28k_<mode>): Likewise.
> 	(ldr_got_tiny): Likewise.
> 	* config/aarch64/iterators.md (ldst_sz): New.
> 	(ldpstp_sz): Likewise.
> 	* config/aarch64/thunderx.md (thunderx_storepair): Split store_8
> 	to store_16.
> 	(thunderx_load): Split load_8 to load_16.
> 	* config/aarch64/thunderx2t99.md (thunderx2t99_loadpair): Split
> 	load_8 to load_16.
> 	(thunderx2t99_storepair_basic): Split store_8 to store_16.
> 	* config/arm/xgene1.md (xgene1_load_pair): Split load_8 to load_16.
> 	(xgene1_store_pair): Split store_8 to store_16.
> 

> diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
> index 11295a6..a1385e3 100644
> --- a/gcc/config/aarch64/aarch64.md
> +++ b/gcc/config/aarch64/aarch64.md
> @@ -981,7 +981,7 @@
>         DONE;
>      }"
>    [(set_attr "type" "mov_reg,mov_reg,mov_reg,mov_imm,mov_imm,\
> -                     load_4,load_4,store_4,store_4,\
> +                     load_8,load_8,store_8,store_8,\
>                       adr,adr,f_mcr,f_mrc,fmov,neon_move")
>     (set_attr "fp" "*,*,*,*,*,*,yes,*,yes,*,*,yes,yes,yes,*")
>     (set_attr "simd" "*,*,*,*,*,*,*,*,*,*,*,*,*,*,yes")]
> @@ -1026,7 +1026,8 @@
>     ldr\\t%q0, %1
>     str\\t%q1, %0"
>    [(set_attr "type" "multiple,f_mcr,f_mrc,neon_logic_q, \
> -		             load_8,store_8,store_8,f_loadd,f_stored")
> +		             load_16,store_16,store_16,\
> +                             load_16,store_16")
>     (set_attr "length" "8,8,8,4,4,4,4,4,4")
>     (set_attr "simd" "*,*,*,yes,*,*,*,*,*")
>     (set_attr "fp" "*,*,*,*,*,*,*,yes,yes")]
> @@ -1121,7 +1122,7 @@
>     str\\t%x1, %0
>     mov\\t%x0, %x1"
>    [(set_attr "type" "neon_move,f_mcr,f_mrc,fmov,fconstd,\
> -                     f_loadd,f_stored,load_4,store_4,mov_reg")
> +                     f_loadd,f_stored,load_8,store_8,mov_reg")
>     (set_attr "simd" "yes,*,*,*,*,*,*,*,*,*")]
>  )
>  
> @@ -1145,7 +1146,7 @@
>     stp\\t%1, %H1, %0
>     stp\\txzr, xzr, %0"
>    [(set_attr "type" "logic_reg,multiple,f_mcr,f_mrc,neon_move_q,f_mcr,\
> -                     f_loadd,f_stored,load_8,store_8,store_8")
> +                     f_loadd,f_stored,load_16,store_16,store_16")
>     (set_attr "length" "4,8,8,8,4,4,4,4,4,4,4")
>     (set_attr "simd" "yes,*,*,*,yes,*,*,*,*,*,*")]
>  )
> @@ -1209,7 +1210,7 @@
>    "@
>     ldp\\t%x0, %x2, %1
>     ldp\\t%d0, %d2, %1"
> -  [(set_attr "type" "load_8,neon_load1_2reg")
> +  [(set_attr "type" "load_16,neon_load1_2reg")
>     (set_attr "fp" "*,yes")]
>  )
>  
> @@ -1244,7 +1245,7 @@
>    "@
>     stp\\t%x1, %x3, %0
>     stp\\t%d1, %d3, %0"
> -  [(set_attr "type" "store_8,neon_store1_2reg")
> +  [(set_attr "type" "store_16,neon_store1_2reg")
>     (set_attr "fp" "*,yes")]
>  )
>  
> @@ -1278,7 +1279,7 @@
>    "@
>     ldp\\t%d0, %d2, %1
>     ldp\\t%x0, %x2, %1"
> -  [(set_attr "type" "neon_load1_2reg,load_8")
> +  [(set_attr "type" "neon_load1_2reg,load_16")
>     (set_attr "fp" "yes,*")]
>  )
>  
> @@ -1312,7 +1313,7 @@
>    "@
>     stp\\t%d1, %d3, %0
>     stp\\t%x1, %x3, %0"
> -  [(set_attr "type" "neon_store1_2reg,store_8")
> +  [(set_attr "type" "neon_store1_2reg,store_16")
>     (set_attr "fp" "yes,*")]
>  )
>  
> @@ -1330,7 +1331,7 @@
>                     (match_operand:P 5 "const_int_operand" "n"))))])]
>    "INTVAL (operands[5]) == GET_MODE_SIZE (<GPI:MODE>mode)"
>    "ldp\\t%<w>2, %<w>3, [%1], %4"
> -  [(set_attr "type" "load_8")]
> +  [(set_attr "type" "load_<ldpstp_sz>")]
>  )
>  
>  (define_insn "loadwb_pair<GPF:mode>_<P:mode>"
> @@ -1363,7 +1364,7 @@
>            (match_operand:GPI 3 "register_operand" "r"))])]
>    "INTVAL (operands[5]) == INTVAL (operands[4]) + GET_MODE_SIZE (<GPI:MODE>mode)"
>    "stp\\t%<w>2, %<w>3, [%0, %4]!"
> -  [(set_attr "type" "store_8")]
> +  [(set_attr "type" "store_<ldpstp_sz>")]
>  )
>  
>  (define_insn "storewb_pair<GPF:mode>_<P:mode>"
> @@ -5139,7 +5140,7 @@
>  		    UNSPEC_GOTSMALLPIC))]
>    ""
>    "ldr\\t%<w>0, [%1, #:got_lo12:%a2]"
> -  [(set_attr "type" "load_4")]
> +  [(set_attr "type" "load_<ldst_sz>")]
>  )
>  
>  (define_insn "ldr_got_small_sidi"
> @@ -5162,7 +5163,7 @@
>  		    UNSPEC_GOTSMALLPIC28K))]
>    ""
>    "ldr\\t%<w>0, [%1, #:<got_modifier>:%a2]"
> -  [(set_attr "type" "load_4")]
> +  [(set_attr "type" "load_<ldst_sz>")]
>  )
>  
>  (define_insn "ldr_got_small_28k_sidi"
> @@ -5183,7 +5184,7 @@
>  		   UNSPEC_GOTTINYPIC))]
>    ""
>    "ldr\\t%0, %L1"
> -  [(set_attr "type" "load_4")]
> +  [(set_attr "type" "load_8")]
>  )
>  
>  (define_insn "aarch64_load_tp_hard"
> diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md
> index 43be7fd..a65c3aa 100644
> --- a/gcc/config/aarch64/iterators.md
> +++ b/gcc/config/aarch64/iterators.md
> @@ -384,6 +384,11 @@
>  ;; 32-bit version and "%x0" in the 64-bit version.
>  (define_mode_attr w [(QI "w") (HI "w") (SI "w") (DI "x") (SF "s") (DF "d")])
>  
> +;; The size of access, in bytes.
> +(define_mode_attr ldst_sz [(SI "4") (DI "8")])
> +;; Likewise for load/store pair.
> +(define_mode_attr ldpstp_sz [(SI "8") (DI "16")])
> +
>  ;; For inequal width int to float conversion
>  (define_mode_attr w1 [(HF "w") (SF "w") (DF "x")])
>  (define_mode_attr w2 [(HF "x") (SF "x") (DF "w")])
> diff --git a/gcc/config/aarch64/thunderx.md b/gcc/config/aarch64/thunderx.md
> index c18da2f..84ac6cd 100644
> --- a/gcc/config/aarch64/thunderx.md
> +++ b/gcc/config/aarch64/thunderx.md
> @@ -100,7 +100,7 @@
>  ;; Store pair are single issued
>  (define_insn_reservation "thunderx_storepair" 1
>    (and (eq_attr "tune" "thunderx")
> -       (eq_attr "type" "store_8"))
> +       (eq_attr "type" "store_8,store_16"))
>    "thunderx_pipe0 + thunderx_pipe1")
>  
>  ;; Prefetch are single issued
> @@ -112,7 +112,7 @@
>  ;; loads (and load pairs) from L1 take 3 cycles in pipe 0
>  (define_insn_reservation "thunderx_load" 3
>    (and (eq_attr "tune" "thunderx")
> -       (eq_attr "type" "load_4, load_8"))
> +       (eq_attr "type" "load_4, load_8, load_16"))
>    "thunderx_pipe0")
>  
>  (define_insn_reservation "thunderx_brj" 1
> diff --git a/gcc/config/aarch64/thunderx2t99.md b/gcc/config/aarch64/thunderx2t99.md
> index 632396f..4e39610 100644
> --- a/gcc/config/aarch64/thunderx2t99.md
> +++ b/gcc/config/aarch64/thunderx2t99.md
> @@ -128,7 +128,7 @@
>  
>  (define_insn_reservation "thunderx2t99_loadpair" 5
>    (and (eq_attr "tune" "thunderx2t99")
> -       (eq_attr "type" "load_8"))
> +       (eq_attr "type" "load_8,load_16"))
>    "thunderx2t99_i012,thunderx2t99_ls01")
>  
>  (define_insn_reservation "thunderx2t99_store_basic" 1
> @@ -138,7 +138,7 @@
>  
>  (define_insn_reservation "thunderx2t99_storepair_basic" 1
>    (and (eq_attr "tune" "thunderx2t99")
> -       (eq_attr "type" "store_8"))
> +       (eq_attr "type" "store_8,store_16"))
>    "thunderx2t99_ls01,thunderx2t99_sd")
>  
>  ;; FP data processing instructions.
> diff --git a/gcc/config/arm/xgene1.md b/gcc/config/arm/xgene1.md
> index 7e70408..0b457ee 100644
> --- a/gcc/config/arm/xgene1.md
> +++ b/gcc/config/arm/xgene1.md
> @@ -92,12 +92,12 @@
>  
>  (define_insn_reservation "xgene1_load_pair" 6
>    (and (eq_attr "tune" "xgene1")
> -       (eq_attr "type" "load_8"))
> +       (eq_attr "type" "load_8, load_16"))
>    "xgene1_decodeIsolated")
>  
>  (define_insn_reservation "xgene1_store_pair" 2
>    (and (eq_attr "tune" "xgene1")
> -       (eq_attr "type" "store_8"))
> +       (eq_attr "type" "store_8, store_16"))
>    "xgene1_decodeIsolated")
>  
>  (define_insn_reservation "xgene1_fp_load1" 10

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [Patch AArch64 2/2] Fix memory sizes to load/store patterns
  2017-06-21 10:50   ` James Greenhalgh
@ 2017-07-03 10:47     ` James Greenhalgh
  2017-07-03 10:47     ` James Greenhalgh
  1 sibling, 0 replies; 10+ messages in thread
From: James Greenhalgh @ 2017-07-03 10:47 UTC (permalink / raw)
  To: gcc-patches; +Cc: nd, richard.earnshaw, marcus.shawcroft

On Wed, Jun 21, 2017 at 11:50:08AM +0100, James Greenhalgh wrote:
> *ping*

*ping*x2

Thanks,
James

> On Mon, Jun 12, 2017 at 02:54:00PM +0100, James Greenhalgh wrote:
> > 
> > Hi,
> > 
> > There seems to be a partial misconception in the AArch64 backend that
> > load1/load2 referred to the number of registers to load, rather than the
> > number of words to load. This patch fixes that using the new "number of
> > byte" types added in the previous patch.
> > 
> > That means using the load_16 and store_16 types that were defined in the
> > previous patch for the first time in the AArch64 backend. To ensure
> > continuity for scheduling models, I've just split this out from load_8.
> > Please update your models if this is very wrong!
> > 
> > Bootstrapped on aarch64-none-linux-gnu with no issue.
> > 
> > OK?
> > 
> > Thanks,
> > James
> > 
> > ---
> > 2017-06-12  James Greenhalgh  <james.greenhalgh@arm.com>
> > 
> > 	* config/aarch64/aarch64.md (movdi_aarch64): Set load/store
> > 	types correctly.
> > 	(movti_aarch64): Likewise.
> > 	(movdf_aarch64): Likewise.
> > 	(movtf_aarch64): Likewise.
> > 	(load_pairdi): Likewise.
> > 	(store_pairdi): Likewise.
> > 	(load_pairdf): Likewise.
> > 	(store_pairdf): Likewise.
> > 	(loadwb_pair<GPI:mode>_<P:mode>): Likewise.
> > 	(storewb_pair<GPI:mode>_<P:mode>): Likewise.
> > 	(ldr_got_small_<mode>): Likewise.
> > 	(ldr_got_small_28k_<mode>): Likewise.
> > 	(ldr_got_tiny): Likewise.
> > 	* config/aarch64/iterators.md (ldst_sz): New.
> > 	(ldpstp_sz): Likewise.
> > 	* config/aarch64/thunderx.md (thunderx_storepair): Split store_8
> > 	to store_16.
> > 	(thunderx_load): Split load_8 to load_16.
> > 	* config/aarch64/thunderx2t99.md (thunderx2t99_loadpair): Split
> > 	load_8 to load_16.
> > 	(thunderx2t99_storepair_basic): Split store_8 to store_16.
> > 	* config/arm/xgene1.md (xgene1_load_pair): Split load_8 to load_16.
> > 	(xgene1_store_pair): Split store_8 to store_16.
> > 
> 
> > diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
> > index 11295a6..a1385e3 100644
> > --- a/gcc/config/aarch64/aarch64.md
> > +++ b/gcc/config/aarch64/aarch64.md
> > @@ -981,7 +981,7 @@
> >         DONE;
> >      }"
> >    [(set_attr "type" "mov_reg,mov_reg,mov_reg,mov_imm,mov_imm,\
> > -                     load_4,load_4,store_4,store_4,\
> > +                     load_8,load_8,store_8,store_8,\
> >                       adr,adr,f_mcr,f_mrc,fmov,neon_move")
> >     (set_attr "fp" "*,*,*,*,*,*,yes,*,yes,*,*,yes,yes,yes,*")
> >     (set_attr "simd" "*,*,*,*,*,*,*,*,*,*,*,*,*,*,yes")]
> > @@ -1026,7 +1026,8 @@
> >     ldr\\t%q0, %1
> >     str\\t%q1, %0"
> >    [(set_attr "type" "multiple,f_mcr,f_mrc,neon_logic_q, \
> > -		             load_8,store_8,store_8,f_loadd,f_stored")
> > +		             load_16,store_16,store_16,\
> > +                             load_16,store_16")
> >     (set_attr "length" "8,8,8,4,4,4,4,4,4")
> >     (set_attr "simd" "*,*,*,yes,*,*,*,*,*")
> >     (set_attr "fp" "*,*,*,*,*,*,*,yes,yes")]
> > @@ -1121,7 +1122,7 @@
> >     str\\t%x1, %0
> >     mov\\t%x0, %x1"
> >    [(set_attr "type" "neon_move,f_mcr,f_mrc,fmov,fconstd,\
> > -                     f_loadd,f_stored,load_4,store_4,mov_reg")
> > +                     f_loadd,f_stored,load_8,store_8,mov_reg")
> >     (set_attr "simd" "yes,*,*,*,*,*,*,*,*,*")]
> >  )
> >  
> > @@ -1145,7 +1146,7 @@
> >     stp\\t%1, %H1, %0
> >     stp\\txzr, xzr, %0"
> >    [(set_attr "type" "logic_reg,multiple,f_mcr,f_mrc,neon_move_q,f_mcr,\
> > -                     f_loadd,f_stored,load_8,store_8,store_8")
> > +                     f_loadd,f_stored,load_16,store_16,store_16")
> >     (set_attr "length" "4,8,8,8,4,4,4,4,4,4,4")
> >     (set_attr "simd" "yes,*,*,*,yes,*,*,*,*,*,*")]
> >  )
> > @@ -1209,7 +1210,7 @@
> >    "@
> >     ldp\\t%x0, %x2, %1
> >     ldp\\t%d0, %d2, %1"
> > -  [(set_attr "type" "load_8,neon_load1_2reg")
> > +  [(set_attr "type" "load_16,neon_load1_2reg")
> >     (set_attr "fp" "*,yes")]
> >  )
> >  
> > @@ -1244,7 +1245,7 @@
> >    "@
> >     stp\\t%x1, %x3, %0
> >     stp\\t%d1, %d3, %0"
> > -  [(set_attr "type" "store_8,neon_store1_2reg")
> > +  [(set_attr "type" "store_16,neon_store1_2reg")
> >     (set_attr "fp" "*,yes")]
> >  )
> >  
> > @@ -1278,7 +1279,7 @@
> >    "@
> >     ldp\\t%d0, %d2, %1
> >     ldp\\t%x0, %x2, %1"
> > -  [(set_attr "type" "neon_load1_2reg,load_8")
> > +  [(set_attr "type" "neon_load1_2reg,load_16")
> >     (set_attr "fp" "yes,*")]
> >  )
> >  
> > @@ -1312,7 +1313,7 @@
> >    "@
> >     stp\\t%d1, %d3, %0
> >     stp\\t%x1, %x3, %0"
> > -  [(set_attr "type" "neon_store1_2reg,store_8")
> > +  [(set_attr "type" "neon_store1_2reg,store_16")
> >     (set_attr "fp" "yes,*")]
> >  )
> >  
> > @@ -1330,7 +1331,7 @@
> >                     (match_operand:P 5 "const_int_operand" "n"))))])]
> >    "INTVAL (operands[5]) == GET_MODE_SIZE (<GPI:MODE>mode)"
> >    "ldp\\t%<w>2, %<w>3, [%1], %4"
> > -  [(set_attr "type" "load_8")]
> > +  [(set_attr "type" "load_<ldpstp_sz>")]
> >  )
> >  
> >  (define_insn "loadwb_pair<GPF:mode>_<P:mode>"
> > @@ -1363,7 +1364,7 @@
> >            (match_operand:GPI 3 "register_operand" "r"))])]
> >    "INTVAL (operands[5]) == INTVAL (operands[4]) + GET_MODE_SIZE (<GPI:MODE>mode)"
> >    "stp\\t%<w>2, %<w>3, [%0, %4]!"
> > -  [(set_attr "type" "store_8")]
> > +  [(set_attr "type" "store_<ldpstp_sz>")]
> >  )
> >  
> >  (define_insn "storewb_pair<GPF:mode>_<P:mode>"
> > @@ -5139,7 +5140,7 @@
> >  		    UNSPEC_GOTSMALLPIC))]
> >    ""
> >    "ldr\\t%<w>0, [%1, #:got_lo12:%a2]"
> > -  [(set_attr "type" "load_4")]
> > +  [(set_attr "type" "load_<ldst_sz>")]
> >  )
> >  
> >  (define_insn "ldr_got_small_sidi"
> > @@ -5162,7 +5163,7 @@
> >  		    UNSPEC_GOTSMALLPIC28K))]
> >    ""
> >    "ldr\\t%<w>0, [%1, #:<got_modifier>:%a2]"
> > -  [(set_attr "type" "load_4")]
> > +  [(set_attr "type" "load_<ldst_sz>")]
> >  )
> >  
> >  (define_insn "ldr_got_small_28k_sidi"
> > @@ -5183,7 +5184,7 @@
> >  		   UNSPEC_GOTTINYPIC))]
> >    ""
> >    "ldr\\t%0, %L1"
> > -  [(set_attr "type" "load_4")]
> > +  [(set_attr "type" "load_8")]
> >  )
> >  
> >  (define_insn "aarch64_load_tp_hard"
> > diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md
> > index 43be7fd..a65c3aa 100644
> > --- a/gcc/config/aarch64/iterators.md
> > +++ b/gcc/config/aarch64/iterators.md
> > @@ -384,6 +384,11 @@
> >  ;; 32-bit version and "%x0" in the 64-bit version.
> >  (define_mode_attr w [(QI "w") (HI "w") (SI "w") (DI "x") (SF "s") (DF "d")])
> >  
> > +;; The size of access, in bytes.
> > +(define_mode_attr ldst_sz [(SI "4") (DI "8")])
> > +;; Likewise for load/store pair.
> > +(define_mode_attr ldpstp_sz [(SI "8") (DI "16")])
> > +
> >  ;; For inequal width int to float conversion
> >  (define_mode_attr w1 [(HF "w") (SF "w") (DF "x")])
> >  (define_mode_attr w2 [(HF "x") (SF "x") (DF "w")])
> > diff --git a/gcc/config/aarch64/thunderx.md b/gcc/config/aarch64/thunderx.md
> > index c18da2f..84ac6cd 100644
> > --- a/gcc/config/aarch64/thunderx.md
> > +++ b/gcc/config/aarch64/thunderx.md
> > @@ -100,7 +100,7 @@
> >  ;; Store pair are single issued
> >  (define_insn_reservation "thunderx_storepair" 1
> >    (and (eq_attr "tune" "thunderx")
> > -       (eq_attr "type" "store_8"))
> > +       (eq_attr "type" "store_8,store_16"))
> >    "thunderx_pipe0 + thunderx_pipe1")
> >  
> >  ;; Prefetch are single issued
> > @@ -112,7 +112,7 @@
> >  ;; loads (and load pairs) from L1 take 3 cycles in pipe 0
> >  (define_insn_reservation "thunderx_load" 3
> >    (and (eq_attr "tune" "thunderx")
> > -       (eq_attr "type" "load_4, load_8"))
> > +       (eq_attr "type" "load_4, load_8, load_16"))
> >    "thunderx_pipe0")
> >  
> >  (define_insn_reservation "thunderx_brj" 1
> > diff --git a/gcc/config/aarch64/thunderx2t99.md b/gcc/config/aarch64/thunderx2t99.md
> > index 632396f..4e39610 100644
> > --- a/gcc/config/aarch64/thunderx2t99.md
> > +++ b/gcc/config/aarch64/thunderx2t99.md
> > @@ -128,7 +128,7 @@
> >  
> >  (define_insn_reservation "thunderx2t99_loadpair" 5
> >    (and (eq_attr "tune" "thunderx2t99")
> > -       (eq_attr "type" "load_8"))
> > +       (eq_attr "type" "load_8,load_16"))
> >    "thunderx2t99_i012,thunderx2t99_ls01")
> >  
> >  (define_insn_reservation "thunderx2t99_store_basic" 1
> > @@ -138,7 +138,7 @@
> >  
> >  (define_insn_reservation "thunderx2t99_storepair_basic" 1
> >    (and (eq_attr "tune" "thunderx2t99")
> > -       (eq_attr "type" "store_8"))
> > +       (eq_attr "type" "store_8,store_16"))
> >    "thunderx2t99_ls01,thunderx2t99_sd")
> >  
> >  ;; FP data processing instructions.
> > diff --git a/gcc/config/arm/xgene1.md b/gcc/config/arm/xgene1.md
> > index 7e70408..0b457ee 100644
> > --- a/gcc/config/arm/xgene1.md
> > +++ b/gcc/config/arm/xgene1.md
> > @@ -92,12 +92,12 @@
> >  
> >  (define_insn_reservation "xgene1_load_pair" 6
> >    (and (eq_attr "tune" "xgene1")
> > -       (eq_attr "type" "load_8"))
> > +       (eq_attr "type" "load_8, load_16"))
> >    "xgene1_decodeIsolated")
> >  
> >  (define_insn_reservation "xgene1_store_pair" 2
> >    (and (eq_attr "tune" "xgene1")
> > -       (eq_attr "type" "store_8"))
> > +       (eq_attr "type" "store_8, store_16"))
> >    "xgene1_decodeIsolated")
> >  
> >  (define_insn_reservation "xgene1_fp_load1" 10
> 

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [Patch AArch64 2/2] Fix memory sizes to load/store patterns
  2017-06-21 10:50   ` James Greenhalgh
  2017-07-03 10:47     ` James Greenhalgh
@ 2017-07-03 10:47     ` James Greenhalgh
  2017-07-27 18:10       ` James Greenhalgh
  1 sibling, 1 reply; 10+ messages in thread
From: James Greenhalgh @ 2017-07-03 10:47 UTC (permalink / raw)
  To: gcc-patches; +Cc: nd, richard.earnshaw, marcus.shawcroft

On Wed, Jun 21, 2017 at 11:50:08AM +0100, James Greenhalgh wrote:
> *ping*

Ping*2

Thanks,
James

> On Mon, Jun 12, 2017 at 02:54:00PM +0100, James Greenhalgh wrote:
> > 
> > Hi,
> > 
> > There seems to be a partial misconception in the AArch64 backend that
> > load1/load2 referred to the number of registers to load, rather than the
> > number of words to load. This patch fixes that using the new "number of
> > byte" types added in the previous patch.
> > 
> > That means using the load_16 and store_16 types that were defined in the
> > previous patch for the first time in the AArch64 backend. To ensure
> > continuity for scheduling models, I've just split this out from load_8.
> > Please update your models if this is very wrong!
> > 
> > Bootstrapped on aarch64-none-linux-gnu with no issue.
> > 
> > OK?
> > 
> > Thanks,
> > James
> > 
> > ---
> > 2017-06-12  James Greenhalgh  <james.greenhalgh@arm.com>
> > 
> > 	* config/aarch64/aarch64.md (movdi_aarch64): Set load/store
> > 	types correctly.
> > 	(movti_aarch64): Likewise.
> > 	(movdf_aarch64): Likewise.
> > 	(movtf_aarch64): Likewise.
> > 	(load_pairdi): Likewise.
> > 	(store_pairdi): Likewise.
> > 	(load_pairdf): Likewise.
> > 	(store_pairdf): Likewise.
> > 	(loadwb_pair<GPI:mode>_<P:mode>): Likewise.
> > 	(storewb_pair<GPI:mode>_<P:mode>): Likewise.
> > 	(ldr_got_small_<mode>): Likewise.
> > 	(ldr_got_small_28k_<mode>): Likewise.
> > 	(ldr_got_tiny): Likewise.
> > 	* config/aarch64/iterators.md (ldst_sz): New.
> > 	(ldpstp_sz): Likewise.
> > 	* config/aarch64/thunderx.md (thunderx_storepair): Split store_8
> > 	to store_16.
> > 	(thunderx_load): Split load_8 to load_16.
> > 	* config/aarch64/thunderx2t99.md (thunderx2t99_loadpair): Split
> > 	load_8 to load_16.
> > 	(thunderx2t99_storepair_basic): Split store_8 to store_16.
> > 	* config/arm/xgene1.md (xgene1_load_pair): Split load_8 to load_16.
> > 	(xgene1_store_pair): Split store_8 to store_16.
> > 
> 
> > diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
> > index 11295a6..a1385e3 100644
> > --- a/gcc/config/aarch64/aarch64.md
> > +++ b/gcc/config/aarch64/aarch64.md
> > @@ -981,7 +981,7 @@
> >         DONE;
> >      }"
> >    [(set_attr "type" "mov_reg,mov_reg,mov_reg,mov_imm,mov_imm,\
> > -                     load_4,load_4,store_4,store_4,\
> > +                     load_8,load_8,store_8,store_8,\
> >                       adr,adr,f_mcr,f_mrc,fmov,neon_move")
> >     (set_attr "fp" "*,*,*,*,*,*,yes,*,yes,*,*,yes,yes,yes,*")
> >     (set_attr "simd" "*,*,*,*,*,*,*,*,*,*,*,*,*,*,yes")]
> > @@ -1026,7 +1026,8 @@
> >     ldr\\t%q0, %1
> >     str\\t%q1, %0"
> >    [(set_attr "type" "multiple,f_mcr,f_mrc,neon_logic_q, \
> > -		             load_8,store_8,store_8,f_loadd,f_stored")
> > +		             load_16,store_16,store_16,\
> > +                             load_16,store_16")
> >     (set_attr "length" "8,8,8,4,4,4,4,4,4")
> >     (set_attr "simd" "*,*,*,yes,*,*,*,*,*")
> >     (set_attr "fp" "*,*,*,*,*,*,*,yes,yes")]
> > @@ -1121,7 +1122,7 @@
> >     str\\t%x1, %0
> >     mov\\t%x0, %x1"
> >    [(set_attr "type" "neon_move,f_mcr,f_mrc,fmov,fconstd,\
> > -                     f_loadd,f_stored,load_4,store_4,mov_reg")
> > +                     f_loadd,f_stored,load_8,store_8,mov_reg")
> >     (set_attr "simd" "yes,*,*,*,*,*,*,*,*,*")]
> >  )
> >  
> > @@ -1145,7 +1146,7 @@
> >     stp\\t%1, %H1, %0
> >     stp\\txzr, xzr, %0"
> >    [(set_attr "type" "logic_reg,multiple,f_mcr,f_mrc,neon_move_q,f_mcr,\
> > -                     f_loadd,f_stored,load_8,store_8,store_8")
> > +                     f_loadd,f_stored,load_16,store_16,store_16")
> >     (set_attr "length" "4,8,8,8,4,4,4,4,4,4,4")
> >     (set_attr "simd" "yes,*,*,*,yes,*,*,*,*,*,*")]
> >  )
> > @@ -1209,7 +1210,7 @@
> >    "@
> >     ldp\\t%x0, %x2, %1
> >     ldp\\t%d0, %d2, %1"
> > -  [(set_attr "type" "load_8,neon_load1_2reg")
> > +  [(set_attr "type" "load_16,neon_load1_2reg")
> >     (set_attr "fp" "*,yes")]
> >  )
> >  
> > @@ -1244,7 +1245,7 @@
> >    "@
> >     stp\\t%x1, %x3, %0
> >     stp\\t%d1, %d3, %0"
> > -  [(set_attr "type" "store_8,neon_store1_2reg")
> > +  [(set_attr "type" "store_16,neon_store1_2reg")
> >     (set_attr "fp" "*,yes")]
> >  )
> >  
> > @@ -1278,7 +1279,7 @@
> >    "@
> >     ldp\\t%d0, %d2, %1
> >     ldp\\t%x0, %x2, %1"
> > -  [(set_attr "type" "neon_load1_2reg,load_8")
> > +  [(set_attr "type" "neon_load1_2reg,load_16")
> >     (set_attr "fp" "yes,*")]
> >  )
> >  
> > @@ -1312,7 +1313,7 @@
> >    "@
> >     stp\\t%d1, %d3, %0
> >     stp\\t%x1, %x3, %0"
> > -  [(set_attr "type" "neon_store1_2reg,store_8")
> > +  [(set_attr "type" "neon_store1_2reg,store_16")
> >     (set_attr "fp" "yes,*")]
> >  )
> >  
> > @@ -1330,7 +1331,7 @@
> >                     (match_operand:P 5 "const_int_operand" "n"))))])]
> >    "INTVAL (operands[5]) == GET_MODE_SIZE (<GPI:MODE>mode)"
> >    "ldp\\t%<w>2, %<w>3, [%1], %4"
> > -  [(set_attr "type" "load_8")]
> > +  [(set_attr "type" "load_<ldpstp_sz>")]
> >  )
> >  
> >  (define_insn "loadwb_pair<GPF:mode>_<P:mode>"
> > @@ -1363,7 +1364,7 @@
> >            (match_operand:GPI 3 "register_operand" "r"))])]
> >    "INTVAL (operands[5]) == INTVAL (operands[4]) + GET_MODE_SIZE (<GPI:MODE>mode)"
> >    "stp\\t%<w>2, %<w>3, [%0, %4]!"
> > -  [(set_attr "type" "store_8")]
> > +  [(set_attr "type" "store_<ldpstp_sz>")]
> >  )
> >  
> >  (define_insn "storewb_pair<GPF:mode>_<P:mode>"
> > @@ -5139,7 +5140,7 @@
> >  		    UNSPEC_GOTSMALLPIC))]
> >    ""
> >    "ldr\\t%<w>0, [%1, #:got_lo12:%a2]"
> > -  [(set_attr "type" "load_4")]
> > +  [(set_attr "type" "load_<ldst_sz>")]
> >  )
> >  
> >  (define_insn "ldr_got_small_sidi"
> > @@ -5162,7 +5163,7 @@
> >  		    UNSPEC_GOTSMALLPIC28K))]
> >    ""
> >    "ldr\\t%<w>0, [%1, #:<got_modifier>:%a2]"
> > -  [(set_attr "type" "load_4")]
> > +  [(set_attr "type" "load_<ldst_sz>")]
> >  )
> >  
> >  (define_insn "ldr_got_small_28k_sidi"
> > @@ -5183,7 +5184,7 @@
> >  		   UNSPEC_GOTTINYPIC))]
> >    ""
> >    "ldr\\t%0, %L1"
> > -  [(set_attr "type" "load_4")]
> > +  [(set_attr "type" "load_8")]
> >  )
> >  
> >  (define_insn "aarch64_load_tp_hard"
> > diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md
> > index 43be7fd..a65c3aa 100644
> > --- a/gcc/config/aarch64/iterators.md
> > +++ b/gcc/config/aarch64/iterators.md
> > @@ -384,6 +384,11 @@
> >  ;; 32-bit version and "%x0" in the 64-bit version.
> >  (define_mode_attr w [(QI "w") (HI "w") (SI "w") (DI "x") (SF "s") (DF "d")])
> >  
> > +;; The size of access, in bytes.
> > +(define_mode_attr ldst_sz [(SI "4") (DI "8")])
> > +;; Likewise for load/store pair.
> > +(define_mode_attr ldpstp_sz [(SI "8") (DI "16")])
> > +
> >  ;; For inequal width int to float conversion
> >  (define_mode_attr w1 [(HF "w") (SF "w") (DF "x")])
> >  (define_mode_attr w2 [(HF "x") (SF "x") (DF "w")])
> > diff --git a/gcc/config/aarch64/thunderx.md b/gcc/config/aarch64/thunderx.md
> > index c18da2f..84ac6cd 100644
> > --- a/gcc/config/aarch64/thunderx.md
> > +++ b/gcc/config/aarch64/thunderx.md
> > @@ -100,7 +100,7 @@
> >  ;; Store pair are single issued
> >  (define_insn_reservation "thunderx_storepair" 1
> >    (and (eq_attr "tune" "thunderx")
> > -       (eq_attr "type" "store_8"))
> > +       (eq_attr "type" "store_8,store_16"))
> >    "thunderx_pipe0 + thunderx_pipe1")
> >  
> >  ;; Prefetch are single issued
> > @@ -112,7 +112,7 @@
> >  ;; loads (and load pairs) from L1 take 3 cycles in pipe 0
> >  (define_insn_reservation "thunderx_load" 3
> >    (and (eq_attr "tune" "thunderx")
> > -       (eq_attr "type" "load_4, load_8"))
> > +       (eq_attr "type" "load_4, load_8, load_16"))
> >    "thunderx_pipe0")
> >  
> >  (define_insn_reservation "thunderx_brj" 1
> > diff --git a/gcc/config/aarch64/thunderx2t99.md b/gcc/config/aarch64/thunderx2t99.md
> > index 632396f..4e39610 100644
> > --- a/gcc/config/aarch64/thunderx2t99.md
> > +++ b/gcc/config/aarch64/thunderx2t99.md
> > @@ -128,7 +128,7 @@
> >  
> >  (define_insn_reservation "thunderx2t99_loadpair" 5
> >    (and (eq_attr "tune" "thunderx2t99")
> > -       (eq_attr "type" "load_8"))
> > +       (eq_attr "type" "load_8,load_16"))
> >    "thunderx2t99_i012,thunderx2t99_ls01")
> >  
> >  (define_insn_reservation "thunderx2t99_store_basic" 1
> > @@ -138,7 +138,7 @@
> >  
> >  (define_insn_reservation "thunderx2t99_storepair_basic" 1
> >    (and (eq_attr "tune" "thunderx2t99")
> > -       (eq_attr "type" "store_8"))
> > +       (eq_attr "type" "store_8,store_16"))
> >    "thunderx2t99_ls01,thunderx2t99_sd")
> >  
> >  ;; FP data processing instructions.
> > diff --git a/gcc/config/arm/xgene1.md b/gcc/config/arm/xgene1.md
> > index 7e70408..0b457ee 100644
> > --- a/gcc/config/arm/xgene1.md
> > +++ b/gcc/config/arm/xgene1.md
> > @@ -92,12 +92,12 @@
> >  
> >  (define_insn_reservation "xgene1_load_pair" 6
> >    (and (eq_attr "tune" "xgene1")
> > -       (eq_attr "type" "load_8"))
> > +       (eq_attr "type" "load_8, load_16"))
> >    "xgene1_decodeIsolated")
> >  
> >  (define_insn_reservation "xgene1_store_pair" 2
> >    (and (eq_attr "tune" "xgene1")
> > -       (eq_attr "type" "store_8"))
> > +       (eq_attr "type" "store_8, store_16"))
> >    "xgene1_decodeIsolated")
> >  
> >  (define_insn_reservation "xgene1_fp_load1" 10
> 

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [Mechanical Patch ARM/AArch64 1/2] Rename load/store scheduling types to encode data size
  2017-06-21 10:50   ` James Greenhalgh
@ 2017-07-27 18:09     ` James Greenhalgh
  0 siblings, 0 replies; 10+ messages in thread
From: James Greenhalgh @ 2017-07-27 18:09 UTC (permalink / raw)
  To: Kyrill Tkachov; +Cc: gcc-patches, nd, richard.earnshaw, marcus.shawcroft

On Wed, Jun 21, 2017 at 11:49:47AM +0100, James Greenhalgh wrote:
> On Mon, Jun 12, 2017 at 03:28:52PM +0100, Kyrill Tkachov wrote:

*ping ^2*

Thanks,
James


> > 
> > On 12/06/17 14:53, James Greenhalgh wrote:
> > >Hi,
> > >
> > >In the AArch64 backend and scheduling models there is some confusion as to
> > >what the load1/load2 etc. scheduling types refer to. This leads to us using
> > >load1/load2 in two contexts - for a variety of 32-bit, 64-bit and 128-bit
> > >loads in AArch32 and 128-bit loads in AArch64. That leads to an undesirable
> > >confusion in scheduling.
> > >
> > >Fixing it is easy, but mechanical and boring. Essentially,
> > >
> > >   s/load1/load_4/
> > >   s/load2/load_8/
> > >   s/load3/load_12/
> > >   s/load4/load_16/
> > >   s/store1/store_4/
> > >   s/store2/store_8/
> > >   s/store3/store_12/
> > >   s/store4/store_16/
> > 
> > So the number now is the number of bytes being loaded?
> > 
> > >Across all sorts of pipeline models, and the two backends.
> > >
> > >I have intentionally not modified any of the patterns which now look obviously
> > >incorrect. I'll be doing a second pass over the AArch64 back-end in patch
> > >2/2 which will fix these bugs. The AArch32 back-end looked to me to get this
> > >correct.
> > >
> > >Bootstrapped on AArch64 and ARM without issue - there's no functional
> > >change here.
> > >
> > >OK?
> > 
> > Ok from an arm perspective.
> 
> *Ping* for the AArch64 maintainers.


^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [Patch AArch64 2/2] Fix memory sizes to load/store patterns
  2017-07-03 10:47     ` James Greenhalgh
@ 2017-07-27 18:10       ` James Greenhalgh
  2017-09-12 15:00         ` James Greenhalgh
  0 siblings, 1 reply; 10+ messages in thread
From: James Greenhalgh @ 2017-07-27 18:10 UTC (permalink / raw)
  To: gcc-patches; +Cc: nd, richard.earnshaw, marcus.shawcroft

On Mon, Jul 03, 2017 at 11:46:58AM +0100, James Greenhalgh wrote:
> On Wed, Jun 21, 2017 at 11:50:08AM +0100, James Greenhalgh wrote:
> > *ping*
> 
> Ping*2

Ping*3

Thanks,
James

> 
> Thanks,
> James
> 
> > On Mon, Jun 12, 2017 at 02:54:00PM +0100, James Greenhalgh wrote:
> > > 
> > > Hi,
> > > 
> > > There seems to be a partial misconception in the AArch64 backend that
> > > load1/load2 referred to the number of registers to load, rather than the
> > > number of words to load. This patch fixes that using the new "number of
> > > byte" types added in the previous patch.
> > > 
> > > That means using the load_16 and store_16 types that were defined in the
> > > previous patch for the first time in the AArch64 backend. To ensure
> > > continuity for scheduling models, I've just split this out from load_8.
> > > Please update your models if this is very wrong!
> > > 
> > > Bootstrapped on aarch64-none-linux-gnu with no issue.
> > > 
> > > OK?
> > > 
> > > Thanks,
> > > James
> > > 
> > > ---
> > > 2017-06-12  James Greenhalgh  <james.greenhalgh@arm.com>
> > > 
> > > 	* config/aarch64/aarch64.md (movdi_aarch64): Set load/store
> > > 	types correctly.
> > > 	(movti_aarch64): Likewise.
> > > 	(movdf_aarch64): Likewise.
> > > 	(movtf_aarch64): Likewise.
> > > 	(load_pairdi): Likewise.
> > > 	(store_pairdi): Likewise.
> > > 	(load_pairdf): Likewise.
> > > 	(store_pairdf): Likewise.
> > > 	(loadwb_pair<GPI:mode>_<P:mode>): Likewise.
> > > 	(storewb_pair<GPI:mode>_<P:mode>): Likewise.
> > > 	(ldr_got_small_<mode>): Likewise.
> > > 	(ldr_got_small_28k_<mode>): Likewise.
> > > 	(ldr_got_tiny): Likewise.
> > > 	* config/aarch64/iterators.md (ldst_sz): New.
> > > 	(ldpstp_sz): Likewise.
> > > 	* config/aarch64/thunderx.md (thunderx_storepair): Split store_8
> > > 	to store_16.
> > > 	(thunderx_load): Split load_8 to load_16.
> > > 	* config/aarch64/thunderx2t99.md (thunderx2t99_loadpair): Split
> > > 	load_8 to load_16.
> > > 	(thunderx2t99_storepair_basic): Split store_8 to store_16.
> > > 	* config/arm/xgene1.md (xgene1_load_pair): Split load_8 to load_16.
> > > 	(xgene1_store_pair): Split store_8 to store_16.
> > > 
> > 
> > > diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
> > > index 11295a6..a1385e3 100644
> > > --- a/gcc/config/aarch64/aarch64.md
> > > +++ b/gcc/config/aarch64/aarch64.md
> > > @@ -981,7 +981,7 @@
> > >         DONE;
> > >      }"
> > >    [(set_attr "type" "mov_reg,mov_reg,mov_reg,mov_imm,mov_imm,\
> > > -                     load_4,load_4,store_4,store_4,\
> > > +                     load_8,load_8,store_8,store_8,\
> > >                       adr,adr,f_mcr,f_mrc,fmov,neon_move")
> > >     (set_attr "fp" "*,*,*,*,*,*,yes,*,yes,*,*,yes,yes,yes,*")
> > >     (set_attr "simd" "*,*,*,*,*,*,*,*,*,*,*,*,*,*,yes")]
> > > @@ -1026,7 +1026,8 @@
> > >     ldr\\t%q0, %1
> > >     str\\t%q1, %0"
> > >    [(set_attr "type" "multiple,f_mcr,f_mrc,neon_logic_q, \
> > > -		             load_8,store_8,store_8,f_loadd,f_stored")
> > > +		             load_16,store_16,store_16,\
> > > +                             load_16,store_16")
> > >     (set_attr "length" "8,8,8,4,4,4,4,4,4")
> > >     (set_attr "simd" "*,*,*,yes,*,*,*,*,*")
> > >     (set_attr "fp" "*,*,*,*,*,*,*,yes,yes")]
> > > @@ -1121,7 +1122,7 @@
> > >     str\\t%x1, %0
> > >     mov\\t%x0, %x1"
> > >    [(set_attr "type" "neon_move,f_mcr,f_mrc,fmov,fconstd,\
> > > -                     f_loadd,f_stored,load_4,store_4,mov_reg")
> > > +                     f_loadd,f_stored,load_8,store_8,mov_reg")
> > >     (set_attr "simd" "yes,*,*,*,*,*,*,*,*,*")]
> > >  )
> > >  
> > > @@ -1145,7 +1146,7 @@
> > >     stp\\t%1, %H1, %0
> > >     stp\\txzr, xzr, %0"
> > >    [(set_attr "type" "logic_reg,multiple,f_mcr,f_mrc,neon_move_q,f_mcr,\
> > > -                     f_loadd,f_stored,load_8,store_8,store_8")
> > > +                     f_loadd,f_stored,load_16,store_16,store_16")
> > >     (set_attr "length" "4,8,8,8,4,4,4,4,4,4,4")
> > >     (set_attr "simd" "yes,*,*,*,yes,*,*,*,*,*,*")]
> > >  )
> > > @@ -1209,7 +1210,7 @@
> > >    "@
> > >     ldp\\t%x0, %x2, %1
> > >     ldp\\t%d0, %d2, %1"
> > > -  [(set_attr "type" "load_8,neon_load1_2reg")
> > > +  [(set_attr "type" "load_16,neon_load1_2reg")
> > >     (set_attr "fp" "*,yes")]
> > >  )
> > >  
> > > @@ -1244,7 +1245,7 @@
> > >    "@
> > >     stp\\t%x1, %x3, %0
> > >     stp\\t%d1, %d3, %0"
> > > -  [(set_attr "type" "store_8,neon_store1_2reg")
> > > +  [(set_attr "type" "store_16,neon_store1_2reg")
> > >     (set_attr "fp" "*,yes")]
> > >  )
> > >  
> > > @@ -1278,7 +1279,7 @@
> > >    "@
> > >     ldp\\t%d0, %d2, %1
> > >     ldp\\t%x0, %x2, %1"
> > > -  [(set_attr "type" "neon_load1_2reg,load_8")
> > > +  [(set_attr "type" "neon_load1_2reg,load_16")
> > >     (set_attr "fp" "yes,*")]
> > >  )
> > >  
> > > @@ -1312,7 +1313,7 @@
> > >    "@
> > >     stp\\t%d1, %d3, %0
> > >     stp\\t%x1, %x3, %0"
> > > -  [(set_attr "type" "neon_store1_2reg,store_8")
> > > +  [(set_attr "type" "neon_store1_2reg,store_16")
> > >     (set_attr "fp" "yes,*")]
> > >  )
> > >  
> > > @@ -1330,7 +1331,7 @@
> > >                     (match_operand:P 5 "const_int_operand" "n"))))])]
> > >    "INTVAL (operands[5]) == GET_MODE_SIZE (<GPI:MODE>mode)"
> > >    "ldp\\t%<w>2, %<w>3, [%1], %4"
> > > -  [(set_attr "type" "load_8")]
> > > +  [(set_attr "type" "load_<ldpstp_sz>")]
> > >  )
> > >  
> > >  (define_insn "loadwb_pair<GPF:mode>_<P:mode>"
> > > @@ -1363,7 +1364,7 @@
> > >            (match_operand:GPI 3 "register_operand" "r"))])]
> > >    "INTVAL (operands[5]) == INTVAL (operands[4]) + GET_MODE_SIZE (<GPI:MODE>mode)"
> > >    "stp\\t%<w>2, %<w>3, [%0, %4]!"
> > > -  [(set_attr "type" "store_8")]
> > > +  [(set_attr "type" "store_<ldpstp_sz>")]
> > >  )
> > >  
> > >  (define_insn "storewb_pair<GPF:mode>_<P:mode>"
> > > @@ -5139,7 +5140,7 @@
> > >  		    UNSPEC_GOTSMALLPIC))]
> > >    ""
> > >    "ldr\\t%<w>0, [%1, #:got_lo12:%a2]"
> > > -  [(set_attr "type" "load_4")]
> > > +  [(set_attr "type" "load_<ldst_sz>")]
> > >  )
> > >  
> > >  (define_insn "ldr_got_small_sidi"
> > > @@ -5162,7 +5163,7 @@
> > >  		    UNSPEC_GOTSMALLPIC28K))]
> > >    ""
> > >    "ldr\\t%<w>0, [%1, #:<got_modifier>:%a2]"
> > > -  [(set_attr "type" "load_4")]
> > > +  [(set_attr "type" "load_<ldst_sz>")]
> > >  )
> > >  
> > >  (define_insn "ldr_got_small_28k_sidi"
> > > @@ -5183,7 +5184,7 @@
> > >  		   UNSPEC_GOTTINYPIC))]
> > >    ""
> > >    "ldr\\t%0, %L1"
> > > -  [(set_attr "type" "load_4")]
> > > +  [(set_attr "type" "load_8")]
> > >  )
> > >  
> > >  (define_insn "aarch64_load_tp_hard"
> > > diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md
> > > index 43be7fd..a65c3aa 100644
> > > --- a/gcc/config/aarch64/iterators.md
> > > +++ b/gcc/config/aarch64/iterators.md
> > > @@ -384,6 +384,11 @@
> > >  ;; 32-bit version and "%x0" in the 64-bit version.
> > >  (define_mode_attr w [(QI "w") (HI "w") (SI "w") (DI "x") (SF "s") (DF "d")])
> > >  
> > > +;; The size of access, in bytes.
> > > +(define_mode_attr ldst_sz [(SI "4") (DI "8")])
> > > +;; Likewise for load/store pair.
> > > +(define_mode_attr ldpstp_sz [(SI "8") (DI "16")])
> > > +
> > >  ;; For inequal width int to float conversion
> > >  (define_mode_attr w1 [(HF "w") (SF "w") (DF "x")])
> > >  (define_mode_attr w2 [(HF "x") (SF "x") (DF "w")])
> > > diff --git a/gcc/config/aarch64/thunderx.md b/gcc/config/aarch64/thunderx.md
> > > index c18da2f..84ac6cd 100644
> > > --- a/gcc/config/aarch64/thunderx.md
> > > +++ b/gcc/config/aarch64/thunderx.md
> > > @@ -100,7 +100,7 @@
> > >  ;; Store pair are single issued
> > >  (define_insn_reservation "thunderx_storepair" 1
> > >    (and (eq_attr "tune" "thunderx")
> > > -       (eq_attr "type" "store_8"))
> > > +       (eq_attr "type" "store_8,store_16"))
> > >    "thunderx_pipe0 + thunderx_pipe1")
> > >  
> > >  ;; Prefetch are single issued
> > > @@ -112,7 +112,7 @@
> > >  ;; loads (and load pairs) from L1 take 3 cycles in pipe 0
> > >  (define_insn_reservation "thunderx_load" 3
> > >    (and (eq_attr "tune" "thunderx")
> > > -       (eq_attr "type" "load_4, load_8"))
> > > +       (eq_attr "type" "load_4, load_8, load_16"))
> > >    "thunderx_pipe0")
> > >  
> > >  (define_insn_reservation "thunderx_brj" 1
> > > diff --git a/gcc/config/aarch64/thunderx2t99.md b/gcc/config/aarch64/thunderx2t99.md
> > > index 632396f..4e39610 100644
> > > --- a/gcc/config/aarch64/thunderx2t99.md
> > > +++ b/gcc/config/aarch64/thunderx2t99.md
> > > @@ -128,7 +128,7 @@
> > >  
> > >  (define_insn_reservation "thunderx2t99_loadpair" 5
> > >    (and (eq_attr "tune" "thunderx2t99")
> > > -       (eq_attr "type" "load_8"))
> > > +       (eq_attr "type" "load_8,load_16"))
> > >    "thunderx2t99_i012,thunderx2t99_ls01")
> > >  
> > >  (define_insn_reservation "thunderx2t99_store_basic" 1
> > > @@ -138,7 +138,7 @@
> > >  
> > >  (define_insn_reservation "thunderx2t99_storepair_basic" 1
> > >    (and (eq_attr "tune" "thunderx2t99")
> > > -       (eq_attr "type" "store_8"))
> > > +       (eq_attr "type" "store_8,store_16"))
> > >    "thunderx2t99_ls01,thunderx2t99_sd")
> > >  
> > >  ;; FP data processing instructions.
> > > diff --git a/gcc/config/arm/xgene1.md b/gcc/config/arm/xgene1.md
> > > index 7e70408..0b457ee 100644
> > > --- a/gcc/config/arm/xgene1.md
> > > +++ b/gcc/config/arm/xgene1.md
> > > @@ -92,12 +92,12 @@
> > >  
> > >  (define_insn_reservation "xgene1_load_pair" 6
> > >    (and (eq_attr "tune" "xgene1")
> > > -       (eq_attr "type" "load_8"))
> > > +       (eq_attr "type" "load_8, load_16"))
> > >    "xgene1_decodeIsolated")
> > >  
> > >  (define_insn_reservation "xgene1_store_pair" 2
> > >    (and (eq_attr "tune" "xgene1")
> > > -       (eq_attr "type" "store_8"))
> > > +       (eq_attr "type" "store_8, store_16"))
> > >    "xgene1_decodeIsolated")
> > >  
> > >  (define_insn_reservation "xgene1_fp_load1" 10
> > 

^ permalink raw reply	[flat|nested] 10+ messages in thread

* [Patch AArch64 2/2] Fix memory sizes to load/store patterns
  2017-07-27 18:10       ` James Greenhalgh
@ 2017-09-12 15:00         ` James Greenhalgh
  0 siblings, 0 replies; 10+ messages in thread
From: James Greenhalgh @ 2017-09-12 15:00 UTC (permalink / raw)
  To: gcc-patches; +Cc: nd, richard.earnshaw, macrus.shawcroft

[-- Attachment #1: Type: text/plain, Size: 1816 bytes --]


On Mon, Jun 12, 2017 at 02:54:00PM +0100, James Greenhalgh wrote:
>
> Hi,
>
> There seems to be a partial misconception in the AArch64 backend that
> load1/load2 referred to the number of registers to load, rather than the
> number of words to load. This patch fixes that using the new "number of
> byte" types added in the previous patch.
>
> That means using the load_16 and store_16 types that were defined in the
> previous patch for the first time in the AArch64 backend. To ensure
> continuity for scheduling models, I've just split this out from load_8.
> Please update your models if this is very wrong!

I've updated this patch on trunk, rechecked it, and committed this patch
as r252026.

Thanks,
James

---
2017-09-12  James Greenhalgh  <james.greenhalgh@arm.com>

	* config/aarch64/aarch64.md (movdi_aarch64): Set load/store
	types correctly.
	(movti_aarch64): Likewise.
	(movdf_aarch64): Likewise.
	(movtf_aarch64): Likewise.
	(load_pairdi): Likewise.
	(store_pairdi): Likewise.
	(load_pairdf): Likewise.
	(store_pairdf): Likewise.
	(loadwb_pair<GPI:mode>_<P:mode>): Likewise.
	(storewb_pair<GPI:mode>_<P:mode>): Likewise.
	(ldr_got_small_<mode>): Likewise.
	(ldr_got_small_28k_<mode>): Likewise.
	(ldr_got_tiny): Likewise.
	* config/aarch64/iterators.md (ldst_sz): New.
	(ldpstp_sz): Likewise.
	* config/aarch64/thunderx.md (thunderx_storepair): Split store_8
	to store_16.
	(thunderx_load): Split load_8 to load_16.
	* config/aarch64/thunderx2t99.md (thunderx2t99_loadpair): Split
	load_8 to load_16.
	(thunderx2t99_storepair_basic): Split store_8 to store_16.
	* config/arm/xgene1.md (xgene1_load_pair): Split load_8 to load_16.
	(xgene1_store_pair): Split store_8 to store_16.
	* config/aarch64/falkor.md (falkor_ld_3_ld): Split load_8 to load_16.
	(falkor_st_0_st_sd): Split store_8 to store_16.


[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #2: 0001-Patch-AArch64-2-2-Fix-memory-sizes-to-load-store-pat.patch --]
[-- Type: text/x-patch; name="0001-Patch-AArch64-2-2-Fix-memory-sizes-to-load-store-pat.patch", Size: 7395 bytes --]

diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index 7cbb458..e85376c 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -994,8 +994,8 @@
        aarch64_expand_mov_immediate (operands[0], operands[1]);
        DONE;
     }"
-  [(set_attr "type" "mov_reg,mov_reg,mov_reg,mov_imm,mov_imm,mov_imm,load_4,\
-                     load_4,store_4,store_4,adr,adr,f_mcr,f_mrc,fmov,neon_move")
+  [(set_attr "type" "mov_reg,mov_reg,mov_reg,mov_imm,mov_imm,mov_imm,load_8,\
+                     load_8,store_8,store_8,adr,adr,f_mcr,f_mrc,fmov,neon_move")
    (set_attr "fp" "*,*,*,*,*,*,*,yes,*,yes,*,*,yes,yes,yes,*")
    (set_attr "simd" "*,*,*,*,*,*,*,*,*,*,*,*,*,*,*,yes")]
 )
@@ -1039,7 +1039,8 @@
    ldr\\t%q0, %1
    str\\t%q1, %0"
   [(set_attr "type" "multiple,f_mcr,f_mrc,neon_logic_q, \
-		             load_8,store_8,store_8,f_loadd,f_stored")
+		             load_16,store_16,store_16,\
+                             load_16,store_16")
    (set_attr "length" "8,8,8,4,4,4,4,4,4")
    (set_attr "simd" "*,*,*,yes,*,*,*,*,*")
    (set_attr "fp" "*,*,*,*,*,*,*,yes,yes")]
@@ -1142,7 +1143,7 @@
    mov\\t%x0, %x1
    mov\\t%x0, %1"
   [(set_attr "type" "neon_move,f_mcr,f_mrc,fmov,fconstd,neon_move,\
-		     f_loadd,f_stored,load_4,store_4,mov_reg,\
+		     f_loadd,f_stored,load_8,store_8,mov_reg,\
 		     fconstd")
    (set_attr "simd" "yes,*,*,*,*,yes,*,*,*,*,*,*")]
 )
@@ -1187,7 +1188,7 @@
    stp\\t%1, %H1, %0
    stp\\txzr, xzr, %0"
   [(set_attr "type" "logic_reg,multiple,f_mcr,f_mrc,neon_move_q,f_mcr,\
-                     f_loadd,f_stored,load_8,store_8,store_8")
+                     f_loadd,f_stored,load_16,store_16,store_16")
    (set_attr "length" "4,8,8,8,4,4,4,4,4,4,4")
    (set_attr "simd" "yes,*,*,*,yes,*,*,*,*,*,*")]
 )
@@ -1251,7 +1252,7 @@
   "@
    ldp\\t%x0, %x2, %1
    ldp\\t%d0, %d2, %1"
-  [(set_attr "type" "load_8,neon_load1_2reg")
+  [(set_attr "type" "load_16,neon_load1_2reg")
    (set_attr "fp" "*,yes")]
 )
 
@@ -1286,7 +1287,7 @@
   "@
    stp\\t%x1, %x3, %0
    stp\\t%d1, %d3, %0"
-  [(set_attr "type" "store_8,neon_store1_2reg")
+  [(set_attr "type" "store_16,neon_store1_2reg")
    (set_attr "fp" "*,yes")]
 )
 
@@ -1320,7 +1321,7 @@
   "@
    ldp\\t%d0, %d2, %1
    ldp\\t%x0, %x2, %1"
-  [(set_attr "type" "neon_load1_2reg,load_8")
+  [(set_attr "type" "neon_load1_2reg,load_16")
    (set_attr "fp" "yes,*")]
 )
 
@@ -1354,7 +1355,7 @@
   "@
    stp\\t%d1, %d3, %0
    stp\\t%x1, %x3, %0"
-  [(set_attr "type" "neon_store1_2reg,store_8")
+  [(set_attr "type" "neon_store1_2reg,store_16")
    (set_attr "fp" "yes,*")]
 )
 
@@ -1372,7 +1373,7 @@
                    (match_operand:P 5 "const_int_operand" "n"))))])]
   "INTVAL (operands[5]) == GET_MODE_SIZE (<GPI:MODE>mode)"
   "ldp\\t%<w>2, %<w>3, [%1], %4"
-  [(set_attr "type" "load_8")]
+  [(set_attr "type" "load_<ldpstp_sz>")]
 )
 
 (define_insn "loadwb_pair<GPF:mode>_<P:mode>"
@@ -1405,7 +1406,7 @@
           (match_operand:GPI 3 "register_operand" "r"))])]
   "INTVAL (operands[5]) == INTVAL (operands[4]) + GET_MODE_SIZE (<GPI:MODE>mode)"
   "stp\\t%<w>2, %<w>3, [%0, %4]!"
-  [(set_attr "type" "store_8")]
+  [(set_attr "type" "store_<ldpstp_sz>")]
 )
 
 (define_insn "storewb_pair<GPF:mode>_<P:mode>"
@@ -5355,7 +5356,7 @@
 		    UNSPEC_GOTSMALLPIC))]
   ""
   "ldr\\t%<w>0, [%1, #:got_lo12:%a2]"
-  [(set_attr "type" "load_4")]
+  [(set_attr "type" "load_<ldst_sz>")]
 )
 
 (define_insn "ldr_got_small_sidi"
@@ -5378,7 +5379,7 @@
 		    UNSPEC_GOTSMALLPIC28K))]
   ""
   "ldr\\t%<w>0, [%1, #:<got_modifier>:%a2]"
-  [(set_attr "type" "load_4")]
+  [(set_attr "type" "load_<ldst_sz>")]
 )
 
 (define_insn "ldr_got_small_28k_sidi"
@@ -5399,7 +5400,7 @@
 		   UNSPEC_GOTTINYPIC))]
   ""
   "ldr\\t%0, %L1"
-  [(set_attr "type" "load_4")]
+  [(set_attr "type" "load_8")]
 )
 
 (define_insn "aarch64_load_tp_hard"
diff --git a/gcc/config/aarch64/falkor.md b/gcc/config/aarch64/falkor.md
index 66efc8c..83971ce 100644
--- a/gcc/config/aarch64/falkor.md
+++ b/gcc/config/aarch64/falkor.md
@@ -581,7 +581,7 @@
 
 (define_insn_reservation "falkor_ld_3_ld" 3
   (and (eq_attr "tune" "falkor")
-       (eq_attr "type" "load_4,load_8"))
+       (eq_attr "type" "load_4,load_8,load_16"))
   "falkor_ld")
 
 ;; Miscellaneous Data-Processing Instructions
@@ -663,7 +663,7 @@
 
 (define_insn_reservation "falkor_st_0_st_sd" 0
   (and (eq_attr "tune" "falkor")
-       (eq_attr "type" "store_4,store_8"))
+       (eq_attr "type" "store_4,store_8,store_16"))
   "falkor_st+falkor_sd")
 \f
 ;; Muliply bypasses.
diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md
index 3e38767..477dc35 100644
--- a/gcc/config/aarch64/iterators.md
+++ b/gcc/config/aarch64/iterators.md
@@ -387,6 +387,11 @@
 ;; 32-bit version and "%x0" in the 64-bit version.
 (define_mode_attr w [(QI "w") (HI "w") (SI "w") (DI "x") (SF "s") (DF "d")])
 
+;; The size of access, in bytes.
+(define_mode_attr ldst_sz [(SI "4") (DI "8")])
+;; Likewise for load/store pair.
+(define_mode_attr ldpstp_sz [(SI "8") (DI "16")])
+
 ;; For inequal width int to float conversion
 (define_mode_attr w1 [(HF "w") (SF "w") (DF "x")])
 (define_mode_attr w2 [(HF "x") (SF "x") (DF "w")])
diff --git a/gcc/config/aarch64/thunderx.md b/gcc/config/aarch64/thunderx.md
index c18da2f..84ac6cd 100644
--- a/gcc/config/aarch64/thunderx.md
+++ b/gcc/config/aarch64/thunderx.md
@@ -100,7 +100,7 @@
 ;; Store pair are single issued
 (define_insn_reservation "thunderx_storepair" 1
   (and (eq_attr "tune" "thunderx")
-       (eq_attr "type" "store_8"))
+       (eq_attr "type" "store_8,store_16"))
   "thunderx_pipe0 + thunderx_pipe1")
 
 ;; Prefetch are single issued
@@ -112,7 +112,7 @@
 ;; loads (and load pairs) from L1 take 3 cycles in pipe 0
 (define_insn_reservation "thunderx_load" 3
   (and (eq_attr "tune" "thunderx")
-       (eq_attr "type" "load_4, load_8"))
+       (eq_attr "type" "load_4, load_8, load_16"))
   "thunderx_pipe0")
 
 (define_insn_reservation "thunderx_brj" 1
diff --git a/gcc/config/aarch64/thunderx2t99.md b/gcc/config/aarch64/thunderx2t99.md
index 41a45ca..5bcf4ff 100644
--- a/gcc/config/aarch64/thunderx2t99.md
+++ b/gcc/config/aarch64/thunderx2t99.md
@@ -128,7 +128,7 @@
 
 (define_insn_reservation "thunderx2t99_loadpair" 5
   (and (eq_attr "tune" "thunderx2t99")
-       (eq_attr "type" "load_8"))
+       (eq_attr "type" "load_8,load_16"))
   "thunderx2t99_i012,thunderx2t99_ls01")
 
 (define_insn_reservation "thunderx2t99_store_basic" 1
@@ -138,7 +138,7 @@
 
 (define_insn_reservation "thunderx2t99_storepair_basic" 1
   (and (eq_attr "tune" "thunderx2t99")
-       (eq_attr "type" "store_8"))
+       (eq_attr "type" "store_8,store_16"))
   "thunderx2t99_ls01,thunderx2t99_sd")
 
 ;; FP data processing instructions.
diff --git a/gcc/config/arm/xgene1.md b/gcc/config/arm/xgene1.md
index d0b17ab..c4b3773 100644
--- a/gcc/config/arm/xgene1.md
+++ b/gcc/config/arm/xgene1.md
@@ -92,12 +92,12 @@
 
 (define_insn_reservation "xgene1_load_pair" 6
   (and (eq_attr "tune" "xgene1")
-       (eq_attr "type" "load_8"))
+       (eq_attr "type" "load_8, load_16"))
   "xgene1_decodeIsolated")
 
 (define_insn_reservation "xgene1_store_pair" 2
   (and (eq_attr "tune" "xgene1")
-       (eq_attr "type" "store_8"))
+       (eq_attr "type" "store_8, store_16"))
   "xgene1_decodeIsolated")
 
 (define_insn_reservation "xgene1_fp_load1" 10

^ permalink raw reply	[flat|nested] 10+ messages in thread

end of thread, other threads:[~2017-09-12 15:00 UTC | newest]

Thread overview: 10+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-06-12 13:54 [Mechanical Patch ARM/AArch64 1/2] Rename load/store scheduling types to encode data size James Greenhalgh
2017-06-12 13:54 ` [Patch AArch64 2/2] Fix memory sizes to load/store patterns James Greenhalgh
2017-06-21 10:50   ` James Greenhalgh
2017-07-03 10:47     ` James Greenhalgh
2017-07-03 10:47     ` James Greenhalgh
2017-07-27 18:10       ` James Greenhalgh
2017-09-12 15:00         ` James Greenhalgh
2017-06-12 14:28 ` [Mechanical Patch ARM/AArch64 1/2] Rename load/store scheduling types to encode data size Kyrill Tkachov
2017-06-21 10:50   ` James Greenhalgh
2017-07-27 18:09     ` James Greenhalgh

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).