New patch: [AArch64] [BE] [1/2] Make large opaque integer modes endianness-safe.

public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed

From: "David Sherwood" <david.sherwood@arm.com>
To: <gcc-patches@gcc.gnu.org>
Subject: New patch: [AArch64] [BE] [1/2] Make large opaque integer modes endianness-safe.
Date: Thu, 13 Nov 2014 10:11:00 -0000	[thread overview]
Message-ID: <000701cfff29$ea4fbcf0$beef36d0$@arm.com> (raw)
In-Reply-To: 

[-- Attachment #1: Type: text/plain, Size: 1795 bytes --]

Hi All,

I have successfully rebased this and tested in conjunction with a patch from
Alan Hayward ([AArch64] [BE] Fix vector load/stores to not use ld1/st1), who
should be submitting a new version shortly. Built and tested on:

aarch64-none-elf
aarch64_be-none-elf
x86_64-linux-gnu

Regards,
David Sherwood.

-----Original Message-----
From: David Sherwood [mailto:david.sherwood@arm.com] 
Sent: 28 October 2014 08:55
To: 'gcc-patches@gcc.gnu.org'
Subject: RE: [AArch64] [BE] [1/2] Make large opaque integer modes endianness-safe.

Hi,

Sorry to bother you again. Could someone take a look at this change
please if they have time?

Thanks!
David.

-----Original Message-----
From: David Sherwood [mailto:david.sherwood@arm.com] 
Sent: 10 October 2014 15:48
To: gcc-patches@gcc.gnu.org
Subject: [AArch64] [BE] [1/2] Make large opaque integer modes endianness-safe.

Hi,

I have a fix (originally written by Tejas Belagod) for the following bug:

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=59810

Could someone take a look please?

Thanks!
David Sherwood.

ChangeLog:

    gcc/:
    2014-11-13  David Sherwood  <david.sherwood@arm.com>

        * config/aarch64/aarch64-protos.h (aarch64_simd_attr_length_rglist,
        aarch64_reverse_mask): New decls.
        * config/aarch64/iterators.md (UNSPEC_REV_REGLIST): New enum.
        * config/aarch64/iterators.md (insn_count): New mode_attr.
       * config/aarch64/aarch64-simd.md (vec_store_lanes(o/c/x)i,
        vec_load_lanes(o/c/x)i): Fixed to work for Big Endian. 
        * config/aarch64/aarch64-simd.md (aarch64_rev_reglist,
        aarch64_simd_(ld/st)(2/3/4)): Added.
        * config/aarch64/aarch64.c (aarch64_simd_attr_length_rglist,
        aarch64_reverse_mask): Added.

[-- Attachment #2: ccmc_v1_rebase.patch --]
[-- Type: application/octet-stream, Size: 9995 bytes --]

diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h
index 470b9eb..494a1ae 100644
--- a/gcc/config/aarch64/aarch64-protos.h
+++ b/gcc/config/aarch64/aarch64-protos.h
@@ -196,6 +196,8 @@ bool aarch64_modes_tieable_p (machine_mode mode1,
 bool aarch64_move_imm (HOST_WIDE_INT, machine_mode);
 bool aarch64_mov_operand_p (rtx, enum aarch64_symbol_context,
 			    machine_mode);
+int aarch64_simd_attr_length_rglist (enum machine_mode);
+rtx aarch64_reverse_mask (enum machine_mode);
 bool aarch64_offset_7bit_signed_scaled_p (machine_mode, HOST_WIDE_INT);
 char *aarch64_output_scalar_simd_mov_immediate (rtx, machine_mode);
 char *aarch64_output_simd_mov_immediate (rtx, machine_mode, unsigned);
diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
index ef196e4..d3aed80 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -4035,7 +4035,7 @@
 
 ;; Patterns for vector struct loads and stores.
 
-(define_insn "vec_load_lanesoi<mode>"
+(define_insn "aarch64_simd_ld2<mode>"
   [(set (match_operand:OI 0 "register_operand" "=w")
 	(unspec:OI [(match_operand:OI 1 "aarch64_simd_struct_operand" "Utv")
 		    (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
@@ -4067,7 +4067,26 @@
   [(set_attr "type" "neon_load2_one_lane")]
 )
 
-(define_insn "vec_store_lanesoi<mode>"
+(define_expand "vec_load_lanesoi<mode>"
+  [(set (match_operand:OI 0 "register_operand" "=w")
+	(unspec:OI [(match_operand:OI 1 "aarch64_simd_struct_operand" "Utv")
+		    (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
+		   UNSPEC_LD2))]
+  "TARGET_SIMD"
+{
+  if (BYTES_BIG_ENDIAN)
+    {
+      rtx tmp = gen_reg_rtx (OImode);
+      rtx mask = aarch64_reverse_mask (<MODE>mode);
+      emit_insn (gen_aarch64_simd_ld2<mode> (tmp, operands[1]));
+      emit_insn (gen_aarch64_rev_reglistoi (operands[0], tmp, mask));
+    }
+  else
+    emit_insn (gen_aarch64_simd_ld2<mode> (operands[0], operands[1]));
+  DONE;
+})
+
+(define_insn "aarch64_simd_st2<mode>"
   [(set (match_operand:OI 0 "aarch64_simd_struct_operand" "=Utv")
 	(unspec:OI [(match_operand:OI 1 "register_operand" "w")
                     (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
@@ -4088,7 +4107,26 @@
   [(set_attr "type" "neon_store3_one_lane<q>")]
 )
 
-(define_insn "vec_load_lanesci<mode>"
+(define_expand "vec_store_lanesoi<mode>"
+  [(set (match_operand:OI 0 "aarch64_simd_struct_operand" "=Utv")
+	(unspec:OI [(match_operand:OI 1 "register_operand" "w")
+                    (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
+                   UNSPEC_ST2))]
+  "TARGET_SIMD"
+{
+  if (BYTES_BIG_ENDIAN)
+    {
+      rtx tmp = gen_reg_rtx (OImode);
+      rtx mask = aarch64_reverse_mask (<MODE>mode);
+      emit_insn (gen_aarch64_rev_reglistoi (tmp, operands[1], mask));
+      emit_insn (gen_aarch64_simd_st2<mode> (operands[0], tmp));
+    }
+  else
+    emit_insn (gen_aarch64_simd_st2<mode> (operands[0], operands[1]));
+  DONE;
+})
+
+(define_insn "aarch64_simd_ld3<mode>"
   [(set (match_operand:CI 0 "register_operand" "=w")
 	(unspec:CI [(match_operand:CI 1 "aarch64_simd_struct_operand" "Utv")
 		    (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
@@ -4120,7 +4158,26 @@
   [(set_attr "type" "neon_load3_one_lane")]
 )
 
-(define_insn "vec_store_lanesci<mode>"
+(define_expand "vec_load_lanesci<mode>"
+  [(set (match_operand:CI 0 "register_operand" "=w")
+	(unspec:CI [(match_operand:CI 1 "aarch64_simd_struct_operand" "Utv")
+		    (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
+		   UNSPEC_LD3))]
+  "TARGET_SIMD"
+{
+  if (BYTES_BIG_ENDIAN)
+    {
+      rtx tmp = gen_reg_rtx (CImode);
+      rtx mask = aarch64_reverse_mask (<MODE>mode);
+      emit_insn (gen_aarch64_simd_ld3<mode> (tmp, operands[1]));
+      emit_insn (gen_aarch64_rev_reglistci (operands[0], tmp, mask));
+    }
+  else
+    emit_insn (gen_aarch64_simd_ld3<mode> (operands[0], operands[1]));
+  DONE;
+})
+
+(define_insn "aarch64_simd_st3<mode>"
   [(set (match_operand:CI 0 "aarch64_simd_struct_operand" "=Utv")
 	(unspec:CI [(match_operand:CI 1 "register_operand" "w")
                     (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
@@ -4141,7 +4198,26 @@
   [(set_attr "type" "neon_store3_one_lane<q>")]
 )
 
-(define_insn "vec_load_lanesxi<mode>"
+(define_expand "vec_store_lanesci<mode>"
+  [(set (match_operand:CI 0 "aarch64_simd_struct_operand" "=Utv")
+	(unspec:CI [(match_operand:CI 1 "register_operand" "w")
+                    (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
+                   UNSPEC_ST3))]
+  "TARGET_SIMD"
+{
+  if (BYTES_BIG_ENDIAN)
+    {
+      rtx tmp = gen_reg_rtx (CImode);
+      rtx mask = aarch64_reverse_mask (<MODE>mode);
+      emit_insn (gen_aarch64_rev_reglistci (tmp, operands[1], mask));
+      emit_insn (gen_aarch64_simd_st3<mode> (operands[0], tmp));
+    }
+  else
+    emit_insn (gen_aarch64_simd_st3<mode> (operands[0], operands[1]));
+  DONE;
+})
+
+(define_insn "aarch64_simd_ld4<mode>"
   [(set (match_operand:XI 0 "register_operand" "=w")
 	(unspec:XI [(match_operand:XI 1 "aarch64_simd_struct_operand" "Utv")
 		    (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
@@ -4173,7 +4249,26 @@
   [(set_attr "type" "neon_load4_one_lane")]
 )
 
-(define_insn "vec_store_lanesxi<mode>"
+(define_expand "vec_load_lanesxi<mode>"
+  [(set (match_operand:XI 0 "register_operand" "=w")
+	(unspec:XI [(match_operand:XI 1 "aarch64_simd_struct_operand" "Utv")
+		    (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
+		   UNSPEC_LD4))]
+  "TARGET_SIMD"
+{
+  if (BYTES_BIG_ENDIAN)
+    {
+      rtx tmp = gen_reg_rtx (XImode);
+      rtx mask = aarch64_reverse_mask (<MODE>mode);
+      emit_insn (gen_aarch64_simd_ld4<mode> (tmp, operands[1]));
+      emit_insn (gen_aarch64_rev_reglistxi (operands[0], tmp, mask));
+    }
+  else
+    emit_insn (gen_aarch64_simd_ld4<mode> (operands[0], operands[1]));
+  DONE;
+})
+
+(define_insn "aarch64_simd_st4<mode>"
   [(set (match_operand:XI 0 "aarch64_simd_struct_operand" "=Utv")
 	(unspec:XI [(match_operand:XI 1 "register_operand" "w")
                     (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
@@ -4194,6 +4289,50 @@
   [(set_attr "type" "neon_store4_one_lane<q>")]
 )
 
+(define_expand "vec_store_lanesxi<mode>"
+  [(set (match_operand:XI 0 "aarch64_simd_struct_operand" "=Utv")
+	(unspec:XI [(match_operand:XI 1 "register_operand" "w")
+                    (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
+                   UNSPEC_ST4))]
+  "TARGET_SIMD"
+{
+  if (BYTES_BIG_ENDIAN)
+    {
+      rtx tmp = gen_reg_rtx (XImode);
+      rtx mask = aarch64_reverse_mask (<MODE>mode);
+      emit_insn (gen_aarch64_rev_reglistxi (tmp, operands[1], mask));
+      emit_insn (gen_aarch64_simd_st4<mode> (operands[0], tmp));
+    }
+  else
+    emit_insn (gen_aarch64_simd_st4<mode> (operands[0], operands[1]));
+  DONE;
+})
+
+(define_insn_and_split "aarch64_rev_reglist<mode>"
+[(set (match_operand:VSTRUCT 0 "register_operand" "=&w")
+	(unspec:VSTRUCT
+	           [(match_operand:VSTRUCT 1 "register_operand" "w")
+		    (match_operand:V16QI 2 "register_operand" "w")]
+                   UNSPEC_REV_REGLIST))]
+  "TARGET_SIMD"
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+{
+  int i;
+  int nregs = GET_MODE_SIZE (<MODE>mode) / UNITS_PER_VREG;
+  for (i = 0; i < nregs; i++)
+    {
+      rtx op0 = gen_rtx_REG (V16QImode, REGNO (operands[0]) + i);
+      rtx op1 = gen_rtx_REG (V16QImode, REGNO (operands[1]) + i);
+      emit_insn (gen_aarch64_tbl1v16qi (op0, op1, operands[2]));
+    }
+  DONE;
+}
+  [(set_attr "type" "neon_tbl1_q")
+   (set_attr "length" "<insn_count>")]
+)
+
 ;; Reload patterns for AdvSIMD register list operands.
 
 (define_expand "mov<mode>"
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index 0429d96..dde9690 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -8239,6 +8239,14 @@ aarch64_simd_attr_length_move (rtx_insn *insn)
   return 4;
 }
 
+/* Compute and return the length of aarch64_simd_reglist<mode>, where <mode> is
+   one of VSTRUCT modes: OI, CI, EI, or XI.  */
+int
+aarch64_simd_attr_length_rglist (enum machine_mode mode)
+{
+  return (GET_MODE_SIZE (mode) / UNITS_PER_VREG) * 4;
+}
+
 /* Implement target hook TARGET_VECTOR_ALIGNMENT.  The AAPCS64 sets the maximum
    alignment of a vector to 128 bits.  */
 static HOST_WIDE_INT
@@ -9767,6 +9775,27 @@ aarch64_cannot_change_mode_class (machine_mode from,
   return true;
 }
 
+rtx
+aarch64_reverse_mask (enum machine_mode mode)
+{
+  /* We have to reverse each vector because we dont have
+     a permuted load that can reverse-load according to ABI rules.  */
+  rtx mask;
+  rtvec v = rtvec_alloc (16);
+  int i, j;
+  int nunits = GET_MODE_NUNITS (mode);
+  int usize = GET_MODE_UNIT_SIZE (mode);
+
+  gcc_assert (BYTES_BIG_ENDIAN);
+  gcc_assert (AARCH64_VALID_SIMD_QREG_MODE (mode));
+
+  for (i = 0; i < nunits; i++)
+    for (j = 0; j < usize; j++)
+      RTVEC_ELT (v, i * usize + j) = GEN_INT ((i + 1) * usize - 1 - j);
+  mask = gen_rtx_CONST_VECTOR (V16QImode, v);
+  return force_reg (V16QImode, mask);
+}
+
 /* Implement MODES_TIEABLE_P.  */
 
 bool
diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md
index 9935167..1772157 100644
--- a/gcc/config/aarch64/iterators.md
+++ b/gcc/config/aarch64/iterators.md
@@ -298,6 +298,7 @@
     UNSPEC_SHA256SU1    ; Used in aarch64-simd.md.
     UNSPEC_PMULL        ; Used in aarch64-simd.md.
     UNSPEC_PMULL2       ; Used in aarch64-simd.md.
+    UNSPEC_REV_REGLIST  ; Used in aarch64-simd.md.
 ])
 
 ;; -------------------------------------------------------------------
@@ -670,6 +671,8 @@
 		      (V2DI  "p") (V2DF  "p")
 		      (V2SF "p") (V4SF  "v")])
 
+(define_mode_attr insn_count [(OI "8") (CI "12") (XI "16")])
+
 ;; -------------------------------------------------------------------
 ;; Code Iterators
 ;; -------------------------------------------------------------------

next             reply	other threads:[~2014-11-13 10:09 UTC|newest]

Thread overview: 17+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2014-11-13 10:11 David Sherwood [this message]
2014-11-13 14:23 ` Christophe Lyon
2014-11-13 14:33   ` David Sherwood
2014-11-17 21:18     ` Christophe Lyon
2014-11-18  9:17       ` David Sherwood
2014-11-18 10:33         ` Christophe Lyon
2014-11-27 15:03           ` David Sherwood
2014-12-11 10:16           ` David Sherwood
2014-12-11 13:46             ` Christophe Lyon
2014-12-15  9:58               ` David Sherwood
2014-12-15 13:48                 ` Christophe Lyon
2014-11-20 18:41 ` Marcus Shawcroft
2014-12-17 15:23   ` Tejas Belagod
2014-12-17 16:54     ` Marcus Shawcroft
2014-12-17 17:05       ` Tejas Belagod
2015-01-25 23:07         ` Christophe Lyon
2015-08-14 15:06           ` Christophe Lyon

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to='000701cfff29$ea4fbcf0$beef36d0$@arm.com' \
    --to=david.sherwood@arm.com \
    --cc=gcc-patches@gcc.gnu.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).