public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH] [ARC] Add basic support for double load and store instructions
@ 2016-01-15 12:41 Claudiu Zissulescu
  2016-01-17  6:21 ` Joern Wolfgang Rennecke
  0 siblings, 1 reply; 8+ messages in thread
From: Claudiu Zissulescu @ 2016-01-15 12:41 UTC (permalink / raw)
  To: gcc-patches, gnu; +Cc: Francois Bedard, jeremy.bennett

[-- Attachment #1: Type: text/plain, Size: 798 bytes --]

Please find attached a patch that adds basic support for double load/store instructions present in archs processors.

Ok to apply?

//Claudiu

gcc/
2015-01-15  Claudiu Zissulescu  <claziss@synopsys.com>

	* config/arc/arc.c (TARGET_DWARF_REGISTER_SPAN): Define.
	(arc_init): Check validity mll64 option.
	(arc_save_restore): Use double load/store instruction.
	(arc_expand_movmem): Likewise.
	(arc_split_move): Don't split if we have double load/store
	instructions.
	(arc_dwarf_register_span): New function.
	* config/arc/arc.h (TARGET_CPU_CPP_BUILTINS): Define __ARC_LL64__.
	* config/arc/arc.md (*movdi_insn): Emit ldd/std instructions.
	(*movdf_insn): Likewise.
	* config/arc/arc.opt (mll64): New option.
	* config/arc/predicates.md (even_register_operand): New predicate.

[-- Attachment #2: 0001-ARC-Add-basic-support-for-double-load-and-store-inst.patch --]
[-- Type: application/octet-stream, Size: 11603 bytes --]

From cef269543eb26ed0b3e20d443c4beb12c5cbd910 Mon Sep 17 00:00:00 2001
From: Claudiu Zissulescu <claziss@synopsys.com>
Date: Fri, 15 Jan 2016 13:34:23 +0100
Subject: [PATCH 1/1] [ARC] Add basic support for double load and store
 instructions

gcc/
2015-01-15  Claudiu Zissulescu  <claziss@synopsys.com>

	* config/arc/arc.c (TARGET_DWARF_REGISTER_SPAN): Define.
	(arc_init): Check validity mll64 option.
	(arc_save_restore): Use double load/store instruction.
	(arc_expand_movmem): Likewise.
	(arc_split_move): Don't split if we have double load/store
	instructions.
	(arc_dwarf_register_span): New function.
	* config/arc/arc.h (TARGET_CPU_CPP_BUILTINS): Define __ARC_LL64__.
	* config/arc/arc.md (*movdi_insn): Emit ldd/std instructions.
	(*movdf_insn): Likewise.
	* config/arc/arc.opt (mll64): New option.
	* config/arc/predicates.md (even_register_operand): New predicate.
---
 gcc/config/arc/arc.c         |  78 +++++++++++++++++++++++++++++---
 gcc/config/arc/arc.h         |   4 ++
 gcc/config/arc/arc.md        | 103 ++++++++++++++++++++++---------------------
 gcc/config/arc/arc.opt       |   4 ++
 gcc/config/arc/predicates.md |  13 +++++-
 5 files changed, 146 insertions(+), 56 deletions(-)

diff --git a/gcc/config/arc/arc.c b/gcc/config/arc/arc.c
index f636534..7244628 100644
--- a/gcc/config/arc/arc.c
+++ b/gcc/config/arc/arc.c
@@ -420,6 +420,9 @@ static void arc_finalize_pic (void);
 #undef TARGET_ASM_ALIGNED_SI_OP
 #define TARGET_ASM_ALIGNED_SI_OP "\t.word\t"
 
+#undef TARGET_DWARF_REGISTER_SPAN
+#define TARGET_DWARF_REGISTER_SPAN arc_dwarf_register_span
+
 /* Try to keep the (mov:DF _, reg) as early as possible so
    that the d<add/sub/mul>h-lr insns appear together and can
    use the peephole2 pattern.  */
@@ -736,6 +739,10 @@ arc_init (void)
   if (TARGET_ATOMIC && !(TARGET_ARC700 || TARGET_HS))
     error ("-matomic is only supported for ARC700 or ARC HS cores");
 
+  /* ll64 ops only available for HS. */
+  if (TARGET_LL64 && !TARGET_HS)
+    error ("-mll64 is only supported for ARC HS cores");
+
   arc_init_reg_tables ();
 
   /* Initialize array for PRINT_OPERAND_PUNCT_VALID_P.  */
@@ -2175,9 +2182,26 @@ arc_save_restore (rtx base_reg,
 
       for (regno = 0; regno <= 31; regno++)
 	{
-	  if ((gmask & (1L << regno)) != 0)
+	  enum machine_mode mode = SImode;
+	  bool found = false;
+
+	  if (TARGET_LL64
+	      && (regno % 2 == 0)
+	      && ((gmask & (1L << regno)) != 0)
+	      && ((gmask & (1L << (regno+1))) != 0))
 	    {
-	      rtx reg = gen_rtx_REG (SImode, regno);
+	      found = true;
+	      mode  = DImode;
+	    }
+	  else if ((gmask & (1L << regno)) != 0)
+	    {
+	      found = true;
+	      mode  = SImode;
+	    }
+
+	  if (found)
+	    {
+	      rtx reg = gen_rtx_REG (mode, regno);
 	      rtx addr, mem;
 	      int cfa_adjust = *first_offset;
 
@@ -2193,7 +2217,7 @@ arc_save_restore (rtx base_reg,
 		  gcc_assert (SMALL_INT (offset));
 		  addr = plus_constant (Pmode, base_reg, offset);
 		}
-	      mem = gen_frame_mem (SImode, addr);
+	      mem = gen_frame_mem (mode, addr);
 	      if (epilogue_p)
 		{
 		  rtx insn =
@@ -2212,6 +2236,11 @@ arc_save_restore (rtx base_reg,
 	      else
 		frame_move_inc (mem, reg, base_reg, addr);
 	      offset += UNITS_PER_WORD;
+	      if (mode == DImode)
+		{
+		  offset += UNITS_PER_WORD;
+		  ++regno;
+		}
 	    } /* if */
 	} /* for */
     }/* if */
@@ -7009,14 +7038,21 @@ arc_expand_movmem (rtx *operands)
   size = INTVAL (operands[2]);
   /* move_by_pieces_ninsns is static, so we can't use it.  */
   if (align >= 4)
-    n_pieces = (size + 2) / 4U + (size & 1);
+    {
+      if (TARGET_LL64)
+	n_pieces = (size + 2) / 8U + (size & 1);
+      else
+	n_pieces = (size + 2) / 4U + (size & 1);
+    }
   else if (align == 2)
     n_pieces = (size + 1) / 2U;
   else
     n_pieces = size;
   if (n_pieces >= (unsigned int) (optimize_size ? 3 : 15))
     return false;
-  if (piece > 4)
+  if (TARGET_LL64 && (piece != 8) && (align >= 4))
+    piece = 8;
+  else if (piece > 4)
     piece = 4;
   dst_addr = force_offsettable (XEXP (operands[0], 0), size, 0);
   src_addr = force_offsettable (XEXP (operands[1], 0), size, 0);
@@ -8556,6 +8592,16 @@ arc_split_move (rtx *operands)
       return val;
   }
 
+  if (TARGET_LL64
+      && ((memory_operand (operands[0], mode)
+	   && even_register_operand (operands[1], mode))
+	  || (memory_operand (operands[1], mode)
+	      && even_register_operand (operands[0], mode))))
+    {
+      rtx val = gen_rtx_SET (operands[0], operands[1]);
+      return val;
+    }
+
   for (i = 0; i < 2; i++)
     {
       if (MEM_P (operands[i]) && auto_inc_p (XEXP (operands[i], 0)))
@@ -9329,6 +9375,28 @@ arc_no_speculation_in_delay_slots_p ()
   return true;
 }
 
+/* Return a parallel of registers to represent where to find the
+   register pieces if required, otherwise NULL_RTX.  */
+
+static rtx
+arc_dwarf_register_span (rtx rtl)
+{
+   enum machine_mode mode = GET_MODE (rtl);
+   unsigned regno;
+   rtx p;
+
+   if (GET_MODE_SIZE (mode) != 8)
+     return NULL_RTX;
+
+   p = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
+   regno = REGNO (rtl);
+   XVECEXP (p, 0, 0) = gen_rtx_REG (SImode, regno);
+   XVECEXP (p, 0, 1) = gen_rtx_REG (SImode, regno + 1);
+
+   return p;
+}
+
+
 struct gcc_target targetm = TARGET_INITIALIZER;
 
 #include "gt-arc.h"
diff --git a/gcc/config/arc/arc.h b/gcc/config/arc/arc.h
index 70a2b1d..27665b0 100644
--- a/gcc/config/arc/arc.h
+++ b/gcc/config/arc/arc.h
@@ -97,6 +97,10 @@ along with GCC; see the file COPYING3.  If not see
 	builtin_define ("__ARC_NORM__");\
 	builtin_define ("__Xnorm");	\
       }					\
+    if (TARGET_LL64)			\
+      {					\
+	builtin_define ("__ARC_LL64__");\
+      }					\
     if (TARGET_MUL64_SET)		\
       builtin_define ("__ARC_MUL64__");\
     if (TARGET_MULMAC_32BY16_SET)	\
diff --git a/gcc/config/arc/arc.md b/gcc/config/arc/arc.md
index 80f1daa..07685ac 100644
--- a/gcc/config/arc/arc.md
+++ b/gcc/config/arc/arc.md
@@ -984,7 +984,7 @@
 }")
 
 (define_insn_and_split "*movdi_insn"
-  [(set (match_operand:DI 0 "move_dest_operand" "=w,w,r,m")
+  [(set (match_operand:DI 0 "move_dest_operand"      "=w, w,r,m")
 	(match_operand:DI 1 "move_double_src_operand" "c,Hi,m,c"))]
   "register_operand (operands[0], DImode)
    || register_operand (operands[1], DImode)"
@@ -993,50 +993,33 @@
   switch (which_alternative)
     {
     default:
-    case 0 :
-      /* We normally copy the low-numbered register first.  However, if
-	 the first register operand 0 is the same as the second register of
-	 operand 1, we must copy in the opposite order.  */
-      if (REGNO (operands[0]) == REGNO (operands[1]) + 1)
-	return \"mov%? %R0,%R1\;mov%? %0,%1\";
-      else
-      return \"mov%? %0,%1\;mov%? %R0,%R1\";
-    case 1 :
-      return \"mov%? %L0,%L1\;mov%? %H0,%H1\";
-    case 2 :
-      /* If the low-address word is used in the address, we must load it
-	 last.  Otherwise, load it first.  Note that we cannot have
-	 auto-increment in that case since the address register is known to be
-	 dead.  */
-      if (refers_to_regno_p (REGNO (operands[0]), operands[1]))
-	return \"ld%V1 %R0,%R1\;ld%V1 %0,%1\";
-      else switch (GET_CODE (XEXP(operands[1], 0)))
-	{
-	case POST_MODIFY: case POST_INC: case POST_DEC:
-	  return \"ld%V1 %R0,%R1\;ld%U1%V1 %0,%1\";
-	case PRE_MODIFY: case PRE_INC: case PRE_DEC:
-	  return \"ld%U1%V1 %0,%1\;ld%V1 %R0,%R1\";
-	default:
-	  return \"ld%U1%V1 %0,%1\;ld%U1%V1 %R0,%R1\";
-	}
-    case 3 :
-      switch (GET_CODE (XEXP(operands[0], 0)))
-	{
-	case POST_MODIFY: case POST_INC: case POST_DEC:
-     	  return \"st%V0 %R1,%R0\;st%U0%V0 %1,%0\";
-	case PRE_MODIFY: case PRE_INC: case PRE_DEC:
-     	  return \"st%U0%V0 %1,%0\;st%V0 %R1,%R0\";
-	default:
-     	  return \"st%U0%V0 %1,%0\;st%U0%V0 %R1,%R0\";
-	}
+      return \"#\";
+
+    case 2:
+    if (TARGET_LL64
+	&& ((even_register_operand (operands[0], DImode)
+	     && memory_operand (operands[1], DImode))
+	    || (memory_operand (operands[0], DImode)
+	        && even_register_operand (operands[1], DImode))))
+      return \"ldd%U1%V1 %0,%1%&\";
+    return \"#\";
+
+    case 3:
+    if (TARGET_LL64
+	&& ((even_register_operand (operands[0], DImode)
+	     && memory_operand (operands[1], DImode))
+	    || (memory_operand (operands[0], DImode)
+	        && even_register_operand (operands[1], DImode))))
+     return \"std%U0%V0 %1,%0\";
+    return \"#\";
     }
 }"
-  "&& reload_completed && optimize"
-  [(set (match_dup 2) (match_dup 3)) (set (match_dup 4) (match_dup 5))]
-  "arc_split_move (operands);"
+  "reload_completed"
+  [(match_dup 2)]
+  "operands[2] = arc_split_move (operands);"
   [(set_attr "type" "move,move,load,store")
    ;; ??? The ld/st values could be 4 if it's [reg,bignum].
-   (set_attr "length" "8,16,16,16")])
+   (set_attr "length" "8,16,*,*")])
 
 
 ;; Floating point move insns.
@@ -1066,23 +1049,43 @@
   ""
   "if (prepare_move_operands (operands, DFmode)) DONE;")
 
-(define_insn "*movdf_insn"
+(define_insn_and_split "*movdf_insn"
   [(set (match_operand:DF 0 "move_dest_operand"      "=D,r,c,c,r,m")
 	(match_operand:DF 1 "move_double_src_operand" "r,D,c,E,m,c"))]
   "register_operand (operands[0], DFmode) || register_operand (operands[1], DFmode)"
-  "#"
+  "*
+{
+ switch (which_alternative)
+   {
+    default:
+      return \"#\";
+    case 4:
+    if (TARGET_LL64
+	&& ((even_register_operand (operands[0], DFmode)
+	     && memory_operand (operands[1], DFmode))
+	    || (memory_operand (operands[0], DFmode)
+	        && even_register_operand (operands[1], DFmode))))
+      return \"ldd%U1%V1 %0,%1%&\";
+    return \"#\";
+
+    case 5:
+    if (TARGET_LL64
+	&& ((even_register_operand (operands[0], DFmode)
+	     && memory_operand (operands[1], DFmode))
+	    || (memory_operand (operands[0], DFmode)
+		&& even_register_operand (operands[1], DFmode))))
+     return \"std%U0%V0 %1,%0\";
+    return \"#\";
+   }
+}"
+  "reload_completed"
+  [(match_dup 2)]
+  "operands[2] = arc_split_move (operands);"
   [(set_attr "type" "move,move,move,move,load,store")
    (set_attr "predicable" "no,no,yes,yes,no,no")
    ;; ??? The ld/st values could be 16 if it's [reg,bignum].
    (set_attr "length" "4,16,8,16,16,16")])
 
-(define_split
-  [(set (match_operand:DF 0 "move_dest_operand" "")
-	(match_operand:DF 1 "move_double_src_operand" ""))]
-  "reload_completed"
-  [(match_dup 2)]
-  "operands[2] = arc_split_move (operands);")
-
 (define_insn_and_split "*movdf_insn_nolrsr"
   [(set (match_operand:DF 0 "register_operand"       "=r")
 	(match_operand:DF 1 "arc_double_register_operand" "D"))
diff --git a/gcc/config/arc/arc.opt b/gcc/config/arc/arc.opt
index 79113a5..00b98d5 100644
--- a/gcc/config/arc/arc.opt
+++ b/gcc/config/arc/arc.opt
@@ -409,3 +409,7 @@ Target Joined
 matomic
 Target Report Mask(ATOMIC)
 Enable atomic instructions.
+
+mll64
+Target Report Mask(LL64)
+Enable double load/store instructions for ARC HS.
diff --git a/gcc/config/arc/predicates.md b/gcc/config/arc/predicates.md
index fba878b..52ac2ac 100644
--- a/gcc/config/arc/predicates.md
+++ b/gcc/config/arc/predicates.md
@@ -783,4 +783,15 @@
        (match_code "reg" "0")))
 
 (define_predicate "any_mem_operand"
-  (match_code "mem"))
\ No newline at end of file
+  (match_code "mem"))
+
+; Special predicate to match even-odd double register pair
+(define_predicate "even_register_operand"
+  (match_code "reg")
+  {
+   if ((GET_MODE (op) != mode) && (mode != VOIDmode))
+      return 0;
+
+   return (REG_P (op) && ((REGNO (op) >= FIRST_PSEUDO_REGISTER)
+			  || ((REGNO (op) & 1) == 0)));
+  })
-- 
1.9.1


^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH] [ARC] Add basic support for double load and store instructions
  2016-01-15 12:41 [PATCH] [ARC] Add basic support for double load and store instructions Claudiu Zissulescu
@ 2016-01-17  6:21 ` Joern Wolfgang Rennecke
  2016-01-18  9:26   ` Claudiu Zissulescu
  2016-01-19 17:46   ` Claudiu Zissulescu
  0 siblings, 2 replies; 8+ messages in thread
From: Joern Wolfgang Rennecke @ 2016-01-17  6:21 UTC (permalink / raw)
  To: Claudiu Zissulescu, gcc-patches; +Cc: Francois Bedard, jeremy.bennett



On 15/01/16 12:40, Claudiu Zissulescu wrote:

	(arc_save_restore): Use double load/store instruction.
	(arc_expand_movmem): Likewise.
  

 >    if (n_pieces >= (unsigned int) (optimize_size ? 3 : 15))
 >      return false;
 > -  if (piece > 4)
 > +  if (TARGET_LL64 && (piece != 8) && (align >= 4))
 > +    piece = 8;
 > +  else if (piece > 4)
 >      piece = 4;
 >    dst_addr = force_offsettable (XEXP (operands[0], 0), size, 0);

That bit doesn't make sense to me.
Assume the alignment is 8.  Thus, piece becomes 8 too.  Then the
above conditional gets processed, and it sets piece to 4.
I think instead of "(piece != 8) && (align >= 4)" it should be:
"(piece >= 8)"

	* config/arc/arc.md (*movdi_insn): Emit ldd/std instructions.


 > -  "&& reload_completed && optimize"
 > -  [(set (match_dup 2) (match_dup 3)) (set (match_dup 4) (match_dup 5))]
 > -  "arc_split_move (operands);"
 > +  "reload_completed"
 > +  [(match_dup 2)]
 > +  "operands[2] = arc_split_move (operands);"

arc_split_move uses, inter alia,  operands[2]..operands[[5].
Thus, it is not save to stop mentioning these in the pattern.

> 	(*movdf_insn): Likewise.
Likewise.

When you say 'basic support', I suppose you have a plan to re-visit this 
later to get the register allocator to use
register pairs, and stop regrename breaking them up?

^ permalink raw reply	[flat|nested] 8+ messages in thread

* RE: [PATCH] [ARC] Add basic support for double load and store instructions
  2016-01-17  6:21 ` Joern Wolfgang Rennecke
@ 2016-01-18  9:26   ` Claudiu Zissulescu
  2016-01-19 17:46   ` Claudiu Zissulescu
  1 sibling, 0 replies; 8+ messages in thread
From: Claudiu Zissulescu @ 2016-01-18  9:26 UTC (permalink / raw)
  To: Joern Wolfgang Rennecke, gcc-patches; +Cc: Francois Bedard, jeremy.bennett

>  >    if (n_pieces >= (unsigned int) (optimize_size ? 3 : 15))
>  >      return false;
>  > -  if (piece > 4)
>  > +  if (TARGET_LL64 && (piece != 8) && (align >= 4))
>  > +    piece = 8;
>  > +  else if (piece > 4)
>  >      piece = 4;
>  >    dst_addr = force_offsettable (XEXP (operands[0], 0), size, 0);
> 
> That bit doesn't make sense to me.
> Assume the alignment is 8.  Thus, piece becomes 8 too.  Then the above
> conditional gets processed, and it sets piece to 4.
> I think instead of "(piece != 8) && (align >= 4)" it should be:
> "(piece >= 8)"

Right. My intention is to force 64 bit transfer also for 32 bit datum. Hence, the condition should be like this:

If (TARGET_LL64 && (piece >= 4))
  piece = 8;
...

So, whenever the align is 32 bit or larger (as piece is align), we use the 64 bit transfers. The number of pieces is computed few lines above.

> 
> 	* config/arc/arc.md (*movdi_insn): Emit ldd/std instructions.
> 
> 
>  > -  "&& reload_completed && optimize"
>  > -  [(set (match_dup 2) (match_dup 3)) (set (match_dup 4) (match_dup 5))]
> > -  "arc_split_move (operands);"
>  > +  "reload_completed"
>  > +  [(match_dup 2)]
>  > +  "operands[2] = arc_split_move (operands);"
> 
> arc_split_move uses, inter alia,  operands[2]..operands[[5].
> Thus, it is not save to stop mentioning these in the pattern.
> 
> > 	(*movdf_insn): Likewise.
> Likewise.
> 

Noted.

> When you say 'basic support', I suppose you have a plan to re-visit this later
> to get the register allocator to use register pairs, and stop regrename
> breaking them up?

Indeed, I am preparing a patch for (new) floating point support which I am very interested to get ur feedback. The double precision floating point operations are using the double registers. Hence, breaking the registers and, afterwards,  introducing moves to get them right, it is not desirable. Thus, I will introduce a new abi variant which will pass the arguments on even-odd registers and mods in the hard_regno_mode_ok to keep the registers in proper pairs. But more in the upcoming patch.

^ permalink raw reply	[flat|nested] 8+ messages in thread

* RE: [PATCH] [ARC] Add basic support for double load and store instructions
  2016-01-17  6:21 ` Joern Wolfgang Rennecke
  2016-01-18  9:26   ` Claudiu Zissulescu
@ 2016-01-19 17:46   ` Claudiu Zissulescu
  2016-01-20 15:39     ` Joern Wolfgang Rennecke
  1 sibling, 1 reply; 8+ messages in thread
From: Claudiu Zissulescu @ 2016-01-19 17:46 UTC (permalink / raw)
  To: Joern Wolfgang Rennecke, gcc-patches; +Cc: Francois Bedard, jeremy.bennett

[-- Attachment #1: Type: text/plain, Size: 2991 bytes --]

Hi,

I've prepared a new patch based on the received review (attached). I also added a mod on invoke.texi regarding mll64 documentation. This mod was missing in the first patch.

I have tested it with dg.exp for arc700, archs and archs+ll64.

Please let me know if everything is alright.

Thank you,
Claudiu

gcc/
2015-01-19  Claudiu Zissulescu  <claziss@synopsys.com>

	* config/arc/arc.c (TARGET_DWARF_REGISTER_SPAN): Define.
	(arc_init): Check validity mll64 option.
	(arc_save_restore): Use double load/store instruction.
	(arc_expand_movmem): Likewise.
	(arc_split_move): Don't split if we have double load/store
	instructions. Returns a boolean.
	(arc_process_double_reg_moves): Change function to return boolean
	instead of a sequence of instructions.
	(arc_dwarf_register_span): New function.
	* config/arc/arc-protos.h (arc_split_move): Change prototype.
	* config/arc/arc.h (TARGET_CPU_CPP_BUILTINS): Define __ARC_LL64__.
	* config/arc/arc.md (*movdi_insn): Emit ldd/std instructions.
	(*movdf_insn): Likewise.
	* config/arc/arc.opt (mll64): New option.
	* config/arc/predicates.md (even_register_operand): New predicate.
	* doc/invoke.texi (ARC Options): Add mll64 documentation


> -----Original Message-----
> From: Joern Wolfgang Rennecke [mailto:gnu@amylaar.uk]
> Sent: Sunday, January 17, 2016 7:21 AM
> To: Claudiu Zissulescu; gcc-patches@gcc.gnu.org
> Cc: Francois Bedard; jeremy.bennett@embecosm.com
> Subject: Re: [PATCH] [ARC] Add basic support for double load and store
> instructions
> 
> 
> 
> On 15/01/16 12:40, Claudiu Zissulescu wrote:
> 
> 	(arc_save_restore): Use double load/store instruction.
> 	(arc_expand_movmem): Likewise.
> 
> 
>  >    if (n_pieces >= (unsigned int) (optimize_size ? 3 : 15))
>  >      return false;
>  > -  if (piece > 4)
>  > +  if (TARGET_LL64 && (piece != 8) && (align >= 4))
>  > +    piece = 8;
>  > +  else if (piece > 4)
>  >      piece = 4;
>  >    dst_addr = force_offsettable (XEXP (operands[0], 0), size, 0);
> 
> That bit doesn't make sense to me.
> Assume the alignment is 8.  Thus, piece becomes 8 too.  Then the above
> conditional gets processed, and it sets piece to 4.
> I think instead of "(piece != 8) && (align >= 4)" it should be:
> "(piece >= 8)"
> 
> 	* config/arc/arc.md (*movdi_insn): Emit ldd/std instructions.
> 
> 
>  > -  "&& reload_completed && optimize"
>  > -  [(set (match_dup 2) (match_dup 3)) (set (match_dup 4) (match_dup 5))]
> > -  "arc_split_move (operands);"
>  > +  "reload_completed"
>  > +  [(match_dup 2)]
>  > +  "operands[2] = arc_split_move (operands);"
> 
> arc_split_move uses, inter alia,  operands[2]..operands[[5].
> Thus, it is not save to stop mentioning these in the pattern.
> 
> > 	(*movdf_insn): Likewise.
> Likewise.
> 
> When you say 'basic support', I suppose you have a plan to re-visit this later
> to get the register allocator to use register pairs, and stop regrename
> breaking them up?

[-- Attachment #2: 0001-ARC-Add-basic-support-for-double-load-and-store-inst.patch --]
[-- Type: application/octet-stream, Size: 15156 bytes --]

From 0968bac2c3c9b26fd36bd03f327f374501865373 Mon Sep 17 00:00:00 2001
From: Claudiu Zissulescu <claziss@synopsys.com>
Date: Fri, 15 Jan 2016 13:34:23 +0100
Subject: [PATCH] [ARC] Add basic support for double load and store
 instructions

gcc/
2015-01-19  Claudiu Zissulescu  <claziss@synopsys.com>

	* config/arc/arc.c (TARGET_DWARF_REGISTER_SPAN): Define.
	(arc_init): Check validity mll64 option.
	(arc_save_restore): Use double load/store instruction.
	(arc_expand_movmem): Likewise.
	(arc_split_move): Don't split if we have double load/store
	instructions. Returns a boolean.
	(arc_process_double_reg_moves): Change function to return boolean
	instead of a sequence of instructions.
	(arc_dwarf_register_span): New function.
	* config/arc/arc-protos.h (arc_split_move): Change prototype.
	* config/arc/arc.h (TARGET_CPU_CPP_BUILTINS): Define __ARC_LL64__.
	* config/arc/arc.md (*movdi_insn): Emit ldd/std instructions.
	(*movdf_insn): Likewise.
	* config/arc/arc.opt (mll64): New option.
	* config/arc/predicates.md (even_register_operand): New predicate.
	* doc/invoke.texi (ARC Options): Add mll64 documentation.
---
 gcc/config/arc/arc-protos.h  |   2 +-
 gcc/config/arc/arc.c         | 107 +++++++++++++++++++++++++++++++----------
 gcc/config/arc/arc.h         |   4 ++
 gcc/config/arc/arc.md        | 111 ++++++++++++++++++++++++-------------------
 gcc/config/arc/arc.opt       |   4 ++
 gcc/config/arc/predicates.md |  13 ++++-
 gcc/doc/invoke.texi          |   6 ++-
 7 files changed, 169 insertions(+), 78 deletions(-)

diff --git a/gcc/config/arc/arc-protos.h b/gcc/config/arc/arc-protos.h
index 3f96455..7d44840 100644
--- a/gcc/config/arc/arc-protos.h
+++ b/gcc/config/arc/arc-protos.h
@@ -104,7 +104,7 @@ extern void arc_toggle_unalign (void);
 extern void split_addsi (rtx *);
 extern void split_subsi (rtx *);
 extern void arc_pad_return (void);
-extern rtx arc_split_move (rtx *);
+extern bool arc_split_move (rtx *);
 extern int arc_verify_short (rtx_insn *insn, int unalign, int);
 extern const char *arc_short_long (rtx_insn *insn, const char *, const char *);
 extern rtx arc_regno_use_in (unsigned int, rtx);
diff --git a/gcc/config/arc/arc.c b/gcc/config/arc/arc.c
index f636534..b308fef 100644
--- a/gcc/config/arc/arc.c
+++ b/gcc/config/arc/arc.c
@@ -420,6 +420,9 @@ static void arc_finalize_pic (void);
 #undef TARGET_ASM_ALIGNED_SI_OP
 #define TARGET_ASM_ALIGNED_SI_OP "\t.word\t"
 
+#undef TARGET_DWARF_REGISTER_SPAN
+#define TARGET_DWARF_REGISTER_SPAN arc_dwarf_register_span
+
 /* Try to keep the (mov:DF _, reg) as early as possible so
    that the d<add/sub/mul>h-lr insns appear together and can
    use the peephole2 pattern.  */
@@ -736,6 +739,10 @@ arc_init (void)
   if (TARGET_ATOMIC && !(TARGET_ARC700 || TARGET_HS))
     error ("-matomic is only supported for ARC700 or ARC HS cores");
 
+  /* ll64 ops only available for HS.  */
+  if (TARGET_LL64 && !TARGET_HS)
+    error ("-mll64 is only supported for ARC HS cores");
+
   arc_init_reg_tables ();
 
   /* Initialize array for PRINT_OPERAND_PUNCT_VALID_P.  */
@@ -2175,9 +2182,26 @@ arc_save_restore (rtx base_reg,
 
       for (regno = 0; regno <= 31; regno++)
 	{
-	  if ((gmask & (1L << regno)) != 0)
+	  enum machine_mode mode = SImode;
+	  bool found = false;
+
+	  if (TARGET_LL64
+	      && (regno % 2 == 0)
+	      && ((gmask & (1L << regno)) != 0)
+	      && ((gmask & (1L << (regno+1))) != 0))
+	    {
+	      found = true;
+	      mode  = DImode;
+	    }
+	  else if ((gmask & (1L << regno)) != 0)
 	    {
-	      rtx reg = gen_rtx_REG (SImode, regno);
+	      found = true;
+	      mode  = SImode;
+	    }
+
+	  if (found)
+	    {
+	      rtx reg = gen_rtx_REG (mode, regno);
 	      rtx addr, mem;
 	      int cfa_adjust = *first_offset;
 
@@ -2193,7 +2217,7 @@ arc_save_restore (rtx base_reg,
 		  gcc_assert (SMALL_INT (offset));
 		  addr = plus_constant (Pmode, base_reg, offset);
 		}
-	      mem = gen_frame_mem (SImode, addr);
+	      mem = gen_frame_mem (mode, addr);
 	      if (epilogue_p)
 		{
 		  rtx insn =
@@ -2212,6 +2236,11 @@ arc_save_restore (rtx base_reg,
 	      else
 		frame_move_inc (mem, reg, base_reg, addr);
 	      offset += UNITS_PER_WORD;
+	      if (mode == DImode)
+		{
+		  offset += UNITS_PER_WORD;
+		  ++regno;
+		}
 	    } /* if */
 	} /* for */
     }/* if */
@@ -7009,14 +7038,23 @@ arc_expand_movmem (rtx *operands)
   size = INTVAL (operands[2]);
   /* move_by_pieces_ninsns is static, so we can't use it.  */
   if (align >= 4)
-    n_pieces = (size + 2) / 4U + (size & 1);
+    {
+      if (TARGET_LL64)
+	n_pieces = (size + 2) / 8U + (size & 1);
+      else
+	n_pieces = (size + 2) / 4U + (size & 1);
+    }
   else if (align == 2)
     n_pieces = (size + 1) / 2U;
   else
     n_pieces = size;
   if (n_pieces >= (unsigned int) (optimize_size ? 3 : 15))
     return false;
-  if (piece > 4)
+  /* Force 32 bit aligned and larger datum to use 64 bit transfers, if
+     possible.  */
+  if (TARGET_LL64 && (piece >= 4))
+    piece = 8;
+  else if (piece > 4)
     piece = 4;
   dst_addr = force_offsettable (XEXP (operands[0], 0), size, 0);
   src_addr = force_offsettable (XEXP (operands[1], 0), size, 0);
@@ -8463,12 +8501,11 @@ split_subsi (rtx *operands)
    Operand 0: destination register
    Operand 1: source register  */
 
-static rtx
+static bool
 arc_process_double_reg_moves (rtx *operands)
 {
   rtx dest = operands[0];
   rtx src  = operands[1];
-  rtx val;
 
   enum usesDxState { none, srcDx, destDx, maxDx };
   enum usesDxState state = none;
@@ -8483,9 +8520,7 @@ arc_process_double_reg_moves (rtx *operands)
     }
 
   if (state == none)
-    return NULL_RTX;
-
-  start_sequence ();
+    return false;
 
   if (state == srcDx)
     {
@@ -8532,30 +8567,36 @@ arc_process_double_reg_moves (rtx *operands)
   else
     gcc_unreachable ();
 
-  val = get_insns ();
-  end_sequence ();
-  return val;
+  return true;
 }
 
 /* operands 0..1 are the operands of a 64 bit move instruction.
    split it into two moves with operands 2/3 and 4/5.  */
 
-rtx
+bool
 arc_split_move (rtx *operands)
 {
   machine_mode mode = GET_MODE (operands[0]);
   int i;
   int swap = 0;
   rtx xop[4];
-  rtx val;
 
   if (TARGET_DPFP)
   {
-    val = arc_process_double_reg_moves (operands);
-    if (val)
-      return val;
+    if (arc_process_double_reg_moves (operands))
+      return true;
   }
 
+  if (TARGET_LL64
+      && ((memory_operand (operands[0], mode)
+	   && even_register_operand (operands[1], mode))
+	  || (memory_operand (operands[1], mode)
+	      && even_register_operand (operands[0], mode))))
+    {
+      emit_move_insn (operands[0], operands[1]);
+      return true;
+    }
+
   for (i = 0; i < 2; i++)
     {
       if (MEM_P (operands[i]) && auto_inc_p (XEXP (operands[i], 0)))
@@ -8608,13 +8649,7 @@ arc_split_move (rtx *operands)
   operands[4-swap] = xop[2];
   operands[5-swap] = xop[3];
 
-  start_sequence ();
-  emit_insn (gen_rtx_SET (operands[2], operands[3]));
-  emit_insn (gen_rtx_SET (operands[4], operands[5]));
-  val = get_insns ();
-  end_sequence ();
-
-  return val;
+  return false;
 }
 
 /* Select between the instruction output templates s_tmpl (for short INSNs)
@@ -9329,6 +9364,28 @@ arc_no_speculation_in_delay_slots_p ()
   return true;
 }
 
+/* Return a parallel of registers to represent where to find the
+   register pieces if required, otherwise NULL_RTX.  */
+
+static rtx
+arc_dwarf_register_span (rtx rtl)
+{
+   enum machine_mode mode = GET_MODE (rtl);
+   unsigned regno;
+   rtx p;
+
+   if (GET_MODE_SIZE (mode) != 8)
+     return NULL_RTX;
+
+   p = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
+   regno = REGNO (rtl);
+   XVECEXP (p, 0, 0) = gen_rtx_REG (SImode, regno);
+   XVECEXP (p, 0, 1) = gen_rtx_REG (SImode, regno + 1);
+
+   return p;
+}
+
+
 struct gcc_target targetm = TARGET_INITIALIZER;
 
 #include "gt-arc.h"
diff --git a/gcc/config/arc/arc.h b/gcc/config/arc/arc.h
index 70a2b1d..27665b0 100644
--- a/gcc/config/arc/arc.h
+++ b/gcc/config/arc/arc.h
@@ -97,6 +97,10 @@ along with GCC; see the file COPYING3.  If not see
 	builtin_define ("__ARC_NORM__");\
 	builtin_define ("__Xnorm");	\
       }					\
+    if (TARGET_LL64)			\
+      {					\
+	builtin_define ("__ARC_LL64__");\
+      }					\
     if (TARGET_MUL64_SET)		\
       builtin_define ("__ARC_MUL64__");\
     if (TARGET_MULMAC_32BY16_SET)	\
diff --git a/gcc/config/arc/arc.md b/gcc/config/arc/arc.md
index 80f1daa..6072756 100644
--- a/gcc/config/arc/arc.md
+++ b/gcc/config/arc/arc.md
@@ -984,7 +984,7 @@
 }")
 
 (define_insn_and_split "*movdi_insn"
-  [(set (match_operand:DI 0 "move_dest_operand" "=w,w,r,m")
+  [(set (match_operand:DI 0 "move_dest_operand"      "=w, w,r,m")
 	(match_operand:DI 1 "move_double_src_operand" "c,Hi,m,c"))]
   "register_operand (operands[0], DImode)
    || register_operand (operands[1], DImode)"
@@ -993,50 +993,37 @@
   switch (which_alternative)
     {
     default:
-    case 0 :
-      /* We normally copy the low-numbered register first.  However, if
-	 the first register operand 0 is the same as the second register of
-	 operand 1, we must copy in the opposite order.  */
-      if (REGNO (operands[0]) == REGNO (operands[1]) + 1)
-	return \"mov%? %R0,%R1\;mov%? %0,%1\";
-      else
-      return \"mov%? %0,%1\;mov%? %R0,%R1\";
-    case 1 :
-      return \"mov%? %L0,%L1\;mov%? %H0,%H1\";
-    case 2 :
-      /* If the low-address word is used in the address, we must load it
-	 last.  Otherwise, load it first.  Note that we cannot have
-	 auto-increment in that case since the address register is known to be
-	 dead.  */
-      if (refers_to_regno_p (REGNO (operands[0]), operands[1]))
-	return \"ld%V1 %R0,%R1\;ld%V1 %0,%1\";
-      else switch (GET_CODE (XEXP(operands[1], 0)))
-	{
-	case POST_MODIFY: case POST_INC: case POST_DEC:
-	  return \"ld%V1 %R0,%R1\;ld%U1%V1 %0,%1\";
-	case PRE_MODIFY: case PRE_INC: case PRE_DEC:
-	  return \"ld%U1%V1 %0,%1\;ld%V1 %R0,%R1\";
-	default:
-	  return \"ld%U1%V1 %0,%1\;ld%U1%V1 %R0,%R1\";
-	}
-    case 3 :
-      switch (GET_CODE (XEXP(operands[0], 0)))
-	{
-	case POST_MODIFY: case POST_INC: case POST_DEC:
-     	  return \"st%V0 %R1,%R0\;st%U0%V0 %1,%0\";
-	case PRE_MODIFY: case PRE_INC: case PRE_DEC:
-     	  return \"st%U0%V0 %1,%0\;st%V0 %R1,%R0\";
-	default:
-     	  return \"st%U0%V0 %1,%0\;st%U0%V0 %R1,%R0\";
-	}
+      return \"#\";
+
+    case 2:
+    if (TARGET_LL64
+	&& ((even_register_operand (operands[0], DImode)
+	     && memory_operand (operands[1], DImode))
+	    || (memory_operand (operands[0], DImode)
+	        && even_register_operand (operands[1], DImode))))
+      return \"ldd%U1%V1 %0,%1%&\";
+    return \"#\";
+
+    case 3:
+    if (TARGET_LL64
+	&& ((even_register_operand (operands[0], DImode)
+	     && memory_operand (operands[1], DImode))
+	    || (memory_operand (operands[0], DImode)
+	        && even_register_operand (operands[1], DImode))))
+     return \"std%U0%V0 %1,%0\";
+    return \"#\";
     }
 }"
-  "&& reload_completed && optimize"
-  [(set (match_dup 2) (match_dup 3)) (set (match_dup 4) (match_dup 5))]
-  "arc_split_move (operands);"
+  "reload_completed"
+  [(set (match_dup 2) (match_dup 3))
+   (set (match_dup 4) (match_dup 5))]
+  {
+    if (arc_split_move (operands))
+      DONE;
+  }
   [(set_attr "type" "move,move,load,store")
    ;; ??? The ld/st values could be 4 if it's [reg,bignum].
-   (set_attr "length" "8,16,16,16")])
+   (set_attr "length" "8,16,*,*")])
 
 
 ;; Floating point move insns.
@@ -1066,23 +1053,47 @@
   ""
   "if (prepare_move_operands (operands, DFmode)) DONE;")
 
-(define_insn "*movdf_insn"
+(define_insn_and_split "*movdf_insn"
   [(set (match_operand:DF 0 "move_dest_operand"      "=D,r,c,c,r,m")
 	(match_operand:DF 1 "move_double_src_operand" "r,D,c,E,m,c"))]
   "register_operand (operands[0], DFmode) || register_operand (operands[1], DFmode)"
-  "#"
+  "*
+{
+ switch (which_alternative)
+   {
+    default:
+      return \"#\";
+    case 4:
+    if (TARGET_LL64
+	&& ((even_register_operand (operands[0], DFmode)
+	     && memory_operand (operands[1], DFmode))
+	    || (memory_operand (operands[0], DFmode)
+	        && even_register_operand (operands[1], DFmode))))
+      return \"ldd%U1%V1 %0,%1%&\";
+    return \"#\";
+
+    case 5:
+    if (TARGET_LL64
+	&& ((even_register_operand (operands[0], DFmode)
+	     && memory_operand (operands[1], DFmode))
+	    || (memory_operand (operands[0], DFmode)
+		&& even_register_operand (operands[1], DFmode))))
+     return \"std%U0%V0 %1,%0\";
+    return \"#\";
+   }
+}"
+  "reload_completed"
+  [(set (match_dup 2) (match_dup 3))
+   (set (match_dup 4) (match_dup 5))]
+  {
+    if (arc_split_move (operands))
+      DONE;
+  }
   [(set_attr "type" "move,move,move,move,load,store")
    (set_attr "predicable" "no,no,yes,yes,no,no")
    ;; ??? The ld/st values could be 16 if it's [reg,bignum].
    (set_attr "length" "4,16,8,16,16,16")])
 
-(define_split
-  [(set (match_operand:DF 0 "move_dest_operand" "")
-	(match_operand:DF 1 "move_double_src_operand" ""))]
-  "reload_completed"
-  [(match_dup 2)]
-  "operands[2] = arc_split_move (operands);")
-
 (define_insn_and_split "*movdf_insn_nolrsr"
   [(set (match_operand:DF 0 "register_operand"       "=r")
 	(match_operand:DF 1 "arc_double_register_operand" "D"))
diff --git a/gcc/config/arc/arc.opt b/gcc/config/arc/arc.opt
index 79113a5..00b98d5 100644
--- a/gcc/config/arc/arc.opt
+++ b/gcc/config/arc/arc.opt
@@ -409,3 +409,7 @@ Target Joined
 matomic
 Target Report Mask(ATOMIC)
 Enable atomic instructions.
+
+mll64
+Target Report Mask(LL64)
+Enable double load/store instructions for ARC HS.
diff --git a/gcc/config/arc/predicates.md b/gcc/config/arc/predicates.md
index fba878b..52ac2ac 100644
--- a/gcc/config/arc/predicates.md
+++ b/gcc/config/arc/predicates.md
@@ -783,4 +783,15 @@
        (match_code "reg" "0")))
 
 (define_predicate "any_mem_operand"
-  (match_code "mem"))
\ No newline at end of file
+  (match_code "mem"))
+
+; Special predicate to match even-odd double register pair
+(define_predicate "even_register_operand"
+  (match_code "reg")
+  {
+   if ((GET_MODE (op) != mode) && (mode != VOIDmode))
+      return 0;
+
+   return (REG_P (op) && ((REGNO (op) >= FIRST_PSEUDO_REGISTER)
+			  || ((REGNO (op) & 1) == 0)));
+  })
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index 070a516..6bb7330 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -585,7 +585,7 @@ Objective-C and Objective-C++ Dialects}.
 -mmixed-code -mq-class -mRcq -mRcw -msize-level=@var{level} @gol
 -mtune=@var{cpu} -mmultcost=@var{num} @gol
 -munalign-prob-threshold=@var{probability} -mmpy-option=@var{multo} @gol
--mdiv-rem -mcode-density}
+-mdiv-rem -mcode-density -mll64}
 
 @emph{ARM Options}
 @gccoptlist{-mapcs-frame  -mno-apcs-frame @gol
@@ -13173,6 +13173,10 @@ Enable DIV/REM instructions for ARCv2 cores.
 @opindex mcode-density
 Enable code density instructions for ARC EM, default on for ARC HS.
 
+@item -mll64
+@opindex mll64
+Enable double load/store operations for ARC HS cores.
+
 @item -mmpy-option=@var{multo}
 @opindex mmpy-option
 Compile ARCv2 code with a multiplier design option.  @samp{wlh1} is
-- 
1.9.1


^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH] [ARC] Add basic support for double load and store instructions
  2016-01-19 17:46   ` Claudiu Zissulescu
@ 2016-01-20 15:39     ` Joern Wolfgang Rennecke
  2016-01-22 11:59       ` Claudiu Zissulescu
  0 siblings, 1 reply; 8+ messages in thread
From: Joern Wolfgang Rennecke @ 2016-01-20 15:39 UTC (permalink / raw)
  To: Claudiu Zissulescu, gcc-patches; +Cc: Francois Bedard, jeremy.bennett


On 19/01/16 17:46, Claudiu Zissulescu wrote:
 > Hi,
 >
 > I've prepared a new patch based on the received review (attached). I 
also added a mod on invoke.texi regarding mll64 documentation. This mod 
was missing in the first patch.
 >
 > I have tested it with dg.exp for arc700, archs and archs+ll64.
 >
 > Please let me know if everything is alright.

Oops, I missed this the first time round:
 > @@ -7009,14 +7038,23 @@ arc_expand_movmem (rtx *operands)
 >    size = INTVAL (operands[2]);
 >    /* move_by_pieces_ninsns is static, so we can't use it. */
 >    if (align >= 4)
 > -    n_pieces = (size + 2) / 4U + (size & 1);
 > +    {
 > +      if (TARGET_LL64)
 > +     n_pieces = (size + 2) / 8U + (size & 1);
 > +      else
 >
You probably mean something like:

n_pieces = (size + 4) / 8U + ((size >> 1) & 1) + (size & 1);

 > -  if (piece > 4)
 > +  /* Force 32 bit aligned and larger datum to use 64 bit transfers, if
 > +     possible.  */
 > +  if (TARGET_LL64 && (piece >= 4))
 > +    piece = 8;
This needs another condition size >= 8 .

While looking at this code, I also notice we got a pre-exisitng problem
(read: inefficiency) with the number of pieces we actually make.

if (piece > size)
   piece = size & -size

will pick the smallest power of two in the decomposition of size.. and
that'll be the transfer size for the rest of the loop.
Better would be:

while (piece > size)
   piece >>= 1;


What you do with arc_split_move looks like it'll work for movdi, and the
the problem with movdf_insn+1 is pre-existing (I just noticed that), but
it's rather odd to have the split pattern solely for allocating an a larger
operands array.

I think it would make more sense to remote the assignment to
operands 2..5 in arc_spli_move, and instead use xop[0+swap] / xop[1+swap] /
xop2[swap] / xop[3-swap] directly to emit the insns.

^ permalink raw reply	[flat|nested] 8+ messages in thread

* RE: [PATCH] [ARC] Add basic support for double load and store instructions
  2016-01-20 15:39     ` Joern Wolfgang Rennecke
@ 2016-01-22 11:59       ` Claudiu Zissulescu
  2016-01-24 14:25         ` Joern Wolfgang Rennecke
  0 siblings, 1 reply; 8+ messages in thread
From: Claudiu Zissulescu @ 2016-01-22 11:59 UTC (permalink / raw)
  To: Joern Wolfgang Rennecke, gcc-patches; +Cc: Francois Bedard, jeremy.bennett

[-- Attachment #1: Type: text/plain, Size: 118 bytes --]

Thank u for the feedback. I hope this new patch solves the outstanding issues. Please find it attached.

//Claudiu

[-- Attachment #2: 0001-ARC-Add-basic-support-for-double-load-and-store-inst.patch --]
[-- Type: application/octet-stream, Size: 16061 bytes --]

From 07ffb21d3cb185319fae1bed633361151d0adf8c Mon Sep 17 00:00:00 2001
From: Claudiu Zissulescu <claziss@synopsys.com>
Date: Fri, 15 Jan 2016 13:34:23 +0100
Subject: [PATCH] [ARC] Add basic support for double load and store
 instructions

gcc/
2016-01-19  Claudiu Zissulescu  <claziss@synopsys.com>

	* config/arc/arc.c (TARGET_DWARF_REGISTER_SPAN): Define.
	(arc_init): Check validity mll64 option.
	(arc_save_restore): Use double load/store instruction.
	(arc_expand_movmem): Likewise.
	(arc_split_move): Don't split if we have double load/store
	instructions. Returns a boolean.
	(arc_process_double_reg_moves): Change function to return boolean
	instead of a sequence of instructions.
	(arc_dwarf_register_span): New function.
	* config/arc/arc-protos.h (arc_split_move): Change prototype.
	* config/arc/arc.h (TARGET_CPU_CPP_BUILTINS): Define __ARC_LL64__.
	* config/arc/arc.md (*movdi_insn): Emit ldd/std instructions.
	(*movdf_insn): Likewise.
	* config/arc/arc.opt (mll64): New option.
	* config/arc/predicates.md (even_register_operand): New predicate.
	* doc/invoke.texi (ARC Options): Add mll64 documentation.
---
 gcc/config/arc/arc-protos.h  |   2 +-
 gcc/config/arc/arc.c         | 120 +++++++++++++++++++++++++++++++------------
 gcc/config/arc/arc.h         |   4 ++
 gcc/config/arc/arc.md        | 109 +++++++++++++++++++++------------------
 gcc/config/arc/arc.opt       |   4 ++
 gcc/config/arc/predicates.md |  13 ++++-
 gcc/doc/invoke.texi          |   6 ++-
 7 files changed, 172 insertions(+), 86 deletions(-)

diff --git a/gcc/config/arc/arc-protos.h b/gcc/config/arc/arc-protos.h
index 3f96455..f487291 100644
--- a/gcc/config/arc/arc-protos.h
+++ b/gcc/config/arc/arc-protos.h
@@ -104,7 +104,7 @@ extern void arc_toggle_unalign (void);
 extern void split_addsi (rtx *);
 extern void split_subsi (rtx *);
 extern void arc_pad_return (void);
-extern rtx arc_split_move (rtx *);
+extern void arc_split_move (rtx *);
 extern int arc_verify_short (rtx_insn *insn, int unalign, int);
 extern const char *arc_short_long (rtx_insn *insn, const char *, const char *);
 extern rtx arc_regno_use_in (unsigned int, rtx);
diff --git a/gcc/config/arc/arc.c b/gcc/config/arc/arc.c
index f636534..b9799a0 100644
--- a/gcc/config/arc/arc.c
+++ b/gcc/config/arc/arc.c
@@ -420,6 +420,9 @@ static void arc_finalize_pic (void);
 #undef TARGET_ASM_ALIGNED_SI_OP
 #define TARGET_ASM_ALIGNED_SI_OP "\t.word\t"
 
+#undef TARGET_DWARF_REGISTER_SPAN
+#define TARGET_DWARF_REGISTER_SPAN arc_dwarf_register_span
+
 /* Try to keep the (mov:DF _, reg) as early as possible so
    that the d<add/sub/mul>h-lr insns appear together and can
    use the peephole2 pattern.  */
@@ -736,6 +739,10 @@ arc_init (void)
   if (TARGET_ATOMIC && !(TARGET_ARC700 || TARGET_HS))
     error ("-matomic is only supported for ARC700 or ARC HS cores");
 
+  /* ll64 ops only available for HS.  */
+  if (TARGET_LL64 && !TARGET_HS)
+    error ("-mll64 is only supported for ARC HS cores");
+
   arc_init_reg_tables ();
 
   /* Initialize array for PRINT_OPERAND_PUNCT_VALID_P.  */
@@ -2175,9 +2182,26 @@ arc_save_restore (rtx base_reg,
 
       for (regno = 0; regno <= 31; regno++)
 	{
-	  if ((gmask & (1L << regno)) != 0)
+	  enum machine_mode mode = SImode;
+	  bool found = false;
+
+	  if (TARGET_LL64
+	      && (regno % 2 == 0)
+	      && ((gmask & (1L << regno)) != 0)
+	      && ((gmask & (1L << (regno+1))) != 0))
+	    {
+	      found = true;
+	      mode  = DImode;
+	    }
+	  else if ((gmask & (1L << regno)) != 0)
 	    {
-	      rtx reg = gen_rtx_REG (SImode, regno);
+	      found = true;
+	      mode  = SImode;
+	    }
+
+	  if (found)
+	    {
+	      rtx reg = gen_rtx_REG (mode, regno);
 	      rtx addr, mem;
 	      int cfa_adjust = *first_offset;
 
@@ -2193,7 +2217,7 @@ arc_save_restore (rtx base_reg,
 		  gcc_assert (SMALL_INT (offset));
 		  addr = plus_constant (Pmode, base_reg, offset);
 		}
-	      mem = gen_frame_mem (SImode, addr);
+	      mem = gen_frame_mem (mode, addr);
 	      if (epilogue_p)
 		{
 		  rtx insn =
@@ -2212,6 +2236,11 @@ arc_save_restore (rtx base_reg,
 	      else
 		frame_move_inc (mem, reg, base_reg, addr);
 	      offset += UNITS_PER_WORD;
+	      if (mode == DImode)
+		{
+		  offset += UNITS_PER_WORD;
+		  ++regno;
+		}
 	    } /* if */
 	} /* for */
     }/* if */
@@ -6986,9 +7015,8 @@ force_offsettable (rtx addr, HOST_WIDE_INT size, bool reuse)
   return addr;
 }
 
-/* Like move_by_pieces, but take account of load latency,
-   and actual offset ranges.
-   Return true on success.  */
+/* Like move_by_pieces, but take account of load latency, and actual
+   offset ranges.  Return true on success.  */
 
 bool
 arc_expand_movmem (rtx *operands)
@@ -7009,14 +7037,23 @@ arc_expand_movmem (rtx *operands)
   size = INTVAL (operands[2]);
   /* move_by_pieces_ninsns is static, so we can't use it.  */
   if (align >= 4)
-    n_pieces = (size + 2) / 4U + (size & 1);
+    {
+      if (TARGET_LL64)
+	n_pieces = (size + 4) / 8U + ((size >> 1) & 1) + (size & 1);
+      else
+	n_pieces = (size + 2) / 4U + (size & 1);
+    }
   else if (align == 2)
     n_pieces = (size + 1) / 2U;
   else
     n_pieces = size;
   if (n_pieces >= (unsigned int) (optimize_size ? 3 : 15))
     return false;
-  if (piece > 4)
+  /* Force 32 bit aligned and larger datum to use 64 bit transfers, if
+     possible.  */
+  if (TARGET_LL64 && (piece >= 4) && (size >= 8))
+    piece = 8;
+  else if (piece > 4)
     piece = 4;
   dst_addr = force_offsettable (XEXP (operands[0], 0), size, 0);
   src_addr = force_offsettable (XEXP (operands[1], 0), size, 0);
@@ -7027,8 +7064,8 @@ arc_expand_movmem (rtx *operands)
       rtx tmp;
       machine_mode mode;
 
-      if (piece > size)
-	piece = size & -size;
+      while (piece > size)
+	piece >>= 1;
       mode = smallest_mode_for_size (piece * BITS_PER_UNIT, MODE_INT);
       /* If we don't re-use temporaries, the scheduler gets carried away,
 	 and the register pressure gets unnecessarily high.  */
@@ -8463,12 +8500,11 @@ split_subsi (rtx *operands)
    Operand 0: destination register
    Operand 1: source register  */
 
-static rtx
+static bool
 arc_process_double_reg_moves (rtx *operands)
 {
   rtx dest = operands[0];
   rtx src  = operands[1];
-  rtx val;
 
   enum usesDxState { none, srcDx, destDx, maxDx };
   enum usesDxState state = none;
@@ -8483,9 +8519,7 @@ arc_process_double_reg_moves (rtx *operands)
     }
 
   if (state == none)
-    return NULL_RTX;
-
-  start_sequence ();
+    return false;
 
   if (state == srcDx)
     {
@@ -8532,30 +8566,36 @@ arc_process_double_reg_moves (rtx *operands)
   else
     gcc_unreachable ();
 
-  val = get_insns ();
-  end_sequence ();
-  return val;
+  return true;
 }
 
 /* operands 0..1 are the operands of a 64 bit move instruction.
    split it into two moves with operands 2/3 and 4/5.  */
 
-rtx
+void
 arc_split_move (rtx *operands)
 {
   machine_mode mode = GET_MODE (operands[0]);
   int i;
   int swap = 0;
   rtx xop[4];
-  rtx val;
 
   if (TARGET_DPFP)
   {
-    val = arc_process_double_reg_moves (operands);
-    if (val)
-      return val;
+    if (arc_process_double_reg_moves (operands))
+      return;
   }
 
+  if (TARGET_LL64
+      && ((memory_operand (operands[0], mode)
+	   && even_register_operand (operands[1], mode))
+	  || (memory_operand (operands[1], mode)
+	      && even_register_operand (operands[0], mode))))
+    {
+      emit_move_insn (operands[0], operands[1]);
+      return;
+    }
+
   for (i = 0; i < 2; i++)
     {
       if (MEM_P (operands[i]) && auto_inc_p (XEXP (operands[i], 0)))
@@ -8603,18 +8643,10 @@ arc_split_move (rtx *operands)
       swap = 2;
       gcc_assert (!reg_overlap_mentioned_p (xop[2], xop[1]));
     }
-  operands[2+swap] = xop[0];
-  operands[3+swap] = xop[1];
-  operands[4-swap] = xop[2];
-  operands[5-swap] = xop[3];
 
-  start_sequence ();
-  emit_insn (gen_rtx_SET (operands[2], operands[3]));
-  emit_insn (gen_rtx_SET (operands[4], operands[5]));
-  val = get_insns ();
-  end_sequence ();
+  emit_move_insn (xop[0 + swap], xop[1 + swap]);
+  emit_move_insn (xop[2 - swap], xop[3 - swap]);
 
-  return val;
 }
 
 /* Select between the instruction output templates s_tmpl (for short INSNs)
@@ -9329,6 +9361,28 @@ arc_no_speculation_in_delay_slots_p ()
   return true;
 }
 
+/* Return a parallel of registers to represent where to find the
+   register pieces if required, otherwise NULL_RTX.  */
+
+static rtx
+arc_dwarf_register_span (rtx rtl)
+{
+   enum machine_mode mode = GET_MODE (rtl);
+   unsigned regno;
+   rtx p;
+
+   if (GET_MODE_SIZE (mode) != 8)
+     return NULL_RTX;
+
+   p = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
+   regno = REGNO (rtl);
+   XVECEXP (p, 0, 0) = gen_rtx_REG (SImode, regno);
+   XVECEXP (p, 0, 1) = gen_rtx_REG (SImode, regno + 1);
+
+   return p;
+}
+
+
 struct gcc_target targetm = TARGET_INITIALIZER;
 
 #include "gt-arc.h"
diff --git a/gcc/config/arc/arc.h b/gcc/config/arc/arc.h
index 70a2b1d..27665b0 100644
--- a/gcc/config/arc/arc.h
+++ b/gcc/config/arc/arc.h
@@ -97,6 +97,10 @@ along with GCC; see the file COPYING3.  If not see
 	builtin_define ("__ARC_NORM__");\
 	builtin_define ("__Xnorm");	\
       }					\
+    if (TARGET_LL64)			\
+      {					\
+	builtin_define ("__ARC_LL64__");\
+      }					\
     if (TARGET_MUL64_SET)		\
       builtin_define ("__ARC_MUL64__");\
     if (TARGET_MULMAC_32BY16_SET)	\
diff --git a/gcc/config/arc/arc.md b/gcc/config/arc/arc.md
index 80f1daa..222a468 100644
--- a/gcc/config/arc/arc.md
+++ b/gcc/config/arc/arc.md
@@ -984,7 +984,7 @@
 }")
 
 (define_insn_and_split "*movdi_insn"
-  [(set (match_operand:DI 0 "move_dest_operand" "=w,w,r,m")
+  [(set (match_operand:DI 0 "move_dest_operand"      "=w, w,r,m")
 	(match_operand:DI 1 "move_double_src_operand" "c,Hi,m,c"))]
   "register_operand (operands[0], DImode)
    || register_operand (operands[1], DImode)"
@@ -993,50 +993,36 @@
   switch (which_alternative)
     {
     default:
-    case 0 :
-      /* We normally copy the low-numbered register first.  However, if
-	 the first register operand 0 is the same as the second register of
-	 operand 1, we must copy in the opposite order.  */
-      if (REGNO (operands[0]) == REGNO (operands[1]) + 1)
-	return \"mov%? %R0,%R1\;mov%? %0,%1\";
-      else
-      return \"mov%? %0,%1\;mov%? %R0,%R1\";
-    case 1 :
-      return \"mov%? %L0,%L1\;mov%? %H0,%H1\";
-    case 2 :
-      /* If the low-address word is used in the address, we must load it
-	 last.  Otherwise, load it first.  Note that we cannot have
-	 auto-increment in that case since the address register is known to be
-	 dead.  */
-      if (refers_to_regno_p (REGNO (operands[0]), operands[1]))
-	return \"ld%V1 %R0,%R1\;ld%V1 %0,%1\";
-      else switch (GET_CODE (XEXP(operands[1], 0)))
-	{
-	case POST_MODIFY: case POST_INC: case POST_DEC:
-	  return \"ld%V1 %R0,%R1\;ld%U1%V1 %0,%1\";
-	case PRE_MODIFY: case PRE_INC: case PRE_DEC:
-	  return \"ld%U1%V1 %0,%1\;ld%V1 %R0,%R1\";
-	default:
-	  return \"ld%U1%V1 %0,%1\;ld%U1%V1 %R0,%R1\";
-	}
-    case 3 :
-      switch (GET_CODE (XEXP(operands[0], 0)))
-	{
-	case POST_MODIFY: case POST_INC: case POST_DEC:
-     	  return \"st%V0 %R1,%R0\;st%U0%V0 %1,%0\";
-	case PRE_MODIFY: case PRE_INC: case PRE_DEC:
-     	  return \"st%U0%V0 %1,%0\;st%V0 %R1,%R0\";
-	default:
-     	  return \"st%U0%V0 %1,%0\;st%U0%V0 %R1,%R0\";
-	}
+      return \"#\";
+
+    case 2:
+    if (TARGET_LL64
+	&& ((even_register_operand (operands[0], DImode)
+	     && memory_operand (operands[1], DImode))
+	    || (memory_operand (operands[0], DImode)
+	        && even_register_operand (operands[1], DImode))))
+      return \"ldd%U1%V1 %0,%1%&\";
+    return \"#\";
+
+    case 3:
+    if (TARGET_LL64
+	&& ((even_register_operand (operands[0], DImode)
+	     && memory_operand (operands[1], DImode))
+	    || (memory_operand (operands[0], DImode)
+	        && even_register_operand (operands[1], DImode))))
+     return \"std%U0%V0 %1,%0\";
+    return \"#\";
     }
 }"
-  "&& reload_completed && optimize"
-  [(set (match_dup 2) (match_dup 3)) (set (match_dup 4) (match_dup 5))]
-  "arc_split_move (operands);"
+  "reload_completed"
+  [(const_int 0)]
+  {
+   arc_split_move (operands);
+   DONE;
+  }
   [(set_attr "type" "move,move,load,store")
    ;; ??? The ld/st values could be 4 if it's [reg,bignum].
-   (set_attr "length" "8,16,16,16")])
+   (set_attr "length" "8,16,*,*")])
 
 
 ;; Floating point move insns.
@@ -1066,23 +1052,46 @@
   ""
   "if (prepare_move_operands (operands, DFmode)) DONE;")
 
-(define_insn "*movdf_insn"
+(define_insn_and_split "*movdf_insn"
   [(set (match_operand:DF 0 "move_dest_operand"      "=D,r,c,c,r,m")
 	(match_operand:DF 1 "move_double_src_operand" "r,D,c,E,m,c"))]
   "register_operand (operands[0], DFmode) || register_operand (operands[1], DFmode)"
-  "#"
+  "*
+{
+ switch (which_alternative)
+   {
+    default:
+      return \"#\";
+    case 4:
+    if (TARGET_LL64
+	&& ((even_register_operand (operands[0], DFmode)
+	     && memory_operand (operands[1], DFmode))
+	    || (memory_operand (operands[0], DFmode)
+	        && even_register_operand (operands[1], DFmode))))
+      return \"ldd%U1%V1 %0,%1%&\";
+    return \"#\";
+
+    case 5:
+    if (TARGET_LL64
+	&& ((even_register_operand (operands[0], DFmode)
+	     && memory_operand (operands[1], DFmode))
+	    || (memory_operand (operands[0], DFmode)
+		&& even_register_operand (operands[1], DFmode))))
+     return \"std%U0%V0 %1,%0\";
+    return \"#\";
+   }
+}"
+  "reload_completed"
+  [(const_int 0)]
+  {
+   arc_split_move (operands);
+   DONE;
+  }
   [(set_attr "type" "move,move,move,move,load,store")
    (set_attr "predicable" "no,no,yes,yes,no,no")
    ;; ??? The ld/st values could be 16 if it's [reg,bignum].
    (set_attr "length" "4,16,8,16,16,16")])
 
-(define_split
-  [(set (match_operand:DF 0 "move_dest_operand" "")
-	(match_operand:DF 1 "move_double_src_operand" ""))]
-  "reload_completed"
-  [(match_dup 2)]
-  "operands[2] = arc_split_move (operands);")
-
 (define_insn_and_split "*movdf_insn_nolrsr"
   [(set (match_operand:DF 0 "register_operand"       "=r")
 	(match_operand:DF 1 "arc_double_register_operand" "D"))
diff --git a/gcc/config/arc/arc.opt b/gcc/config/arc/arc.opt
index 79113a5..00b98d5 100644
--- a/gcc/config/arc/arc.opt
+++ b/gcc/config/arc/arc.opt
@@ -409,3 +409,7 @@ Target Joined
 matomic
 Target Report Mask(ATOMIC)
 Enable atomic instructions.
+
+mll64
+Target Report Mask(LL64)
+Enable double load/store instructions for ARC HS.
diff --git a/gcc/config/arc/predicates.md b/gcc/config/arc/predicates.md
index fba878b..52ac2ac 100644
--- a/gcc/config/arc/predicates.md
+++ b/gcc/config/arc/predicates.md
@@ -783,4 +783,15 @@
        (match_code "reg" "0")))
 
 (define_predicate "any_mem_operand"
-  (match_code "mem"))
\ No newline at end of file
+  (match_code "mem"))
+
+; Special predicate to match even-odd double register pair
+(define_predicate "even_register_operand"
+  (match_code "reg")
+  {
+   if ((GET_MODE (op) != mode) && (mode != VOIDmode))
+      return 0;
+
+   return (REG_P (op) && ((REGNO (op) >= FIRST_PSEUDO_REGISTER)
+			  || ((REGNO (op) & 1) == 0)));
+  })
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index 070a516..6bb7330 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -585,7 +585,7 @@ Objective-C and Objective-C++ Dialects}.
 -mmixed-code -mq-class -mRcq -mRcw -msize-level=@var{level} @gol
 -mtune=@var{cpu} -mmultcost=@var{num} @gol
 -munalign-prob-threshold=@var{probability} -mmpy-option=@var{multo} @gol
--mdiv-rem -mcode-density}
+-mdiv-rem -mcode-density -mll64}
 
 @emph{ARM Options}
 @gccoptlist{-mapcs-frame  -mno-apcs-frame @gol
@@ -13173,6 +13173,10 @@ Enable DIV/REM instructions for ARCv2 cores.
 @opindex mcode-density
 Enable code density instructions for ARC EM, default on for ARC HS.
 
+@item -mll64
+@opindex mll64
+Enable double load/store operations for ARC HS cores.
+
 @item -mmpy-option=@var{multo}
 @opindex mmpy-option
 Compile ARCv2 code with a multiplier design option.  @samp{wlh1} is
-- 
1.9.1


^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH] [ARC] Add basic support for double load and store instructions
  2016-01-22 11:59       ` Claudiu Zissulescu
@ 2016-01-24 14:25         ` Joern Wolfgang Rennecke
  2016-01-25 11:18           ` Claudiu Zissulescu
  0 siblings, 1 reply; 8+ messages in thread
From: Joern Wolfgang Rennecke @ 2016-01-24 14:25 UTC (permalink / raw)
  To: Claudiu Zissulescu, gcc-patches; +Cc: Francois Bedard, jeremy.bennett



On 22/01/16 11:59, Claudiu Zissulescu wrote:
> Thank u for the feedback. I hope this new patch solves the outstanding issues. Please find it attached.

This is OK.

^ permalink raw reply	[flat|nested] 8+ messages in thread

* RE: [PATCH] [ARC] Add basic support for double load and store instructions
  2016-01-24 14:25         ` Joern Wolfgang Rennecke
@ 2016-01-25 11:18           ` Claudiu Zissulescu
  0 siblings, 0 replies; 8+ messages in thread
From: Claudiu Zissulescu @ 2016-01-25 11:18 UTC (permalink / raw)
  To: Joern Wolfgang Rennecke, gcc-patches; +Cc: Francois Bedard, jeremy.bennett

Committed r232788

Thanks,
Claudiu

> -----Original Message-----
> From: Joern Wolfgang Rennecke [mailto:gnu@amylaar.uk]
> Sent: Sunday, January 24, 2016 3:26 PM
> To: Claudiu Zissulescu; gcc-patches@gcc.gnu.org
> Cc: Francois Bedard; jeremy.bennett@embecosm.com
> Subject: Re: [PATCH] [ARC] Add basic support for double load and store
> instructions
> 
> 
> 
> On 22/01/16 11:59, Claudiu Zissulescu wrote:
> > Thank u for the feedback. I hope this new patch solves the outstanding
> issues. Please find it attached.
> 
> This is OK.

^ permalink raw reply	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2016-01-25 11:18 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2016-01-15 12:41 [PATCH] [ARC] Add basic support for double load and store instructions Claudiu Zissulescu
2016-01-17  6:21 ` Joern Wolfgang Rennecke
2016-01-18  9:26   ` Claudiu Zissulescu
2016-01-19 17:46   ` Claudiu Zissulescu
2016-01-20 15:39     ` Joern Wolfgang Rennecke
2016-01-22 11:59       ` Claudiu Zissulescu
2016-01-24 14:25         ` Joern Wolfgang Rennecke
2016-01-25 11:18           ` Claudiu Zissulescu

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).