public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* RE: [PATCH] [Aarch64] Optimize subtract in shift counts
@ 2017-08-08 10:04 Wilco Dijkstra
  0 siblings, 0 replies; 32+ messages in thread
From: Wilco Dijkstra @ 2017-08-08 10:04 UTC (permalink / raw)
  To: GCC Patches, kenner; +Cc: nd, Michael Collison, pinskia

Richard Kenner wrote:
>Michael Collison wrote:
> > On Aarc64 SHIFT_COUNT_TRUNCATED is only true if SIMD code generation
> > is disabled. This is because the simd instructions can be used for
> > shifting but they do not truncate the shift count.
>
> In that case, the change isn't safe!  Consider if the value was
> negative, for example.  Yes, it's technically undefined, but I'm not
> sure I'd want to rely on that.

No it's perfectly safe - it becomes an integer-only shift after the split since it
keeps the masking as part of the pattern.

But generally the SHIFT_COUNT_TRUNCATED is a mess, and so are other ways
of doing this - note the extremely complex subregs in the patch, none of that should
be required as there are no QI registers on AArch64! So it would be great if there
was a better way to describe the number of bits used by a particular shift alternative.

Wilco

^ permalink raw reply	[flat|nested] 32+ messages in thread
* Re: [PATCH] [Aarch64] Optimize subtract in shift counts
@ 2017-08-22 11:01 Wilco Dijkstra
  0 siblings, 0 replies; 32+ messages in thread
From: Wilco Dijkstra @ 2017-08-22 11:01 UTC (permalink / raw)
  To: Richard Biener, richard.sandiford
  Cc: kenner, Michael Collison, GCC Patches, nd

Hi,

The main reason we have this issue is that DImode can be treated as a
vector of size 1. As a result we do not know whether the shift is an integer
or SIMD instruction. One way around this is to never use the SIMD variant,
another is to introduce V1DImode for vectors of size 1.

Short term I believe the first option is best - I don't think scalar operations executed
as SIMD instructions are useful in general, and these shifts are likely never used in
reality, so we're adding a lot of complexity and potential bugs .
Long term we need to compute register classes and do instruction selection
much earlier so this doesn't increase complexity of register allocation and machine
descriptors.

Wilco

^ permalink raw reply	[flat|nested] 32+ messages in thread
* [PATCH] [Aarch64] Optimize subtract in shift counts
@ 2017-08-07 20:36 Michael Collison
  2017-08-07 20:44 ` Andrew Pinski
  2017-08-07 20:58 ` Richard Kenner
  0 siblings, 2 replies; 32+ messages in thread
From: Michael Collison @ 2017-08-07 20:36 UTC (permalink / raw)
  To: gcc-patches; +Cc: nd

[-- Attachment #1: Type: text/plain, Size: 898 bytes --]

This patch improves code generation for shifts with subtract instructions where the first operand to the subtract is equal to the bit-size of the operation.


long f1(long x, int i)
{
  return x >> (64 - i);
}

int f2(int x, int i)
{
  return x << (32 - i);
}


With trunk at -O2 we generate:

f1:
	mov	w2, 64
	sub	w1, w2, w1
	asr	x0, x0, x1
	ret

f2:
	mov	w2, 32
	sub	w1, w2, w1
	lsl	w0, w0, w1
	ret

with the patch we generate:

f1:
	neg	w2, w1
	asr	x0, x0, x2
	ret
	.size	f1, .-f1
	.align	2
	.p2align 3,,7
	.global	f2
	.type	f2, %function
f2:
	neg	w2, w1
	lsl	w0, w0, w2
	ret

Okay for trunk?

2017-08-07  Michael Collison <michael.collison@arm.com>

	* config/aarch64/aarch64.md (*aarch64_reg_<optab>_minus<mode>3):
	New pattern.

2016-08-07  Michael Collison <michael.collison@arm.com>

	* gcc.target/aarch64/var_shift_mask_2.c: New test.

[-- Attachment #2: pr7313v6.patch --]
[-- Type: application/octet-stream, Size: 3089 bytes --]

diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index f876a2b..9d4284c 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -4044,6 +4044,45 @@
   [(set_attr "type" "shift_reg")]
 )
 
+(define_insn_and_split "*aarch64_reg_<optab>_minus<mode>3"
+  [(set (match_operand:GPI 0 "register_operand" "=&r")
+	(ASHIFT:GPI
+	  (match_operand:GPI 1 "register_operand" "r")
+	  (minus:QI (match_operand 2 "const_int_operand" "n")
+		    (match_operand:QI 3 "register_operand" "r"))))]
+  "INTVAL (operands[2]) == GET_MODE_BITSIZE (<MODE>mode)"
+  "#"
+  "&& true"
+  [(const_int 0)]
+  {
+    /* Handle cases where operand 3 is a plain QI register, or
+       a subreg with either a SImode or DImode register.  */
+
+    rtx subreg_tmp = (REG_P (operands[3])
+		      ? gen_lowpart_SUBREG (SImode, operands[3])
+		      : SUBREG_REG (operands[3]));
+
+    if (REG_P (subreg_tmp) && GET_MODE (subreg_tmp) == DImode)
+      subreg_tmp = gen_lowpart_SUBREG (SImode, subreg_tmp);
+
+    rtx tmp = (can_create_pseudo_p () ? gen_reg_rtx (SImode)
+	       : operands[0]);
+
+    if (<MODE>mode == DImode && !can_create_pseudo_p ())
+      tmp = gen_lowpart_SUBREG (SImode, operands[0]);
+
+    emit_insn (gen_negsi2 (tmp, subreg_tmp));
+
+    rtx and_op = gen_rtx_AND (SImode, tmp,
+			      GEN_INT (GET_MODE_BITSIZE (<MODE>mode) - 1));
+
+    rtx subreg_tmp2 = gen_lowpart_SUBREG (QImode, and_op);
+
+    emit_insn (gen_<optab><mode>3 (operands[0], operands[1], subreg_tmp2));
+    DONE;
+  }
+)
+
 ;; Logical left shift using SISD or Integer instruction
 (define_insn "*aarch64_ashl_sisd_or_int_<mode>3"
   [(set (match_operand:GPI 0 "register_operand" "=r,r,w,w")
diff --git a/gcc/testsuite/gcc.target/aarch64/var_shift_mask_2.c b/gcc/testsuite/gcc.target/aarch64/var_shift_mask_2.c
new file mode 100644
index 0000000..c1fe691
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/var_shift_mask_2.c
@@ -0,0 +1,51 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+long long
+f1 (long long x, int i)
+{
+
+  return x >> (64 - i);
+}
+
+unsigned long long
+f2 (unsigned long long x, unsigned int i)
+{
+
+  return x >> (64 - i);
+}
+
+int
+f3 (int x, int i)
+{
+
+  return x >> (32 - i);
+}
+
+unsigned int
+f4 (unsigned int x, unsigned int i)
+{
+
+  return x >> (32 - i);
+}
+
+int
+f5 (int x, int i)
+{
+  return x << (32 - i);
+}
+
+long long
+f6 (long long x, int i)
+{
+  return x << (64 - i);
+}
+
+/* { dg-final { scan-assembler "lsl\tw\[0-9\]+, w\[0-9\]+, w\[0-9\]+" } } */
+/* { dg-final { scan-assembler "lsl\tx\[0-9\]+, x\[0-9\]+, x\[0-9\]+" } } */
+/* { dg-final { scan-assembler "lsr\tw\[0-9\]+, w\[0-9\]+, w\[0-9\]+" } } */
+/* { dg-final { scan-assembler "lsr\tx\[0-9\]+, x\[0-9\]+, x\[0-9\]+" } } */
+/* { dg-final { scan-assembler "asr\tw\[0-9\]+, w\[0-9\]+, w\[0-9\]+" } } */
+/* { dg-final { scan-assembler "asr\tx\[0-9\]+, x\[0-9\]+, x\[0-9\]+" } } */
+/* { dg-final { scan-assembler-times "neg\tw\[0-9\]+, w\[0-9\]+" 6 } } */
+/* { dg-final { scan-assembler-not "sub\tw\[0-9\]+, w\[0-9\]+, w\[0-9\]+" } } */
-- 
1.9.1


^ permalink raw reply	[flat|nested] 32+ messages in thread

end of thread, other threads:[~2017-09-29 23:01 UTC | newest]

Thread overview: 32+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-08-08 10:04 [PATCH] [Aarch64] Optimize subtract in shift counts Wilco Dijkstra
  -- strict thread matches above, loose matches on Subject: below --
2017-08-22 11:01 Wilco Dijkstra
2017-08-07 20:36 Michael Collison
2017-08-07 20:44 ` Andrew Pinski
2017-08-07 21:02   ` Richard Kenner
2017-08-07 23:44     ` Michael Collison
2017-08-08  1:56       ` Richard Kenner
2017-08-08  4:06         ` Michael Collison
2017-08-08 12:13           ` Richard Kenner
2017-08-08 19:46             ` Michael Collison
2017-08-08 19:52               ` Richard Kenner
2017-08-08 19:59                 ` Michael Collison
2017-08-08 20:04                   ` Richard Kenner
2017-08-08 20:18                     ` Michael Collison
2017-08-08 20:20                       ` Richard Kenner
2017-08-08 20:33                         ` Michael Collison
2017-08-14  8:58                         ` Richard Biener
2017-08-15  7:28                           ` Michael Collison
2017-08-21 19:11                           ` Richard Sandiford
2017-08-21 19:14                             ` Richard Biener
2017-08-22  8:17                               ` Richard Sandiford
2017-08-22  8:46                                 ` Richard Biener
2017-08-22  8:50                                   ` Richard Sandiford
2017-09-06  9:11                                     ` Michael Collison
2017-09-06 10:32                                       ` Richard Sandiford
2017-09-06 15:41                                         ` Michael Collison
2017-09-06 16:53                                           ` Richard Sandiford
2017-09-06 16:56                                             ` Richard Sandiford
2017-09-06 17:21                                             ` Richard Sandiford
2017-09-15  8:15                                               ` Michael Collison
2017-09-29 23:01                                                 ` James Greenhalgh
2017-08-07 20:58 ` Richard Kenner

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).