* [PATCH] Implement x86 reduc_plus_scal_v{16,32,64}qi (PR tree-optimization/91201)
@ 2019-07-31 7:20 Jakub Jelinek
2019-07-31 8:57 ` Uros Bizjak
0 siblings, 1 reply; 4+ messages in thread
From: Jakub Jelinek @ 2019-07-31 7:20 UTC (permalink / raw)
To: Uros Bizjak; +Cc: gcc-patches
Hi!
As mentioned in the PR, we can use psadbw to shorten the final reductions to
scalar for 8-bit elements. E.g. for -mavx2 the difference is:
- vmovdqa %xmm1, %xmm0
- vextracti128 $0x1, %ymm1, %xmm1
- vpaddb %xmm1, %xmm0, %xmm0
- vpsrldq $8, %xmm0, %xmm1
- vpaddb %xmm1, %xmm0, %xmm0
- vpsrldq $4, %xmm0, %xmm1
- vpaddb %xmm1, %xmm0, %xmm0
- vpsrldq $2, %xmm0, %xmm1
- vpaddb %xmm1, %xmm0, %xmm0
- vpsrldq $1, %xmm0, %xmm1
- vpaddb %xmm1, %xmm0, %xmm0
+ vextracti128 $0x1, %ymm1, %xmm0
+ vpaddb %xmm1, %xmm0, %xmm1
+ vpsrldq $8, %xmm1, %xmm0
+ vpaddb %xmm0, %xmm1, %xmm1
+ vpxor %xmm0, %xmm0, %xmm0
+ vpsadbw %xmm0, %xmm1, %xmm0
vpextrb $0, %xmm0, %eax
Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?
2019-07-31 Jakub Jelinek <jakub@redhat.com>
PR tree-optimization/91201
* config/i386/sse.md (reduc_plus_scal_v16qi): New expander.
(REDUC_PLUS_MODE): Add V32QImode for TARGET_AVX and V64QImode for
TARGET_AVX512F.
(reduc_plus_scal_<mode>): Improve formatting by introducing
a temporary.
* gcc.target/i386/sse2-pr91201.c: New test.
* gcc.target/i386/avx2-pr91201.c: New test.
* gcc.target/i386/avx512bw-pr91201.c: New test.
--- gcc/config/i386/sse.md.jj 2019-07-30 12:19:45.999490854 +0200
+++ gcc/config/i386/sse.md 2019-07-30 12:19:55.379352735 +0200
@@ -2728,9 +2728,30 @@ (define_expand "reduc_plus_scal_<mode>"
DONE;
})
+(define_expand "reduc_plus_scal_v16qi"
+ [(plus:V16QI
+ (match_operand:QI 0 "register_operand")
+ (match_operand:V16QI 1 "register_operand"))]
+ "TARGET_SSE2"
+{
+ rtx tmp = gen_reg_rtx (V1TImode);
+ emit_insn (gen_sse2_lshrv1ti3 (tmp, gen_lowpart (V1TImode, operands[1]),
+ GEN_INT (64)));
+ rtx tmp2 = gen_reg_rtx (V16QImode);
+ emit_insn (gen_addv16qi3 (tmp2, operands[1], gen_lowpart (V16QImode, tmp)));
+ rtx tmp3 = gen_reg_rtx (V16QImode);
+ emit_move_insn (tmp3, CONST0_RTX (V16QImode));
+ rtx tmp4 = gen_reg_rtx (V2DImode);
+ emit_insn (gen_sse2_psadbw (tmp4, tmp2, tmp3));
+ tmp4 = gen_lowpart (V16QImode, tmp4);
+ emit_insn (gen_vec_extractv16qiqi (operands[0], tmp4, const0_rtx));
+ DONE;
+})
+
(define_mode_iterator REDUC_PLUS_MODE
[(V4DF "TARGET_AVX") (V8SF "TARGET_AVX")
- (V8DF "TARGET_AVX512F") (V16SF "TARGET_AVX512F")])
+ (V8DF "TARGET_AVX512F") (V16SF "TARGET_AVX512F")
+ (V32QI "TARGET_AVX") (V64QI "TARGET_AVX512F")])
(define_expand "reduc_plus_scal_<mode>"
[(plus:REDUC_PLUS_MODE
@@ -2741,8 +2762,8 @@ (define_expand "reduc_plus_scal_<mode>"
rtx tmp = gen_reg_rtx (<ssehalfvecmode>mode);
emit_insn (gen_vec_extract_hi_<mode> (tmp, operands[1]));
rtx tmp2 = gen_reg_rtx (<ssehalfvecmode>mode);
- emit_insn (gen_add<ssehalfvecmodelower>3
- (tmp2, tmp, gen_lowpart (<ssehalfvecmode>mode, operands[1])));
+ rtx tmp3 = gen_lowpart (<ssehalfvecmode>mode, operands[1]);
+ emit_insn (gen_add<ssehalfvecmodelower>3 (tmp2, tmp, tmp3));
emit_insn (gen_reduc_plus_scal_<ssehalfvecmodelower> (operands[0], tmp2));
DONE;
})
--- gcc/testsuite/gcc.target/i386/sse2-pr91201.c.jj 2019-07-30 12:23:48.930913778 +0200
+++ gcc/testsuite/gcc.target/i386/sse2-pr91201.c 2019-07-30 12:23:45.518964018 +0200
@@ -0,0 +1,18 @@
+/* PR tree-optimization/91201 */
+/* { dg-do compile } */
+/* { dg-options "-O3 -msse2 -mno-sse3" } */
+/* { dg-final { scan-assembler "\tpsadbw\t" } } */
+
+unsigned char bytes[1024];
+
+unsigned char
+sum (void)
+{
+ unsigned char r = 0;
+ unsigned char *p = (unsigned char *) bytes;
+ int n;
+
+ for (n = 0; n < sizeof (bytes); ++n)
+ r += p[n];
+ return r;
+}
--- gcc/testsuite/gcc.target/i386/avx2-pr91201.c.jj 2019-07-30 12:24:05.199674228 +0200
+++ gcc/testsuite/gcc.target/i386/avx2-pr91201.c 2019-07-30 12:24:34.544242142 +0200
@@ -0,0 +1,6 @@
+/* PR tree-optimization/91201 */
+/* { dg-do compile } */
+/* { dg-options "-O3 -mavx2 -mno-avx512f" } */
+/* { dg-final { scan-assembler "\tvpsadbw\t" } } */
+
+#include "sse2-pr91201.c"
--- gcc/testsuite/gcc.target/i386/avx512bw-pr91201.c.jj 2019-07-30 12:24:50.079013395 +0200
+++ gcc/testsuite/gcc.target/i386/avx512bw-pr91201.c 2019-07-30 12:25:10.685709971 +0200
@@ -0,0 +1,6 @@
+/* PR tree-optimization/91201 */
+/* { dg-do compile } */
+/* { dg-options "-O3 -mavx512bw -mprefer-vector-width=512" } */
+/* { dg-final { scan-assembler "\tvpsadbw\t" } } */
+
+#include "sse2-pr91201.c"
Jakub
^ permalink raw reply [flat|nested] 4+ messages in thread
* Re: [PATCH] Implement x86 reduc_plus_scal_v{16,32,64}qi (PR tree-optimization/91201)
2019-07-31 7:20 [PATCH] Implement x86 reduc_plus_scal_v{16,32,64}qi (PR tree-optimization/91201) Jakub Jelinek
@ 2019-07-31 8:57 ` Uros Bizjak
2019-07-31 9:51 ` [PATCH] Implement x86 reduc_plus_scal_v8qi " Jakub Jelinek
0 siblings, 1 reply; 4+ messages in thread
From: Uros Bizjak @ 2019-07-31 8:57 UTC (permalink / raw)
To: Jakub Jelinek; +Cc: gcc-patches
On Wed, Jul 31, 2019 at 9:10 AM Jakub Jelinek <jakub@redhat.com> wrote:
>
> Hi!
>
> As mentioned in the PR, we can use psadbw to shorten the final reductions to
> scalar for 8-bit elements. E.g. for -mavx2 the difference is:
> - vmovdqa %xmm1, %xmm0
> - vextracti128 $0x1, %ymm1, %xmm1
> - vpaddb %xmm1, %xmm0, %xmm0
> - vpsrldq $8, %xmm0, %xmm1
> - vpaddb %xmm1, %xmm0, %xmm0
> - vpsrldq $4, %xmm0, %xmm1
> - vpaddb %xmm1, %xmm0, %xmm0
> - vpsrldq $2, %xmm0, %xmm1
> - vpaddb %xmm1, %xmm0, %xmm0
> - vpsrldq $1, %xmm0, %xmm1
> - vpaddb %xmm1, %xmm0, %xmm0
> + vextracti128 $0x1, %ymm1, %xmm0
> + vpaddb %xmm1, %xmm0, %xmm1
> + vpsrldq $8, %xmm1, %xmm0
> + vpaddb %xmm0, %xmm1, %xmm1
> + vpxor %xmm0, %xmm0, %xmm0
> + vpsadbw %xmm0, %xmm1, %xmm0
> vpextrb $0, %xmm0, %eax
> Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?
>
> 2019-07-31 Jakub Jelinek <jakub@redhat.com>
>
> PR tree-optimization/91201
> * config/i386/sse.md (reduc_plus_scal_v16qi): New expander.
> (REDUC_PLUS_MODE): Add V32QImode for TARGET_AVX and V64QImode for
> TARGET_AVX512F.
> (reduc_plus_scal_<mode>): Improve formatting by introducing
> a temporary.
>
> * gcc.target/i386/sse2-pr91201.c: New test.
> * gcc.target/i386/avx2-pr91201.c: New test.
> * gcc.target/i386/avx512bw-pr91201.c: New test.
OK.
Thanks,
Uros.
> --- gcc/config/i386/sse.md.jj 2019-07-30 12:19:45.999490854 +0200
> +++ gcc/config/i386/sse.md 2019-07-30 12:19:55.379352735 +0200
> @@ -2728,9 +2728,30 @@ (define_expand "reduc_plus_scal_<mode>"
> DONE;
> })
>
> +(define_expand "reduc_plus_scal_v16qi"
> + [(plus:V16QI
> + (match_operand:QI 0 "register_operand")
> + (match_operand:V16QI 1 "register_operand"))]
> + "TARGET_SSE2"
> +{
> + rtx tmp = gen_reg_rtx (V1TImode);
> + emit_insn (gen_sse2_lshrv1ti3 (tmp, gen_lowpart (V1TImode, operands[1]),
> + GEN_INT (64)));
> + rtx tmp2 = gen_reg_rtx (V16QImode);
> + emit_insn (gen_addv16qi3 (tmp2, operands[1], gen_lowpart (V16QImode, tmp)));
> + rtx tmp3 = gen_reg_rtx (V16QImode);
> + emit_move_insn (tmp3, CONST0_RTX (V16QImode));
> + rtx tmp4 = gen_reg_rtx (V2DImode);
> + emit_insn (gen_sse2_psadbw (tmp4, tmp2, tmp3));
> + tmp4 = gen_lowpart (V16QImode, tmp4);
> + emit_insn (gen_vec_extractv16qiqi (operands[0], tmp4, const0_rtx));
> + DONE;
> +})
> +
> (define_mode_iterator REDUC_PLUS_MODE
> [(V4DF "TARGET_AVX") (V8SF "TARGET_AVX")
> - (V8DF "TARGET_AVX512F") (V16SF "TARGET_AVX512F")])
> + (V8DF "TARGET_AVX512F") (V16SF "TARGET_AVX512F")
> + (V32QI "TARGET_AVX") (V64QI "TARGET_AVX512F")])
>
> (define_expand "reduc_plus_scal_<mode>"
> [(plus:REDUC_PLUS_MODE
> @@ -2741,8 +2762,8 @@ (define_expand "reduc_plus_scal_<mode>"
> rtx tmp = gen_reg_rtx (<ssehalfvecmode>mode);
> emit_insn (gen_vec_extract_hi_<mode> (tmp, operands[1]));
> rtx tmp2 = gen_reg_rtx (<ssehalfvecmode>mode);
> - emit_insn (gen_add<ssehalfvecmodelower>3
> - (tmp2, tmp, gen_lowpart (<ssehalfvecmode>mode, operands[1])));
> + rtx tmp3 = gen_lowpart (<ssehalfvecmode>mode, operands[1]);
> + emit_insn (gen_add<ssehalfvecmodelower>3 (tmp2, tmp, tmp3));
> emit_insn (gen_reduc_plus_scal_<ssehalfvecmodelower> (operands[0], tmp2));
> DONE;
> })
> --- gcc/testsuite/gcc.target/i386/sse2-pr91201.c.jj 2019-07-30 12:23:48.930913778 +0200
> +++ gcc/testsuite/gcc.target/i386/sse2-pr91201.c 2019-07-30 12:23:45.518964018 +0200
> @@ -0,0 +1,18 @@
> +/* PR tree-optimization/91201 */
> +/* { dg-do compile } */
> +/* { dg-options "-O3 -msse2 -mno-sse3" } */
> +/* { dg-final { scan-assembler "\tpsadbw\t" } } */
> +
> +unsigned char bytes[1024];
> +
> +unsigned char
> +sum (void)
> +{
> + unsigned char r = 0;
> + unsigned char *p = (unsigned char *) bytes;
> + int n;
> +
> + for (n = 0; n < sizeof (bytes); ++n)
> + r += p[n];
> + return r;
> +}
> --- gcc/testsuite/gcc.target/i386/avx2-pr91201.c.jj 2019-07-30 12:24:05.199674228 +0200
> +++ gcc/testsuite/gcc.target/i386/avx2-pr91201.c 2019-07-30 12:24:34.544242142 +0200
> @@ -0,0 +1,6 @@
> +/* PR tree-optimization/91201 */
> +/* { dg-do compile } */
> +/* { dg-options "-O3 -mavx2 -mno-avx512f" } */
> +/* { dg-final { scan-assembler "\tvpsadbw\t" } } */
> +
> +#include "sse2-pr91201.c"
> --- gcc/testsuite/gcc.target/i386/avx512bw-pr91201.c.jj 2019-07-30 12:24:50.079013395 +0200
> +++ gcc/testsuite/gcc.target/i386/avx512bw-pr91201.c 2019-07-30 12:25:10.685709971 +0200
> @@ -0,0 +1,6 @@
> +/* PR tree-optimization/91201 */
> +/* { dg-do compile } */
> +/* { dg-options "-O3 -mavx512bw -mprefer-vector-width=512" } */
> +/* { dg-final { scan-assembler "\tvpsadbw\t" } } */
> +
> +#include "sse2-pr91201.c"
>
> Jakub
^ permalink raw reply [flat|nested] 4+ messages in thread
* [PATCH] Implement x86 reduc_plus_scal_v8qi (PR tree-optimization/91201)
2019-07-31 8:57 ` Uros Bizjak
@ 2019-07-31 9:51 ` Jakub Jelinek
2019-07-31 13:55 ` Uros Bizjak
0 siblings, 1 reply; 4+ messages in thread
From: Jakub Jelinek @ 2019-07-31 9:51 UTC (permalink / raw)
To: Uros Bizjak; +Cc: gcc-patches
Hi!
On Wed, Jul 31, 2019 at 10:51:22AM +0200, Uros Bizjak wrote:
> OK.
Thanks. This follow-up implements the same for mmx with sse for V8QImode,
the testcase shows that it is useful too. The difference is quite large:
- movq $0, -72(%rsp)
- movl $bytes, %eax
movq bytes(%rip), %xmm0
+ movl $bytes, %eax
+ pxor %xmm2, %xmm2
.p2align 4,,10
.p2align 3
.L2:
movdqa %xmm0, %xmm1
movq 8(%rax), %xmm0
- movq -72(%rsp), %xmm2
addq $8, %rax
paddb %xmm0, %xmm1
paddb %xmm0, %xmm2
movq %xmm1, -8(%rax)
- movq %xmm2, -72(%rsp)
cmpq $bytes+1016, %rax
jne .L2
- movq -72(%rsp), %rcx
- movzbl -72(%rsp), %eax
- movzbl %ch, %edx
- addl %edx, %eax
- movq %rcx, %rdx
- shrq $16, %rdx
- addl %edx, %eax
- movq %rcx, %rdx
- shrq $24, %rdx
- addl %edx, %eax
- movq %rcx, %rdx
- shrq $32, %rdx
- addl %edx, %eax
- movq %rcx, %rdx
- shrq $40, %rdx
- addl %edx, %eax
- movq %rcx, %rdx
- shrq $48, %rdx
- addl %eax, %edx
- movq %rcx, %rax
- shrq $56, %rax
- addl %edx, %eax
+ pxor %xmm0, %xmm0
+ movdqa %xmm2, %xmm3
+ psadbw %xmm0, %xmm3
+ movq %xmm3, %rax
Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?
2019-07-31 Jakub Jelinek <jakub@redhat.com>
PR tree-optimization/91201
* config/i386/mmx.md (reduc_plus_scal_v8qi): New expander.
* gcc.target/i386/sse2-pr91201-2.c: New test.
--- gcc/config/i386/mmx.md.jj 2019-07-20 08:35:05.720255567 +0200
+++ gcc/config/i386/mmx.md 2019-07-31 08:43:23.054776025 +0200
@@ -1897,6 +1897,21 @@ (define_insn "mmx_psadbw"
(set_attr "type" "mmxshft,sseiadd,sseiadd")
(set_attr "mode" "DI,TI,TI")])
+(define_expand "reduc_plus_scal_v8qi"
+ [(plus:V8QI
+ (match_operand:QI 0 "register_operand")
+ (match_operand:V8QI 1 "register_operand"))]
+ "TARGET_MMX_WITH_SSE"
+{
+ rtx tmp = gen_reg_rtx (V8QImode);
+ emit_move_insn (tmp, CONST0_RTX (V8QImode));
+ rtx tmp2 = gen_reg_rtx (V1DImode);
+ emit_insn (gen_mmx_psadbw (tmp2, operands[1], tmp));
+ tmp2 = gen_lowpart (V8QImode, tmp2);
+ emit_insn (gen_vec_extractv8qiqi (operands[0], tmp2, const0_rtx));
+ DONE;
+})
+
(define_insn_and_split "mmx_pmovmskb"
[(set (match_operand:SI 0 "register_operand" "=r,r")
(unspec:SI [(match_operand:V8QI 1 "register_operand" "y,x")]
--- gcc/testsuite/gcc.target/i386/sse2-pr91201-2.c.jj 2019-07-31 08:45:19.553086849 +0200
+++ gcc/testsuite/gcc.target/i386/sse2-pr91201-2.c 2019-07-31 08:46:52.556738334 +0200
@@ -0,0 +1,21 @@
+/* PR tree-optimization/91201 */
+/* { dg-do compile { target lp64 } } */
+/* { dg-options "-O3 -msse2 -mno-sse3" } */
+/* { dg-final { scan-assembler "\tpsadbw\t" } } */
+
+unsigned char bytes[1024];
+
+unsigned char
+sum (void)
+{
+ unsigned char r = 0;
+ unsigned char *p = (unsigned char *) bytes;
+ int n;
+
+ for (n = 8; n < sizeof (bytes); ++n)
+ {
+ p[n - 8] += p[n];
+ r += p[n];
+ }
+ return r;
+}
Jakub
^ permalink raw reply [flat|nested] 4+ messages in thread
* Re: [PATCH] Implement x86 reduc_plus_scal_v8qi (PR tree-optimization/91201)
2019-07-31 9:51 ` [PATCH] Implement x86 reduc_plus_scal_v8qi " Jakub Jelinek
@ 2019-07-31 13:55 ` Uros Bizjak
0 siblings, 0 replies; 4+ messages in thread
From: Uros Bizjak @ 2019-07-31 13:55 UTC (permalink / raw)
To: Jakub Jelinek; +Cc: gcc-patches
On Wed, Jul 31, 2019 at 11:30 AM Jakub Jelinek <jakub@redhat.com> wrote:
>
> Hi!
>
> On Wed, Jul 31, 2019 at 10:51:22AM +0200, Uros Bizjak wrote:
> > OK.
>
> Thanks. This follow-up implements the same for mmx with sse for V8QImode,
> the testcase shows that it is useful too. The difference is quite large:
>
> - movq $0, -72(%rsp)
> - movl $bytes, %eax
> movq bytes(%rip), %xmm0
> + movl $bytes, %eax
> + pxor %xmm2, %xmm2
> .p2align 4,,10
> .p2align 3
> .L2:
> movdqa %xmm0, %xmm1
> movq 8(%rax), %xmm0
> - movq -72(%rsp), %xmm2
> addq $8, %rax
> paddb %xmm0, %xmm1
> paddb %xmm0, %xmm2
> movq %xmm1, -8(%rax)
> - movq %xmm2, -72(%rsp)
> cmpq $bytes+1016, %rax
> jne .L2
> - movq -72(%rsp), %rcx
> - movzbl -72(%rsp), %eax
> - movzbl %ch, %edx
> - addl %edx, %eax
> - movq %rcx, %rdx
> - shrq $16, %rdx
> - addl %edx, %eax
> - movq %rcx, %rdx
> - shrq $24, %rdx
> - addl %edx, %eax
> - movq %rcx, %rdx
> - shrq $32, %rdx
> - addl %edx, %eax
> - movq %rcx, %rdx
> - shrq $40, %rdx
> - addl %edx, %eax
> - movq %rcx, %rdx
> - shrq $48, %rdx
> - addl %eax, %edx
> - movq %rcx, %rax
> - shrq $56, %rax
> - addl %edx, %eax
> + pxor %xmm0, %xmm0
> + movdqa %xmm2, %xmm3
> + psadbw %xmm0, %xmm3
> + movq %xmm3, %rax
Excellent!
IIRC, there are quite some (integer) named patterns that can be
implemented using TARGET_MMX_WITH_SSE. I'm not at my keyboard right
now, but it looks that horizontal adds can be implemented using the
same approach. I'm glad that TARGET_MMX_WITH_SSE opens such noticeable
optimization opportunities.
> Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?
>
> 2019-07-31 Jakub Jelinek <jakub@redhat.com>
>
> PR tree-optimization/91201
> * config/i386/mmx.md (reduc_plus_scal_v8qi): New expander.
>
> * gcc.target/i386/sse2-pr91201-2.c: New test.
OK.
Thanks,
Uros.
> --- gcc/config/i386/mmx.md.jj 2019-07-20 08:35:05.720255567 +0200
> +++ gcc/config/i386/mmx.md 2019-07-31 08:43:23.054776025 +0200
> @@ -1897,6 +1897,21 @@ (define_insn "mmx_psadbw"
> (set_attr "type" "mmxshft,sseiadd,sseiadd")
> (set_attr "mode" "DI,TI,TI")])
>
> +(define_expand "reduc_plus_scal_v8qi"
> + [(plus:V8QI
> + (match_operand:QI 0 "register_operand")
> + (match_operand:V8QI 1 "register_operand"))]
> + "TARGET_MMX_WITH_SSE"
> +{
> + rtx tmp = gen_reg_rtx (V8QImode);
> + emit_move_insn (tmp, CONST0_RTX (V8QImode));
> + rtx tmp2 = gen_reg_rtx (V1DImode);
> + emit_insn (gen_mmx_psadbw (tmp2, operands[1], tmp));
> + tmp2 = gen_lowpart (V8QImode, tmp2);
> + emit_insn (gen_vec_extractv8qiqi (operands[0], tmp2, const0_rtx));
> + DONE;
> +})
> +
> (define_insn_and_split "mmx_pmovmskb"
> [(set (match_operand:SI 0 "register_operand" "=r,r")
> (unspec:SI [(match_operand:V8QI 1 "register_operand" "y,x")]
> --- gcc/testsuite/gcc.target/i386/sse2-pr91201-2.c.jj 2019-07-31 08:45:19.553086849 +0200
> +++ gcc/testsuite/gcc.target/i386/sse2-pr91201-2.c 2019-07-31 08:46:52.556738334 +0200
> @@ -0,0 +1,21 @@
> +/* PR tree-optimization/91201 */
> +/* { dg-do compile { target lp64 } } */
> +/* { dg-options "-O3 -msse2 -mno-sse3" } */
> +/* { dg-final { scan-assembler "\tpsadbw\t" } } */
> +
> +unsigned char bytes[1024];
> +
> +unsigned char
> +sum (void)
> +{
> + unsigned char r = 0;
> + unsigned char *p = (unsigned char *) bytes;
> + int n;
> +
> + for (n = 8; n < sizeof (bytes); ++n)
> + {
> + p[n - 8] += p[n];
> + r += p[n];
> + }
> + return r;
> +}
>
>
> Jakub
^ permalink raw reply [flat|nested] 4+ messages in thread
end of thread, other threads:[~2019-07-31 13:33 UTC | newest]
Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-07-31 7:20 [PATCH] Implement x86 reduc_plus_scal_v{16,32,64}qi (PR tree-optimization/91201) Jakub Jelinek
2019-07-31 8:57 ` Uros Bizjak
2019-07-31 9:51 ` [PATCH] Implement x86 reduc_plus_scal_v8qi " Jakub Jelinek
2019-07-31 13:55 ` Uros Bizjak
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).