[PATCH] Fix avx512{f,vl} shuffles (PR target/87214)

public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed

* [PATCH] Fix avx512{f,vl} shuffles (PR target/87214)
@ 2019-01-26 11:36 Jakub Jelinek
  2019-01-27 11:20 ` Uros Bizjak
  0 siblings, 1 reply; 2+ messages in thread
From: Jakub Jelinek @ 2019-01-26 11:36 UTC (permalink / raw)
  To: Uros Bizjak; +Cc: gcc-patches

Hi!

The following 4 define_insn shuffle patterns don't have sufficient
conditions.  As can be seen even from the way how they transform the
RTL representation into the mask, e.g.:
  mask = INTVAL (operands[3]) / 2;
  mask |= INTVAL (operands[5]) / 2 << 2;
  mask |= (INTVAL (operands[7]) - 8) / 2 << 4;
  mask |= (INTVAL (operands[9]) - 8) / 2 << 6;
  operands[3] = GEN_INT (mask);
or how corresponding expander constructs the RTL representation from the mask,
e.g.:
  emit_insn (gen_avx512f_shuf_<shuffletype>64x2_1_mask
      (operands[0], operands[1], operands[2],
       GEN_INT (((mask >> 0) & 3) * 2),
       GEN_INT (((mask >> 0) & 3) * 2 + 1),
       GEN_INT (((mask >> 2) & 3) * 2),
       GEN_INT (((mask >> 2) & 3) * 2 + 1),
       GEN_INT (((mask >> 4) & 3) * 2 + 8),
       GEN_INT (((mask >> 4) & 3) * 2 + 9),
       GEN_INT (((mask >> 6) & 3) * 2 + 8),
       GEN_INT (((mask >> 6) & 3) * 2 + 9),
they really require not just that there are 2 (or 4) consecutive numbers
from certain range (in the predicate), but also that the first of these
numbers is a multiple of 2 (or 4) - the least significant 1 (or 2) bits
are ignored when creating the mask for the hw instruction.
Rather than including a huge set of new predicates like
const_0_or_2_operand, const_0_2_4_or_6_operand etc., this patch just
verifies the least significant 1 (or 2) bits are zero where needed,
plus some formatting fixes.

Bootstrapped/regtested on x86_64-linux and i686-linux (on skylake-avx512),
verified both testcases FAIL without the patch, including for the second one
every single subtest in there (all those are where at least one set of pairs
or quadruples starts with a number that is not a multiple of 2 or 4).
Ok for trunk and release branches after a while?

2019-01-26  Jakub Jelinek  <jakub@redhat.com>

	PR target/87214
	* config/i386/sse.md
	(<mask_codefor>avx512dq_shuf_<shuffletype>64x2_1<mask_name>,
	avx512f_shuf_<shuffletype>64x2_1<mask_name>): Ensure the
	first constants in pairs are multiples of 2.  Formatting fixes.
	(avx512vl_shuf_<shuffletype>32x4_1<mask_name>,
	avx512vl_shuf_<shuffletype>32x4_1<mask_name>): Ensure the
	first constants in each quadruple are multiples of 4.  Formatting fixes.

	* gcc.target/i386/avx512vl-pr87214-1.c: New test.
	* gcc.target/i386/avx512vl-pr87214-2.c: New test.

--- gcc/config/i386/sse.md.jj	2019-01-25 23:46:02.156263173 +0100
+++ gcc/config/i386/sse.md	2019-01-26 00:01:24.510168638 +0100
@@ -13372,13 +13372,15 @@ (define_insn "<mask_codefor>avx512dq_shu
 	  (vec_concat:<ssedoublemode>
 	    (match_operand:VI8F_256 1 "register_operand" "v")
 	    (match_operand:VI8F_256 2 "nonimmediate_operand" "vm"))
-	  (parallel [(match_operand 3  "const_0_to_3_operand")
-		     (match_operand 4  "const_0_to_3_operand")
-		     (match_operand 5  "const_4_to_7_operand")
-		     (match_operand 6  "const_4_to_7_operand")])))]
+	  (parallel [(match_operand 3 "const_0_to_3_operand")
+		     (match_operand 4 "const_0_to_3_operand")
+		     (match_operand 5 "const_4_to_7_operand")
+		     (match_operand 6 "const_4_to_7_operand")])))]
   "TARGET_AVX512VL
-   && (INTVAL (operands[3]) == (INTVAL (operands[4]) - 1)
-       && INTVAL (operands[5]) == (INTVAL (operands[6]) - 1))"
+   && (INTVAL (operands[3]) & 1) == 0
+   && INTVAL (operands[3]) == INTVAL (operands[4]) - 1
+   && (INTVAL (operands[5]) & 1) == 0
+   && INTVAL (operands[5]) == INTVAL (operands[6]) - 1"
 {
   int mask;
   mask = INTVAL (operands[3]) / 2;
@@ -13421,19 +13423,23 @@ (define_insn "avx512f_shuf_<shuffletype>
 	  (vec_concat:<ssedoublemode>
 	    (match_operand:V8FI 1 "register_operand" "v")
 	    (match_operand:V8FI 2 "nonimmediate_operand" "vm"))
-	  (parallel [(match_operand 3  "const_0_to_7_operand")
-		     (match_operand 4  "const_0_to_7_operand")
-		     (match_operand 5  "const_0_to_7_operand")
-		     (match_operand 6  "const_0_to_7_operand")
-		     (match_operand 7  "const_8_to_15_operand")
-		     (match_operand 8  "const_8_to_15_operand")
-		     (match_operand 9  "const_8_to_15_operand")
-		     (match_operand 10  "const_8_to_15_operand")])))]
+	  (parallel [(match_operand 3 "const_0_to_7_operand")
+		     (match_operand 4 "const_0_to_7_operand")
+		     (match_operand 5 "const_0_to_7_operand")
+		     (match_operand 6 "const_0_to_7_operand")
+		     (match_operand 7 "const_8_to_15_operand")
+		     (match_operand 8 "const_8_to_15_operand")
+		     (match_operand 9 "const_8_to_15_operand")
+		     (match_operand 10 "const_8_to_15_operand")])))]
   "TARGET_AVX512F
-   && (INTVAL (operands[3]) == (INTVAL (operands[4]) - 1)
-       && INTVAL (operands[5]) == (INTVAL (operands[6]) - 1)
-       && INTVAL (operands[7]) == (INTVAL (operands[8]) - 1)
-       && INTVAL (operands[9]) == (INTVAL (operands[10]) - 1))"
+   && (INTVAL (operands[3]) & 1) == 0
+   && INTVAL (operands[3]) == INTVAL (operands[4]) - 1
+   && (INTVAL (operands[5]) & 1) == 0
+   && INTVAL (operands[5]) == INTVAL (operands[6]) - 1
+   && (INTVAL (operands[7]) & 1) == 0
+   && INTVAL (operands[7]) == INTVAL (operands[8]) - 1
+   && (INTVAL (operands[9]) & 1) == 0
+   && INTVAL (operands[9]) == INTVAL (operands[10]) - 1"
 {
   int mask;
   mask = INTVAL (operands[3]) / 2;
@@ -13479,21 +13485,23 @@ (define_insn "avx512vl_shuf_<shuffletype
 	  (vec_concat:<ssedoublemode>
 	    (match_operand:VI4F_256 1 "register_operand" "v")
 	    (match_operand:VI4F_256 2 "nonimmediate_operand" "vm"))
-	  (parallel [(match_operand 3  "const_0_to_7_operand")
-		     (match_operand 4  "const_0_to_7_operand")
-		     (match_operand 5  "const_0_to_7_operand")
-		     (match_operand 6  "const_0_to_7_operand")
-		     (match_operand 7  "const_8_to_15_operand")
-		     (match_operand 8  "const_8_to_15_operand")
-		     (match_operand 9  "const_8_to_15_operand")
+	  (parallel [(match_operand 3 "const_0_to_7_operand")
+		     (match_operand 4 "const_0_to_7_operand")
+		     (match_operand 5 "const_0_to_7_operand")
+		     (match_operand 6 "const_0_to_7_operand")
+		     (match_operand 7 "const_8_to_15_operand")
+		     (match_operand 8 "const_8_to_15_operand")
+		     (match_operand 9 "const_8_to_15_operand")
 		     (match_operand 10 "const_8_to_15_operand")])))]
   "TARGET_AVX512VL
-   && (INTVAL (operands[3]) == (INTVAL (operands[4]) - 1)
-       && INTVAL (operands[3]) == (INTVAL (operands[5]) - 2)
-       && INTVAL (operands[3]) == (INTVAL (operands[6]) - 3)
-       && INTVAL (operands[7]) == (INTVAL (operands[8]) - 1)
-       && INTVAL (operands[7]) == (INTVAL (operands[9]) - 2)
-       && INTVAL (operands[7]) == (INTVAL (operands[10]) - 3))"
+   && (INTVAL (operands[3]) & 3) == 0
+   && INTVAL (operands[3]) == INTVAL (operands[4]) - 1
+   && INTVAL (operands[3]) == INTVAL (operands[5]) - 2
+   && INTVAL (operands[3]) == INTVAL (operands[6]) - 3
+   && (INTVAL (operands[7]) & 3) == 0
+   && INTVAL (operands[7]) == INTVAL (operands[8]) - 1
+   && INTVAL (operands[7]) == INTVAL (operands[9]) - 2
+   && INTVAL (operands[7]) == INTVAL (operands[10]) - 3"
 {
   int mask;
   mask = INTVAL (operands[3]) / 4;
@@ -13545,35 +13553,39 @@ (define_insn "avx512f_shuf_<shuffletype>
 	  (vec_concat:<ssedoublemode>
 	    (match_operand:V16FI 1 "register_operand" "v")
 	    (match_operand:V16FI 2 "nonimmediate_operand" "vm"))
-	  (parallel [(match_operand 3  "const_0_to_15_operand")
-		     (match_operand 4  "const_0_to_15_operand")
-		     (match_operand 5  "const_0_to_15_operand")
-		     (match_operand 6  "const_0_to_15_operand")
-		     (match_operand 7  "const_0_to_15_operand")
-		     (match_operand 8  "const_0_to_15_operand")
-		     (match_operand 9  "const_0_to_15_operand")
-		     (match_operand 10  "const_0_to_15_operand")
-		     (match_operand 11  "const_16_to_31_operand")
-		     (match_operand 12  "const_16_to_31_operand")
-		     (match_operand 13  "const_16_to_31_operand")
-		     (match_operand 14  "const_16_to_31_operand")
-		     (match_operand 15  "const_16_to_31_operand")
-		     (match_operand 16  "const_16_to_31_operand")
-		     (match_operand 17  "const_16_to_31_operand")
-		     (match_operand 18  "const_16_to_31_operand")])))]
+	  (parallel [(match_operand 3 "const_0_to_15_operand")
+		     (match_operand 4 "const_0_to_15_operand")
+		     (match_operand 5 "const_0_to_15_operand")
+		     (match_operand 6 "const_0_to_15_operand")
+		     (match_operand 7 "const_0_to_15_operand")
+		     (match_operand 8 "const_0_to_15_operand")
+		     (match_operand 9 "const_0_to_15_operand")
+		     (match_operand 10 "const_0_to_15_operand")
+		     (match_operand 11 "const_16_to_31_operand")
+		     (match_operand 12 "const_16_to_31_operand")
+		     (match_operand 13 "const_16_to_31_operand")
+		     (match_operand 14 "const_16_to_31_operand")
+		     (match_operand 15 "const_16_to_31_operand")
+		     (match_operand 16 "const_16_to_31_operand")
+		     (match_operand 17 "const_16_to_31_operand")
+		     (match_operand 18 "const_16_to_31_operand")])))]
   "TARGET_AVX512F
-   && (INTVAL (operands[3]) == (INTVAL (operands[4]) - 1)
-       && INTVAL (operands[3]) == (INTVAL (operands[5]) - 2)
-       && INTVAL (operands[3]) == (INTVAL (operands[6]) - 3)
-       && INTVAL (operands[7]) == (INTVAL (operands[8]) - 1)
-       && INTVAL (operands[7]) == (INTVAL (operands[9]) - 2)
-       && INTVAL (operands[7]) == (INTVAL (operands[10]) - 3)
-       && INTVAL (operands[11]) == (INTVAL (operands[12]) - 1)
-       && INTVAL (operands[11]) == (INTVAL (operands[13]) - 2)
-       && INTVAL (operands[11]) == (INTVAL (operands[14]) - 3)
-       && INTVAL (operands[15]) == (INTVAL (operands[16]) - 1)
-       && INTVAL (operands[15]) == (INTVAL (operands[17]) - 2)
-       && INTVAL (operands[15]) == (INTVAL (operands[18]) - 3))"
+   && (INTVAL (operands[3]) & 3) == 0
+   && INTVAL (operands[3]) == INTVAL (operands[4]) - 1
+   && INTVAL (operands[3]) == INTVAL (operands[5]) - 2
+   && INTVAL (operands[3]) == INTVAL (operands[6]) - 3
+   && (INTVAL (operands[7]) & 3) == 0
+   && INTVAL (operands[7]) == INTVAL (operands[8]) - 1
+   && INTVAL (operands[7]) == INTVAL (operands[9]) - 2
+   && INTVAL (operands[7]) == INTVAL (operands[10]) - 3
+   && (INTVAL (operands[11]) & 3) == 0
+   && INTVAL (operands[11]) == INTVAL (operands[12]) - 1
+   && INTVAL (operands[11]) == INTVAL (operands[13]) - 2
+   && INTVAL (operands[11]) == INTVAL (operands[14]) - 3
+   && (INTVAL (operands[15]) & 3) == 0
+   && INTVAL (operands[15]) == INTVAL (operands[16]) - 1
+   && INTVAL (operands[15]) == INTVAL (operands[17]) - 2
+   && INTVAL (operands[15]) == INTVAL (operands[18]) - 3"
 {
   int mask;
   mask = INTVAL (operands[3]) / 4;
--- gcc/testsuite/gcc.target/i386/avx512vl-pr87214-1.c.jj	2019-01-26 00:01:24.511168621 +0100
+++ gcc/testsuite/gcc.target/i386/avx512vl-pr87214-1.c	2019-01-26 00:13:39.730135406 +0100
@@ -0,0 +1,44 @@
+/* PR target/87214 */
+/* { dg-do run { target { avx512vl } } } */
+/* { dg-options "-O3 -mavx512vl -mtune=skylake-avx512" } */
+
+#define AVX512VL
+#define AVX512F_LEN 512
+#define AVX512F_LEN_HALF 256
+#include "avx512f-check.h"
+
+struct s { unsigned long a, b, c; };
+
+void __attribute__ ((noipa))
+foo (struct s *restrict s1, struct s *restrict s2, int n)
+{
+  for (int i = 0; i < n; ++i)
+    {
+      s1[i].b = s2[i].b;
+      s1[i].c = s2[i].c;
+      s2[i].c = 0;
+    }
+}
+                            
+#define N 12
+
+static void
+test_256 (void)
+{
+  struct s s1[N], s2[N];
+  for (unsigned int j = 0; j < N; ++j)
+    {
+      s2[j].a = j * 5;
+      s2[j].b = j * 5 + 2;
+      s2[j].c = j * 5 + 4;
+    }
+  foo (s1, s2, N);
+  for (unsigned int j = 0; j < N; ++j)
+  if (s1[j].b != j * 5 + 2)
+    __builtin_abort ();
+}
+
+static void
+test_128 (void)
+{
+}
--- gcc/testsuite/gcc.target/i386/avx512vl-pr87214-2.c.jj	2019-01-26 00:01:24.511168621 +0100
+++ gcc/testsuite/gcc.target/i386/avx512vl-pr87214-2.c	2019-01-26 00:17:19.941530293 +0100
@@ -0,0 +1,128 @@
+/* PR target/87214 */
+/* { dg-do run { target { avx512vl } } } */
+/* { dg-options "-O2 -mavx512vl" } */
+
+#define AVX512VL
+#define AVX512F_LEN 512
+#define AVX512F_LEN_HALF 256
+#include "avx512f-check.h"
+
+typedef long long int v4di __attribute__((vector_size (4 * sizeof (long long int))));
+typedef double v4df __attribute__((vector_size (4 * sizeof (double))));
+typedef long long int v8di __attribute__((vector_size (8 * sizeof (long long int))));
+typedef double v8df __attribute__((vector_size (8 * sizeof (double))));
+typedef int v8si __attribute__((vector_size (8 * sizeof (int))));
+typedef float v8sf __attribute__((vector_size (8 * sizeof (float))));
+typedef int v16si __attribute__((vector_size (16 * sizeof (int))));
+typedef float v16sf __attribute__((vector_size (16 * sizeof (float))));
+
+__attribute__((noipa)) void
+f1 (v4di *p)
+{
+  p[0] = __builtin_shuffle (p[1], p[2], (v4di) { 2, 3, 5, 6 });
+}
+
+__attribute__((noipa)) void
+f2 (v4df *p)
+{
+  p[0] = __builtin_shuffle (p[1], p[2], (v4di) { 1, 2, 6, 7 });
+}
+
+__attribute__((noipa)) void
+f3 (v8di *p)
+{
+  p[0] = __builtin_shuffle (p[1], p[2], (v8di) { 2, 3, 5, 6, 8, 9, 11, 12 });
+}
+
+__attribute__((noipa)) void
+f4 (v8df *p)
+{
+  p[0] = __builtin_shuffle (p[1], p[2], (v8di) { 1, 2, 6, 7, 9, 10, 12, 13 });
+}
+
+__attribute__((noipa)) void
+f5 (v8si *p)
+{
+  p[0] = __builtin_shuffle (p[1], p[2], (v8si) { 2, 3, 4, 5, 9, 10, 11, 12 });
+}
+
+__attribute__((noipa)) void
+f6 (v8sf *p)
+{
+  p[0] = __builtin_shuffle (p[1], p[2], (v8si) { 1, 2, 3, 4, 12, 13, 14, 15 });
+}
+
+__attribute__((noipa)) void
+f7 (v16si *p)
+{
+  p[0] = __builtin_shuffle (p[1], p[2], (v16si) { 0, 1, 2, 3, 1, 2, 3, 4, 16, 17, 18, 19, 25, 26, 27, 28 });
+}
+
+__attribute__((noipa)) void
+f8 (v16sf *p)
+{
+  p[0] = __builtin_shuffle (p[1], p[2], (v16si) { 1, 2, 3, 4, 4, 5, 6, 7, 17, 18, 19, 20, 18, 19, 20, 21 });
+}
+
+static void
+test_256 (void)
+{
+  v4di a[3] = { { 0, 0, 0, 0 }, { 10, 11, 12, 13 }, { 14, 15, 16, 17 } };
+  f1 (a);
+  if (a[0][0] != 12 || a[0][1] != 13 || a[0][2] != 15 || a[0][3] != 16)
+    __builtin_abort ();
+  v4df b[3] = { { 0.0, 0.0, 0.0, 0.0 }, { 10.0, 11.0, 12.0, 13.0 }, { 14.0, 15.0, 16.0, 17.0 } };
+  f2 (b);
+  if (b[0][0] != 11.0 || b[0][1] != 12.0 || b[0][2] != 16.0 || b[0][3] != 17.0)
+    __builtin_abort ();
+  v8di c[3] = { { 0, 0, 0, 0, 0, 0, 0, 0 }, { 10, 11, 12, 13, 14, 15, 16, 17 }, { 18, 19, 20, 21, 22, 23, 24, 25 } };
+  f3 (c);
+  if (c[0][0] != 12 || c[0][1] != 13 || c[0][2] != 15 || c[0][3] != 16
+      || c[0][4] != 18 || c[0][5] != 19 || c[0][6] != 21 || c[0][7] != 22)
+    __builtin_abort ();
+  v8df d[3] = { { 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 },
+		{ 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0 },
+		{ 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0 } };
+  f4 (d);
+  if (d[0][0] != 11.0 || d[0][1] != 12.0 || d[0][2] != 16.0 || d[0][3] != 17.0
+      || d[0][4] != 19.0 || d[0][5] != 20.0 || d[0][6] != 22.0 || d[0][7] != 23.0)
+    __builtin_abort ();
+  v8si e[3] = { { 0, 0, 0, 0, 0, 0, 0, 0 }, { 10, 11, 12, 13, 14, 15, 16, 17 }, { 18, 19, 20, 21, 22, 23, 24, 25 } };
+  f5 (e);
+  if (e[0][0] != 12 || e[0][1] != 13 || e[0][2] != 14 || e[0][3] != 15
+      || e[0][4] != 19 || e[0][5] != 20 || e[0][6] != 21 || e[0][7] != 22)
+    __builtin_abort ();
+  v8sf f[3] = { { 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f },
+		{ 10.0f, 11.0f, 12.0f, 13.0f, 14.0f, 15.0f, 16.0f, 17.0f },
+		{ 18.0f, 19.0f, 20.0f, 21.0f, 22.0f, 23.0f, 24.0f, 25.0f } };
+  f6 (f);
+  if (f[0][0] != 11.0f || f[0][1] != 12.0f || f[0][2] != 13.0f || f[0][3] != 14.0f
+      || f[0][4] != 22.0f || f[0][5] != 23.0f || f[0][6] != 24.0f || f[0][7] != 25.0f)
+    __builtin_abort ();
+  v16si g[3] = { { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
+		 { 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25 },
+		 { 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41 } };
+  f7 (g);
+  if (g[0][0] != 10 || g[0][1] != 11 || g[0][2] != 12 || g[0][3] != 13
+      || g[0][4] != 11 || g[0][5] != 12 || g[0][6] != 13 || g[0][7] != 14
+      || g[0][8] != 26 || g[0][9] != 27 || g[0][10] != 28 || g[0][11] != 29
+      || g[0][12] != 35 || g[0][13] != 36 || g[0][14] != 37 || g[0][15] != 38)
+    __builtin_abort ();
+  v16sf h[3] = { { 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
+		   0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f },
+		 { 10.0f, 11.0f, 12.0f, 13.0f, 14.0f, 15.0f, 16.0f, 17.0f,
+		   18.0f, 19.0f, 20.0f, 21.0f, 22.0f, 23.0f, 24.0f, 25.0f },
+		 { 26.0f, 27.0f, 28.0f, 29.0f, 30.0f, 31.0f, 32.0f, 33.0f,
+		   34.0f, 35.0f, 36.0f, 37.0f, 38.0f, 39.0f, 40.0f, 41.0f } };
+  f8 (h);
+  if (h[0][0] != 11.0f || h[0][1] != 12.0f || h[0][2] != 13.0f || h[0][3] != 14.0f
+      || h[0][4] != 14.0f || h[0][5] != 15.0f || h[0][6] != 16.0f || h[0][7] != 17.0f
+      || h[0][8] != 27.0f || h[0][9] != 28.0f || h[0][10] != 29.0f || h[0][11] != 30.0f
+      || h[0][12] != 28.0f || h[0][13] != 29.0f || h[0][14] != 30.0f || h[0][15] != 31.0f)
+    __builtin_abort ();
+}
+
+static void
+test_128 (void)
+{
+}

	Jakub

^ permalink raw reply	[flat|nested] 2+ messages in thread

* Re: [PATCH] Fix avx512{f,vl} shuffles (PR target/87214)
  2019-01-26 11:36 [PATCH] Fix avx512{f,vl} shuffles (PR target/87214) Jakub Jelinek
@ 2019-01-27 11:20 ` Uros Bizjak
  0 siblings, 0 replies; 2+ messages in thread
From: Uros Bizjak @ 2019-01-27 11:20 UTC (permalink / raw)
  To: Jakub Jelinek; +Cc: gcc-patches

On 1/26/19, Jakub Jelinek <jakub@redhat.com> wrote:
> Hi!
>
> The following 4 define_insn shuffle patterns don't have sufficient
> conditions.  As can be seen even from the way how they transform the
> RTL representation into the mask, e.g.:
>   mask = INTVAL (operands[3]) / 2;
>   mask |= INTVAL (operands[5]) / 2 << 2;
>   mask |= (INTVAL (operands[7]) - 8) / 2 << 4;
>   mask |= (INTVAL (operands[9]) - 8) / 2 << 6;
>   operands[3] = GEN_INT (mask);
> or how corresponding expander constructs the RTL representation from the
> mask,
> e.g.:
>   emit_insn (gen_avx512f_shuf_<shuffletype>64x2_1_mask
>       (operands[0], operands[1], operands[2],
>        GEN_INT (((mask >> 0) & 3) * 2),
>        GEN_INT (((mask >> 0) & 3) * 2 + 1),
>        GEN_INT (((mask >> 2) & 3) * 2),
>        GEN_INT (((mask >> 2) & 3) * 2 + 1),
>        GEN_INT (((mask >> 4) & 3) * 2 + 8),
>        GEN_INT (((mask >> 4) & 3) * 2 + 9),
>        GEN_INT (((mask >> 6) & 3) * 2 + 8),
>        GEN_INT (((mask >> 6) & 3) * 2 + 9),
> they really require not just that there are 2 (or 4) consecutive numbers
> from certain range (in the predicate), but also that the first of these
> numbers is a multiple of 2 (or 4) - the least significant 1 (or 2) bits
> are ignored when creating the mask for the hw instruction.
> Rather than including a huge set of new predicates like
> const_0_or_2_operand, const_0_2_4_or_6_operand etc., this patch just
> verifies the least significant 1 (or 2) bits are zero where needed,
> plus some formatting fixes.
>
> Bootstrapped/regtested on x86_64-linux and i686-linux (on skylake-avx512),
> verified both testcases FAIL without the patch, including for the second
> one
> every single subtest in there (all those are where at least one set of
> pairs
> or quadruples starts with a number that is not a multiple of 2 or 4).
> Ok for trunk and release branches after a while?
>
> 2019-01-26  Jakub Jelinek  <jakub@redhat.com>
>
> 	PR target/87214
> 	* config/i386/sse.md
> 	(<mask_codefor>avx512dq_shuf_<shuffletype>64x2_1<mask_name>,
> 	avx512f_shuf_<shuffletype>64x2_1<mask_name>): Ensure the
> 	first constants in pairs are multiples of 2.  Formatting fixes.
> 	(avx512vl_shuf_<shuffletype>32x4_1<mask_name>,
> 	avx512vl_shuf_<shuffletype>32x4_1<mask_name>): Ensure the
> 	first constants in each quadruple are multiples of 4.  Formatting fixes.
>
> 	* gcc.target/i386/avx512vl-pr87214-1.c: New test.
> 	* gcc.target/i386/avx512vl-pr87214-2.c: New test.

OK.

Thanks,
Uros.

> --- gcc/config/i386/sse.md.jj	2019-01-25 23:46:02.156263173 +0100
> +++ gcc/config/i386/sse.md	2019-01-26 00:01:24.510168638 +0100
> @@ -13372,13 +13372,15 @@ (define_insn "<mask_codefor>avx512dq_shu
>  	  (vec_concat:<ssedoublemode>
>  	    (match_operand:VI8F_256 1 "register_operand" "v")
>  	    (match_operand:VI8F_256 2 "nonimmediate_operand" "vm"))
> -	  (parallel [(match_operand 3  "const_0_to_3_operand")
> -		     (match_operand 4  "const_0_to_3_operand")
> -		     (match_operand 5  "const_4_to_7_operand")
> -		     (match_operand 6  "const_4_to_7_operand")])))]
> +	  (parallel [(match_operand 3 "const_0_to_3_operand")
> +		     (match_operand 4 "const_0_to_3_operand")
> +		     (match_operand 5 "const_4_to_7_operand")
> +		     (match_operand 6 "const_4_to_7_operand")])))]
>    "TARGET_AVX512VL
> -   && (INTVAL (operands[3]) == (INTVAL (operands[4]) - 1)
> -       && INTVAL (operands[5]) == (INTVAL (operands[6]) - 1))"
> +   && (INTVAL (operands[3]) & 1) == 0
> +   && INTVAL (operands[3]) == INTVAL (operands[4]) - 1
> +   && (INTVAL (operands[5]) & 1) == 0
> +   && INTVAL (operands[5]) == INTVAL (operands[6]) - 1"
>  {
>    int mask;
>    mask = INTVAL (operands[3]) / 2;
> @@ -13421,19 +13423,23 @@ (define_insn "avx512f_shuf_<shuffletype>
>  	  (vec_concat:<ssedoublemode>
>  	    (match_operand:V8FI 1 "register_operand" "v")
>  	    (match_operand:V8FI 2 "nonimmediate_operand" "vm"))
> -	  (parallel [(match_operand 3  "const_0_to_7_operand")
> -		     (match_operand 4  "const_0_to_7_operand")
> -		     (match_operand 5  "const_0_to_7_operand")
> -		     (match_operand 6  "const_0_to_7_operand")
> -		     (match_operand 7  "const_8_to_15_operand")
> -		     (match_operand 8  "const_8_to_15_operand")
> -		     (match_operand 9  "const_8_to_15_operand")
> -		     (match_operand 10  "const_8_to_15_operand")])))]
> +	  (parallel [(match_operand 3 "const_0_to_7_operand")
> +		     (match_operand 4 "const_0_to_7_operand")
> +		     (match_operand 5 "const_0_to_7_operand")
> +		     (match_operand 6 "const_0_to_7_operand")
> +		     (match_operand 7 "const_8_to_15_operand")
> +		     (match_operand 8 "const_8_to_15_operand")
> +		     (match_operand 9 "const_8_to_15_operand")
> +		     (match_operand 10 "const_8_to_15_operand")])))]
>    "TARGET_AVX512F
> -   && (INTVAL (operands[3]) == (INTVAL (operands[4]) - 1)
> -       && INTVAL (operands[5]) == (INTVAL (operands[6]) - 1)
> -       && INTVAL (operands[7]) == (INTVAL (operands[8]) - 1)
> -       && INTVAL (operands[9]) == (INTVAL (operands[10]) - 1))"
> +   && (INTVAL (operands[3]) & 1) == 0
> +   && INTVAL (operands[3]) == INTVAL (operands[4]) - 1
> +   && (INTVAL (operands[5]) & 1) == 0
> +   && INTVAL (operands[5]) == INTVAL (operands[6]) - 1
> +   && (INTVAL (operands[7]) & 1) == 0
> +   && INTVAL (operands[7]) == INTVAL (operands[8]) - 1
> +   && (INTVAL (operands[9]) & 1) == 0
> +   && INTVAL (operands[9]) == INTVAL (operands[10]) - 1"
>  {
>    int mask;
>    mask = INTVAL (operands[3]) / 2;
> @@ -13479,21 +13485,23 @@ (define_insn "avx512vl_shuf_<shuffletype
>  	  (vec_concat:<ssedoublemode>
>  	    (match_operand:VI4F_256 1 "register_operand" "v")
>  	    (match_operand:VI4F_256 2 "nonimmediate_operand" "vm"))
> -	  (parallel [(match_operand 3  "const_0_to_7_operand")
> -		     (match_operand 4  "const_0_to_7_operand")
> -		     (match_operand 5  "const_0_to_7_operand")
> -		     (match_operand 6  "const_0_to_7_operand")
> -		     (match_operand 7  "const_8_to_15_operand")
> -		     (match_operand 8  "const_8_to_15_operand")
> -		     (match_operand 9  "const_8_to_15_operand")
> +	  (parallel [(match_operand 3 "const_0_to_7_operand")
> +		     (match_operand 4 "const_0_to_7_operand")
> +		     (match_operand 5 "const_0_to_7_operand")
> +		     (match_operand 6 "const_0_to_7_operand")
> +		     (match_operand 7 "const_8_to_15_operand")
> +		     (match_operand 8 "const_8_to_15_operand")
> +		     (match_operand 9 "const_8_to_15_operand")
>  		     (match_operand 10 "const_8_to_15_operand")])))]
>    "TARGET_AVX512VL
> -   && (INTVAL (operands[3]) == (INTVAL (operands[4]) - 1)
> -       && INTVAL (operands[3]) == (INTVAL (operands[5]) - 2)
> -       && INTVAL (operands[3]) == (INTVAL (operands[6]) - 3)
> -       && INTVAL (operands[7]) == (INTVAL (operands[8]) - 1)
> -       && INTVAL (operands[7]) == (INTVAL (operands[9]) - 2)
> -       && INTVAL (operands[7]) == (INTVAL (operands[10]) - 3))"
> +   && (INTVAL (operands[3]) & 3) == 0
> +   && INTVAL (operands[3]) == INTVAL (operands[4]) - 1
> +   && INTVAL (operands[3]) == INTVAL (operands[5]) - 2
> +   && INTVAL (operands[3]) == INTVAL (operands[6]) - 3
> +   && (INTVAL (operands[7]) & 3) == 0
> +   && INTVAL (operands[7]) == INTVAL (operands[8]) - 1
> +   && INTVAL (operands[7]) == INTVAL (operands[9]) - 2
> +   && INTVAL (operands[7]) == INTVAL (operands[10]) - 3"
>  {
>    int mask;
>    mask = INTVAL (operands[3]) / 4;
> @@ -13545,35 +13553,39 @@ (define_insn "avx512f_shuf_<shuffletype>
>  	  (vec_concat:<ssedoublemode>
>  	    (match_operand:V16FI 1 "register_operand" "v")
>  	    (match_operand:V16FI 2 "nonimmediate_operand" "vm"))
> -	  (parallel [(match_operand 3  "const_0_to_15_operand")
> -		     (match_operand 4  "const_0_to_15_operand")
> -		     (match_operand 5  "const_0_to_15_operand")
> -		     (match_operand 6  "const_0_to_15_operand")
> -		     (match_operand 7  "const_0_to_15_operand")
> -		     (match_operand 8  "const_0_to_15_operand")
> -		     (match_operand 9  "const_0_to_15_operand")
> -		     (match_operand 10  "const_0_to_15_operand")
> -		     (match_operand 11  "const_16_to_31_operand")
> -		     (match_operand 12  "const_16_to_31_operand")
> -		     (match_operand 13  "const_16_to_31_operand")
> -		     (match_operand 14  "const_16_to_31_operand")
> -		     (match_operand 15  "const_16_to_31_operand")
> -		     (match_operand 16  "const_16_to_31_operand")
> -		     (match_operand 17  "const_16_to_31_operand")
> -		     (match_operand 18  "const_16_to_31_operand")])))]
> +	  (parallel [(match_operand 3 "const_0_to_15_operand")
> +		     (match_operand 4 "const_0_to_15_operand")
> +		     (match_operand 5 "const_0_to_15_operand")
> +		     (match_operand 6 "const_0_to_15_operand")
> +		     (match_operand 7 "const_0_to_15_operand")
> +		     (match_operand 8 "const_0_to_15_operand")
> +		     (match_operand 9 "const_0_to_15_operand")
> +		     (match_operand 10 "const_0_to_15_operand")
> +		     (match_operand 11 "const_16_to_31_operand")
> +		     (match_operand 12 "const_16_to_31_operand")
> +		     (match_operand 13 "const_16_to_31_operand")
> +		     (match_operand 14 "const_16_to_31_operand")
> +		     (match_operand 15 "const_16_to_31_operand")
> +		     (match_operand 16 "const_16_to_31_operand")
> +		     (match_operand 17 "const_16_to_31_operand")
> +		     (match_operand 18 "const_16_to_31_operand")])))]
>    "TARGET_AVX512F
> -   && (INTVAL (operands[3]) == (INTVAL (operands[4]) - 1)
> -       && INTVAL (operands[3]) == (INTVAL (operands[5]) - 2)
> -       && INTVAL (operands[3]) == (INTVAL (operands[6]) - 3)
> -       && INTVAL (operands[7]) == (INTVAL (operands[8]) - 1)
> -       && INTVAL (operands[7]) == (INTVAL (operands[9]) - 2)
> -       && INTVAL (operands[7]) == (INTVAL (operands[10]) - 3)
> -       && INTVAL (operands[11]) == (INTVAL (operands[12]) - 1)
> -       && INTVAL (operands[11]) == (INTVAL (operands[13]) - 2)
> -       && INTVAL (operands[11]) == (INTVAL (operands[14]) - 3)
> -       && INTVAL (operands[15]) == (INTVAL (operands[16]) - 1)
> -       && INTVAL (operands[15]) == (INTVAL (operands[17]) - 2)
> -       && INTVAL (operands[15]) == (INTVAL (operands[18]) - 3))"
> +   && (INTVAL (operands[3]) & 3) == 0
> +   && INTVAL (operands[3]) == INTVAL (operands[4]) - 1
> +   && INTVAL (operands[3]) == INTVAL (operands[5]) - 2
> +   && INTVAL (operands[3]) == INTVAL (operands[6]) - 3
> +   && (INTVAL (operands[7]) & 3) == 0
> +   && INTVAL (operands[7]) == INTVAL (operands[8]) - 1
> +   && INTVAL (operands[7]) == INTVAL (operands[9]) - 2
> +   && INTVAL (operands[7]) == INTVAL (operands[10]) - 3
> +   && (INTVAL (operands[11]) & 3) == 0
> +   && INTVAL (operands[11]) == INTVAL (operands[12]) - 1
> +   && INTVAL (operands[11]) == INTVAL (operands[13]) - 2
> +   && INTVAL (operands[11]) == INTVAL (operands[14]) - 3
> +   && (INTVAL (operands[15]) & 3) == 0
> +   && INTVAL (operands[15]) == INTVAL (operands[16]) - 1
> +   && INTVAL (operands[15]) == INTVAL (operands[17]) - 2
> +   && INTVAL (operands[15]) == INTVAL (operands[18]) - 3"
>  {
>    int mask;
>    mask = INTVAL (operands[3]) / 4;
> --- gcc/testsuite/gcc.target/i386/avx512vl-pr87214-1.c.jj	2019-01-26
> 00:01:24.511168621 +0100
> +++ gcc/testsuite/gcc.target/i386/avx512vl-pr87214-1.c	2019-01-26
> 00:13:39.730135406 +0100
> @@ -0,0 +1,44 @@
> +/* PR target/87214 */
> +/* { dg-do run { target { avx512vl } } } */
> +/* { dg-options "-O3 -mavx512vl -mtune=skylake-avx512" } */
> +
> +#define AVX512VL
> +#define AVX512F_LEN 512
> +#define AVX512F_LEN_HALF 256
> +#include "avx512f-check.h"
> +
> +struct s { unsigned long a, b, c; };
> +
> +void __attribute__ ((noipa))
> +foo (struct s *restrict s1, struct s *restrict s2, int n)
> +{
> +  for (int i = 0; i < n; ++i)
> +    {
> +      s1[i].b = s2[i].b;
> +      s1[i].c = s2[i].c;
> +      s2[i].c = 0;
> +    }
> +}
> +
> +#define N 12
> +
> +static void
> +test_256 (void)
> +{
> +  struct s s1[N], s2[N];
> +  for (unsigned int j = 0; j < N; ++j)
> +    {
> +      s2[j].a = j * 5;
> +      s2[j].b = j * 5 + 2;
> +      s2[j].c = j * 5 + 4;
> +    }
> +  foo (s1, s2, N);
> +  for (unsigned int j = 0; j < N; ++j)
> +  if (s1[j].b != j * 5 + 2)
> +    __builtin_abort ();
> +}
> +
> +static void
> +test_128 (void)
> +{
> +}
> --- gcc/testsuite/gcc.target/i386/avx512vl-pr87214-2.c.jj	2019-01-26
> 00:01:24.511168621 +0100
> +++ gcc/testsuite/gcc.target/i386/avx512vl-pr87214-2.c	2019-01-26
> 00:17:19.941530293 +0100
> @@ -0,0 +1,128 @@
> +/* PR target/87214 */
> +/* { dg-do run { target { avx512vl } } } */
> +/* { dg-options "-O2 -mavx512vl" } */
> +
> +#define AVX512VL
> +#define AVX512F_LEN 512
> +#define AVX512F_LEN_HALF 256
> +#include "avx512f-check.h"
> +
> +typedef long long int v4di __attribute__((vector_size (4 * sizeof (long
> long int))));
> +typedef double v4df __attribute__((vector_size (4 * sizeof (double))));
> +typedef long long int v8di __attribute__((vector_size (8 * sizeof (long
> long int))));
> +typedef double v8df __attribute__((vector_size (8 * sizeof (double))));
> +typedef int v8si __attribute__((vector_size (8 * sizeof (int))));
> +typedef float v8sf __attribute__((vector_size (8 * sizeof (float))));
> +typedef int v16si __attribute__((vector_size (16 * sizeof (int))));
> +typedef float v16sf __attribute__((vector_size (16 * sizeof (float))));
> +
> +__attribute__((noipa)) void
> +f1 (v4di *p)
> +{
> +  p[0] = __builtin_shuffle (p[1], p[2], (v4di) { 2, 3, 5, 6 });
> +}
> +
> +__attribute__((noipa)) void
> +f2 (v4df *p)
> +{
> +  p[0] = __builtin_shuffle (p[1], p[2], (v4di) { 1, 2, 6, 7 });
> +}
> +
> +__attribute__((noipa)) void
> +f3 (v8di *p)
> +{
> +  p[0] = __builtin_shuffle (p[1], p[2], (v8di) { 2, 3, 5, 6, 8, 9, 11, 12
> });
> +}
> +
> +__attribute__((noipa)) void
> +f4 (v8df *p)
> +{
> +  p[0] = __builtin_shuffle (p[1], p[2], (v8di) { 1, 2, 6, 7, 9, 10, 12, 13
> });
> +}
> +
> +__attribute__((noipa)) void
> +f5 (v8si *p)
> +{
> +  p[0] = __builtin_shuffle (p[1], p[2], (v8si) { 2, 3, 4, 5, 9, 10, 11, 12
> });
> +}
> +
> +__attribute__((noipa)) void
> +f6 (v8sf *p)
> +{
> +  p[0] = __builtin_shuffle (p[1], p[2], (v8si) { 1, 2, 3, 4, 12, 13, 14, 15
> });
> +}
> +
> +__attribute__((noipa)) void
> +f7 (v16si *p)
> +{
> +  p[0] = __builtin_shuffle (p[1], p[2], (v16si) { 0, 1, 2, 3, 1, 2, 3, 4,
> 16, 17, 18, 19, 25, 26, 27, 28 });
> +}
> +
> +__attribute__((noipa)) void
> +f8 (v16sf *p)
> +{
> +  p[0] = __builtin_shuffle (p[1], p[2], (v16si) { 1, 2, 3, 4, 4, 5, 6, 7,
> 17, 18, 19, 20, 18, 19, 20, 21 });
> +}
> +
> +static void
> +test_256 (void)
> +{
> +  v4di a[3] = { { 0, 0, 0, 0 }, { 10, 11, 12, 13 }, { 14, 15, 16, 17 } };
> +  f1 (a);
> +  if (a[0][0] != 12 || a[0][1] != 13 || a[0][2] != 15 || a[0][3] != 16)
> +    __builtin_abort ();
> +  v4df b[3] = { { 0.0, 0.0, 0.0, 0.0 }, { 10.0, 11.0, 12.0, 13.0 }, { 14.0,
> 15.0, 16.0, 17.0 } };
> +  f2 (b);
> +  if (b[0][0] != 11.0 || b[0][1] != 12.0 || b[0][2] != 16.0 || b[0][3] !=
> 17.0)
> +    __builtin_abort ();
> +  v8di c[3] = { { 0, 0, 0, 0, 0, 0, 0, 0 }, { 10, 11, 12, 13, 14, 15, 16,
> 17 }, { 18, 19, 20, 21, 22, 23, 24, 25 } };
> +  f3 (c);
> +  if (c[0][0] != 12 || c[0][1] != 13 || c[0][2] != 15 || c[0][3] != 16
> +      || c[0][4] != 18 || c[0][5] != 19 || c[0][6] != 21 || c[0][7] != 22)
> +    __builtin_abort ();
> +  v8df d[3] = { { 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 },
> +		{ 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0 },
> +		{ 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0 } };
> +  f4 (d);
> +  if (d[0][0] != 11.0 || d[0][1] != 12.0 || d[0][2] != 16.0 || d[0][3] !=
> 17.0
> +      || d[0][4] != 19.0 || d[0][5] != 20.0 || d[0][6] != 22.0 || d[0][7]
> != 23.0)
> +    __builtin_abort ();
> +  v8si e[3] = { { 0, 0, 0, 0, 0, 0, 0, 0 }, { 10, 11, 12, 13, 14, 15, 16,
> 17 }, { 18, 19, 20, 21, 22, 23, 24, 25 } };
> +  f5 (e);
> +  if (e[0][0] != 12 || e[0][1] != 13 || e[0][2] != 14 || e[0][3] != 15
> +      || e[0][4] != 19 || e[0][5] != 20 || e[0][6] != 21 || e[0][7] != 22)
> +    __builtin_abort ();
> +  v8sf f[3] = { { 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f },
> +		{ 10.0f, 11.0f, 12.0f, 13.0f, 14.0f, 15.0f, 16.0f, 17.0f },
> +		{ 18.0f, 19.0f, 20.0f, 21.0f, 22.0f, 23.0f, 24.0f, 25.0f } };
> +  f6 (f);
> +  if (f[0][0] != 11.0f || f[0][1] != 12.0f || f[0][2] != 13.0f || f[0][3]
> != 14.0f
> +      || f[0][4] != 22.0f || f[0][5] != 23.0f || f[0][6] != 24.0f ||
> f[0][7] != 25.0f)
> +    __builtin_abort ();
> +  v16si g[3] = { { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
> +		 { 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25 },
> +		 { 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41 } };
> +  f7 (g);
> +  if (g[0][0] != 10 || g[0][1] != 11 || g[0][2] != 12 || g[0][3] != 13
> +      || g[0][4] != 11 || g[0][5] != 12 || g[0][6] != 13 || g[0][7] != 14
> +      || g[0][8] != 26 || g[0][9] != 27 || g[0][10] != 28 || g[0][11] !=
> 29
> +      || g[0][12] != 35 || g[0][13] != 36 || g[0][14] != 37 || g[0][15] !=
> 38)
> +    __builtin_abort ();
> +  v16sf h[3] = { { 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
> +		   0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f },
> +		 { 10.0f, 11.0f, 12.0f, 13.0f, 14.0f, 15.0f, 16.0f, 17.0f,
> +		   18.0f, 19.0f, 20.0f, 21.0f, 22.0f, 23.0f, 24.0f, 25.0f },
> +		 { 26.0f, 27.0f, 28.0f, 29.0f, 30.0f, 31.0f, 32.0f, 33.0f,
> +		   34.0f, 35.0f, 36.0f, 37.0f, 38.0f, 39.0f, 40.0f, 41.0f } };
> +  f8 (h);
> +  if (h[0][0] != 11.0f || h[0][1] != 12.0f || h[0][2] != 13.0f || h[0][3]
> != 14.0f
> +      || h[0][4] != 14.0f || h[0][5] != 15.0f || h[0][6] != 16.0f ||
> h[0][7] != 17.0f
> +      || h[0][8] != 27.0f || h[0][9] != 28.0f || h[0][10] != 29.0f ||
> h[0][11] != 30.0f
> +      || h[0][12] != 28.0f || h[0][13] != 29.0f || h[0][14] != 30.0f ||
> h[0][15] != 31.0f)
> +    __builtin_abort ();
> +}
> +
> +static void
> +test_128 (void)
> +{
> +}
>
> 	Jakub
>

^ permalink raw reply	[flat|nested] 2+ messages in thread

end of thread, other threads:[~2019-01-27 10:38 UTC | newest]

Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-01-26 11:36 [PATCH] Fix avx512{f,vl} shuffles (PR target/87214) Jakub Jelinek
2019-01-27 11:20 ` Uros Bizjak

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).