* [RFC/RFA][PATCH v2 05/12] i386: Implement new expander for efficient CRC computation
@ 2024-07-26 18:07 Mariam Arutunian
0 siblings, 0 replies; only message in thread
From: Mariam Arutunian @ 2024-07-26 18:07 UTC (permalink / raw)
To: GCC Patches
[-- Attachment #1.1: Type: text/plain, Size: 2613 bytes --]
This patch introduces two new expanders for the i386 backend,
dedicated to generating optimized code for CRC computations.
The new expanders are designed to leverage specific hardware
capabilities to achieve faster CRC calculations,
particularly using the pclmulqdq or crc32 instructions when supported by
the target architecture.
Expander 1: Bit-Forward CRC (crc<SWI124dup:mode><SWI124:mode>4)
For targets that support both pclmulqdq instruction (TARGET_PCLMUL) and
are 64-bit (TARGET_64BIT),
the expander will generate code that uses the pclmulqdq instruction for
CRC computation.
Expander 2: Bit-Reversed CRC (crc_rev<SWI124dup:mode><SWI124:mode>4)
The expander first checks if the target supports the CRC32 instruction
set (TARGET_CRC32)
and the polynomial in use is 0x1EDC6F41 (iSCSI). If the conditions are
met,
it emits calls to the corresponding crc32 instruction (crc32b, crc32w,
or crc32l depending on the data size).
If the target does not support crc32 but supports pclmulqdq, it then
uses the pclmulqdq instruction for bit-reversed CRC computation.
Otherwise table-based CRC is generated.
gcc/config/i386/
* i386-protos.h (ix86_expand_crc_using_pclmul): New extern function
declaration.
(ix86_expand_reversed_crc_using_pclmul): Likewise.
* i386.cc (ix86_expand_crc_using_pclmul): New function.
(ix86_expand_reversed_crc_using_pclmul): Likewise.
* i386.md (UNSPEC_CRC, UNSPEC_CRC_REV): New unspecs.
(SWI124dup): New iterator.
(crc<SWI124dup:mode><SWI124:mode>4): New expander for bit-forward
CRC.
(crc_rev<SWI124dup:mode><SWI124:mode>4): New expander for reversed
CRC.
gcc/testsuite/gcc.target/i386/
* crc-crc32-data16.c: New test.
* crc-crc32-data32.c: Likewise.
* crc-crc32-data8.c: Likewise.
* crc-1-pclmul.c: Likewise.
* crc-10-pclmul.c: Likewise.
* crc-12-pclmul.c: Likewise.
* crc-13-pclmul.c: Likewise.
* crc-14-pclmul.c: Likewise.
* crc-17-pclmul.c: Likewise.
* crc-18-pclmul.c: Likewise.
* crc-21-pclmul.c: Likewise.
* crc-22-pclmul.c: Likewise.
* crc-23-pclmul.c: Likewise.
* crc-4-pclmul.c: Likewise.
* crc-5-pclmul.c: Likewise.
* crc-6-pclmul.c: Likewise.
* crc-7-pclmul.c: Likewise.
* crc-8-pclmul.c: Likewise.
* crc-9-pclmul.c: Likewise.
* crc-CCIT-data16-pclmul.c: Likewise.
* crc-CCIT-data8-pclmul.c: Likewise.
* crc-coremark-16bitdata-pclmul.c: Likewise.
Signed-off-by: Mariam Arutunian <mariamarutunian@gmail.com>
[-- Attachment #2: 0005-i386-Implement-new-expander-for-efficient-CRC-comput.patch --]
[-- Type: text/x-patch, Size: 25522 bytes --]
diff --git a/gcc/config/i386/i386-protos.h b/gcc/config/i386/i386-protos.h
index dbc861fb1ea..845a5dcd9ab 100644
--- a/gcc/config/i386/i386-protos.h
+++ b/gcc/config/i386/i386-protos.h
@@ -288,6 +288,8 @@ extern void ix86_expand_sse2_mulvxdi3 (rtx, rtx, rtx);
extern void ix86_expand_sse2_abs (rtx, rtx);
extern bool ix86_expand_vector_init_duplicate (bool, machine_mode, rtx,
rtx);
+extern void ix86_expand_crc_using_pclmul (rtx *);
+extern void ix86_expand_reversed_crc_using_pclmul (rtx *);
extern bool ix86_extract_perm_from_pool_constant (int*, rtx);
/* In i386-c.cc */
diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
index 69cd4ae05a7..33a2ab4f99c 100644
--- a/gcc/config/i386/i386.cc
+++ b/gcc/config/i386/i386.cc
@@ -26185,6 +26185,135 @@ ix86_run_selftests (void)
} // namespace selftest
+/* Generate assembly to calculate CRC using pclmulqdq instruction.
+ OPERANDS[1] is input CRC,
+ OPERANDS[2] is data (message),
+ OPERANDS[3] is the polynomial without the leading 1. */
+
+void
+ix86_expand_crc_using_pclmul (rtx *operands)
+{
+/* Check and keep arguments. */
+ gcc_assert (!CONST_INT_P (operands[0]));
+ gcc_assert (CONST_INT_P (operands[3]));
+ rtx crc = operands[1];
+ rtx data = operands[2];
+ unsigned HOST_WIDE_INT crc_size = GET_MODE_BITSIZE (GET_MODE (operands[0]));
+ gcc_assert (crc_size <= 32);
+ unsigned HOST_WIDE_INT data_size = GET_MODE_BITSIZE (GET_MODE (data));
+ unsigned HOST_WIDE_INT DImode_size = GET_MODE_BITSIZE (DImode);
+
+ /* Calculate the quotient. */
+ unsigned HOST_WIDE_INT
+ q = gf2n_poly_long_div_quotient (UINTVAL (operands[3]), crc_size);
+
+ if (crc_size > data_size)
+ crc = expand_shift (RSHIFT_EXPR, DImode, crc, crc_size - data_size,
+ NULL_RTX, 1);
+
+ /* Keep the quotient in V2DImode. */
+ rtx q_v2di = gen_reg_rtx (V2DImode);
+ rtx quotient = gen_reg_rtx (DImode);
+ convert_move (quotient, gen_int_mode (q, DImode), 0);
+ emit_insn (gen_vec_concatv2di (q_v2di, quotient, const0_rtx));
+
+ /* crc ^ data and keep in V2DImode. */
+ rtx cd_xor = expand_binop (DImode, xor_optab, crc, data, NULL_RTX, 1,
+ OPTAB_WIDEN);
+ rtx res = gen_reg_rtx (V2DImode);
+ emit_insn (gen_vec_concatv2di (res, cd_xor, const0_rtx));
+ /* Perform carry-less multiplication. */
+ emit_insn (gen_pclmulqdq (res, res, q_v2di, gen_int_mode (0, DImode)));
+
+ res = expand_shift (RSHIFT_EXPR, V2DImode, res, crc_size, NULL_RTX, 0);
+
+ /* Keep the polynomial in V2DImode. */
+ rtx polynomial = gen_reg_rtx (DImode);
+ convert_move (polynomial, operands[3], 0);
+ rtx p_v2di = gen_reg_rtx (V2DImode);
+ emit_insn (gen_vec_concatv2di (p_v2di, polynomial, const0_rtx));
+
+ /* Perform carry-less multiplication and get low part. */
+ emit_insn (gen_pclmulqdq (res, res, p_v2di, gen_int_mode (0, DImode)));
+ rtx crc_part = gen_reg_rtx (DImode);
+ emit_insn (gen_vec_extractv2didi (crc_part, res, const0_rtx));
+
+ if (crc_size > data_size)
+ {
+ rtx shift = expand_shift (LSHIFT_EXPR, DImode, operands[1], data_size,
+ NULL_RTX, 1);
+ crc_part = expand_binop (DImode, xor_optab, crc_part, shift, NULL_RTX, 1,
+ OPTAB_DIRECT);
+ }
+ /* Zero upper bits beyond crc_size. */
+ res = expand_shift (RSHIFT_EXPR, DImode, crc_part, DImode_size - crc_size,
+ NULL_RTX, 1);
+ res = expand_shift (LSHIFT_EXPR, DImode, crc_part, DImode_size - crc_size,
+ NULL_RTX, 0);
+ emit_move_insn (operands[0], gen_lowpart (GET_MODE (operands[0]), crc_part));
+}
+
+/* Generate assembly to calculate reversed CRC using pclmulqdq instruction.
+ OPERANDS[1] is input CRC,
+ OPERANDS[2] is data (message),
+ OPERANDS[3] is the polynomial without the leading 1. */
+
+void
+ix86_expand_reversed_crc_using_pclmul (rtx *operands)
+{
+ /* Check and keep arguments. */
+ gcc_assert (!CONST_INT_P (operands[0]));
+ gcc_assert (CONST_INT_P (operands[3]));
+ rtx crc = operands[1];
+ rtx data = operands[2];
+ unsigned HOST_WIDE_INT crc_size = GET_MODE_BITSIZE (GET_MODE (operands[0]));
+ gcc_assert (crc_size <= 32);
+ unsigned HOST_WIDE_INT data_size = GET_MODE_BITSIZE (GET_MODE (data));
+
+ /* Calculate the quotient. */
+ unsigned HOST_WIDE_INT
+ q = gf2n_poly_long_div_quotient (UINTVAL (operands[3]), crc_size);
+
+ /* Reflect the calculated quotient. */
+ q = reflect (q);
+ rtx q_v2di = gen_reg_rtx (V2DImode);
+ rtx quotient = gen_reg_rtx (DImode);
+ convert_move (quotient, gen_int_mode (q >> (data_size - 4), DImode), 0);
+ emit_insn (gen_vec_concatv2di (q_v2di, quotient, const0_rtx));
+
+ /* crc ^ data and keep in V2DImode. */
+ rtx cd_xor = expand_binop (DImode, xor_optab, crc, data, NULL_RTX, 1,
+ OPTAB_WIDEN);
+
+ /* Perform carry-less multiplication. */
+ rtx res = gen_reg_rtx (V2DImode);
+ emit_insn (gen_vec_concatv2di (res, cd_xor, const0_rtx));
+ emit_insn (gen_pclmulqdq (res, res, q_v2di, gen_int_mode (0, DImode)));
+
+ res = expand_shift (LSHIFT_EXPR, V2DImode, res, 64 - crc_size - 3,
+ NULL_RTX, 0);
+
+ /* Reflect the polynomial and keep in V2DImode. */
+ unsigned HOST_WIDE_INT reflected_op3 = reflect (UINTVAL (operands[3]));
+ rtx polynomial = gen_reg_rtx (DImode);
+ convert_move (polynomial, gen_int_mode (reflected_op3 << 1, DImode), 0);
+ rtx p_v2di = gen_reg_rtx (V2DImode);
+ emit_insn (gen_vec_concatv2di (p_v2di, polynomial, const0_rtx));
+
+ /* Perform carry-less multiplication and get high part. */
+ emit_insn (gen_pclmulqdq (res, res, p_v2di, gen_int_mode (0, DImode)));
+ rtx res_high = gen_reg_rtx (DImode);
+ emit_insn (gen_vec_extractv2didi (res_high, res, const1_rtx));
+
+ if (crc_size > data_size)
+ {
+ rtx shift = expand_shift (RSHIFT_EXPR, DImode, crc, data_size,
+ NULL_RTX, 1);
+ res_high = expand_binop (DImode, xor_optab, res_high, shift, NULL_RTX, 1,
+ OPTAB_DIRECT);
+ }
+ emit_move_insn (operands[0], gen_lowpart (GET_MODE (operands[0]), res_high));
+}
#endif /* CHECKING_P */
static const scoped_attribute_specs *const ix86_attribute_table[] =
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index e8073f5a200..681e0fed59b 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -189,6 +189,10 @@
;; For CRC32 support
UNSPEC_CRC32
+ ;; For CRC support
+ UNSPEC_CRC
+ UNSPEC_CRC_REV
+
;; For LZCNT suppoprt
UNSPEC_LZCNT
@@ -27175,6 +27179,61 @@
(set_attr "prefix_extra" "1")
(set_attr "mode" "DI")])
+
+;; Same as SWI124. Added for different combinations.
+(define_mode_iterator SWI124dup [QI HI SI])
+
+;; CRC 8, 16, 32 for TARGET_64
+(define_expand "crc<SWI124dup:mode><SWI124:mode>4"
+ ;; return value (calculated CRC)
+ [(set (match_operand:SWI124 0 "register_operand" "=r")
+ ;; initial CRC
+ (unspec:SWI124 [(match_operand:SWI124 1 "register_operand" "r")
+ ;; data
+ (match_operand:SWI124dup 2 "register_operand" "r")
+ ;; polynomial without leading 1
+ (match_operand:SWI124 3)]
+ UNSPEC_CRC))]
+ /* The case when data's size is bigger than CRC's size is not supported. */
+ "TARGET_PCLMUL && TARGET_64BIT && <SWI124:MODE>mode >= <SWI124dup:MODE>mode"
+{
+ ix86_expand_crc_using_pclmul (operands);
+ DONE;
+})
+
+;; Reversed CRC 8, 16, 32 for TARGET_64
+(define_expand "crc_rev<SWI124dup:mode><SWI124:mode>4"
+ ;; return value (calculated CRC)
+ [(set (match_operand:SWI124 0 "register_operand" "=r")
+ ;; initial CRC
+ (unspec:SWI124 [(match_operand:SWI124 1 "register_operand" "r")
+ ;; data
+ (match_operand:SWI124dup 2 "register_operand" "r")
+ ;; polynomial without leading 1
+ (match_operand:SWI124 3)]
+ UNSPEC_CRC_REV))]
+ /* The case when data's size is bigger than CRC's size is not supported. */
+ "((TARGET_PCLMUL && TARGET_64BIT) || TARGET_CRC32)
+ && <SWI124:MODE>mode >= <SWI124dup:MODE>mode"
+{ /* If it is iSCSI polynomial (0x1EDC6F41), generate crc32 instruction. */
+ if (TARGET_CRC32 && INTVAL (operands[3]) == 517762881)
+ {
+ rtx crc_part = gen_reg_rtx (SImode);
+ rtx crc = operands[1];
+ rtx data = operands[2];
+ emit_insn (gen_sse4_2_crc32<SWI124dup:mode> (crc_part, crc, data));
+ emit_move_insn (operands[0],
+ gen_lowpart (GET_MODE (operands[0]), crc_part));
+ }
+ else if (TARGET_PCLMUL && TARGET_64BIT)
+ ix86_expand_reversed_crc_using_pclmul (operands);
+ else
+ expand_reversed_crc_table_based (operands[0], operands[1], operands[2],
+ operands[3], GET_MODE (operands[2]),
+ generate_reflecting_code_standard);
+ DONE;
+})
+
(define_insn "rdpmc"
[(set (match_operand:DI 0 "register_operand" "=A")
(unspec_volatile:DI [(match_operand:SI 1 "register_operand" "c")]
diff --git a/gcc/testsuite/gcc.target/i386/crc-1-pclmul.c b/gcc/testsuite/gcc.target/i386/crc-1-pclmul.c
new file mode 100644
index 00000000000..21edf417f0c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/crc-1-pclmul.c
@@ -0,0 +1,8 @@
+/* { dg-do run } */
+/* { dg-options "-mpclmul -O2 -fdump-rtl-dfinish -fdump-tree-crc -fdisable-tree-phiopt2 -fdisable-tree-phiopt3" } */
+
+#include "../../gcc.dg/torture/crc-1.c"
+
+/* { dg-final { scan-tree-dump "calculates CRC!" "crc"} } */
+/* { dg-final { scan-tree-dump-times "Couldn't generate faster CRC code." 0 "crc"} } */
+/* { dg-final { scan-rtl-dump "pclmulqdq" "dfinish"} } */
\ No newline at end of file
diff --git a/gcc/testsuite/gcc.target/i386/crc-10-pclmul.c b/gcc/testsuite/gcc.target/i386/crc-10-pclmul.c
new file mode 100644
index 00000000000..54e3310c17b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/crc-10-pclmul.c
@@ -0,0 +1,9 @@
+/* { dg-do run } */
+/* { dg-options "-mpclmul -O2 -fdump-rtl-dfinish -fdump-tree-crc" } */
+/* { dg-skip-if "" { *-*-* } { "-flto"} } */
+
+#include "../../gcc.dg/torture/crc-10.c"
+
+/* { dg-final { scan-tree-dump "calculates CRC!" "crc"} } */
+/* { dg-final { scan-tree-dump-times "Couldn't generate faster CRC code." 0 "crc"} } */
+/* { dg-final { scan-rtl-dump "pclmulqdq" "dfinish"} } */
diff --git a/gcc/testsuite/gcc.target/i386/crc-12-pclmul.c b/gcc/testsuite/gcc.target/i386/crc-12-pclmul.c
new file mode 100644
index 00000000000..1ac9a6bf56d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/crc-12-pclmul.c
@@ -0,0 +1,9 @@
+/* { dg-do run } */
+/* { dg-options "-mpclmul -O2 -fdump-rtl-dfinish -fdump-tree-crc -fdisable-tree-phiopt2 -fdisable-tree-phiopt3" } */
+/* { dg-skip-if "" { *-*-* } { "-flto"} } */
+
+#include "../../gcc.dg/torture/crc-12.c"
+
+/* { dg-final { scan-tree-dump "calculates CRC!" "crc"} } */
+/* { dg-final { scan-tree-dump-times "Couldn't generate faster CRC code." 0 "crc"} } */
+/* { dg-final { scan-rtl-dump "pclmulqdq" "dfinish"} } */
diff --git a/gcc/testsuite/gcc.target/i386/crc-13-pclmul.c b/gcc/testsuite/gcc.target/i386/crc-13-pclmul.c
new file mode 100644
index 00000000000..d5ac93525b8
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/crc-13-pclmul.c
@@ -0,0 +1,9 @@
+/* { dg-do run } */
+/* { dg-options "-mpclmul -O2 -fdump-rtl-dfinish -fdump-tree-crc" } */
+/* { dg-skip-if "" { *-*-* } { "-flto"} } */
+
+#include "../../gcc.dg/torture/crc-13.c"
+
+/* { dg-final { scan-tree-dump "calculates CRC!" "crc"} } */
+/* { dg-final { scan-tree-dump-times "Couldn't generate faster CRC code." 0 "crc"} } */
+/* { dg-final { scan-rtl-dump "pclmulqdq" "dfinish"} } */
diff --git a/gcc/testsuite/gcc.target/i386/crc-14-pclmul.c b/gcc/testsuite/gcc.target/i386/crc-14-pclmul.c
new file mode 100644
index 00000000000..3f916b913cd
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/crc-14-pclmul.c
@@ -0,0 +1,9 @@
+/* { dg-do run } */
+/* { dg-options "-mpclmul -O2 -fdump-rtl-dfinish -fdump-tree-crc" } */
+/* { dg-skip-if "" { *-*-* } { "-flto"} } */
+
+#include "../../gcc.dg/torture/crc-14.c"
+
+/* { dg-final { scan-tree-dump "calculates CRC!" "crc"} } */
+/* { dg-final { scan-tree-dump-times "Couldn't generate faster CRC code." 0 "crc"} } */
+/* { dg-final { scan-rtl-dump "pclmulqdq" "dfinish"} } */
diff --git a/gcc/testsuite/gcc.target/i386/crc-17-pclmul.c b/gcc/testsuite/gcc.target/i386/crc-17-pclmul.c
new file mode 100644
index 00000000000..6c3c8460535
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/crc-17-pclmul.c
@@ -0,0 +1,9 @@
+/* { dg-do run } */
+/* { dg-options "-mpclmul -O2 -fdump-rtl-dfinish -fdump-tree-crc" } */
+/* { dg-skip-if "" { *-*-* } { "-flto"} } */
+
+#include "../../gcc.dg/torture/crc-17.c"
+
+/* { dg-final { scan-tree-dump "calculates CRC!" "crc"} } */
+/* { dg-final { scan-tree-dump-times "Couldn't generate faster CRC code." 0 "crc"} } */
+/* { dg-final { scan-rtl-dump "pclmulqdq" "dfinish"} } */
diff --git a/gcc/testsuite/gcc.target/i386/crc-18-pclmul.c b/gcc/testsuite/gcc.target/i386/crc-18-pclmul.c
new file mode 100644
index 00000000000..4d3ac62a4ff
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/crc-18-pclmul.c
@@ -0,0 +1,9 @@
+/* { dg-do run } */
+/* { dg-options "-mpclmul -O2 -fdump-rtl-dfinish -fdump-tree-crc" } */
+/* { dg-skip-if "" { *-*-* } { "-flto"} } */
+
+#include "../../gcc.dg/torture/crc-18.c"
+
+/* { dg-final { scan-tree-dump "calculates CRC!" "crc"} } */
+/* { dg-final { scan-tree-dump-times "Couldn't generate faster CRC code." 0 "crc"} } */
+/* { dg-final { scan-rtl-dump "pclmulqdq" "dfinish"} } */
diff --git a/gcc/testsuite/gcc.target/i386/crc-21-pclmul.c b/gcc/testsuite/gcc.target/i386/crc-21-pclmul.c
new file mode 100644
index 00000000000..e9569bebd25
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/crc-21-pclmul.c
@@ -0,0 +1,9 @@
+/* { dg-do run } */
+/* { dg-options "-mpclmul -O2 -fdump-rtl-dfinish -fdump-tree-crc" } */
+/* { dg-skip-if "" { *-*-* } { "-flto"} } */
+
+#include "../../gcc.dg/torture/crc-21.c"
+
+/* { dg-final { scan-tree-dump "calculates CRC!" "crc"} } */
+/* { dg-final { scan-tree-dump-times "Couldn't generate faster CRC code." 0 "crc"} } */
+/* { dg-final { scan-rtl-dump "pclmulqdq" "dfinish"} } */
diff --git a/gcc/testsuite/gcc.target/i386/crc-22-pclmul.c b/gcc/testsuite/gcc.target/i386/crc-22-pclmul.c
new file mode 100644
index 00000000000..92f1559b040
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/crc-22-pclmul.c
@@ -0,0 +1,9 @@
+/* { dg-do run } */
+/* { dg-options "-mpclmul -O2 -fdump-rtl-dfinish -fdump-tree-crc" } */
+/* { dg-skip-if "" { *-*-* } { "-flto"} } */
+
+#include "../../gcc.dg/torture/crc-22.c"
+
+/* { dg-final { scan-tree-dump "calculates CRC!" "crc"} } */
+/* { dg-final { scan-tree-dump-times "Couldn't generate faster CRC code." 0 "crc"} } */
+/* { dg-final { scan-rtl-dump "pclmulqdq" "dfinish"} } */
diff --git a/gcc/testsuite/gcc.target/i386/crc-23-pclmul.c b/gcc/testsuite/gcc.target/i386/crc-23-pclmul.c
new file mode 100644
index 00000000000..0417f10e5e2
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/crc-23-pclmul.c
@@ -0,0 +1,9 @@
+/* { dg-do run } */
+/* { dg-options "-mpclmul -O2 -fdump-rtl-dfinish -fdump-tree-crc" } */
+/* { dg-skip-if "" { *-*-* } { "-flto"} } */
+
+#include "../../gcc.dg/torture/crc-23.c"
+
+/* { dg-final { scan-tree-dump "calculates CRC!" "crc"} } */
+/* { dg-final { scan-tree-dump-times "Couldn't generate faster CRC code." 0 "crc"} } */
+/* { dg-final { scan-rtl-dump "pclmulqdq" "dfinish"} } */
diff --git a/gcc/testsuite/gcc.target/i386/crc-4-pclmul.c b/gcc/testsuite/gcc.target/i386/crc-4-pclmul.c
new file mode 100644
index 00000000000..6c6c0608541
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/crc-4-pclmul.c
@@ -0,0 +1,9 @@
+/* { dg-do run } */
+/* { dg-options "-mpclmul -O2 -fdump-rtl-dfinish -fdump-tree-crc" } */
+/* { dg-skip-if "" { *-*-* } { "-flto"} } */
+
+#include "../../gcc.dg/torture/crc-4.c"
+
+/* { dg-final { scan-tree-dump "calculates CRC!" "crc"} } */
+/* { dg-final { scan-tree-dump-times "Couldn't generate faster CRC code." 0 "crc"} } */
+/* { dg-final { scan-rtl-dump "pclmulqdq" "dfinish"} } */
diff --git a/gcc/testsuite/gcc.target/i386/crc-5-pclmul.c b/gcc/testsuite/gcc.target/i386/crc-5-pclmul.c
new file mode 100644
index 00000000000..b80368e81bc
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/crc-5-pclmul.c
@@ -0,0 +1,9 @@
+/* { dg-do run } */
+/* { dg-options "-mpclmul -O2 -w -fdump-rtl-dfinish -fdump-tree-crc" } */
+/* { dg-skip-if "" { *-*-* } { "-flto"} } */
+
+#include "../../gcc.dg/torture/crc-5.c"
+
+/* { dg-final { scan-tree-dump "calculates CRC!" "crc"} } */
+/* { dg-final { scan-tree-dump-times "Couldn't generate faster CRC code." 0 "crc"} } */
+/* { dg-final { scan-rtl-dump "pclmulqdq" "dfinish"} } */
\ No newline at end of file
diff --git a/gcc/testsuite/gcc.target/i386/crc-6-pclmul.c b/gcc/testsuite/gcc.target/i386/crc-6-pclmul.c
new file mode 100644
index 00000000000..d3ac2cb09fe
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/crc-6-pclmul.c
@@ -0,0 +1,9 @@
+/* { dg-do run } */
+/* { dg-options "-mpclmul -O2 -fdump-rtl-dfinish -fdump-tree-crc" } */
+/* { dg-skip-if "" { *-*-* } { "-flto"} } */
+
+#include "../../gcc.dg/torture/crc-6.c"
+
+/* { dg-final { scan-tree-dump "calculates CRC!" "crc"} } */
+/* { dg-final { scan-tree-dump-times "Couldn't generate faster CRC code." 0 "crc"} } */
+/* { dg-final { scan-rtl-dump "pclmulqdq" "dfinish"} } */
\ No newline at end of file
diff --git a/gcc/testsuite/gcc.target/i386/crc-7-pclmul.c b/gcc/testsuite/gcc.target/i386/crc-7-pclmul.c
new file mode 100644
index 00000000000..8bbb5098e66
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/crc-7-pclmul.c
@@ -0,0 +1,9 @@
+/* { dg-do run } */
+/* { dg-options "-mpclmul -O2 -fdump-rtl-dfinish -fdump-tree-crc" } */
+/* { dg-skip-if "" { *-*-* } { "-flto"} } */
+
+#include "../../gcc.dg/torture/crc-7.c"
+
+/* { dg-final { scan-tree-dump "calculates CRC!" "crc"} } */
+/* { dg-final { scan-tree-dump-times "Couldn't generate faster CRC code." 0 "crc"} } */
+/* { dg-final { scan-rtl-dump "pclmulqdq" "dfinish"} } */
diff --git a/gcc/testsuite/gcc.target/i386/crc-8-pclmul.c b/gcc/testsuite/gcc.target/i386/crc-8-pclmul.c
new file mode 100644
index 00000000000..fe9f1e90270
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/crc-8-pclmul.c
@@ -0,0 +1,9 @@
+/* { dg-do run } */
+/* { dg-options "-mpclmul -O2 -fdump-rtl-dfinish -fdump-tree-crc" } */
+/* { dg-skip-if "" { *-*-* } { "-flto"} } */
+
+#include "../../gcc.dg/torture/crc-8.c"
+
+/* { dg-final { scan-tree-dump "calculates CRC!" "crc"} } */
+/* { dg-final { scan-tree-dump-times "Couldn't generate faster CRC code." 0 "crc"} } */
+/* { dg-final { scan-rtl-dump "pclmulqdq" "dfinish"} } */
diff --git a/gcc/testsuite/gcc.target/i386/crc-9-pclmul.c b/gcc/testsuite/gcc.target/i386/crc-9-pclmul.c
new file mode 100644
index 00000000000..91936c07b39
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/crc-9-pclmul.c
@@ -0,0 +1,9 @@
+/* { dg-do run } */
+/* { dg-options "-mpclmul -O2 -fdump-rtl-dfinish -fdump-tree-crc" } */
+/* { dg-skip-if "" { *-*-* } { "-flto"} } */
+
+#include "../../gcc.dg/torture/crc-9.c"
+
+/* { dg-final { scan-tree-dump "calculates CRC!" "crc"} } */
+/* { dg-final { scan-tree-dump-times "Couldn't generate faster CRC code." 0 "crc"} } */
+/* { dg-final { scan-rtl-dump "pclmulqdq" "dfinish"} } */
diff --git a/gcc/testsuite/gcc.target/i386/crc-CCIT-data16-pclmul.c b/gcc/testsuite/gcc.target/i386/crc-CCIT-data16-pclmul.c
new file mode 100644
index 00000000000..ca728120858
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/crc-CCIT-data16-pclmul.c
@@ -0,0 +1,9 @@
+/* { dg-do run } */
+/* { dg-options "-w -mpclmul -O2 -fdump-rtl-dfinish -fdump-tree-crc" } */
+/* { dg-skip-if "" { *-*-* } { "-flto"} } */
+
+#include "../../gcc.dg/torture/crc-CCIT-data16.c"
+
+/* { dg-final { scan-tree-dump "calculates CRC!" "crc"} } */
+/* { dg-final { scan-tree-dump-times "Couldn't generate faster CRC code." 0 "crc"} } */
+/* { dg-final { scan-rtl-dump "pclmulqdq" "dfinish"} } */
\ No newline at end of file
diff --git a/gcc/testsuite/gcc.target/i386/crc-CCIT-data8-pclmul.c b/gcc/testsuite/gcc.target/i386/crc-CCIT-data8-pclmul.c
new file mode 100644
index 00000000000..816e0561d8f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/crc-CCIT-data8-pclmul.c
@@ -0,0 +1,9 @@
+/* { dg-do run } */
+/* { dg-options "-w -mpclmul -O2 -fdump-rtl-dfinish -fdump-tree-crc" } */
+/* { dg-skip-if "" { *-*-* } { "-flto" } } */
+
+#include "../../gcc.dg/torture/crc-CCIT-data8.c"
+
+/* { dg-final { scan-tree-dump "calculates CRC!" "crc"} } */
+/* { dg-final { scan-tree-dump-times "Couldn't generate faster CRC code." 0 "crc"} } */
+/* { dg-final { scan-rtl-dump "pclmulqdq" "dfinish"} } */
\ No newline at end of file
diff --git a/gcc/testsuite/gcc.target/i386/crc-coremark-16bitdata-pclmul.c b/gcc/testsuite/gcc.target/i386/crc-coremark-16bitdata-pclmul.c
new file mode 100644
index 00000000000..817d960b0aa
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/crc-coremark-16bitdata-pclmul.c
@@ -0,0 +1,9 @@
+/* { dg-do run } */
+/* { dg-options "-w -mpclmul -O2 -fdump-rtl-dfinish -fdump-tree-crc" } */
+/* { dg-skip-if "" { *-*-* } { "-flto"} } */
+
+#include "../../gcc.dg/torture/crc-coremark16-data16.c"
+
+/* { dg-final { scan-tree-dump "calculates CRC!" "crc"} } */
+/* { dg-final { scan-tree-dump-times "Couldn't generate faster CRC code." 0 "crc"} } */
+/* { dg-final { scan-rtl-dump "pclmulqdq" "dfinish"} } */
\ No newline at end of file
diff --git a/gcc/testsuite/gcc.target/i386/crc-crc32-data16.c b/gcc/testsuite/gcc.target/i386/crc-crc32-data16.c
new file mode 100644
index 00000000000..49ab5f31ef0
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/crc-crc32-data16.c
@@ -0,0 +1,53 @@
+/* { dg-do run } */
+/* { dg-options "-mcrc32 -O2 -fdump-rtl-dfinish -fdump-tree-crc" } */
+/* { dg-skip-if "" { *-*-* } { "-flto"} } */
+
+#include <stdint.h>
+#include <stdlib.h>
+
+__attribute__ ((noinline,optimize(0)))
+uint32_t _crc32_O0 (uint32_t crc, uint16_t data) {
+ int i;
+ crc = crc ^ data;
+
+ for (i = 0; i < 8; i++) {
+ if (crc & 1)
+ crc = (crc >> 1) ^ 0x82F63B78;
+ else
+ crc = (crc >> 1);
+ }
+
+ return crc;
+}
+
+uint32_t _crc32 (uint32_t crc, uint16_t data) {
+ int i;
+ crc = crc ^ data;
+
+ for (i = 0; i < 8; i++) {
+ if (crc & 1)
+ crc = (crc >> 1) ^ 0x82F63B78;
+ else
+ crc = (crc >> 1);
+ }
+
+ return crc;
+}
+
+int main ()
+{
+ uint32_t crc = 0x0D800D80;
+ for (uint16_t i = 0; i < 0xffff; i++)
+ {
+ uint32_t res1 = _crc32_O0 (crc, i);
+ uint32_t res2 = _crc32 (crc, i);
+ if (res1 != res2)
+ abort ();
+ crc = res1;
+ }
+}
+
+/* { dg-final { scan-tree-dump "calculates CRC!" "crc"} } */
+/* { dg-final { scan-tree-dump-times "Couldn't generate faster CRC code." 0 "crc"} } */
+/* { dg-final { scan-rtl-dump "UNSPEC_CRC32" "dfinish"} } */
+/* { dg-final { scan-rtl-dump-times "pclmulqdq" 0 "dfinish"} } */
diff --git a/gcc/testsuite/gcc.target/i386/crc-crc32-data32.c b/gcc/testsuite/gcc.target/i386/crc-crc32-data32.c
new file mode 100644
index 00000000000..08d6c193a77
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/crc-crc32-data32.c
@@ -0,0 +1,53 @@
+/* { dg-do run } */
+/* { dg-options "-mcrc32 -O2 -fdump-rtl-dfinish -fdump-tree-crc" } */
+/* { dg-skip-if "" { *-*-* } { "-flto"} } */
+
+#include <stdint.h>
+#include <stdlib.h>
+
+__attribute__ ((noinline,optimize(0)))
+uint32_t _crc32_O0 (uint32_t crc, uint32_t data) {
+ int i;
+ crc = crc ^ data;
+
+ for (i = 0; i < 32; i++) {
+ if (crc & 1)
+ crc = (crc >> 1) ^ 0x82F63B78;
+ else
+ crc = (crc >> 1);
+ }
+
+ return crc;
+}
+
+uint32_t _crc32 (uint32_t crc, uint32_t data) {
+ int i;
+ crc = crc ^ data;
+
+ for (i = 0; i < 32; i++) {
+ if (crc & 1)
+ crc = (crc >> 1) ^ 0x82F63B78;
+ else
+ crc = (crc >> 1);
+ }
+
+ return crc;
+}
+
+int main ()
+{
+ uint32_t crc = 0x0D800D80;
+ for (uint8_t i = 0; i < 0xff; i++)
+ {
+ uint32_t res1 = _crc32_O0 (crc, i);
+ uint32_t res2 = _crc32 (crc, i);
+ if (res1 != res2)
+ abort ();
+ crc = res1;
+ }
+}
+
+/* { dg-final { scan-tree-dump "calculates CRC!" "crc"} } */
+/* { dg-final { scan-tree-dump-times "Couldn't generate faster CRC code." 0 "crc"} } */
+/* { dg-final { scan-rtl-dump "UNSPEC_CRC32" "dfinish"} } */
+/* { dg-final { scan-rtl-dump-times "pclmulqdq" 0 "dfinish"} } */
diff --git a/gcc/testsuite/gcc.target/i386/crc-crc32-data8.c b/gcc/testsuite/gcc.target/i386/crc-crc32-data8.c
new file mode 100644
index 00000000000..7a76b27fd28
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/crc-crc32-data8.c
@@ -0,0 +1,53 @@
+/* { dg-do run } */
+/* { dg-options "-mcrc32 -O2 -fdump-rtl-dfinish -fdump-tree-crc" } */
+/* { dg-skip-if "" { *-*-* } { "-flto"} } */
+
+#include <stdint.h>
+#include <stdlib.h>
+
+__attribute__ ((noinline,optimize(0)))
+uint32_t _crc32_O0 (uint32_t crc, uint8_t data) {
+ int i;
+ crc = crc ^ data;
+
+ for (i = 0; i < 8; i++) {
+ if (crc & 1)
+ crc = (crc >> 1) ^ 0x82F63B78;
+ else
+ crc = (crc >> 1);
+ }
+
+ return crc;
+}
+
+uint32_t _crc32 (uint32_t crc, uint8_t data) {
+ int i;
+ crc = crc ^ data;
+
+ for (i = 0; i < 8; i++) {
+ if (crc & 1)
+ crc = (crc >> 1) ^ 0x82F63B78;
+ else
+ crc = (crc >> 1);
+ }
+
+ return crc;
+}
+
+int main ()
+{
+ uint32_t crc = 0x0D800D80;
+ for (uint8_t i = 0; i < 0xff; i++)
+ {
+ uint32_t res1 = _crc32_O0 (crc, i);
+ uint32_t res2 = _crc32 (crc, i);
+ if (res1 != res2)
+ abort ();
+ crc = res1;
+ }
+}
+
+/* { dg-final { scan-tree-dump "calculates CRC!" "crc"} } */
+/* { dg-final { scan-tree-dump-times "Couldn't generate faster CRC code." 0 "crc"} } */
+/* { dg-final { scan-rtl-dump "UNSPEC_CRC32" "dfinish"} } */
+/* { dg-final { scan-rtl-dump-times "pclmulqdq" 0 "dfinish"} } */
--
2.25.1
^ permalink raw reply [flat|nested] only message in thread
only message in thread, other threads:[~2024-07-26 18:07 UTC | newest]
Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2024-07-26 18:07 [RFC/RFA][PATCH v2 05/12] i386: Implement new expander for efficient CRC computation Mariam Arutunian
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).