public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH] i386: vcvtph2ps and vcvtps2ph should be used to convert _Float16 to SFmode with -mf16c [PR 102811]
       [not found] <20211116081034.22557-1-lingling.kong@intel.com>
@ 2021-11-16  8:15 ` Kong, Lingling
  2021-11-16  8:35   ` Hongtao Liu
  2021-11-16  8:48   ` Uros Bizjak
  0 siblings, 2 replies; 11+ messages in thread
From: Kong, Lingling @ 2021-11-16  8:15 UTC (permalink / raw)
  To: Kong, Lingling; +Cc: Liu, Hongtao, gcc-patches

Hi,

vcvtph2ps and vcvtps2ph should be used to convert _Float16 to SFmode with -mf16c. So added define_insn extendhfsf2 and truncsfhf2 for target_f16c.

OK for master?

gcc/ChangeLog:

	PR target/102811
	* config/i386/i386.md (extendhfsf2): Add extenndhfsf2 for f16c.
	(extendhfdf2): Split extendhf<mode>2 into separate extendhfsf2, extendhfdf2.
	(truncsfhf2): Likewise.
	(truncdfhf2): Likewise.

gcc/testsuite/ChangeLog:

	PR target/102811
	* gcc.target/i386/avx512vl-vcvtps2ph-pr102811.c: New test.
---
 gcc/config/i386/i386.md                       | 48 +++++++++++++++----
 .../i386/avx512vl-vcvtps2ph-pr102811.c        | 10 ++++
 2 files changed, 49 insertions(+), 9 deletions(-)  create mode 100644 gcc/testsuite/gcc.target/i386/avx512vl-vcvtps2ph-pr102811.c

diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index 6eb9de81921..c5415475342 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -4574,15 +4574,30 @@
   emit_move_insn (operands[0], CONST0_RTX (V2DFmode));
 })
 
-(define_insn "extendhf<mode>2"
-  [(set (match_operand:MODEF 0 "nonimm_ssenomem_operand" "=v")
-        (float_extend:MODEF
+(define_insn "extendhfsf2"
+  [(set (match_operand:SF 0 "register_operand" "=v")
+	(float_extend:SF
+	  (match_operand:HF 1 "nonimmediate_operand" "vm")))]
+  "TARGET_AVX512FP16 || TARGET_F16C || TARGET_AVX512VL"
+{
+  if (TARGET_AVX512FP16)
+    return "vcvtsh2ss\t{%1, %0, %0|%0, %0, %1}";
+  else
+    return "vcvtph2ps\t{%1, %0|%0, %1}"; }
+  [(set_attr "type" "ssecvt")
+   (set_attr "prefix" "maybe_evex")
+   (set_attr "mode" "SF")])
+
+(define_insn "extendhfdf2"
+  [(set (match_operand:DF 0 "nonimm_ssenomem_operand" "=v")
+	(float_extend:DF
 	  (match_operand:HF 1 "nonimmediate_operand" "vm")))]
   "TARGET_AVX512FP16"
-  "vcvtsh2<ssemodesuffix>\t{%1, %0, %0|%0, %0, %1}"
+  "vcvtsh2sd\t{%1, %0, %0|%0, %0, %1}"
   [(set_attr "type" "ssecvt")
    (set_attr "prefix" "evex")
-   (set_attr "mode" "<MODE>")])
+   (set_attr "mode" "DF")])
 
 
 (define_expand "extend<mode>xf2"
@@ -4766,12 +4781,27 @@
 
 ;; Conversion from {SF,DF}mode to HFmode.
 
-(define_insn "trunc<mode>hf2"
+(define_insn "truncsfhf2"
+  [(set (match_operand:HF 0 "register_operand" "=v")
+	(float_truncate:HF
+	  (match_operand:SF 1 "nonimmediate_operand" "vm")))]
+  "TARGET_AVX512FP16 || TARGET_F16C || TARGET_AVX512VL"
+  {
+    if (TARGET_AVX512FP16)
+      return "vcvtss2sh\t{%1, %d0|%d0, %1}";
+    else
+      return "vcvtps2ph\t{0, %1, %0|%0, %1, 0}";
+  }
+  [(set_attr "type" "ssecvt")
+   (set_attr "prefix" "evex")
+   (set_attr "mode" "HF")])
+
+(define_insn "truncdfhf2"
   [(set (match_operand:HF 0 "register_operand" "=v")
-       (float_truncate:HF
-         (match_operand:MODEF 1 "nonimmediate_operand" "vm")))]
+	(float_truncate:HF
+	  (match_operand:DF 1 "nonimmediate_operand" "vm")))]
   "TARGET_AVX512FP16"
-  "vcvt<ssemodesuffix>2sh\t{%1, %d0|%d0, %1}"
+  "vcvtsd2sh\t{%1, %d0|%d0, %1}"
   [(set_attr "type" "ssecvt")
    (set_attr "prefix" "evex")
    (set_attr "mode" "HF")])
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vcvtps2ph-pr102811.c b/gcc/testsuite/gcc.target/i386/avx512vl-vcvtps2ph-pr102811.c
new file mode 100644
index 00000000000..ab44a304a03
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vcvtps2ph-pr102811.c
@@ -0,0 +1,10 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mf16c -mno-avx512fp16" } */
+/* { dg-final { scan-assembler-times "vcvtph2ps\[ \\t\]" 2 } } */
+/* { dg-final { scan-assembler-times "vcvtps2ph\[ \\t\]" 1 } } */
+/* { dg-final { scan-assembler-not "__truncsfhf2\[ \\t\]"} } */
+/* { dg-final { scan-assembler-not "__extendhfsf2\[ \\t\]"} } */
+_Float16 test (_Float16 a, _Float16 b)
+{
+  return a + b;
+}
--
2.18.1


^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH] i386: vcvtph2ps and vcvtps2ph should be used to convert _Float16 to SFmode with -mf16c [PR 102811]
  2021-11-16  8:15 ` [PATCH] i386: vcvtph2ps and vcvtps2ph should be used to convert _Float16 to SFmode with -mf16c [PR 102811] Kong, Lingling
@ 2021-11-16  8:35   ` Hongtao Liu
  2021-11-16  8:48   ` Uros Bizjak
  1 sibling, 0 replies; 11+ messages in thread
From: Hongtao Liu @ 2021-11-16  8:35 UTC (permalink / raw)
  To: Uros Bizjak; +Cc: gcc-patches, Kong, Lingling

On Tue, Nov 16, 2021 at 4:15 PM Kong, Lingling via Gcc-patches
<gcc-patches@gcc.gnu.org> wrote:
>
> Hi,
>
> vcvtph2ps and vcvtps2ph should be used to convert _Float16 to SFmode with -mf16c. So added define_insn extendhfsf2 and truncsfhf2 for target_f16c.
>
> OK for master?
>
> gcc/ChangeLog:
>
>         PR target/102811
>         * config/i386/i386.md (extendhfsf2): Add extenndhfsf2 for f16c.
>         (extendhfdf2): Split extendhf<mode>2 into separate extendhfsf2, extendhfdf2.
>         (truncsfhf2): Likewise.
>         (truncdfhf2): Likewise.
>
> gcc/testsuite/ChangeLog:
>
>         PR target/102811
>         * gcc.target/i386/avx512vl-vcvtps2ph-pr102811.c: New test.
> ---
>  gcc/config/i386/i386.md                       | 48 +++++++++++++++----
>  .../i386/avx512vl-vcvtps2ph-pr102811.c        | 10 ++++
>  2 files changed, 49 insertions(+), 9 deletions(-)  create mode 100644 gcc/testsuite/gcc.target/i386/avx512vl-vcvtps2ph-pr102811.c
>
> diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index 6eb9de81921..c5415475342 100644
> --- a/gcc/config/i386/i386.md
> +++ b/gcc/config/i386/i386.md
> @@ -4574,15 +4574,30 @@
>    emit_move_insn (operands[0], CONST0_RTX (V2DFmode));
>  })
>
> -(define_insn "extendhf<mode>2"
> -  [(set (match_operand:MODEF 0 "nonimm_ssenomem_operand" "=v")
> -        (float_extend:MODEF
> +(define_insn "extendhfsf2"
> +  [(set (match_operand:SF 0 "register_operand" "=v")
> +       (float_extend:SF
> +         (match_operand:HF 1 "nonimmediate_operand" "vm")))]
> +  "TARGET_AVX512FP16 || TARGET_F16C || TARGET_AVX512VL"
> +{
> +  if (TARGET_AVX512FP16)
> +    return "vcvtsh2ss\t{%1, %0, %0|%0, %0, %1}";
> +  else
> +    return "vcvtph2ps\t{%1, %0|%0, %1}"; }
> +  [(set_attr "type" "ssecvt")
> +   (set_attr "prefix" "maybe_evex")
> +   (set_attr "mode" "SF")])
> +
> +(define_insn "extendhfdf2"
> +  [(set (match_operand:DF 0 "nonimm_ssenomem_operand" "=v")
> +       (float_extend:DF
>           (match_operand:HF 1 "nonimmediate_operand" "vm")))]
>    "TARGET_AVX512FP16"
> -  "vcvtsh2<ssemodesuffix>\t{%1, %0, %0|%0, %0, %1}"
> +  "vcvtsh2sd\t{%1, %0, %0|%0, %0, %1}"
>    [(set_attr "type" "ssecvt")
>     (set_attr "prefix" "evex")
> -   (set_attr "mode" "<MODE>")])
> +   (set_attr "mode" "DF")])
>
>
>  (define_expand "extend<mode>xf2"
> @@ -4766,12 +4781,27 @@
>
>  ;; Conversion from {SF,DF}mode to HFmode.
>
> -(define_insn "trunc<mode>hf2"
> +(define_insn "truncsfhf2"
> +  [(set (match_operand:HF 0 "register_operand" "=v")
> +       (float_truncate:HF
> +         (match_operand:SF 1 "nonimmediate_operand" "vm")))]
> +  "TARGET_AVX512FP16 || TARGET_F16C || TARGET_AVX512VL"
> +  {
> +    if (TARGET_AVX512FP16)
> +      return "vcvtss2sh\t{%1, %d0|%d0, %1}";
> +    else
> +      return "vcvtps2ph\t{0, %1, %0|%0, %1, 0}";
> +  }
> +  [(set_attr "type" "ssecvt")
> +   (set_attr "prefix" "evex")
> +   (set_attr "mode" "HF")])
> +
> +(define_insn "truncdfhf2"
>    [(set (match_operand:HF 0 "register_operand" "=v")
> -       (float_truncate:HF
> -         (match_operand:MODEF 1 "nonimmediate_operand" "vm")))]
> +       (float_truncate:HF
> +         (match_operand:DF 1 "nonimmediate_operand" "vm")))]
>    "TARGET_AVX512FP16"
> -  "vcvt<ssemodesuffix>2sh\t{%1, %d0|%d0, %1}"
> +  "vcvtsd2sh\t{%1, %d0|%d0, %1}"
>    [(set_attr "type" "ssecvt")
>     (set_attr "prefix" "evex")
>     (set_attr "mode" "HF")])
> diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vcvtps2ph-pr102811.c b/gcc/testsuite/gcc.target/i386/avx512vl-vcvtps2ph-pr102811.c
> new file mode 100644
> index 00000000000..ab44a304a03
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/avx512vl-vcvtps2ph-pr102811.c
> @@ -0,0 +1,10 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -mf16c -mno-avx512fp16" } */
> +/* { dg-final { scan-assembler-times "vcvtph2ps\[ \\t\]" 2 } } */
> +/* { dg-final { scan-assembler-times "vcvtps2ph\[ \\t\]" 1 } } */
> +/* { dg-final { scan-assembler-not "__truncsfhf2\[ \\t\]"} } */
> +/* { dg-final { scan-assembler-not "__extendhfsf2\[ \\t\]"} } */
> +_Float16 test (_Float16 a, _Float16 b)
> +{
> +  return a + b;
> +}
> --
> 2.18.1
>


-- 
BR,
Hongtao

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH] i386: vcvtph2ps and vcvtps2ph should be used to convert _Float16 to SFmode with -mf16c [PR 102811]
  2021-11-16  8:15 ` [PATCH] i386: vcvtph2ps and vcvtps2ph should be used to convert _Float16 to SFmode with -mf16c [PR 102811] Kong, Lingling
  2021-11-16  8:35   ` Hongtao Liu
@ 2021-11-16  8:48   ` Uros Bizjak
  1 sibling, 0 replies; 11+ messages in thread
From: Uros Bizjak @ 2021-11-16  8:48 UTC (permalink / raw)
  To: Kong, Lingling; +Cc: Liu, Hongtao, gcc-patches

[-- Attachment #1: Type: text/plain, Size: 4495 bytes --]

On Tue, Nov 16, 2021 at 9:15 AM Kong, Lingling via Gcc-patches
<gcc-patches@gcc.gnu.org> wrote:
>
> Hi,
>
> vcvtph2ps and vcvtps2ph should be used to convert _Float16 to SFmode with -mf16c. So added define_insn extendhfsf2 and truncsfhf2 for target_f16c.
>
> OK for master?

No, this is the wrong approach. There can be invalid values in the
high elements of the vector, so these should be cleared before
conversion.

Please see the attached (unfinished) patch and use it as a starting
point. Please note that we can now allow 2-byte values in SSE
registers, so movhi_internal and ix86_can_change_mode_class should be
updated accordingly.

Uros.
>
> gcc/ChangeLog:
>
>         PR target/102811
>         * config/i386/i386.md (extendhfsf2): Add extenndhfsf2 for f16c.
>         (extendhfdf2): Split extendhf<mode>2 into separate extendhfsf2, extendhfdf2.
>         (truncsfhf2): Likewise.
>         (truncdfhf2): Likewise.
>
> gcc/testsuite/ChangeLog:
>
>         PR target/102811
>         * gcc.target/i386/avx512vl-vcvtps2ph-pr102811.c: New test.
> ---
>  gcc/config/i386/i386.md                       | 48 +++++++++++++++----
>  .../i386/avx512vl-vcvtps2ph-pr102811.c        | 10 ++++
>  2 files changed, 49 insertions(+), 9 deletions(-)  create mode 100644 gcc/testsuite/gcc.target/i386/avx512vl-vcvtps2ph-pr102811.c
>
> diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index 6eb9de81921..c5415475342 100644
> --- a/gcc/config/i386/i386.md
> +++ b/gcc/config/i386/i386.md
> @@ -4574,15 +4574,30 @@
>    emit_move_insn (operands[0], CONST0_RTX (V2DFmode));
>  })
>
> -(define_insn "extendhf<mode>2"
> -  [(set (match_operand:MODEF 0 "nonimm_ssenomem_operand" "=v")
> -        (float_extend:MODEF
> +(define_insn "extendhfsf2"
> +  [(set (match_operand:SF 0 "register_operand" "=v")
> +       (float_extend:SF
> +         (match_operand:HF 1 "nonimmediate_operand" "vm")))]
> +  "TARGET_AVX512FP16 || TARGET_F16C || TARGET_AVX512VL"
> +{
> +  if (TARGET_AVX512FP16)
> +    return "vcvtsh2ss\t{%1, %0, %0|%0, %0, %1}";
> +  else
> +    return "vcvtph2ps\t{%1, %0|%0, %1}"; }
> +  [(set_attr "type" "ssecvt")
> +   (set_attr "prefix" "maybe_evex")
> +   (set_attr "mode" "SF")])
> +
> +(define_insn "extendhfdf2"
> +  [(set (match_operand:DF 0 "nonimm_ssenomem_operand" "=v")
> +       (float_extend:DF
>           (match_operand:HF 1 "nonimmediate_operand" "vm")))]
>    "TARGET_AVX512FP16"
> -  "vcvtsh2<ssemodesuffix>\t{%1, %0, %0|%0, %0, %1}"
> +  "vcvtsh2sd\t{%1, %0, %0|%0, %0, %1}"
>    [(set_attr "type" "ssecvt")
>     (set_attr "prefix" "evex")
> -   (set_attr "mode" "<MODE>")])
> +   (set_attr "mode" "DF")])
>
>
>  (define_expand "extend<mode>xf2"
> @@ -4766,12 +4781,27 @@
>
>  ;; Conversion from {SF,DF}mode to HFmode.
>
> -(define_insn "trunc<mode>hf2"
> +(define_insn "truncsfhf2"
> +  [(set (match_operand:HF 0 "register_operand" "=v")
> +       (float_truncate:HF
> +         (match_operand:SF 1 "nonimmediate_operand" "vm")))]
> +  "TARGET_AVX512FP16 || TARGET_F16C || TARGET_AVX512VL"
> +  {
> +    if (TARGET_AVX512FP16)
> +      return "vcvtss2sh\t{%1, %d0|%d0, %1}";
> +    else
> +      return "vcvtps2ph\t{0, %1, %0|%0, %1, 0}";
> +  }
> +  [(set_attr "type" "ssecvt")
> +   (set_attr "prefix" "evex")
> +   (set_attr "mode" "HF")])
> +
> +(define_insn "truncdfhf2"
>    [(set (match_operand:HF 0 "register_operand" "=v")
> -       (float_truncate:HF
> -         (match_operand:MODEF 1 "nonimmediate_operand" "vm")))]
> +       (float_truncate:HF
> +         (match_operand:DF 1 "nonimmediate_operand" "vm")))]
>    "TARGET_AVX512FP16"
> -  "vcvt<ssemodesuffix>2sh\t{%1, %d0|%d0, %1}"
> +  "vcvtsd2sh\t{%1, %d0|%d0, %1}"
>    [(set_attr "type" "ssecvt")
>     (set_attr "prefix" "evex")
>     (set_attr "mode" "HF")])
> diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vcvtps2ph-pr102811.c b/gcc/testsuite/gcc.target/i386/avx512vl-vcvtps2ph-pr102811.c
> new file mode 100644
> index 00000000000..ab44a304a03
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/avx512vl-vcvtps2ph-pr102811.c
> @@ -0,0 +1,10 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -mf16c -mno-avx512fp16" } */
> +/* { dg-final { scan-assembler-times "vcvtph2ps\[ \\t\]" 2 } } */
> +/* { dg-final { scan-assembler-times "vcvtps2ph\[ \\t\]" 1 } } */
> +/* { dg-final { scan-assembler-not "__truncsfhf2\[ \\t\]"} } */
> +/* { dg-final { scan-assembler-not "__extendhfsf2\[ \\t\]"} } */
> +_Float16 test (_Float16 a, _Float16 b)
> +{
> +  return a + b;
> +}
> --
> 2.18.1
>

[-- Attachment #2: pr102881-v2.diff.txt --]
[-- Type: text/plain, Size: 5248 bytes --]

diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index 9cc903e826b..21a3a45d22c 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -19462,9 +19462,8 @@ ix86_can_change_mode_class (machine_mode from, machine_mode to,
 	 disallow a change to these modes, reload will assume it's ok to
 	 drop the subreg from (subreg:SI (reg:HI 100) 0).  This affects
 	 the vec_dupv4hi pattern.
-	 NB: AVX512FP16 supports vmovw which can load 16bit data to sse
-	 register.  */
-      int mov_size = MAYBE_SSE_CLASS_P (regclass) && TARGET_AVX512FP16 ? 2 : 4;
+	 NB: SSE2 can load 16bit data to sse register via pinsrw.  */
+      int mov_size = MAYBE_SSE_CLASS_P (regclass) && TARGET_SSE2 ? 2 : 4;
       if (GET_MODE_SIZE (from) < mov_size)
 	return false;
     }
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index e733a40fc90..27e3772c138 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -2540,8 +2540,10 @@
     }
 }
   [(set (attr "isa")
-	(cond [(eq_attr "alternative" "9,10,11,12,13")
-		  (const_string "avx512fp16")
+	(cond [(eq_attr "alternative" "9,10,11,12")
+		  (const_string "sse2")
+	       (eq_attr "alternative" "13")
+		 (const_string "sse4")
 	       ]
 	       (const_string "*")))
    (set (attr "type")
@@ -4574,8 +4576,42 @@
   emit_move_insn (operands[0], CONST0_RTX (V2DFmode));
 })
 
-(define_insn "extendhf<mode>2"
-  [(set (match_operand:MODEF 0 "nonimm_ssenomem_operand" "=v")
+(define_expand "extendhfsf2"
+  [(set (match_operand:SF 0 "register_operand")
+        (float_extend:SF
+	  (match_operand:HF 1 "nonimmediate_operand")))]
+  "TARGET_F16C || TARGET_AVX512FP16"
+{
+  if (!TARGET_AVX512FP16)
+    {
+#if 0
+      rtx zero = force_reg (V8HFmode, CONST0_RTX (V8HFmode));
+      rtx op = lowpart_subreg (V8HFmode,
+			       force_reg (HFmode, operands[1]), HFmode);
+      rtx res = lowpart_subreg (V4SFmode, operands[0], SFmode);
+      rtx tmp = gen_reg_rtx (V8HFmode);
+
+      emit_insn (gen_sse4_1_pblendph (tmp, op, zero, const1_rtx));
+#endif
+
+      rtx res = gen_reg_rtx (V4SFmode);
+      rtx tmp = gen_reg_rtx (V8HFmode);
+
+      ix86_expand_vector_set (false, tmp, operands[1], 0);
+      emit_insn (gen_vcvtph2ps (res, gen_lowpart (V8HImode, tmp)));
+      emit_move_insn (operands[0], gen_lowpart (SFmode, res));
+      DONE;
+    }
+})
+
+(define_expand "extendhfdf2"
+  [(set (match_operand:DF 0 "register_operand")
+        (float_extend:DF
+	  (match_operand:HF 1 "nonimmediate_operand")))]
+  "TARGET_AVX512FP16")
+
+(define_insn "*extendhf<mode>2"
+  [(set (match_operand:MODEF 0 "register_operand" "=v")
         (float_extend:MODEF
 	  (match_operand:HF 1 "nonimmediate_operand" "vm")))]
   "TARGET_AVX512FP16"
@@ -4584,7 +4620,6 @@
    (set_attr "prefix" "evex")
    (set_attr "mode" "<MODE>")])
 
-
 (define_expand "extend<mode>xf2"
   [(set (match_operand:XF 0 "nonimmediate_operand")
         (float_extend:XF (match_operand:MODEF 1 "general_operand")))]
@@ -4766,7 +4801,31 @@
 
 ;; Conversion from {SF,DF}mode to HFmode.
 
-(define_insn "trunc<mode>hf2"
+(define_expand "truncsfhf2"
+  [(set (match_operand:HF 0 "register_operand" "=v")
+       (float_truncate:HF
+         (match_operand:SF 1 "nonimmediate_operand" "vm")))]
+  "TARGET_F16C || TARGET_AVX512FP16"
+{
+  if (!TARGET_AVX512FP16)
+    {
+      rtx res = gen_reg_rtx (V8HFmode);
+      rtx tmp = gen_reg_rtx (V4SFmode);
+
+      ix86_expand_vector_set (false, tmp, operands[1], 0);
+      emit_insn (gen_vcvtps2ph (gen_lowpart (V8HImode, res), tmp, GEN_INT (4)));
+      emit_move_insn (operands[0], gen_lowpart (HFmode, res));
+      DONE;
+    }
+})
+
+(define_expand "truncdfhf2"
+  [(set (match_operand:HF 0 "register_operand" "=v")
+       (float_truncate:HF
+         (match_operand:DF 1 "nonimmediate_operand" "vm")))]
+  "TARGET_AVX512FP16")
+
+(define_insn "*trunc<mode>hf2"
   [(set (match_operand:HF 0 "register_operand" "=v")
        (float_truncate:HF
          (match_operand:MODEF 1 "nonimmediate_operand" "vm")))]
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index fbf056bf9e6..7125801724f 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -9969,6 +9969,28 @@
 	   ]
 	   (symbol_ref "true")))])
 
+(define_insn "*vec_set<mode>_0"
+  [(set (match_operand:V8_128 0 "register_operand" "=v,v,x,x,x,x,x,x")
+	(vec_merge:V8_128
+	  (vec_duplicate:V8_128
+	    (match_operand:<ssescalarmode> 2 "nonimmediate_operand" "r,m,r,m,x,r,m,x"))
+	  (match_operand:V8_128 1 "reg_or_0_operand" "C,C,0,0,0,x,x,x")
+	  (const_int 1)))]
+  "TARGET_SSE2"
+  "@
+   vmovw\t{%k2, %x0|%x0, %k2}
+   vmovw\t{%2, %x0|%x0, %2}
+   pinsr<sseintmodesuffix>\t{$0, %k2, %0|%0, %k2, $0}
+   pinsr<sseintmodesuffix>\t{$0, %2, %0|%0, %2, $0}
+   pblendw\t{$1, %2, %0|%0, %2, $1}
+   vpinsr<sseintmodesuffix>\t{$0, %k2, %1, %0|%0, %1, %k2, $0}
+   vpinsr<sseintmodesuffix>\t{$0, %2, %1, %0|%0, %1, %2, $0}
+   vpblendw\t{$1, %2, %1, %0|%0, %1, %2, $1}"
+  [(set_attr "isa" "avx512fp16,avx512fp16,noavx,noavx,sse4_noavx,avx,avx,avx")
+   (set_attr "type" "ssemov")
+   (set_attr "prefix" "evex")
+   (set_attr "mode" "HF")])
+
 ;; vmovw clears also the higer bits
 (define_insn "vec_set<mode>_0"
   [(set (match_operand:VI2F 0 "register_operand" "=v,v")

^ permalink raw reply	[flat|nested] 11+ messages in thread

* RE: [PATCH] i386: vcvtph2ps and vcvtps2ph should be used to convert _Float16 to SFmode with -mf16c [PR 102811]
  2021-11-24  8:48   ` Uros Bizjak
@ 2021-11-24  9:04     ` Kong, Lingling
  0 siblings, 0 replies; 11+ messages in thread
From: Kong, Lingling @ 2021-11-24  9:04 UTC (permalink / raw)
  To: Uros Bizjak; +Cc: Liu, Hongtao, gcc-patches, Kong, Lingling

[-- Attachment #1: Type: text/plain, Size: 9041 bytes --]

OK, This is the patch I prepare to check in.

-----Original Message-----
From: Uros Bizjak <ubizjak@gmail.com> 
Sent: Wednesday, November 24, 2021 4:49 PM
To: Kong, Lingling <lingling.kong@intel.com>
Cc: Liu, Hongtao <hongtao.liu@intel.com>; gcc-patches@gcc.gnu.org
Subject: Re: [PATCH] i386: vcvtph2ps and vcvtps2ph should be used to convert _Float16 to SFmode with -mf16c [PR 102811]

On Wed, Nov 24, 2021 at 9:44 AM Kong, Lingling <lingling.kong@intel.com> wrote:
>
> Hi,
>
> vcvtph2ps and vcvtps2ph should be used to convert _Float16 to SFmode with -mf16c. So added define_insn extendhfsf2 and truncsfhf2 for target_f16c.
> Cleared before conversion, updated  movhi_internal and ix86_can_change_mode_class. And fixed some commit message.
>
> OK for master?

OK, with a small adjustment to ChangeLog.

Thanks,
Uros.

> gcc/ChangeLog:
>
>         PR target/102811
>         * config/i386/i386.c (ix86_can_change_mode_class): Allow 16 bit data in XMM register
>         for TARGET_SSE2.
>         * config/i386/i386.md (extendhfsf2): Add extenndhfsf2 for TARGET_F16C.
>         (extendhfdf2): Restrict extendhfdf for TARGET_AVX512FP16 only.
>         (*extendhf<mode>2): Rename from extendhf<mode>2.
>         (truncsfhf2): Likewise.
>         (truncdfhf2): Likewise.
>         (*trunc<mode>2): Likewise.
>
> gcc/testsuite/ChangeLog:
>
>         PR target/102811
>         * gcc.target/i386/pr90773-21.c: Optimize movhi_internal,
>         also allow pextrw replace vmovd + movw.

Just write:

* gcc.target/i386/pr90773-21.c: Allow pextrw instead of movw.

>         * gcc.target/i386/pr90773-23.c: Ditto.
>         * gcc.target/i386/avx512vl-vcvtps2ph-pr102811.c: New test.
> ---
>  gcc/config/i386/i386.c                        |  5 +-
>  gcc/config/i386/i386.md                       | 74 +++++++++++++++++--
>  .../i386/avx512vl-vcvtps2ph-pr102811.c        | 11 +++
>  gcc/testsuite/gcc.target/i386/pr90773-21.c    |  2 +-
>  gcc/testsuite/gcc.target/i386/pr90773-23.c    |  2 +-
>  5 files changed, 83 insertions(+), 11 deletions(-)  create mode 
> 100644 gcc/testsuite/gcc.target/i386/avx512vl-vcvtps2ph-pr102811.c
>
> diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index 
> e94efdf39fb..4b813533961 100644
> --- a/gcc/config/i386/i386.c
> +++ b/gcc/config/i386/i386.c
> @@ -19485,9 +19485,8 @@ ix86_can_change_mode_class (machine_mode from, machine_mode to,
>          disallow a change to these modes, reload will assume it's ok to
>          drop the subreg from (subreg:SI (reg:HI 100) 0).  This affects
>          the vec_dupv4hi pattern.
> -        NB: AVX512FP16 supports vmovw which can load 16bit data to sse
> -        register.  */
> -      int mov_size = MAYBE_SSE_CLASS_P (regclass) && TARGET_AVX512FP16 ? 2 : 4;
> +        NB: SSE2 can load 16bit data to sse register via pinsrw.  */
> +      int mov_size = MAYBE_SSE_CLASS_P (regclass) && TARGET_SSE2 ? 2 :
> +4;
>        if (GET_MODE_SIZE (from) < mov_size)
>         return false;
>      }
> diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index 
> 6eb9de81921..6ee264f1151 100644
> --- a/gcc/config/i386/i386.md
> +++ b/gcc/config/i386/i386.md
> @@ -2525,6 +2525,16 @@
>      case TYPE_SSEMOV:
>        return ix86_output_ssemov (insn, operands);
>
> +    case TYPE_SSELOG:
> +      if (SSE_REG_P (operands[0]))
> +       return MEM_P (operands[1])
> +         ? "pinsrw\t{$0, %1, %0|%0, %1, 0}"
> +         : "pinsrw\t{$0, %k1, %0|%0, %k1, 0}";
> +      else
> +       return MEM_P (operands[1])
> +         ? "pextrw\t{$0, %1, %0|%0, %1, 0}"
> +         : "pextrw\t{$0, %1, %k0|%k0, %k1, 0}";
> +
>      case TYPE_MSKLOG:
>        if (operands[1] == const0_rtx)
>         return "kxorw\t%0, %0, %0";
> @@ -2540,13 +2550,17 @@
>      }
>  }
>    [(set (attr "isa")
> -       (cond [(eq_attr "alternative" "9,10,11,12,13")
> -                 (const_string "avx512fp16")
> +       (cond [(eq_attr "alternative" "9,10,11,12")
> +                 (const_string "sse2")
> +              (eq_attr "alternative" "13")
> +                 (const_string "sse4")
>                ]
>                (const_string "*")))
>     (set (attr "type")
>       (cond [(eq_attr "alternative" "9,10,11,12,13")
> -             (const_string "ssemov")
> +             (if_then_else (match_test "TARGET_AVX512FP16")
> +               (const_string "ssemov")
> +               (const_string "sselog"))
>             (eq_attr "alternative" "4,5,6,7")
>               (const_string "mskmov")
>             (eq_attr "alternative" "8") @@ -4574,8 +4588,32 @@
>    emit_move_insn (operands[0], CONST0_RTX (V2DFmode));
>  })
>
> -(define_insn "extendhf<mode>2"
> -  [(set (match_operand:MODEF 0 "nonimm_ssenomem_operand" "=v")
> +(define_expand "extendhfsf2"
> +  [(set (match_operand:SF 0 "register_operand")
> +       (float_extend:SF
> +         (match_operand:HF 1 "nonimmediate_operand")))]
> +  "TARGET_AVX512FP16 || TARGET_F16C || TARGET_AVX512VL"
> +{
> +  if (!TARGET_AVX512FP16)
> +    {
> +      rtx res = gen_reg_rtx (V4SFmode);
> +      rtx tmp = force_reg (V8HFmode, CONST0_RTX (V8HFmode));
> +
> +      ix86_expand_vector_set (false, tmp, operands[1], 0);
> +      emit_insn (gen_vcvtph2ps (res, gen_lowpart (V8HImode, tmp)));
> +      emit_move_insn (operands[0], gen_lowpart (SFmode, res));
> +      DONE;
> +    }
> +})
> +
> +(define_expand "extendhfdf2"
> +  [(set (match_operand:DF 0 "register_operand")
> +       (float_extend:DF
> +         (match_operand:HF 1 "nonimmediate_operand")))]
> +  "TARGET_AVX512FP16")
> +
> +(define_insn "*extendhf<mode>2"
> +  [(set (match_operand:MODEF 0 "register_operand" "=v")
>          (float_extend:MODEF
>           (match_operand:HF 1 "nonimmediate_operand" "vm")))]
>    "TARGET_AVX512FP16"
> @@ -4766,7 +4804,31 @@
>
>  ;; Conversion from {SF,DF}mode to HFmode.
>
> -(define_insn "trunc<mode>hf2"
> +(define_expand "truncsfhf2"
> +  [(set (match_operand:HF 0 "register_operand")
> +       (float_truncate:HF
> +         (match_operand:SF 1 "nonimmediate_operand")))]
> +  "TARGET_AVX512FP16 || TARGET_F16C || TARGET_AVX512VL"
> +  {
> +    if (!TARGET_AVX512FP16)
> +    {
> +      rtx res = gen_reg_rtx (V8HFmode);
> +      rtx tmp = force_reg (V4SFmode, CONST0_RTX (V4SFmode));
> +
> +      ix86_expand_vector_set (false, tmp, operands[1], 0);
> +      emit_insn (gen_vcvtps2ph (gen_lowpart (V8HImode, res), tmp, GEN_INT (4)));
> +      emit_move_insn (operands[0], gen_lowpart (HFmode, res));
> +      DONE;
> +    }
> +  })
> +
> +(define_expand "truncdfhf2"
> +  [(set (match_operand:HF 0 "register_operand")
> +       (float_truncate:HF
> +         (match_operand:DF 1 "nonimmediate_operand")))]
> +  "TARGET_AVX512FP16")
> +
> +(define_insn "*trunc<mode>hf2"
>    [(set (match_operand:HF 0 "register_operand" "=v")
>         (float_truncate:HF
>           (match_operand:MODEF 1 "nonimmediate_operand" "vm")))] diff 
> --git a/gcc/testsuite/gcc.target/i386/avx512vl-vcvtps2ph-pr102811.c 
> b/gcc/testsuite/gcc.target/i386/avx512vl-vcvtps2ph-pr102811.c
> new file mode 100644
> index 00000000000..dfbfb167953
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/avx512vl-vcvtps2ph-pr102811.c
> @@ -0,0 +1,11 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -mf16c -mno-avx512fp16" } */
> +/* { dg-final { scan-assembler-times "vpxor\[ \\t\]" 2 } } */
> +/* { dg-final { scan-assembler-times "vcvtph2ps\[ \\t\]" 2 } } */
> +/* { dg-final { scan-assembler-times "vcvtps2ph\[ \\t\]" 1 } } */
> +/* { dg-final { scan-assembler-not "__truncsfhf2\[ \\t\]"} } */
> +/* { dg-final { scan-assembler-not "__extendhfsf2\[ \\t\]"} } */
> +_Float16 test (_Float16 a, _Float16 b) {
> +  return a + b;
> +}
> diff --git a/gcc/testsuite/gcc.target/i386/pr90773-21.c 
> b/gcc/testsuite/gcc.target/i386/pr90773-21.c
> index 5bbb387a3ea..0d620fff83c 100644
> --- a/gcc/testsuite/gcc.target/i386/pr90773-21.c
> +++ b/gcc/testsuite/gcc.target/i386/pr90773-21.c
> @@ -10,4 +10,4 @@ foo (int c)
>  }
>
>  /* { dg-final { scan-assembler-times "vmovdqu\[\\t \]%ymm\[0-9\]+, 
> \\(%\[\^,\]+\\)" 1 } } */
> -/* { dg-final { scan-assembler-times "movw\[\\t \]%.*, 
> 32\\(%\[\^,\]+\\)" 1 } } */
> +/* { dg-final { scan-assembler-times "(?:movw|pextrw)\[\\t \].*, 
> +32\\(%\[\^,\]+\\)" 1 } } */
> diff --git a/gcc/testsuite/gcc.target/i386/pr90773-23.c 
> b/gcc/testsuite/gcc.target/i386/pr90773-23.c
> index ca4a86f30b8..b7369e802e1 100644
> --- a/gcc/testsuite/gcc.target/i386/pr90773-23.c
> +++ b/gcc/testsuite/gcc.target/i386/pr90773-23.c
> @@ -10,4 +10,4 @@ foo (void)
>  }
>
>  /* { dg-final { scan-assembler-times "vmovdqu\[\\t \]%ymm\[0-9\]+, 
> \\(%\[\^,\]+\\)" 1 } } */
> -/* { dg-final { scan-assembler-times "movw\[\\t \]+.+, 
> 32\\(%\[\^,\]+\\)" 1 } } */
> +/* { dg-final { scan-assembler-times "(?:movw|pextrw)\[\\t \]+.+, 
> +32\\(%\[\^,\]+\\)" 1 } } */
> --
> 2.18.1
>

[-- Attachment #2: 0001-i386-vcvtph2ps-and-vcvtps2ph-should-be-used-to-conve.patch --]
[-- Type: application/octet-stream, Size: 7445 bytes --]

From c70f7d69d7d89bde883d6338405aa18b7a6e1299 Mon Sep 17 00:00:00 2001
From: konglin1 <lingling.kong@intel.com>
Date: Wed, 10 Nov 2021 09:37:32 +0800
Subject: [PATCH] i386: vcvtph2ps and vcvtps2ph should be used to convert
 _Float16 to SFmode with -mf16c [PR 102811]

Add define_insn extendhfsf2 and truncsfhf2 for target_f16c.

gcc/ChangeLog:

	PR target/102811
	* config/i386/i386.c (ix86_can_change_mode_class): Allow 16 bit data in XMM register
	for TARGET_SSE2.
	* config/i386/i386.md (extendhfsf2): Add extenndhfsf2 for TARGET_F16C.
	(extendhfdf2): Restrict extendhfdf for TARGET_AVX512FP16 only.
	(*extendhf<mode>2): Rename from extendhf<mode>2.
	(truncsfhf2): Likewise.
	(truncdfhf2): Likewise.
	(*trunc<mode>2): Likewise.

gcc/testsuite/ChangeLog:

	PR target/102811
	* gcc.target/i386/pr90773-21.c: Allow pextrw instead of movw.
	* gcc.target/i386/pr90773-23.c: Ditto.
	* gcc.target/i386/avx512vl-vcvtps2ph-pr102811.c: New test.
---
 gcc/config/i386/i386.c                        |  5 +-
 gcc/config/i386/i386.md                       | 74 +++++++++++++++++--
 .../i386/avx512vl-vcvtps2ph-pr102811.c        | 11 +++
 gcc/testsuite/gcc.target/i386/pr90773-21.c    |  2 +-
 gcc/testsuite/gcc.target/i386/pr90773-23.c    |  2 +-
 5 files changed, 83 insertions(+), 11 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/i386/avx512vl-vcvtps2ph-pr102811.c

diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index e94efdf39fb..4b813533961 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -19485,9 +19485,8 @@ ix86_can_change_mode_class (machine_mode from, machine_mode to,
 	 disallow a change to these modes, reload will assume it's ok to
 	 drop the subreg from (subreg:SI (reg:HI 100) 0).  This affects
 	 the vec_dupv4hi pattern.
-	 NB: AVX512FP16 supports vmovw which can load 16bit data to sse
-	 register.  */
-      int mov_size = MAYBE_SSE_CLASS_P (regclass) && TARGET_AVX512FP16 ? 2 : 4;
+	 NB: SSE2 can load 16bit data to sse register via pinsrw.  */
+      int mov_size = MAYBE_SSE_CLASS_P (regclass) && TARGET_SSE2 ? 2 : 4;
       if (GET_MODE_SIZE (from) < mov_size)
 	return false;
     }
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index 6eb9de81921..6ee264f1151 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -2525,6 +2525,16 @@
     case TYPE_SSEMOV:
       return ix86_output_ssemov (insn, operands);
 
+    case TYPE_SSELOG:
+      if (SSE_REG_P (operands[0]))
+	return MEM_P (operands[1])
+	  ? "pinsrw\t{$0, %1, %0|%0, %1, 0}"
+	  : "pinsrw\t{$0, %k1, %0|%0, %k1, 0}";
+      else
+	return MEM_P (operands[1])
+	  ? "pextrw\t{$0, %1, %0|%0, %1, 0}"
+	  : "pextrw\t{$0, %1, %k0|%k0, %k1, 0}";
+
     case TYPE_MSKLOG:
       if (operands[1] == const0_rtx)
 	return "kxorw\t%0, %0, %0";
@@ -2540,13 +2550,17 @@
     }
 }
   [(set (attr "isa")
-	(cond [(eq_attr "alternative" "9,10,11,12,13")
-		  (const_string "avx512fp16")
+	(cond [(eq_attr "alternative" "9,10,11,12")
+		  (const_string "sse2")
+	       (eq_attr "alternative" "13")
+		  (const_string "sse4")
 	       ]
 	       (const_string "*")))
    (set (attr "type")
      (cond [(eq_attr "alternative" "9,10,11,12,13")
-	      (const_string "ssemov")
+	      (if_then_else (match_test "TARGET_AVX512FP16")
+		(const_string "ssemov")
+		(const_string "sselog"))
 	    (eq_attr "alternative" "4,5,6,7")
 	      (const_string "mskmov")
 	    (eq_attr "alternative" "8")
@@ -4574,8 +4588,32 @@
   emit_move_insn (operands[0], CONST0_RTX (V2DFmode));
 })
 
-(define_insn "extendhf<mode>2"
-  [(set (match_operand:MODEF 0 "nonimm_ssenomem_operand" "=v")
+(define_expand "extendhfsf2"
+  [(set (match_operand:SF 0 "register_operand")
+	(float_extend:SF
+	  (match_operand:HF 1 "nonimmediate_operand")))]
+  "TARGET_AVX512FP16 || TARGET_F16C || TARGET_AVX512VL"
+{
+  if (!TARGET_AVX512FP16)
+    {
+      rtx res = gen_reg_rtx (V4SFmode);
+      rtx tmp = force_reg (V8HFmode, CONST0_RTX (V8HFmode));
+
+      ix86_expand_vector_set (false, tmp, operands[1], 0);
+      emit_insn (gen_vcvtph2ps (res, gen_lowpart (V8HImode, tmp)));
+      emit_move_insn (operands[0], gen_lowpart (SFmode, res));
+      DONE;
+    }
+})
+
+(define_expand "extendhfdf2"
+  [(set (match_operand:DF 0 "register_operand")
+	(float_extend:DF
+	  (match_operand:HF 1 "nonimmediate_operand")))]
+  "TARGET_AVX512FP16")
+
+(define_insn "*extendhf<mode>2"
+  [(set (match_operand:MODEF 0 "register_operand" "=v")
         (float_extend:MODEF
 	  (match_operand:HF 1 "nonimmediate_operand" "vm")))]
   "TARGET_AVX512FP16"
@@ -4766,7 +4804,31 @@
 
 ;; Conversion from {SF,DF}mode to HFmode.
 
-(define_insn "trunc<mode>hf2"
+(define_expand "truncsfhf2"
+  [(set (match_operand:HF 0 "register_operand")
+	(float_truncate:HF
+	  (match_operand:SF 1 "nonimmediate_operand")))]
+  "TARGET_AVX512FP16 || TARGET_F16C || TARGET_AVX512VL"
+  {
+    if (!TARGET_AVX512FP16)
+    {
+      rtx res = gen_reg_rtx (V8HFmode);
+      rtx tmp = force_reg (V4SFmode, CONST0_RTX (V4SFmode));
+
+      ix86_expand_vector_set (false, tmp, operands[1], 0);
+      emit_insn (gen_vcvtps2ph (gen_lowpart (V8HImode, res), tmp, GEN_INT (4)));
+      emit_move_insn (operands[0], gen_lowpart (HFmode, res));
+      DONE;
+    }
+  })
+
+(define_expand "truncdfhf2"
+  [(set (match_operand:HF 0 "register_operand")
+	(float_truncate:HF
+	  (match_operand:DF 1 "nonimmediate_operand")))]
+  "TARGET_AVX512FP16")
+
+(define_insn "*trunc<mode>hf2"
   [(set (match_operand:HF 0 "register_operand" "=v")
        (float_truncate:HF
          (match_operand:MODEF 1 "nonimmediate_operand" "vm")))]
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vcvtps2ph-pr102811.c b/gcc/testsuite/gcc.target/i386/avx512vl-vcvtps2ph-pr102811.c
new file mode 100644
index 00000000000..dfbfb167953
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vcvtps2ph-pr102811.c
@@ -0,0 +1,11 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mf16c -mno-avx512fp16" } */
+/* { dg-final { scan-assembler-times "vpxor\[ \\t\]" 2 } } */
+/* { dg-final { scan-assembler-times "vcvtph2ps\[ \\t\]" 2 } } */
+/* { dg-final { scan-assembler-times "vcvtps2ph\[ \\t\]" 1 } } */
+/* { dg-final { scan-assembler-not "__truncsfhf2\[ \\t\]"} } */
+/* { dg-final { scan-assembler-not "__extendhfsf2\[ \\t\]"} } */
+_Float16 test (_Float16 a, _Float16 b)
+{
+  return a + b;
+}
diff --git a/gcc/testsuite/gcc.target/i386/pr90773-21.c b/gcc/testsuite/gcc.target/i386/pr90773-21.c
index 5bbb387a3ea..0d620fff83c 100644
--- a/gcc/testsuite/gcc.target/i386/pr90773-21.c
+++ b/gcc/testsuite/gcc.target/i386/pr90773-21.c
@@ -10,4 +10,4 @@ foo (int c)
 }
 
 /* { dg-final { scan-assembler-times "vmovdqu\[\\t \]%ymm\[0-9\]+, \\(%\[\^,\]+\\)" 1 } } */
-/* { dg-final { scan-assembler-times "movw\[\\t \]%.*, 32\\(%\[\^,\]+\\)" 1 } } */
+/* { dg-final { scan-assembler-times "(?:movw|pextrw)\[\\t \].*, 32\\(%\[\^,\]+\\)" 1 } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr90773-23.c b/gcc/testsuite/gcc.target/i386/pr90773-23.c
index ca4a86f30b8..b7369e802e1 100644
--- a/gcc/testsuite/gcc.target/i386/pr90773-23.c
+++ b/gcc/testsuite/gcc.target/i386/pr90773-23.c
@@ -10,4 +10,4 @@ foo (void)
 }
 
 /* { dg-final { scan-assembler-times "vmovdqu\[\\t \]%ymm\[0-9\]+, \\(%\[\^,\]+\\)" 1 } } */
-/* { dg-final { scan-assembler-times "movw\[\\t \]+.+, 32\\(%\[\^,\]+\\)" 1 } } */
+/* { dg-final { scan-assembler-times "(?:movw|pextrw)\[\\t \]+.+, 32\\(%\[\^,\]+\\)" 1 } } */
-- 
2.18.1


^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH] i386: vcvtph2ps and vcvtps2ph should be used to convert _Float16 to SFmode with -mf16c [PR 102811]
  2021-11-24  8:43 ` Kong, Lingling
@ 2021-11-24  8:48   ` Uros Bizjak
  2021-11-24  9:04     ` Kong, Lingling
  0 siblings, 1 reply; 11+ messages in thread
From: Uros Bizjak @ 2021-11-24  8:48 UTC (permalink / raw)
  To: Kong, Lingling; +Cc: Liu, Hongtao, gcc-patches

On Wed, Nov 24, 2021 at 9:44 AM Kong, Lingling <lingling.kong@intel.com> wrote:
>
> Hi,
>
> vcvtph2ps and vcvtps2ph should be used to convert _Float16 to SFmode with -mf16c. So added define_insn extendhfsf2 and truncsfhf2 for target_f16c.
> Cleared before conversion, updated  movhi_internal and ix86_can_change_mode_class. And fixed some commit message.
>
> OK for master?

OK, with a small adjustment to ChangeLog.

Thanks,
Uros.

> gcc/ChangeLog:
>
>         PR target/102811
>         * config/i386/i386.c (ix86_can_change_mode_class): Allow 16 bit data in XMM register
>         for TARGET_SSE2.
>         * config/i386/i386.md (extendhfsf2): Add extenndhfsf2 for TARGET_F16C.
>         (extendhfdf2): Restrict extendhfdf for TARGET_AVX512FP16 only.
>         (*extendhf<mode>2): Rename from extendhf<mode>2.
>         (truncsfhf2): Likewise.
>         (truncdfhf2): Likewise.
>         (*trunc<mode>2): Likewise.
>
> gcc/testsuite/ChangeLog:
>
>         PR target/102811
>         * gcc.target/i386/pr90773-21.c: Optimize movhi_internal,
>         also allow pextrw replace vmovd + movw.

Just write:

* gcc.target/i386/pr90773-21.c: Allow pextrw instead of movw.

>         * gcc.target/i386/pr90773-23.c: Ditto.
>         * gcc.target/i386/avx512vl-vcvtps2ph-pr102811.c: New test.
> ---
>  gcc/config/i386/i386.c                        |  5 +-
>  gcc/config/i386/i386.md                       | 74 +++++++++++++++++--
>  .../i386/avx512vl-vcvtps2ph-pr102811.c        | 11 +++
>  gcc/testsuite/gcc.target/i386/pr90773-21.c    |  2 +-
>  gcc/testsuite/gcc.target/i386/pr90773-23.c    |  2 +-
>  5 files changed, 83 insertions(+), 11 deletions(-)  create mode 100644 gcc/testsuite/gcc.target/i386/avx512vl-vcvtps2ph-pr102811.c
>
> diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index e94efdf39fb..4b813533961 100644
> --- a/gcc/config/i386/i386.c
> +++ b/gcc/config/i386/i386.c
> @@ -19485,9 +19485,8 @@ ix86_can_change_mode_class (machine_mode from, machine_mode to,
>          disallow a change to these modes, reload will assume it's ok to
>          drop the subreg from (subreg:SI (reg:HI 100) 0).  This affects
>          the vec_dupv4hi pattern.
> -        NB: AVX512FP16 supports vmovw which can load 16bit data to sse
> -        register.  */
> -      int mov_size = MAYBE_SSE_CLASS_P (regclass) && TARGET_AVX512FP16 ? 2 : 4;
> +        NB: SSE2 can load 16bit data to sse register via pinsrw.  */
> +      int mov_size = MAYBE_SSE_CLASS_P (regclass) && TARGET_SSE2 ? 2 :
> +4;
>        if (GET_MODE_SIZE (from) < mov_size)
>         return false;
>      }
> diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index 6eb9de81921..6ee264f1151 100644
> --- a/gcc/config/i386/i386.md
> +++ b/gcc/config/i386/i386.md
> @@ -2525,6 +2525,16 @@
>      case TYPE_SSEMOV:
>        return ix86_output_ssemov (insn, operands);
>
> +    case TYPE_SSELOG:
> +      if (SSE_REG_P (operands[0]))
> +       return MEM_P (operands[1])
> +         ? "pinsrw\t{$0, %1, %0|%0, %1, 0}"
> +         : "pinsrw\t{$0, %k1, %0|%0, %k1, 0}";
> +      else
> +       return MEM_P (operands[1])
> +         ? "pextrw\t{$0, %1, %0|%0, %1, 0}"
> +         : "pextrw\t{$0, %1, %k0|%k0, %k1, 0}";
> +
>      case TYPE_MSKLOG:
>        if (operands[1] == const0_rtx)
>         return "kxorw\t%0, %0, %0";
> @@ -2540,13 +2550,17 @@
>      }
>  }
>    [(set (attr "isa")
> -       (cond [(eq_attr "alternative" "9,10,11,12,13")
> -                 (const_string "avx512fp16")
> +       (cond [(eq_attr "alternative" "9,10,11,12")
> +                 (const_string "sse2")
> +              (eq_attr "alternative" "13")
> +                 (const_string "sse4")
>                ]
>                (const_string "*")))
>     (set (attr "type")
>       (cond [(eq_attr "alternative" "9,10,11,12,13")
> -             (const_string "ssemov")
> +             (if_then_else (match_test "TARGET_AVX512FP16")
> +               (const_string "ssemov")
> +               (const_string "sselog"))
>             (eq_attr "alternative" "4,5,6,7")
>               (const_string "mskmov")
>             (eq_attr "alternative" "8")
> @@ -4574,8 +4588,32 @@
>    emit_move_insn (operands[0], CONST0_RTX (V2DFmode));
>  })
>
> -(define_insn "extendhf<mode>2"
> -  [(set (match_operand:MODEF 0 "nonimm_ssenomem_operand" "=v")
> +(define_expand "extendhfsf2"
> +  [(set (match_operand:SF 0 "register_operand")
> +       (float_extend:SF
> +         (match_operand:HF 1 "nonimmediate_operand")))]
> +  "TARGET_AVX512FP16 || TARGET_F16C || TARGET_AVX512VL"
> +{
> +  if (!TARGET_AVX512FP16)
> +    {
> +      rtx res = gen_reg_rtx (V4SFmode);
> +      rtx tmp = force_reg (V8HFmode, CONST0_RTX (V8HFmode));
> +
> +      ix86_expand_vector_set (false, tmp, operands[1], 0);
> +      emit_insn (gen_vcvtph2ps (res, gen_lowpart (V8HImode, tmp)));
> +      emit_move_insn (operands[0], gen_lowpart (SFmode, res));
> +      DONE;
> +    }
> +})
> +
> +(define_expand "extendhfdf2"
> +  [(set (match_operand:DF 0 "register_operand")
> +       (float_extend:DF
> +         (match_operand:HF 1 "nonimmediate_operand")))]
> +  "TARGET_AVX512FP16")
> +
> +(define_insn "*extendhf<mode>2"
> +  [(set (match_operand:MODEF 0 "register_operand" "=v")
>          (float_extend:MODEF
>           (match_operand:HF 1 "nonimmediate_operand" "vm")))]
>    "TARGET_AVX512FP16"
> @@ -4766,7 +4804,31 @@
>
>  ;; Conversion from {SF,DF}mode to HFmode.
>
> -(define_insn "trunc<mode>hf2"
> +(define_expand "truncsfhf2"
> +  [(set (match_operand:HF 0 "register_operand")
> +       (float_truncate:HF
> +         (match_operand:SF 1 "nonimmediate_operand")))]
> +  "TARGET_AVX512FP16 || TARGET_F16C || TARGET_AVX512VL"
> +  {
> +    if (!TARGET_AVX512FP16)
> +    {
> +      rtx res = gen_reg_rtx (V8HFmode);
> +      rtx tmp = force_reg (V4SFmode, CONST0_RTX (V4SFmode));
> +
> +      ix86_expand_vector_set (false, tmp, operands[1], 0);
> +      emit_insn (gen_vcvtps2ph (gen_lowpart (V8HImode, res), tmp, GEN_INT (4)));
> +      emit_move_insn (operands[0], gen_lowpart (HFmode, res));
> +      DONE;
> +    }
> +  })
> +
> +(define_expand "truncdfhf2"
> +  [(set (match_operand:HF 0 "register_operand")
> +       (float_truncate:HF
> +         (match_operand:DF 1 "nonimmediate_operand")))]
> +  "TARGET_AVX512FP16")
> +
> +(define_insn "*trunc<mode>hf2"
>    [(set (match_operand:HF 0 "register_operand" "=v")
>         (float_truncate:HF
>           (match_operand:MODEF 1 "nonimmediate_operand" "vm")))] diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vcvtps2ph-pr102811.c b/gcc/testsuite/gcc.target/i386/avx512vl-vcvtps2ph-pr102811.c
> new file mode 100644
> index 00000000000..dfbfb167953
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/avx512vl-vcvtps2ph-pr102811.c
> @@ -0,0 +1,11 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -mf16c -mno-avx512fp16" } */
> +/* { dg-final { scan-assembler-times "vpxor\[ \\t\]" 2 } } */
> +/* { dg-final { scan-assembler-times "vcvtph2ps\[ \\t\]" 2 } } */
> +/* { dg-final { scan-assembler-times "vcvtps2ph\[ \\t\]" 1 } } */
> +/* { dg-final { scan-assembler-not "__truncsfhf2\[ \\t\]"} } */
> +/* { dg-final { scan-assembler-not "__extendhfsf2\[ \\t\]"} } */
> +_Float16 test (_Float16 a, _Float16 b)
> +{
> +  return a + b;
> +}
> diff --git a/gcc/testsuite/gcc.target/i386/pr90773-21.c b/gcc/testsuite/gcc.target/i386/pr90773-21.c
> index 5bbb387a3ea..0d620fff83c 100644
> --- a/gcc/testsuite/gcc.target/i386/pr90773-21.c
> +++ b/gcc/testsuite/gcc.target/i386/pr90773-21.c
> @@ -10,4 +10,4 @@ foo (int c)
>  }
>
>  /* { dg-final { scan-assembler-times "vmovdqu\[\\t \]%ymm\[0-9\]+, \\(%\[\^,\]+\\)" 1 } } */
> -/* { dg-final { scan-assembler-times "movw\[\\t \]%.*, 32\\(%\[\^,\]+\\)" 1 } } */
> +/* { dg-final { scan-assembler-times "(?:movw|pextrw)\[\\t \].*,
> +32\\(%\[\^,\]+\\)" 1 } } */
> diff --git a/gcc/testsuite/gcc.target/i386/pr90773-23.c b/gcc/testsuite/gcc.target/i386/pr90773-23.c
> index ca4a86f30b8..b7369e802e1 100644
> --- a/gcc/testsuite/gcc.target/i386/pr90773-23.c
> +++ b/gcc/testsuite/gcc.target/i386/pr90773-23.c
> @@ -10,4 +10,4 @@ foo (void)
>  }
>
>  /* { dg-final { scan-assembler-times "vmovdqu\[\\t \]%ymm\[0-9\]+, \\(%\[\^,\]+\\)" 1 } } */
> -/* { dg-final { scan-assembler-times "movw\[\\t \]+.+, 32\\(%\[\^,\]+\\)" 1 } } */
> +/* { dg-final { scan-assembler-times "(?:movw|pextrw)\[\\t \]+.+,
> +32\\(%\[\^,\]+\\)" 1 } } */
> --
> 2.18.1
>

^ permalink raw reply	[flat|nested] 11+ messages in thread

* [PATCH] i386: vcvtph2ps and vcvtps2ph should be used to convert _Float16 to SFmode with -mf16c [PR 102811]
       [not found] <20211124083421.5768-1-lingling.kong@intel.com>
@ 2021-11-24  8:43 ` Kong, Lingling
  2021-11-24  8:48   ` Uros Bizjak
  0 siblings, 1 reply; 11+ messages in thread
From: Kong, Lingling @ 2021-11-24  8:43 UTC (permalink / raw)
  To: Uros Bizjak, Liu, Hongtao, gcc-patches; +Cc: Kong, Lingling

Hi,

vcvtph2ps and vcvtps2ph should be used to convert _Float16 to SFmode with -mf16c. So added define_insn extendhfsf2 and truncsfhf2 for target_f16c.
Cleared before conversion, updated  movhi_internal and ix86_can_change_mode_class. And fixed some commit message.

OK for master?

gcc/ChangeLog:

	PR target/102811
	* config/i386/i386.c (ix86_can_change_mode_class): Allow 16 bit data in XMM register
	for TARGET_SSE2.
	* config/i386/i386.md (extendhfsf2): Add extenndhfsf2 for TARGET_F16C.
	(extendhfdf2): Restrict extendhfdf for TARGET_AVX512FP16 only.
	(*extendhf<mode>2): Rename from extendhf<mode>2.
	(truncsfhf2): Likewise.
	(truncdfhf2): Likewise.
	(*trunc<mode>2): Likewise.

gcc/testsuite/ChangeLog:

	PR target/102811
	* gcc.target/i386/pr90773-21.c: Optimize movhi_internal,
	also allow pextrw replace vmovd + movw.
	* gcc.target/i386/pr90773-23.c: Ditto.
	* gcc.target/i386/avx512vl-vcvtps2ph-pr102811.c: New test.
---
 gcc/config/i386/i386.c                        |  5 +-
 gcc/config/i386/i386.md                       | 74 +++++++++++++++++--
 .../i386/avx512vl-vcvtps2ph-pr102811.c        | 11 +++
 gcc/testsuite/gcc.target/i386/pr90773-21.c    |  2 +-
 gcc/testsuite/gcc.target/i386/pr90773-23.c    |  2 +-
 5 files changed, 83 insertions(+), 11 deletions(-)  create mode 100644 gcc/testsuite/gcc.target/i386/avx512vl-vcvtps2ph-pr102811.c

diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index e94efdf39fb..4b813533961 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -19485,9 +19485,8 @@ ix86_can_change_mode_class (machine_mode from, machine_mode to,
 	 disallow a change to these modes, reload will assume it's ok to
 	 drop the subreg from (subreg:SI (reg:HI 100) 0).  This affects
 	 the vec_dupv4hi pattern.
-	 NB: AVX512FP16 supports vmovw which can load 16bit data to sse
-	 register.  */
-      int mov_size = MAYBE_SSE_CLASS_P (regclass) && TARGET_AVX512FP16 ? 2 : 4;
+	 NB: SSE2 can load 16bit data to sse register via pinsrw.  */
+      int mov_size = MAYBE_SSE_CLASS_P (regclass) && TARGET_SSE2 ? 2 : 
+4;
       if (GET_MODE_SIZE (from) < mov_size)
 	return false;
     }
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index 6eb9de81921..6ee264f1151 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -2525,6 +2525,16 @@
     case TYPE_SSEMOV:
       return ix86_output_ssemov (insn, operands);
 
+    case TYPE_SSELOG:
+      if (SSE_REG_P (operands[0]))
+	return MEM_P (operands[1])
+	  ? "pinsrw\t{$0, %1, %0|%0, %1, 0}"
+	  : "pinsrw\t{$0, %k1, %0|%0, %k1, 0}";
+      else
+	return MEM_P (operands[1])
+	  ? "pextrw\t{$0, %1, %0|%0, %1, 0}"
+	  : "pextrw\t{$0, %1, %k0|%k0, %k1, 0}";
+
     case TYPE_MSKLOG:
       if (operands[1] == const0_rtx)
 	return "kxorw\t%0, %0, %0";
@@ -2540,13 +2550,17 @@
     }
 }
   [(set (attr "isa")
-	(cond [(eq_attr "alternative" "9,10,11,12,13")
-		  (const_string "avx512fp16")
+	(cond [(eq_attr "alternative" "9,10,11,12")
+		  (const_string "sse2")
+	       (eq_attr "alternative" "13")
+		  (const_string "sse4")
 	       ]
 	       (const_string "*")))
    (set (attr "type")
      (cond [(eq_attr "alternative" "9,10,11,12,13")
-	      (const_string "ssemov")
+	      (if_then_else (match_test "TARGET_AVX512FP16")
+		(const_string "ssemov")
+		(const_string "sselog"))
 	    (eq_attr "alternative" "4,5,6,7")
 	      (const_string "mskmov")
 	    (eq_attr "alternative" "8")
@@ -4574,8 +4588,32 @@
   emit_move_insn (operands[0], CONST0_RTX (V2DFmode));
 })
 
-(define_insn "extendhf<mode>2"
-  [(set (match_operand:MODEF 0 "nonimm_ssenomem_operand" "=v")
+(define_expand "extendhfsf2"
+  [(set (match_operand:SF 0 "register_operand")
+	(float_extend:SF
+	  (match_operand:HF 1 "nonimmediate_operand")))]
+  "TARGET_AVX512FP16 || TARGET_F16C || TARGET_AVX512VL"
+{
+  if (!TARGET_AVX512FP16)
+    {
+      rtx res = gen_reg_rtx (V4SFmode);
+      rtx tmp = force_reg (V8HFmode, CONST0_RTX (V8HFmode));
+
+      ix86_expand_vector_set (false, tmp, operands[1], 0);
+      emit_insn (gen_vcvtph2ps (res, gen_lowpart (V8HImode, tmp)));
+      emit_move_insn (operands[0], gen_lowpart (SFmode, res));
+      DONE;
+    }
+})
+
+(define_expand "extendhfdf2"
+  [(set (match_operand:DF 0 "register_operand")
+	(float_extend:DF
+	  (match_operand:HF 1 "nonimmediate_operand")))]
+  "TARGET_AVX512FP16")
+
+(define_insn "*extendhf<mode>2"
+  [(set (match_operand:MODEF 0 "register_operand" "=v")
         (float_extend:MODEF
 	  (match_operand:HF 1 "nonimmediate_operand" "vm")))]
   "TARGET_AVX512FP16"
@@ -4766,7 +4804,31 @@
 
 ;; Conversion from {SF,DF}mode to HFmode.
 
-(define_insn "trunc<mode>hf2"
+(define_expand "truncsfhf2"
+  [(set (match_operand:HF 0 "register_operand")
+	(float_truncate:HF
+	  (match_operand:SF 1 "nonimmediate_operand")))]
+  "TARGET_AVX512FP16 || TARGET_F16C || TARGET_AVX512VL"
+  {
+    if (!TARGET_AVX512FP16)
+    {
+      rtx res = gen_reg_rtx (V8HFmode);
+      rtx tmp = force_reg (V4SFmode, CONST0_RTX (V4SFmode));
+
+      ix86_expand_vector_set (false, tmp, operands[1], 0);
+      emit_insn (gen_vcvtps2ph (gen_lowpart (V8HImode, res), tmp, GEN_INT (4)));
+      emit_move_insn (operands[0], gen_lowpart (HFmode, res));
+      DONE;
+    }
+  })
+
+(define_expand "truncdfhf2"
+  [(set (match_operand:HF 0 "register_operand")
+	(float_truncate:HF
+	  (match_operand:DF 1 "nonimmediate_operand")))]
+  "TARGET_AVX512FP16")
+
+(define_insn "*trunc<mode>hf2"
   [(set (match_operand:HF 0 "register_operand" "=v")
        (float_truncate:HF
          (match_operand:MODEF 1 "nonimmediate_operand" "vm")))] diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vcvtps2ph-pr102811.c b/gcc/testsuite/gcc.target/i386/avx512vl-vcvtps2ph-pr102811.c
new file mode 100644
index 00000000000..dfbfb167953
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vcvtps2ph-pr102811.c
@@ -0,0 +1,11 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mf16c -mno-avx512fp16" } */
+/* { dg-final { scan-assembler-times "vpxor\[ \\t\]" 2 } } */
+/* { dg-final { scan-assembler-times "vcvtph2ps\[ \\t\]" 2 } } */
+/* { dg-final { scan-assembler-times "vcvtps2ph\[ \\t\]" 1 } } */
+/* { dg-final { scan-assembler-not "__truncsfhf2\[ \\t\]"} } */
+/* { dg-final { scan-assembler-not "__extendhfsf2\[ \\t\]"} } */
+_Float16 test (_Float16 a, _Float16 b)
+{
+  return a + b;
+}
diff --git a/gcc/testsuite/gcc.target/i386/pr90773-21.c b/gcc/testsuite/gcc.target/i386/pr90773-21.c
index 5bbb387a3ea..0d620fff83c 100644
--- a/gcc/testsuite/gcc.target/i386/pr90773-21.c
+++ b/gcc/testsuite/gcc.target/i386/pr90773-21.c
@@ -10,4 +10,4 @@ foo (int c)
 }
 
 /* { dg-final { scan-assembler-times "vmovdqu\[\\t \]%ymm\[0-9\]+, \\(%\[\^,\]+\\)" 1 } } */
-/* { dg-final { scan-assembler-times "movw\[\\t \]%.*, 32\\(%\[\^,\]+\\)" 1 } } */
+/* { dg-final { scan-assembler-times "(?:movw|pextrw)\[\\t \].*, 
+32\\(%\[\^,\]+\\)" 1 } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr90773-23.c b/gcc/testsuite/gcc.target/i386/pr90773-23.c
index ca4a86f30b8..b7369e802e1 100644
--- a/gcc/testsuite/gcc.target/i386/pr90773-23.c
+++ b/gcc/testsuite/gcc.target/i386/pr90773-23.c
@@ -10,4 +10,4 @@ foo (void)
 }
 
 /* { dg-final { scan-assembler-times "vmovdqu\[\\t \]%ymm\[0-9\]+, \\(%\[\^,\]+\\)" 1 } } */
-/* { dg-final { scan-assembler-times "movw\[\\t \]+.+, 32\\(%\[\^,\]+\\)" 1 } } */
+/* { dg-final { scan-assembler-times "(?:movw|pextrw)\[\\t \]+.+, 
+32\\(%\[\^,\]+\\)" 1 } } */
--
2.18.1


^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH] i386: vcvtph2ps and vcvtps2ph should be used to convert _Float16 to SFmode with -mf16c [PR 102811]
  2021-11-24  8:06     ` Kong, Lingling
@ 2021-11-24  8:39       ` Uros Bizjak
  0 siblings, 0 replies; 11+ messages in thread
From: Uros Bizjak @ 2021-11-24  8:39 UTC (permalink / raw)
  To: Kong, Lingling; +Cc: Liu, Hongtao, gcc-patches

On Wed, Nov 24, 2021 at 9:06 AM Kong, Lingling <lingling.kong@intel.com> wrote:
>
> Hi  Uros,
>
> > BTW: When playing with my patch, I introduced (define_insn "*vec_set<mode>_0" ...) to optimize scalar load to a vector. Does ix86_expand_vector_set work OK without this pattern?
>
> Yes, ix86_expand_vector_set could work ok with (define_insn "<sse2p4_1>_pinsr<ssemodesuffix>"), this insn can optimize scalar load to a vector.

Ah, now I remember - this pattern can be used to optimize HI/HF mode
scalar loads in the same way as other "vec_set<mode>_0" patterns are
used. It is similar to e.g. VI4F_128 mode vec_set<mode>_0 pattern. I
was not able to test it properly without AVX512FP16, but the pattern
is otherwise independent of the proposed patch.

Uros.

^ permalink raw reply	[flat|nested] 11+ messages in thread

* RE: [PATCH] i386: vcvtph2ps and vcvtps2ph should be used to convert _Float16 to SFmode with -mf16c [PR 102811]
  2021-11-24  7:57   ` Uros Bizjak
@ 2021-11-24  8:06     ` Kong, Lingling
  2021-11-24  8:39       ` Uros Bizjak
  0 siblings, 1 reply; 11+ messages in thread
From: Kong, Lingling @ 2021-11-24  8:06 UTC (permalink / raw)
  To: Uros Bizjak; +Cc: Liu, Hongtao, gcc-patches

Hi  Uros,

> BTW: When playing with my patch, I introduced (define_insn "*vec_set<mode>_0" ...) to optimize scalar load to a vector. Does ix86_expand_vector_set work OK without this pattern?

Yes, ix86_expand_vector_set could work ok with (define_insn "<sse2p4_1>_pinsr<ssemodesuffix>"), this insn can optimize scalar load to a vector.

Thanks,
Lingling

-----Original Message-----
From: Uros Bizjak <ubizjak@gmail.com> 
Sent: Wednesday, November 24, 2021 3:57 PM
To: Kong, Lingling <lingling.kong@intel.com>
Cc: Liu, Hongtao <hongtao.liu@intel.com>; gcc-patches@gcc.gnu.org
Subject: Re: [PATCH] i386: vcvtph2ps and vcvtps2ph should be used to convert _Float16 to SFmode with -mf16c [PR 102811]

On Wed, Nov 24, 2021 at 7:25 AM Kong, Lingling via Gcc-patches <gcc-patches@gcc.gnu.org> wrote:
>
> Hi,
>
> vcvtph2ps and vcvtps2ph should be used to convert _Float16 to SFmode with -mf16c. So added define_insn extendhfsf2 and truncsfhf2 for target_f16c.
> And cleared before conversion, updated  movhi_internal and ix86_can_change_mode_class.

Please fix the above commit message.

>
> OK for master?
>
> gcc/ChangeLog:
>
>         PR target/102811
>         * config/i386/i386.c (ix86_can_change_mode_class): SSE2 can load 16bit data
>         to sse register via pinsrw.

Allow 16bit data in XMM register for SSE2 targets.

>         * config/i386/i386.md (extendhfsf2): Add extenndhfsf2 for f16c.

... for TARGET_F16C.

>         (extendhfdf2): Split extendhf<mode>2 into separate extendhfsf2, extendhfdf2.
>         extendhfdf only for target_avx512fp16.

Restrict extendhfdf for TARGET_AVX512FP16 only.

>         (*extendhf<mode>2):rename extendhf<mode>2.

Rename from extendhf<mode>2.

>         (truncsfhf2): Likewise.
>         (truncdfhf2): Likewise.
>         (*trunc<mode>2): Likewise.
>
> gcc/testsuite/ChangeLog:
>
>         PR target/102811
>         * gcc.target/i386/pr90773-21.c: Optimized movhi_internal,
>         optimize vmovd + movw to vpextrw.

Also allow pextrw.

>         * gcc.target/i386/pr90773-23.c: Ditto.
>         * gcc.target/i386/avx512vl-vcvtps2ph-pr102811.c: New test.

Otherwise LGTM.

BTW: When playing with my patch, I introduced (define_insn "*vec_set<mode>_0" ...) to optimize scalar load to a vector. Does ix86_expand_vector_set work OK without this pattern?

Thanks,
Uros.

> ---
>  gcc/config/i386/i386.c                        |  5 +-
>  gcc/config/i386/i386.md                       | 74 +++++++++++++++++--
>  .../i386/avx512vl-vcvtps2ph-pr102811.c        | 11 +++
>  gcc/testsuite/gcc.target/i386/pr90773-21.c    |  2 +-
>  gcc/testsuite/gcc.target/i386/pr90773-23.c    |  2 +-
>  5 files changed, 83 insertions(+), 11 deletions(-)  create mode 
> 100644 gcc/testsuite/gcc.target/i386/avx512vl-vcvtps2ph-pr102811.c
>
> diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index 
> e94efdf39fb..4b813533961 100644
> --- a/gcc/config/i386/i386.c
> +++ b/gcc/config/i386/i386.c
> @@ -19485,9 +19485,8 @@ ix86_can_change_mode_class (machine_mode from, machine_mode to,
>          disallow a change to these modes, reload will assume it's ok to
>          drop the subreg from (subreg:SI (reg:HI 100) 0).  This affects
>          the vec_dupv4hi pattern.
> -        NB: AVX512FP16 supports vmovw which can load 16bit data to sse
> -        register.  */
> -      int mov_size = MAYBE_SSE_CLASS_P (regclass) && TARGET_AVX512FP16 ? 2 : 4;
> +        NB: SSE2 can load 16bit data to sse register via pinsrw.  */
> +      int mov_size = MAYBE_SSE_CLASS_P (regclass) && TARGET_SSE2 ? 2 :
> +4;
>        if (GET_MODE_SIZE (from) < mov_size)
>         return false;
>      }
> diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index 
> 6eb9de81921..6ee264f1151 100644
> --- a/gcc/config/i386/i386.md
> +++ b/gcc/config/i386/i386.md
> @@ -2525,6 +2525,16 @@
>      case TYPE_SSEMOV:
>        return ix86_output_ssemov (insn, operands);
>
> +    case TYPE_SSELOG:
> +      if (SSE_REG_P (operands[0]))
> +       return MEM_P (operands[1])
> +         ? "pinsrw\t{$0, %1, %0|%0, %1, 0}"
> +         : "pinsrw\t{$0, %k1, %0|%0, %k1, 0}";
> +      else
> +       return MEM_P (operands[1])
> +         ? "pextrw\t{$0, %1, %0|%0, %1, 0}"
> +         : "pextrw\t{$0, %1, %k0|%k0, %k1, 0}";
> +
>      case TYPE_MSKLOG:
>        if (operands[1] == const0_rtx)
>         return "kxorw\t%0, %0, %0";
> @@ -2540,13 +2550,17 @@
>      }
>  }
>    [(set (attr "isa")
> -       (cond [(eq_attr "alternative" "9,10,11,12,13")
> -                 (const_string "avx512fp16")
> +       (cond [(eq_attr "alternative" "9,10,11,12")
> +                 (const_string "sse2")
> +              (eq_attr "alternative" "13")
> +                 (const_string "sse4")
>                ]
>                (const_string "*")))
>     (set (attr "type")
>       (cond [(eq_attr "alternative" "9,10,11,12,13")
> -             (const_string "ssemov")
> +             (if_then_else (match_test "TARGET_AVX512FP16")
> +               (const_string "ssemov")
> +               (const_string "sselog"))
>             (eq_attr "alternative" "4,5,6,7")
>               (const_string "mskmov")
>             (eq_attr "alternative" "8") @@ -4574,8 +4588,32 @@
>    emit_move_insn (operands[0], CONST0_RTX (V2DFmode));
>  })
>
> -(define_insn "extendhf<mode>2"
> -  [(set (match_operand:MODEF 0 "nonimm_ssenomem_operand" "=v")
> +(define_expand "extendhfsf2"
> +  [(set (match_operand:SF 0 "register_operand")
> +       (float_extend:SF
> +         (match_operand:HF 1 "nonimmediate_operand")))]
> +  "TARGET_AVX512FP16 || TARGET_F16C || TARGET_AVX512VL"
> +{
> +  if (!TARGET_AVX512FP16)
> +    {
> +      rtx res = gen_reg_rtx (V4SFmode);
> +      rtx tmp = force_reg (V8HFmode, CONST0_RTX (V8HFmode));
> +
> +      ix86_expand_vector_set (false, tmp, operands[1], 0);
> +      emit_insn (gen_vcvtph2ps (res, gen_lowpart (V8HImode, tmp)));
> +      emit_move_insn (operands[0], gen_lowpart (SFmode, res));
> +      DONE;
> +    }
> +})
> +
> +(define_expand "extendhfdf2"
> +  [(set (match_operand:DF 0 "register_operand")
> +       (float_extend:DF
> +         (match_operand:HF 1 "nonimmediate_operand")))]
> +  "TARGET_AVX512FP16")
> +
> +(define_insn "*extendhf<mode>2"
> +  [(set (match_operand:MODEF 0 "register_operand" "=v")
>          (float_extend:MODEF
>           (match_operand:HF 1 "nonimmediate_operand" "vm")))]
>    "TARGET_AVX512FP16"
> @@ -4766,7 +4804,31 @@
>
>  ;; Conversion from {SF,DF}mode to HFmode.
>
> -(define_insn "trunc<mode>hf2"
> +(define_expand "truncsfhf2"
> +  [(set (match_operand:HF 0 "register_operand")
> +       (float_truncate:HF
> +         (match_operand:SF 1 "nonimmediate_operand")))]
> +  "TARGET_AVX512FP16 || TARGET_F16C || TARGET_AVX512VL"
> +  {
> +    if (!TARGET_AVX512FP16)
> +    {
> +      rtx res = gen_reg_rtx (V8HFmode);
> +      rtx tmp = force_reg (V4SFmode, CONST0_RTX (V4SFmode));
> +
> +      ix86_expand_vector_set (false, tmp, operands[1], 0);
> +      emit_insn (gen_vcvtps2ph (gen_lowpart (V8HImode, res), tmp, GEN_INT (4)));
> +      emit_move_insn (operands[0], gen_lowpart (HFmode, res));
> +      DONE;
> +    }
> +  })
> +
> +(define_expand "truncdfhf2"
> +  [(set (match_operand:HF 0 "register_operand")
> +       (float_truncate:HF
> +         (match_operand:DF 1 "nonimmediate_operand")))]
> +  "TARGET_AVX512FP16")
> +
> +(define_insn "*trunc<mode>hf2"
>    [(set (match_operand:HF 0 "register_operand" "=v")
>         (float_truncate:HF
>           (match_operand:MODEF 1 "nonimmediate_operand" "vm")))] diff 
> --git a/gcc/testsuite/gcc.target/i386/avx512vl-vcvtps2ph-pr102811.c 
> b/gcc/testsuite/gcc.target/i386/avx512vl-vcvtps2ph-pr102811.c
> new file mode 100644
> index 00000000000..dfbfb167953
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/avx512vl-vcvtps2ph-pr102811.c
> @@ -0,0 +1,11 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -mf16c -mno-avx512fp16" } */
> +/* { dg-final { scan-assembler-times "vpxor\[ \\t\]" 2 } } */
> +/* { dg-final { scan-assembler-times "vcvtph2ps\[ \\t\]" 2 } } */
> +/* { dg-final { scan-assembler-times "vcvtps2ph\[ \\t\]" 1 } } */
> +/* { dg-final { scan-assembler-not "__truncsfhf2\[ \\t\]"} } */
> +/* { dg-final { scan-assembler-not "__extendhfsf2\[ \\t\]"} } */
> +_Float16 test (_Float16 a, _Float16 b) {
> +  return a + b;
> +}
> diff --git a/gcc/testsuite/gcc.target/i386/pr90773-21.c 
> b/gcc/testsuite/gcc.target/i386/pr90773-21.c
> index 5bbb387a3ea..0d620fff83c 100644
> --- a/gcc/testsuite/gcc.target/i386/pr90773-21.c
> +++ b/gcc/testsuite/gcc.target/i386/pr90773-21.c
> @@ -10,4 +10,4 @@ foo (int c)
>  }
>
>  /* { dg-final { scan-assembler-times "vmovdqu\[\\t \]%ymm\[0-9\]+, 
> \\(%\[\^,\]+\\)" 1 } } */
> -/* { dg-final { scan-assembler-times "movw\[\\t \]%.*, 
> 32\\(%\[\^,\]+\\)" 1 } } */
> +/* { dg-final { scan-assembler-times "(?:movw|pextrw)\[\\t \].*, 
> +32\\(%\[\^,\]+\\)" 1 } } */
> diff --git a/gcc/testsuite/gcc.target/i386/pr90773-23.c 
> b/gcc/testsuite/gcc.target/i386/pr90773-23.c
> index ca4a86f30b8..b7369e802e1 100644
> --- a/gcc/testsuite/gcc.target/i386/pr90773-23.c
> +++ b/gcc/testsuite/gcc.target/i386/pr90773-23.c
> @@ -10,4 +10,4 @@ foo (void)
>  }
>
>  /* { dg-final { scan-assembler-times "vmovdqu\[\\t \]%ymm\[0-9\]+, 
> \\(%\[\^,\]+\\)" 1 } } */
> -/* { dg-final { scan-assembler-times "movw\[\\t \]+.+, 
> 32\\(%\[\^,\]+\\)" 1 } } */
> +/* { dg-final { scan-assembler-times "(?:movw|pextrw)\[\\t \]+.+, 
> +32\\(%\[\^,\]+\\)" 1 } } */
> --
> 2.18.1
>

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH] i386: vcvtph2ps and vcvtps2ph should be used to convert _Float16 to SFmode with -mf16c [PR 102811]
  2021-11-24  6:24 ` Kong, Lingling
  2021-11-24  7:05   ` Liu, Hongtao
@ 2021-11-24  7:57   ` Uros Bizjak
  2021-11-24  8:06     ` Kong, Lingling
  1 sibling, 1 reply; 11+ messages in thread
From: Uros Bizjak @ 2021-11-24  7:57 UTC (permalink / raw)
  To: Kong, Lingling; +Cc: Liu, Hongtao, gcc-patches

On Wed, Nov 24, 2021 at 7:25 AM Kong, Lingling via Gcc-patches
<gcc-patches@gcc.gnu.org> wrote:
>
> Hi,
>
> vcvtph2ps and vcvtps2ph should be used to convert _Float16 to SFmode with -mf16c. So added define_insn extendhfsf2 and truncsfhf2 for target_f16c.
> And cleared before conversion, updated  movhi_internal and ix86_can_change_mode_class.

Please fix the above commit message.

>
> OK for master?
>
> gcc/ChangeLog:
>
>         PR target/102811
>         * config/i386/i386.c (ix86_can_change_mode_class): SSE2 can load 16bit data
>         to sse register via pinsrw.

Allow 16bit data in XMM register for SSE2 targets.

>         * config/i386/i386.md (extendhfsf2): Add extenndhfsf2 for f16c.

... for TARGET_F16C.

>         (extendhfdf2): Split extendhf<mode>2 into separate extendhfsf2, extendhfdf2.
>         extendhfdf only for target_avx512fp16.

Restrict extendhfdf for TARGET_AVX512FP16 only.

>         (*extendhf<mode>2):rename extendhf<mode>2.

Rename from extendhf<mode>2.

>         (truncsfhf2): Likewise.
>         (truncdfhf2): Likewise.
>         (*trunc<mode>2): Likewise.
>
> gcc/testsuite/ChangeLog:
>
>         PR target/102811
>         * gcc.target/i386/pr90773-21.c: Optimized movhi_internal,
>         optimize vmovd + movw to vpextrw.

Also allow pextrw.

>         * gcc.target/i386/pr90773-23.c: Ditto.
>         * gcc.target/i386/avx512vl-vcvtps2ph-pr102811.c: New test.

Otherwise LGTM.

BTW: When playing with my patch, I introduced (define_insn
"*vec_set<mode>_0" ...) to optimize scalar load to a vector. Does
ix86_expand_vector_set work OK without this pattern?

Thanks,
Uros.

> ---
>  gcc/config/i386/i386.c                        |  5 +-
>  gcc/config/i386/i386.md                       | 74 +++++++++++++++++--
>  .../i386/avx512vl-vcvtps2ph-pr102811.c        | 11 +++
>  gcc/testsuite/gcc.target/i386/pr90773-21.c    |  2 +-
>  gcc/testsuite/gcc.target/i386/pr90773-23.c    |  2 +-
>  5 files changed, 83 insertions(+), 11 deletions(-)  create mode 100644 gcc/testsuite/gcc.target/i386/avx512vl-vcvtps2ph-pr102811.c
>
> diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index e94efdf39fb..4b813533961 100644
> --- a/gcc/config/i386/i386.c
> +++ b/gcc/config/i386/i386.c
> @@ -19485,9 +19485,8 @@ ix86_can_change_mode_class (machine_mode from, machine_mode to,
>          disallow a change to these modes, reload will assume it's ok to
>          drop the subreg from (subreg:SI (reg:HI 100) 0).  This affects
>          the vec_dupv4hi pattern.
> -        NB: AVX512FP16 supports vmovw which can load 16bit data to sse
> -        register.  */
> -      int mov_size = MAYBE_SSE_CLASS_P (regclass) && TARGET_AVX512FP16 ? 2 : 4;
> +        NB: SSE2 can load 16bit data to sse register via pinsrw.  */
> +      int mov_size = MAYBE_SSE_CLASS_P (regclass) && TARGET_SSE2 ? 2 :
> +4;
>        if (GET_MODE_SIZE (from) < mov_size)
>         return false;
>      }
> diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index 6eb9de81921..6ee264f1151 100644
> --- a/gcc/config/i386/i386.md
> +++ b/gcc/config/i386/i386.md
> @@ -2525,6 +2525,16 @@
>      case TYPE_SSEMOV:
>        return ix86_output_ssemov (insn, operands);
>
> +    case TYPE_SSELOG:
> +      if (SSE_REG_P (operands[0]))
> +       return MEM_P (operands[1])
> +         ? "pinsrw\t{$0, %1, %0|%0, %1, 0}"
> +         : "pinsrw\t{$0, %k1, %0|%0, %k1, 0}";
> +      else
> +       return MEM_P (operands[1])
> +         ? "pextrw\t{$0, %1, %0|%0, %1, 0}"
> +         : "pextrw\t{$0, %1, %k0|%k0, %k1, 0}";
> +
>      case TYPE_MSKLOG:
>        if (operands[1] == const0_rtx)
>         return "kxorw\t%0, %0, %0";
> @@ -2540,13 +2550,17 @@
>      }
>  }
>    [(set (attr "isa")
> -       (cond [(eq_attr "alternative" "9,10,11,12,13")
> -                 (const_string "avx512fp16")
> +       (cond [(eq_attr "alternative" "9,10,11,12")
> +                 (const_string "sse2")
> +              (eq_attr "alternative" "13")
> +                 (const_string "sse4")
>                ]
>                (const_string "*")))
>     (set (attr "type")
>       (cond [(eq_attr "alternative" "9,10,11,12,13")
> -             (const_string "ssemov")
> +             (if_then_else (match_test "TARGET_AVX512FP16")
> +               (const_string "ssemov")
> +               (const_string "sselog"))
>             (eq_attr "alternative" "4,5,6,7")
>               (const_string "mskmov")
>             (eq_attr "alternative" "8")
> @@ -4574,8 +4588,32 @@
>    emit_move_insn (operands[0], CONST0_RTX (V2DFmode));
>  })
>
> -(define_insn "extendhf<mode>2"
> -  [(set (match_operand:MODEF 0 "nonimm_ssenomem_operand" "=v")
> +(define_expand "extendhfsf2"
> +  [(set (match_operand:SF 0 "register_operand")
> +       (float_extend:SF
> +         (match_operand:HF 1 "nonimmediate_operand")))]
> +  "TARGET_AVX512FP16 || TARGET_F16C || TARGET_AVX512VL"
> +{
> +  if (!TARGET_AVX512FP16)
> +    {
> +      rtx res = gen_reg_rtx (V4SFmode);
> +      rtx tmp = force_reg (V8HFmode, CONST0_RTX (V8HFmode));
> +
> +      ix86_expand_vector_set (false, tmp, operands[1], 0);
> +      emit_insn (gen_vcvtph2ps (res, gen_lowpart (V8HImode, tmp)));
> +      emit_move_insn (operands[0], gen_lowpart (SFmode, res));
> +      DONE;
> +    }
> +})
> +
> +(define_expand "extendhfdf2"
> +  [(set (match_operand:DF 0 "register_operand")
> +       (float_extend:DF
> +         (match_operand:HF 1 "nonimmediate_operand")))]
> +  "TARGET_AVX512FP16")
> +
> +(define_insn "*extendhf<mode>2"
> +  [(set (match_operand:MODEF 0 "register_operand" "=v")
>          (float_extend:MODEF
>           (match_operand:HF 1 "nonimmediate_operand" "vm")))]
>    "TARGET_AVX512FP16"
> @@ -4766,7 +4804,31 @@
>
>  ;; Conversion from {SF,DF}mode to HFmode.
>
> -(define_insn "trunc<mode>hf2"
> +(define_expand "truncsfhf2"
> +  [(set (match_operand:HF 0 "register_operand")
> +       (float_truncate:HF
> +         (match_operand:SF 1 "nonimmediate_operand")))]
> +  "TARGET_AVX512FP16 || TARGET_F16C || TARGET_AVX512VL"
> +  {
> +    if (!TARGET_AVX512FP16)
> +    {
> +      rtx res = gen_reg_rtx (V8HFmode);
> +      rtx tmp = force_reg (V4SFmode, CONST0_RTX (V4SFmode));
> +
> +      ix86_expand_vector_set (false, tmp, operands[1], 0);
> +      emit_insn (gen_vcvtps2ph (gen_lowpart (V8HImode, res), tmp, GEN_INT (4)));
> +      emit_move_insn (operands[0], gen_lowpart (HFmode, res));
> +      DONE;
> +    }
> +  })
> +
> +(define_expand "truncdfhf2"
> +  [(set (match_operand:HF 0 "register_operand")
> +       (float_truncate:HF
> +         (match_operand:DF 1 "nonimmediate_operand")))]
> +  "TARGET_AVX512FP16")
> +
> +(define_insn "*trunc<mode>hf2"
>    [(set (match_operand:HF 0 "register_operand" "=v")
>         (float_truncate:HF
>           (match_operand:MODEF 1 "nonimmediate_operand" "vm")))] diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vcvtps2ph-pr102811.c b/gcc/testsuite/gcc.target/i386/avx512vl-vcvtps2ph-pr102811.c
> new file mode 100644
> index 00000000000..dfbfb167953
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/avx512vl-vcvtps2ph-pr102811.c
> @@ -0,0 +1,11 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -mf16c -mno-avx512fp16" } */
> +/* { dg-final { scan-assembler-times "vpxor\[ \\t\]" 2 } } */
> +/* { dg-final { scan-assembler-times "vcvtph2ps\[ \\t\]" 2 } } */
> +/* { dg-final { scan-assembler-times "vcvtps2ph\[ \\t\]" 1 } } */
> +/* { dg-final { scan-assembler-not "__truncsfhf2\[ \\t\]"} } */
> +/* { dg-final { scan-assembler-not "__extendhfsf2\[ \\t\]"} } */
> +_Float16 test (_Float16 a, _Float16 b)
> +{
> +  return a + b;
> +}
> diff --git a/gcc/testsuite/gcc.target/i386/pr90773-21.c b/gcc/testsuite/gcc.target/i386/pr90773-21.c
> index 5bbb387a3ea..0d620fff83c 100644
> --- a/gcc/testsuite/gcc.target/i386/pr90773-21.c
> +++ b/gcc/testsuite/gcc.target/i386/pr90773-21.c
> @@ -10,4 +10,4 @@ foo (int c)
>  }
>
>  /* { dg-final { scan-assembler-times "vmovdqu\[\\t \]%ymm\[0-9\]+, \\(%\[\^,\]+\\)" 1 } } */
> -/* { dg-final { scan-assembler-times "movw\[\\t \]%.*, 32\\(%\[\^,\]+\\)" 1 } } */
> +/* { dg-final { scan-assembler-times "(?:movw|pextrw)\[\\t \].*,
> +32\\(%\[\^,\]+\\)" 1 } } */
> diff --git a/gcc/testsuite/gcc.target/i386/pr90773-23.c b/gcc/testsuite/gcc.target/i386/pr90773-23.c
> index ca4a86f30b8..b7369e802e1 100644
> --- a/gcc/testsuite/gcc.target/i386/pr90773-23.c
> +++ b/gcc/testsuite/gcc.target/i386/pr90773-23.c
> @@ -10,4 +10,4 @@ foo (void)
>  }
>
>  /* { dg-final { scan-assembler-times "vmovdqu\[\\t \]%ymm\[0-9\]+, \\(%\[\^,\]+\\)" 1 } } */
> -/* { dg-final { scan-assembler-times "movw\[\\t \]+.+, 32\\(%\[\^,\]+\\)" 1 } } */
> +/* { dg-final { scan-assembler-times "(?:movw|pextrw)\[\\t \]+.+,
> +32\\(%\[\^,\]+\\)" 1 } } */
> --
> 2.18.1
>

^ permalink raw reply	[flat|nested] 11+ messages in thread

* RE: [PATCH] i386: vcvtph2ps and vcvtps2ph should be used to convert _Float16 to SFmode with -mf16c [PR 102811]
  2021-11-24  6:24 ` Kong, Lingling
@ 2021-11-24  7:05   ` Liu, Hongtao
  2021-11-24  7:57   ` Uros Bizjak
  1 sibling, 0 replies; 11+ messages in thread
From: Liu, Hongtao @ 2021-11-24  7:05 UTC (permalink / raw)
  To: Kong, Lingling, gcc-patches



>-----Original Message-----
>From: Kong, Lingling <lingling.kong@intel.com>
>Sent: Wednesday, November 24, 2021 2:25 PM
>To: Liu, Hongtao <hongtao.liu@intel.com>; gcc-patches@gcc.gnu.org
>Cc: Kong, Lingling <lingling.kong@intel.com>
>Subject: RE: [PATCH] i386: vcvtph2ps and vcvtps2ph should be used to convert
>_Float16 to SFmode with -mf16c [PR 102811]
>
>Hi,
>
>vcvtph2ps and vcvtps2ph should be used to convert _Float16 to SFmode with
>-mf16c. So added define_insn extendhfsf2 and truncsfhf2 for target_f16c.
>And cleared before conversion, updated  movhi_internal and
>ix86_can_change_mode_class.
>
>OK for master?
>
>gcc/ChangeLog:
>
>	PR target/102811
>	* config/i386/i386.c (ix86_can_change_mode_class): SSE2 can load
>16bit data
>	to sse register via pinsrw.
>	* config/i386/i386.md (extendhfsf2): Add extenndhfsf2 for f16c.
>	(extendhfdf2): Split extendhf<mode>2 into separate extendhfsf2,
>extendhfdf2.
>	extendhfdf only for target_avx512fp16.
>	(*extendhf<mode>2):rename extendhf<mode>2.
>	(truncsfhf2): Likewise.
>	(truncdfhf2): Likewise.
>	(*trunc<mode>2): Likewise.
>
>gcc/testsuite/ChangeLog:
>
>	PR target/102811
>	* gcc.target/i386/pr90773-21.c: Optimized movhi_internal,
>	optimize vmovd + movw to vpextrw.
>	* gcc.target/i386/pr90773-23.c: Ditto.
>	* gcc.target/i386/avx512vl-vcvtps2ph-pr102811.c: New test.
>---
> gcc/config/i386/i386.c                        |  5 +-
> gcc/config/i386/i386.md                       | 74 +++++++++++++++++--
> .../i386/avx512vl-vcvtps2ph-pr102811.c        | 11 +++
> gcc/testsuite/gcc.target/i386/pr90773-21.c    |  2 +-
> gcc/testsuite/gcc.target/i386/pr90773-23.c    |  2 +-
> 5 files changed, 83 insertions(+), 11 deletions(-)  create mode 100644
>gcc/testsuite/gcc.target/i386/avx512vl-vcvtps2ph-pr102811.c
>
>diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index
>e94efdf39fb..4b813533961 100644
>--- a/gcc/config/i386/i386.c
>+++ b/gcc/config/i386/i386.c
>@@ -19485,9 +19485,8 @@ ix86_can_change_mode_class (machine_mode
>from, machine_mode to,
> 	 disallow a change to these modes, reload will assume it's ok to
> 	 drop the subreg from (subreg:SI (reg:HI 100) 0).  This affects
> 	 the vec_dupv4hi pattern.
>-	 NB: AVX512FP16 supports vmovw which can load 16bit data to sse
>-	 register.  */
>-      int mov_size = MAYBE_SSE_CLASS_P (regclass) && TARGET_AVX512FP16 ?
>2 : 4;
>+	 NB: SSE2 can load 16bit data to sse register via pinsrw.  */
>+      int mov_size = MAYBE_SSE_CLASS_P (regclass) && TARGET_SSE2 ? 2 :
>+4;
>       if (GET_MODE_SIZE (from) < mov_size)
> 	return false;
>     }
>diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index
>6eb9de81921..6ee264f1151 100644
>--- a/gcc/config/i386/i386.md
>+++ b/gcc/config/i386/i386.md
>@@ -2525,6 +2525,16 @@
>     case TYPE_SSEMOV:
>       return ix86_output_ssemov (insn, operands);
>
>+    case TYPE_SSELOG:
>+      if (SSE_REG_P (operands[0]))
>+	return MEM_P (operands[1])
>+	  ? "pinsrw\t{$0, %1, %0|%0, %1, 0}"
>+	  : "pinsrw\t{$0, %k1, %0|%0, %k1, 0}";
>+      else
>+	return MEM_P (operands[1])
>+	  ? "pextrw\t{$0, %1, %0|%0, %1, 0}"
>+	  : "pextrw\t{$0, %1, %k0|%k0, %k1, 0}";
>+
>     case TYPE_MSKLOG:
>       if (operands[1] == const0_rtx)
> 	return "kxorw\t%0, %0, %0";
>@@ -2540,13 +2550,17 @@
>     }
> }
>   [(set (attr "isa")
>-	(cond [(eq_attr "alternative" "9,10,11,12,13")
>-		  (const_string "avx512fp16")
>+	(cond [(eq_attr "alternative" "9,10,11,12")
>+		  (const_string "sse2")
>+	       (eq_attr "alternative" "13")
>+		  (const_string "sse4")
> 	       ]
> 	       (const_string "*")))
>    (set (attr "type")
>      (cond [(eq_attr "alternative" "9,10,11,12,13")
>-	      (const_string "ssemov")
>+	      (if_then_else (match_test "TARGET_AVX512FP16")
>+		(const_string "ssemov")
>+		(const_string "sselog"))
> 	    (eq_attr "alternative" "4,5,6,7")
> 	      (const_string "mskmov")
> 	    (eq_attr "alternative" "8")
>@@ -4574,8 +4588,32 @@
>   emit_move_insn (operands[0], CONST0_RTX (V2DFmode));
> })
>
>-(define_insn "extendhf<mode>2"
>-  [(set (match_operand:MODEF 0 "nonimm_ssenomem_operand" "=v")
>+(define_expand "extendhfsf2"
>+  [(set (match_operand:SF 0 "register_operand")
>+	(float_extend:SF
>+	  (match_operand:HF 1 "nonimmediate_operand")))]
>+  "TARGET_AVX512FP16 || TARGET_F16C || TARGET_AVX512VL"
>+{
>+  if (!TARGET_AVX512FP16)
>+    {
>+      rtx res = gen_reg_rtx (V4SFmode);
>+      rtx tmp = force_reg (V8HFmode, CONST0_RTX (V8HFmode));
>+
>+      ix86_expand_vector_set (false, tmp, operands[1], 0);
>+      emit_insn (gen_vcvtph2ps (res, gen_lowpart (V8HImode, tmp)));
>+      emit_move_insn (operands[0], gen_lowpart (SFmode, res));
>+      DONE;
>+    }
>+})
>+
>+(define_expand "extendhfdf2"
>+  [(set (match_operand:DF 0 "register_operand")
>+	(float_extend:DF
>+	  (match_operand:HF 1 "nonimmediate_operand")))]
>+  "TARGET_AVX512FP16")
>+
>+(define_insn "*extendhf<mode>2"
>+  [(set (match_operand:MODEF 0 "register_operand" "=v")
>         (float_extend:MODEF
> 	  (match_operand:HF 1 "nonimmediate_operand" "vm")))]
>   "TARGET_AVX512FP16"
>@@ -4766,7 +4804,31 @@
>
> ;; Conversion from {SF,DF}mode to HFmode.
>
>-(define_insn "trunc<mode>hf2"
>+(define_expand "truncsfhf2"
>+  [(set (match_operand:HF 0 "register_operand")
>+	(float_truncate:HF
>+	  (match_operand:SF 1 "nonimmediate_operand")))]
>+  "TARGET_AVX512FP16 || TARGET_F16C || TARGET_AVX512VL"
>+  {
>+    if (!TARGET_AVX512FP16)
>+    {
>+      rtx res = gen_reg_rtx (V8HFmode);
>+      rtx tmp = force_reg (V4SFmode, CONST0_RTX (V4SFmode));
>+
>+      ix86_expand_vector_set (false, tmp, operands[1], 0);
>+      emit_insn (gen_vcvtps2ph (gen_lowpart (V8HImode, res), tmp, GEN_INT
>(4)));
>+      emit_move_insn (operands[0], gen_lowpart (HFmode, res));
>+      DONE;
>+    }
>+  })
>+
>+(define_expand "truncdfhf2"
>+  [(set (match_operand:HF 0 "register_operand")
>+	(float_truncate:HF
>+	  (match_operand:DF 1 "nonimmediate_operand")))]
>+  "TARGET_AVX512FP16")
>+
>+(define_insn "*trunc<mode>hf2"
>   [(set (match_operand:HF 0 "register_operand" "=v")
>        (float_truncate:HF
>          (match_operand:MODEF 1 "nonimmediate_operand" "vm")))] diff --git
>a/gcc/testsuite/gcc.target/i386/avx512vl-vcvtps2ph-pr102811.c
>b/gcc/testsuite/gcc.target/i386/avx512vl-vcvtps2ph-pr102811.c
>new file mode 100644
>index 00000000000..dfbfb167953
>--- /dev/null
>+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vcvtps2ph-pr102811.c
>@@ -0,0 +1,11 @@
>+/* { dg-do compile } */
>+/* { dg-options "-O2 -mf16c -mno-avx512fp16" } */
>+/* { dg-final { scan-assembler-times "vpxor\[ \\t\]" 2 } } */
>+/* { dg-final { scan-assembler-times "vcvtph2ps\[ \\t\]" 2 } } */
>+/* { dg-final { scan-assembler-times "vcvtps2ph\[ \\t\]" 1 } } */
>+/* { dg-final { scan-assembler-not "__truncsfhf2\[ \\t\]"} } */
>+/* { dg-final { scan-assembler-not "__extendhfsf2\[ \\t\]"} } */
>+_Float16 test (_Float16 a, _Float16 b)
>+{
>+  return a + b;
>+}
>diff --git a/gcc/testsuite/gcc.target/i386/pr90773-21.c
>b/gcc/testsuite/gcc.target/i386/pr90773-21.c
>index 5bbb387a3ea..0d620fff83c 100644
>--- a/gcc/testsuite/gcc.target/i386/pr90773-21.c
>+++ b/gcc/testsuite/gcc.target/i386/pr90773-21.c
>@@ -10,4 +10,4 @@ foo (int c)
> }
>
> /* { dg-final { scan-assembler-times "vmovdqu\[\\t \]%ymm\[0-9\]+,
>\\(%\[\^,\]+\\)" 1 } } */
>-/* { dg-final { scan-assembler-times "movw\[\\t \]%.*, 32\\(%\[\^,\]+\\)" 1 } }
>*/
>+/* { dg-final { scan-assembler-times "(?:movw|pextrw)\[\\t \].*,
>+32\\(%\[\^,\]+\\)" 1 } } */
>diff --git a/gcc/testsuite/gcc.target/i386/pr90773-23.c
>b/gcc/testsuite/gcc.target/i386/pr90773-23.c
>index ca4a86f30b8..b7369e802e1 100644
>--- a/gcc/testsuite/gcc.target/i386/pr90773-23.c
>+++ b/gcc/testsuite/gcc.target/i386/pr90773-23.c
>@@ -10,4 +10,4 @@ foo (void)
> }
>
> /* { dg-final { scan-assembler-times "vmovdqu\[\\t \]%ymm\[0-9\]+,
>\\(%\[\^,\]+\\)" 1 } } */
>-/* { dg-final { scan-assembler-times "movw\[\\t \]+.+, 32\\(%\[\^,\]+\\)" 1 } }
>*/
>+/* { dg-final { scan-assembler-times "(?:movw|pextrw)\[\\t \]+.+,
>+32\\(%\[\^,\]+\\)" 1 } } */
>--
>2.18.1


^ permalink raw reply	[flat|nested] 11+ messages in thread

* RE: [PATCH] i386: vcvtph2ps and vcvtps2ph should be used to convert _Float16 to SFmode with -mf16c [PR 102811]
       [not found] <20211124062105.61472-1-lingling.kong@intel.com>
@ 2021-11-24  6:24 ` Kong, Lingling
  2021-11-24  7:05   ` Liu, Hongtao
  2021-11-24  7:57   ` Uros Bizjak
  0 siblings, 2 replies; 11+ messages in thread
From: Kong, Lingling @ 2021-11-24  6:24 UTC (permalink / raw)
  To: Liu, Hongtao, gcc-patches; +Cc: Kong, Lingling

Hi,

vcvtph2ps and vcvtps2ph should be used to convert _Float16 to SFmode with -mf16c. So added define_insn extendhfsf2 and truncsfhf2 for target_f16c.
And cleared before conversion, updated  movhi_internal and ix86_can_change_mode_class.

OK for master?

gcc/ChangeLog:

	PR target/102811
	* config/i386/i386.c (ix86_can_change_mode_class): SSE2 can load 16bit data
	to sse register via pinsrw.
	* config/i386/i386.md (extendhfsf2): Add extenndhfsf2 for f16c.
	(extendhfdf2): Split extendhf<mode>2 into separate extendhfsf2, extendhfdf2.
	extendhfdf only for target_avx512fp16.
	(*extendhf<mode>2):rename extendhf<mode>2.
	(truncsfhf2): Likewise.
	(truncdfhf2): Likewise.
	(*trunc<mode>2): Likewise.

gcc/testsuite/ChangeLog:

	PR target/102811
	* gcc.target/i386/pr90773-21.c: Optimized movhi_internal,
	optimize vmovd + movw to vpextrw.
	* gcc.target/i386/pr90773-23.c: Ditto.
	* gcc.target/i386/avx512vl-vcvtps2ph-pr102811.c: New test.
---
 gcc/config/i386/i386.c                        |  5 +-
 gcc/config/i386/i386.md                       | 74 +++++++++++++++++--
 .../i386/avx512vl-vcvtps2ph-pr102811.c        | 11 +++
 gcc/testsuite/gcc.target/i386/pr90773-21.c    |  2 +-
 gcc/testsuite/gcc.target/i386/pr90773-23.c    |  2 +-
 5 files changed, 83 insertions(+), 11 deletions(-)  create mode 100644 gcc/testsuite/gcc.target/i386/avx512vl-vcvtps2ph-pr102811.c

diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index e94efdf39fb..4b813533961 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -19485,9 +19485,8 @@ ix86_can_change_mode_class (machine_mode from, machine_mode to,
 	 disallow a change to these modes, reload will assume it's ok to
 	 drop the subreg from (subreg:SI (reg:HI 100) 0).  This affects
 	 the vec_dupv4hi pattern.
-	 NB: AVX512FP16 supports vmovw which can load 16bit data to sse
-	 register.  */
-      int mov_size = MAYBE_SSE_CLASS_P (regclass) && TARGET_AVX512FP16 ? 2 : 4;
+	 NB: SSE2 can load 16bit data to sse register via pinsrw.  */
+      int mov_size = MAYBE_SSE_CLASS_P (regclass) && TARGET_SSE2 ? 2 : 
+4;
       if (GET_MODE_SIZE (from) < mov_size)
 	return false;
     }
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index 6eb9de81921..6ee264f1151 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -2525,6 +2525,16 @@
     case TYPE_SSEMOV:
       return ix86_output_ssemov (insn, operands);
 
+    case TYPE_SSELOG:
+      if (SSE_REG_P (operands[0]))
+	return MEM_P (operands[1])
+	  ? "pinsrw\t{$0, %1, %0|%0, %1, 0}"
+	  : "pinsrw\t{$0, %k1, %0|%0, %k1, 0}";
+      else
+	return MEM_P (operands[1])
+	  ? "pextrw\t{$0, %1, %0|%0, %1, 0}"
+	  : "pextrw\t{$0, %1, %k0|%k0, %k1, 0}";
+
     case TYPE_MSKLOG:
       if (operands[1] == const0_rtx)
 	return "kxorw\t%0, %0, %0";
@@ -2540,13 +2550,17 @@
     }
 }
   [(set (attr "isa")
-	(cond [(eq_attr "alternative" "9,10,11,12,13")
-		  (const_string "avx512fp16")
+	(cond [(eq_attr "alternative" "9,10,11,12")
+		  (const_string "sse2")
+	       (eq_attr "alternative" "13")
+		  (const_string "sse4")
 	       ]
 	       (const_string "*")))
    (set (attr "type")
      (cond [(eq_attr "alternative" "9,10,11,12,13")
-	      (const_string "ssemov")
+	      (if_then_else (match_test "TARGET_AVX512FP16")
+		(const_string "ssemov")
+		(const_string "sselog"))
 	    (eq_attr "alternative" "4,5,6,7")
 	      (const_string "mskmov")
 	    (eq_attr "alternative" "8")
@@ -4574,8 +4588,32 @@
   emit_move_insn (operands[0], CONST0_RTX (V2DFmode));
 })
 
-(define_insn "extendhf<mode>2"
-  [(set (match_operand:MODEF 0 "nonimm_ssenomem_operand" "=v")
+(define_expand "extendhfsf2"
+  [(set (match_operand:SF 0 "register_operand")
+	(float_extend:SF
+	  (match_operand:HF 1 "nonimmediate_operand")))]
+  "TARGET_AVX512FP16 || TARGET_F16C || TARGET_AVX512VL"
+{
+  if (!TARGET_AVX512FP16)
+    {
+      rtx res = gen_reg_rtx (V4SFmode);
+      rtx tmp = force_reg (V8HFmode, CONST0_RTX (V8HFmode));
+
+      ix86_expand_vector_set (false, tmp, operands[1], 0);
+      emit_insn (gen_vcvtph2ps (res, gen_lowpart (V8HImode, tmp)));
+      emit_move_insn (operands[0], gen_lowpart (SFmode, res));
+      DONE;
+    }
+})
+
+(define_expand "extendhfdf2"
+  [(set (match_operand:DF 0 "register_operand")
+	(float_extend:DF
+	  (match_operand:HF 1 "nonimmediate_operand")))]
+  "TARGET_AVX512FP16")
+
+(define_insn "*extendhf<mode>2"
+  [(set (match_operand:MODEF 0 "register_operand" "=v")
         (float_extend:MODEF
 	  (match_operand:HF 1 "nonimmediate_operand" "vm")))]
   "TARGET_AVX512FP16"
@@ -4766,7 +4804,31 @@
 
 ;; Conversion from {SF,DF}mode to HFmode.
 
-(define_insn "trunc<mode>hf2"
+(define_expand "truncsfhf2"
+  [(set (match_operand:HF 0 "register_operand")
+	(float_truncate:HF
+	  (match_operand:SF 1 "nonimmediate_operand")))]
+  "TARGET_AVX512FP16 || TARGET_F16C || TARGET_AVX512VL"
+  {
+    if (!TARGET_AVX512FP16)
+    {
+      rtx res = gen_reg_rtx (V8HFmode);
+      rtx tmp = force_reg (V4SFmode, CONST0_RTX (V4SFmode));
+
+      ix86_expand_vector_set (false, tmp, operands[1], 0);
+      emit_insn (gen_vcvtps2ph (gen_lowpart (V8HImode, res), tmp, GEN_INT (4)));
+      emit_move_insn (operands[0], gen_lowpart (HFmode, res));
+      DONE;
+    }
+  })
+
+(define_expand "truncdfhf2"
+  [(set (match_operand:HF 0 "register_operand")
+	(float_truncate:HF
+	  (match_operand:DF 1 "nonimmediate_operand")))]
+  "TARGET_AVX512FP16")
+
+(define_insn "*trunc<mode>hf2"
   [(set (match_operand:HF 0 "register_operand" "=v")
        (float_truncate:HF
          (match_operand:MODEF 1 "nonimmediate_operand" "vm")))] diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vcvtps2ph-pr102811.c b/gcc/testsuite/gcc.target/i386/avx512vl-vcvtps2ph-pr102811.c
new file mode 100644
index 00000000000..dfbfb167953
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vcvtps2ph-pr102811.c
@@ -0,0 +1,11 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mf16c -mno-avx512fp16" } */
+/* { dg-final { scan-assembler-times "vpxor\[ \\t\]" 2 } } */
+/* { dg-final { scan-assembler-times "vcvtph2ps\[ \\t\]" 2 } } */
+/* { dg-final { scan-assembler-times "vcvtps2ph\[ \\t\]" 1 } } */
+/* { dg-final { scan-assembler-not "__truncsfhf2\[ \\t\]"} } */
+/* { dg-final { scan-assembler-not "__extendhfsf2\[ \\t\]"} } */
+_Float16 test (_Float16 a, _Float16 b)
+{
+  return a + b;
+}
diff --git a/gcc/testsuite/gcc.target/i386/pr90773-21.c b/gcc/testsuite/gcc.target/i386/pr90773-21.c
index 5bbb387a3ea..0d620fff83c 100644
--- a/gcc/testsuite/gcc.target/i386/pr90773-21.c
+++ b/gcc/testsuite/gcc.target/i386/pr90773-21.c
@@ -10,4 +10,4 @@ foo (int c)
 }
 
 /* { dg-final { scan-assembler-times "vmovdqu\[\\t \]%ymm\[0-9\]+, \\(%\[\^,\]+\\)" 1 } } */
-/* { dg-final { scan-assembler-times "movw\[\\t \]%.*, 32\\(%\[\^,\]+\\)" 1 } } */
+/* { dg-final { scan-assembler-times "(?:movw|pextrw)\[\\t \].*, 
+32\\(%\[\^,\]+\\)" 1 } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr90773-23.c b/gcc/testsuite/gcc.target/i386/pr90773-23.c
index ca4a86f30b8..b7369e802e1 100644
--- a/gcc/testsuite/gcc.target/i386/pr90773-23.c
+++ b/gcc/testsuite/gcc.target/i386/pr90773-23.c
@@ -10,4 +10,4 @@ foo (void)
 }
 
 /* { dg-final { scan-assembler-times "vmovdqu\[\\t \]%ymm\[0-9\]+, \\(%\[\^,\]+\\)" 1 } } */
-/* { dg-final { scan-assembler-times "movw\[\\t \]+.+, 32\\(%\[\^,\]+\\)" 1 } } */
+/* { dg-final { scan-assembler-times "(?:movw|pextrw)\[\\t \]+.+, 
+32\\(%\[\^,\]+\\)" 1 } } */
--
2.18.1


^ permalink raw reply	[flat|nested] 11+ messages in thread

end of thread, other threads:[~2021-11-24  9:05 UTC | newest]

Thread overview: 11+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
     [not found] <20211116081034.22557-1-lingling.kong@intel.com>
2021-11-16  8:15 ` [PATCH] i386: vcvtph2ps and vcvtps2ph should be used to convert _Float16 to SFmode with -mf16c [PR 102811] Kong, Lingling
2021-11-16  8:35   ` Hongtao Liu
2021-11-16  8:48   ` Uros Bizjak
     [not found] <20211124062105.61472-1-lingling.kong@intel.com>
2021-11-24  6:24 ` Kong, Lingling
2021-11-24  7:05   ` Liu, Hongtao
2021-11-24  7:57   ` Uros Bizjak
2021-11-24  8:06     ` Kong, Lingling
2021-11-24  8:39       ` Uros Bizjak
     [not found] <20211124083421.5768-1-lingling.kong@intel.com>
2021-11-24  8:43 ` Kong, Lingling
2021-11-24  8:48   ` Uros Bizjak
2021-11-24  9:04     ` Kong, Lingling

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).