public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH V2] extract DF/SF/SI/HI/QI subreg from parameter word on stack
@ 2023-01-04 13:45 Jiufu Guo
  2023-03-02  9:33 ` Ping: " Jiufu Guo
  0 siblings, 1 reply; 5+ messages in thread
From: Jiufu Guo @ 2023-01-04 13:45 UTC (permalink / raw)
  To: gcc-patches; +Cc: segher, dje.gcc, linkw, rguenther, jeffreyalaw, guojiufu

Hi,

This patch is fixing an issue about parameter accessing if the
parameter is struct type and passed through integer registers, and
there is floating member is accessed. Like below code:

typedef struct DF {double a[4]; long l; } DF;
double foo_df (DF arg){return arg.a[3];}

On ppc64le, with trunk gcc, "std 6,-24(1) ; lfd 1,-24(1)" is
generated.  While instruction "mtvsrd 1, 6" would be enough for
this case.

This patch updates the behavior when loading floating members of a
parameter: if that floating member is stored via integer register,
then loading it as integer mode first, and converting it to floating
mode.

Compare with previous patch:
https://gcc.gnu.org/pipermail/gcc-patches/2022-December/608872.html
Previous version supports converion from DImode to DF/SF, this
version also supports conversion from DImode to SI/HI/QI modes.

I also tried to enhance CSE/DSE for this issue.  But because the
limitations (e.g. CSE does not like new pseudo, DSE is not good
at cross-blocks), some cases (as this patch) can not be handled.

Bootstrap and regtest passes on ppc64{,le}.
Is this ok for trunk?  Thanks for comments!


BR,
Jeff (Jiufu)


	PR target/108073

gcc/ChangeLog:

	* expr.cc (extract_subreg_from_loading_word): New function.
	(expand_expr_real_1): Call extract_subreg_from_loading_word.

gcc/testsuite/ChangeLog:

	* g++.target/powerpc/pr102024.C: Updated.
	* gcc.target/powerpc/pr108073.c: New test.

---
 gcc/expr.cc                                 | 76 +++++++++++++++++++++
 gcc/testsuite/g++.target/powerpc/pr102024.C |  2 +-
 gcc/testsuite/gcc.target/powerpc/pr108073.c | 30 ++++++++
 3 files changed, 107 insertions(+), 1 deletion(-)
 create mode 100644 gcc/testsuite/gcc.target/powerpc/pr108073.c

diff --git a/gcc/expr.cc b/gcc/expr.cc
index d9407432ea5..6de4a985c8b 100644
--- a/gcc/expr.cc
+++ b/gcc/expr.cc
@@ -10631,6 +10631,69 @@ stmt_is_replaceable_p (gimple *stmt)
   return false;
 }
 
+/* Return the content of the memory slot SOURCE as MODE.
+   SOURCE is based on BASE. BASE is a memory block that is stored via words.
+
+   To get the content from SOURCE:
+   first load the word from the memory which covers the SOURCE slot first;
+   next return the word's subreg which offsets to SOURCE slot;
+   then convert to MODE as necessary.  */
+
+static rtx
+extract_subreg_from_loading_word (machine_mode mode, rtx source, rtx base)
+{
+  rtx src_base = XEXP (source, 0);
+  poly_uint64 offset = MEM_OFFSET (source);
+
+  if (GET_CODE (src_base) == PLUS && CONSTANT_P (XEXP (src_base, 1)))
+    {
+      offset += INTVAL (XEXP (src_base, 1));
+      src_base = XEXP (src_base, 0);
+    }
+
+  if (!rtx_equal_p (XEXP (base, 0), src_base))
+    return NULL_RTX;
+
+  /* Subreg(DI,n) -> DF/SF/SI/HI/QI */
+  poly_uint64 word_size = GET_MODE_SIZE (word_mode);
+  poly_uint64 mode_size = GET_MODE_SIZE (mode);
+  poly_uint64 byte_off;
+  unsigned int start;
+  machine_mode int_mode;
+  if (known_ge (word_size, mode_size) && multiple_p (word_size, mode_size)
+      && int_mode_for_mode (mode).exists (&int_mode)
+      && can_div_trunc_p (offset, word_size, &start, &byte_off)
+      && multiple_p (byte_off, mode_size))
+    {
+      rtx word_mem = copy_rtx (source);
+      PUT_MODE (word_mem, word_mode);
+      word_mem = adjust_address (word_mem, word_mode, -byte_off);
+
+      rtx word_reg = gen_reg_rtx (word_mode);
+      emit_move_insn (word_reg, word_mem);
+
+      poly_uint64 low_off = subreg_lowpart_offset (int_mode, word_mode);
+      if (!known_eq (byte_off, low_off))
+	{
+	  poly_uint64 shift_bytes = known_gt (byte_off, low_off)
+				      ? byte_off - low_off
+				      : low_off - byte_off;
+	  word_reg = expand_shift (RSHIFT_EXPR, word_mode, word_reg,
+				   shift_bytes * BITS_PER_UNIT, word_reg, 0);
+	}
+
+      rtx int_subreg = gen_lowpart (int_mode, word_reg);
+      if (mode == int_mode)
+	return int_subreg;
+
+      rtx int_mode_reg = gen_reg_rtx (int_mode);
+      emit_move_insn (int_mode_reg, int_subreg);
+      return gen_lowpart (mode, int_mode_reg);
+    }
+
+  return NULL_RTX;
+}
+
 rtx
 expand_expr_real_1 (tree exp, rtx target, machine_mode tmode,
 		    enum expand_modifier modifier, rtx *alt_rtl,
@@ -11812,6 +11875,19 @@ expand_expr_real_1 (tree exp, rtx target, machine_mode tmode,
 	    && modifier != EXPAND_WRITE)
 	  op0 = flip_storage_order (mode1, op0);
 
+	/* Accessing sub-field of struct parameter which passed via integer
+	   registers.  */
+	if (mode == mode1 && TREE_CODE (tem) == PARM_DECL
+	    && DECL_INCOMING_RTL (tem) && REG_P (DECL_INCOMING_RTL (tem))
+	    && GET_MODE (DECL_INCOMING_RTL (tem)) == BLKmode && MEM_P (op0)
+	    && MEM_OFFSET_KNOWN_P (op0))
+	  {
+	    rtx subreg
+	      = extract_subreg_from_loading_word (mode, op0, DECL_RTL (tem));
+	    if (subreg)
+	      op0 = subreg;
+	  }
+
 	if (mode == mode1 || mode1 == BLKmode || mode1 == tmode
 	    || modifier == EXPAND_CONST_ADDRESS
 	    || modifier == EXPAND_INITIALIZER)
diff --git a/gcc/testsuite/g++.target/powerpc/pr102024.C b/gcc/testsuite/g++.target/powerpc/pr102024.C
index 769585052b5..c8995cae707 100644
--- a/gcc/testsuite/g++.target/powerpc/pr102024.C
+++ b/gcc/testsuite/g++.target/powerpc/pr102024.C
@@ -5,7 +5,7 @@
 // Test that a zero-width bit field in an otherwise homogeneous aggregate
 // generates a psabi warning and passes arguments in GPRs.
 
-// { dg-final { scan-assembler-times {\mstd\M} 4 } }
+// { dg-final { scan-assembler-times {\mmtvsrd\M} 4 } }
 
 struct a_thing
 {
diff --git a/gcc/testsuite/gcc.target/powerpc/pr108073.c b/gcc/testsuite/gcc.target/powerpc/pr108073.c
new file mode 100644
index 00000000000..91c13d896b7
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/pr108073.c
@@ -0,0 +1,30 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -save-temps" } */
+
+typedef struct DF {double a[4]; short s1; short s2; short s3; short s4; } DF;
+typedef struct SF {float a[4]; int i1; int i2; } SF;
+
+/* Each of below function contains one mtvsrd.  */
+/* { dg-final { scan-assembler-times {\mmtvsrd\M} 3 {target { has_arch_ppc64 && has_arch_pwr8 } } } } */
+/* { dg-final { scan-assembler-not {\mlwz\M} {target { has_arch_ppc64 && has_arch_pwr8 } } } } */
+/* { dg-final { scan-assembler-not {\mlhz\M} {target { has_arch_ppc64 && has_arch_pwr8 } } } } */
+short  __attribute__ ((noipa)) foo_hi (DF a, int flag){if (flag == 2)return a.s2+a.s3;return 0;}
+int  __attribute__ ((noipa)) foo_si (SF a, int flag){if (flag == 2)return a.i2+a.i1;return 0;}
+double __attribute__ ((noipa)) foo_df (DF arg, int flag){if (flag == 2)return arg.a[3];else return 0.0;}
+float  __attribute__ ((noipa)) foo_sf (SF arg, int flag){if (flag == 2)return arg.a[2]; return 0;}
+float  __attribute__ ((noipa)) foo_sf1 (SF arg, int flag){if (flag == 2)return arg.a[1];return 0;}
+
+DF gdf = {{1.0,2.0,3.0,4.0}, 1, 2, 3, 4};
+SF gsf = {{1.0f,2.0f,3.0f,4.0f}, 1, 2};
+
+int main()
+{
+  if (!(foo_hi (gdf, 2) == 5 && foo_si (gsf, 2) == 3 && foo_df (gdf, 2) == 4.0
+	&& foo_sf (gsf, 2) == 3.0 && foo_sf1 (gsf, 2) == 2.0))
+    __builtin_abort ();
+  if (!(foo_hi (gdf, 1) == 0 && foo_si (gsf, 1) == 0 && foo_df (gdf, 1) == 0
+	&& foo_sf (gsf, 1) == 0 && foo_sf1 (gsf, 1) == 0))
+    __builtin_abort ();
+  return 0;
+}
+
-- 
2.17.1


^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2023-06-13  2:58 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-01-04 13:45 [PATCH V2] extract DF/SF/SI/HI/QI subreg from parameter word on stack Jiufu Guo
2023-03-02  9:33 ` Ping: " Jiufu Guo
2023-05-11  1:20   ` Ping^^: " Jiufu Guo
2023-06-10 17:40     ` Jeff Law
2023-06-13  2:58       ` Jiufu Guo

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).