From: Xinliang David Li <davidxl@google.com>
To: "Bin.Cheng" <amker.cheng@gmail.com>
Cc: Wei Mi <wmi@google.com>, GCC Patches <gcc-patches@gcc.gnu.org>,
Uros Bizjak <ubizjak@gmail.com>, "H.J. Lu" <hjl.tools@gmail.com>
Subject: Re: [PATCH, x86] merge movsd/movhpd pair in peephole
Date: Mon, 21 Apr 2014 19:26:00 -0000 [thread overview]
Message-ID: <CAAkRFZL4ajQghy7LR58ob8LX4cpcsEeFM=N7HqcJiPEh9t+8Gw@mail.gmail.com> (raw)
In-Reply-To: <CAHFci28JaB_45BishBAXsoRBp_c=4Y1yVeCutYt6=y_ivApfyg@mail.gmail.com>
Bin, when will the patch for the generic pass be available for review?
David
On Wed, Apr 9, 2014 at 7:27 PM, Bin.Cheng <amker.cheng@gmail.com> wrote:
> On Thu, Apr 10, 2014 at 8:18 AM, Wei Mi <wmi@google.com> wrote:
>> Hi,
>>
>> For the testcase 1.c
>>
>> #include <emmintrin.h>
>>
>> double a[1000];
>>
>> __m128d foo1() {
>> __m128d res;
>> res = _mm_load_sd(&a[1]);
>> res = _mm_loadh_pd(res, &a[2]);
>> return res;
>> }
>>
>> llvm will merge movsd/movhpd to movupd while gcc will not. The merge
>> is beneficial on x86 machines starting from Nehalem.
>>
>> The patch is to add the merging in peephole.
>> bootstrap and regression pass. Is it ok for stage1?
>>
>> Thanks,
>> Wei.
>>
>> gcc/ChangeLog:
>>
>> 2014-04-09 Wei Mi <wmi@google.com>
>>
>> * config/i386/i386.c (get_memref_parts): New function.
>> (adjacent_mem_locations): Ditto.
>> * config/i386/i386-protos.h: Add decl for adjacent_mem_locations.
>> * config/i386/sse.md: Add define_peephole rule.
>>
>> gcc/testsuite/ChangeLog:
>>
>> 2014-04-09 Wei Mi <wmi@google.com>
>>
>> * gcc.target/i386/sse2-unaligned-mov.c: New test.
>>
>> diff --git a/gcc/config/i386/i386-protos.h b/gcc/config/i386/i386-protos.h
>> index 6e32978..3ae0d6d 100644
>> --- a/gcc/config/i386/i386-protos.h
>> +++ b/gcc/config/i386/i386-protos.h
>> @@ -312,6 +312,7 @@ extern enum attr_cpu ix86_schedule;
>> #endif
>>
>> extern const char * ix86_output_call_insn (rtx insn, rtx call_op);
>> +extern bool adjacent_mem_locations (rtx mem1, rtx mem2);
>>
>> #ifdef RTX_CODE
>> /* Target data for multipass lookahead scheduling.
>> diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
>> index 3eefe4a..a330e84 100644
>> --- a/gcc/config/i386/i386.c
>> +++ b/gcc/config/i386/i386.c
>> @@ -46737,6 +46737,70 @@ ix86_atomic_assign_expand_fenv (tree *hold,
>> tree *clear, tree *update)
>> atomic_feraiseexcept_call);
>> }
>>
>> +/* Try to determine BASE/OFFSET/SIZE parts of the given MEM.
>> + Return true if successful, false if all the values couldn't
>> + be determined.
>> +
>> + This function only looks for REG/SYMBOL or REG/SYMBOL+CONST
>> + address forms. */
>> +
>> +static bool
>> +get_memref_parts (rtx mem, rtx *base, HOST_WIDE_INT *offset,
>> + HOST_WIDE_INT *size)
>> +{
>> + rtx addr_rtx;
>> + if MEM_SIZE_KNOWN_P (mem)
>> + *size = MEM_SIZE (mem);
>> + else
>> + return false;
>> +
>> + if (GET_CODE (XEXP (mem, 0)) == CONST)
>> + addr_rtx = XEXP (XEXP (mem, 0), 0);
>> + else
>> + addr_rtx = (XEXP (mem, 0));
>> +
>> + if (GET_CODE (addr_rtx) == REG
>> + || GET_CODE (addr_rtx) == SYMBOL_REF)
>> + {
>> + *base = addr_rtx;
>> + *offset = 0;
>> + }
>> + else if (GET_CODE (addr_rtx) == PLUS
>> + && CONST_INT_P (XEXP (addr_rtx, 1)))
>> + {
>> + *base = XEXP (addr_rtx, 0);
>> + *offset = INTVAL (XEXP (addr_rtx, 1));
>> + }
>> + else
>> + return false;
>> +
>> + return true;
>> +}
>> +
>> +/* If MEM1 is adjacent to MEM2 and MEM1 has lower address,
>> + return true. */
>> +
>> +extern bool
>> +adjacent_mem_locations (rtx mem1, rtx mem2)
>> +{
>> + rtx base1, base2;
>> + HOST_WIDE_INT off1, size1, off2, size2;
>> +
>> + if (get_memref_parts (mem1, &base1, &off1, &size1)
>> + && get_memref_parts (mem2, &base2, &off2, &size2))
>> + {
>> + if (GET_CODE (base1) == SYMBOL_REF
>> + && GET_CODE (base2) == SYMBOL_REF
>> + && SYMBOL_REF_DECL (base1) == SYMBOL_REF_DECL (base2))
>> + return (off1 + size1 == off2);
>> + else if (REG_P (base1)
>> + && REG_P (base2)
>> + && REGNO (base1) == REGNO (base2))
>> + return (off1 + size1 == off2);
>> + }
>> + return false;
>> +}
>> +
>> /* Initialize the GCC target structure. */
>> #undef TARGET_RETURN_IN_MEMORY
>> #define TARGET_RETURN_IN_MEMORY ix86_return_in_memory
>> diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
>> index 72a4d6d..4bf8461 100644
>> --- a/gcc/config/i386/sse.md
>> +++ b/gcc/config/i386/sse.md
>> @@ -15606,3 +15606,37 @@
>> [(set_attr "type" "sselog1")
>> (set_attr "length_immediate" "1")
>> (set_attr "mode" "TI")])
>> +
>> +;; merge movsd/movhpd to movupd when TARGET_SSE_UNALIGNED_LOAD_OPTIMAL
>> +;; is true.
>> +(define_peephole2
>> + [(set (match_operand:DF 0 "register_operand")
>> + (match_operand:DF 1 "memory_operand"))
>> + (set (match_operand:V2DF 2 "register_operand")
>> + (vec_concat:V2DF (match_dup 0)
>> + (match_operand:DF 3 "memory_operand")))]
>> + "TARGET_SSE_UNALIGNED_LOAD_OPTIMAL
>> + && REGNO (operands[0]) == REGNO (operands[2])
>> + && adjacent_mem_locations (operands[1], operands[3])"
>> + [(set (match_dup 2)
>> + (unspec:V2DF [(match_dup 4)] UNSPEC_LOADU))]
>> +{
>> + operands[4] = gen_rtx_MEM (V2DFmode, XEXP(operands[1], 0));
>> +})
>> +
>> +;; merge movsd/movhpd to movupd when TARGET_SSE_UNALIGNED_STORE_OPTIMAL
>> +;; is true.
>> +(define_peephole2
>> + [(set (match_operand:DF 0 "memory_operand")
>> + (vec_select:DF (match_operand:V2DF 1 "register_operand")
>> + (parallel [(const_int 0)])))
>> + (set (match_operand:DF 2 "memory_operand")
>> + (vec_select:DF (match_dup 1)
>> + (parallel [(const_int 1)])))]
>> + "TARGET_SSE_UNALIGNED_STORE_OPTIMAL
>> + && adjacent_mem_locations (operands[0], operands[2])"
>> + [(set (match_dup 3)
>> + (unspec:V2DF [(match_dup 1)] UNSPEC_STOREU))]
>> +{
>> + operands[3] = gen_rtx_MEM (V2DFmode, XEXP(operands[0], 0));
>> +})
>> diff --git a/gcc/testsuite/gcc.target/i386/sse2-unaligned-mov.c
>> b/gcc/testsuite/gcc.target/i386/sse2-unaligned-mov.c
>> new file mode 100644
>> index 0000000..28470ce
>> --- /dev/null
>> +++ b/gcc/testsuite/gcc.target/i386/sse2-unaligned-mov.c
>> @@ -0,0 +1,20 @@
>> +/* { dg-do compile } */
>> +/* { dg-options "-mtune=corei7 -O2" } */
>> +
>> +#include <emmintrin.h>
>> +
>> +double a[1000];
>> +
>> +__m128d foo1() {
>> + __m128d res;
>> + res = _mm_load_sd(&a[1]);
>> + res = _mm_loadh_pd(res, &a[2]);
>> + return res;
>> +}
>> +
>> +void foo2(__m128d res) {
>> + _mm_store_sd(&a[1], res);
>> + _mm_storeh_pd(&a[2], res);
>> +}
>> +
>> +/* { dg-final { scan-assembler-times "movup" 2 } } */
>
> Hi Wei,
> Just FYI, though I am not sure whether it can help. I am working on
> load store pairing on ARM too. On ARM (maybe other machines like
> mips), the problem is more general because we can pair memory accesses
> with respect to both core register and fpu register. The current
> strategy taken by GCC is to use peephole2 but it's too weak, for
> example, it can't handle pairs which are intervened by other
> instructions. Right now I am working on a new pass in GCC to do that
> work, and have already worked out a draft patch. The instrumental
> data looks promising with many opportunities captured besides the
> original peephole2 pass. Furthermore, the result could be even better
> if register renaming is enabled. Hopefully I will try to bring back
> register renaming for this purpose later.
>
> Thanks,
> bin
next prev parent reply other threads:[~2014-04-21 18:59 UTC|newest]
Thread overview: 7+ messages / expand[flat|nested] mbox.gz Atom feed top
2014-04-10 0:18 Wei Mi
2014-04-10 2:27 ` Bin.Cheng
2014-04-10 4:08 ` Wei Mi
2014-04-21 19:26 ` Xinliang David Li [this message]
2014-04-22 9:51 ` Bin.Cheng
2014-04-21 18:39 ` Wei Mi
2014-04-21 18:43 ` Uros Bizjak
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to='CAAkRFZL4ajQghy7LR58ob8LX4cpcsEeFM=N7HqcJiPEh9t+8Gw@mail.gmail.com' \
--to=davidxl@google.com \
--cc=amker.cheng@gmail.com \
--cc=gcc-patches@gcc.gnu.org \
--cc=hjl.tools@gmail.com \
--cc=ubizjak@gmail.com \
--cc=wmi@google.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).