[PING] [PATCH v2] rs6000: fmr gets used instead of faster xxlor [PR93571]

public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed

From: Ajit Agarwal <aagarwa1@linux.ibm.com>
To: gcc-patches <gcc-patches@gcc.gnu.org>
Cc: Segher Boessenkool <segher@kernel.crashing.org>, bergner@linux.ibm.com
Subject: [PING] [PATCH v2] rs6000: fmr gets used instead of faster xxlor [PR93571]
Date: Mon, 12 Jun 2023 14:13:06 +0530	[thread overview]
Message-ID: <fe4774ab-bb30-510e-71cf-d0b6427476b5@linux.ibm.com> (raw)
In-Reply-To: <174972e2-3792-935b-ed4e-4e9d3d4ec26a@linux.ibm.com>

Hello Segher:

Please review and let me know your feedback to submit in trunk.

Thanks & Regards
Ajit

On 25/02/23 3:20 pm, Ajit Agarwal via Gcc-patches wrote:
> Hello All:
> 
> Here is the patch that uses xxlor instead of fmr where possible.
> Performance results shows that fmr is better in power9 and 
> power10 architectures whereas xxlor is better in power7 and
> power 8 architectures. fmr is the only option before p7.
> 
> Bootstrapped and regtested on powerpc64-linux-gnu
> 
> Thanks & Regards
> Ajit
> 
> 	rs6000: Use xxlor instead of fmr where possible
> 
> 	Replaces fmr with xxlor instruction for power7 and power8
> 	architectures whereas for power9 and power10 keep fmr
> 	instruction.
> 
> 	Perf measurement results:
> 
> 	Power9 fmr:  201,847,661 cycles.
> 	Power9 xxlor: 201,877,78 cycles.
> 	Power8 fmr: 200,901,043 cycles.
> 	Power8 xxlor: 201,020,518 cycles.
> 	Power7 fmr: 201,059,524 cycles.
> 	Power7 xxlor: 201,042,851 cycles.
> 
> 	2023-02-25  Ajit Kumar Agarwal  <aagarwa1@linux.ibm.com>
> 
> gcc/ChangeLog:
> 
> 	* config/rs6000/rs6000.md (*movdf_hardfloat64): Use xxlor for power7
> 	and power8 and fmr for power9 and power10.
> ---
>  gcc/config/rs6000/rs6000.md | 44 +++++++++++++++++++++++--------------
>  1 file changed, 28 insertions(+), 16 deletions(-)
> 
> diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
> index 81bffb04ceb..e101f7f5fc1 100644
> --- a/gcc/config/rs6000/rs6000.md
> +++ b/gcc/config/rs6000/rs6000.md
> @@ -354,7 +354,7 @@ (define_attr "cpu"
>    (const (symbol_ref "(enum attr_cpu) rs6000_tune")))
>  
>  ;; The ISA we implement.
> -(define_attr "isa" "any,p5,p6,p7,p7v,p8v,p9,p9v,p9kf,p9tf,p10"
> +(define_attr "isa" "any,p5,p6,p7,p7v,p8v,p7p8v,p9,p9v,p9kf,p9tf,p10"
>    (const_string "any"))
>  
>  ;; Is this alternative enabled for the current CPU/ISA/etc.?
> @@ -402,6 +402,11 @@ (define_attr "enabled" ""
>       (and (eq_attr "isa" "p10")
>  	  (match_test "TARGET_POWER10"))
>       (const_int 1)
> +      
> +     (and (eq_attr "isa" "p7p8v")
> +	  (match_test "TARGET_VSX && !TARGET_P9_VECTOR"))
> +     (const_int 1)
> +
>      ] (const_int 0)))
>  
>  ;; If this instruction is microcoded on the CELL processor
> @@ -8436,27 +8441,29 @@ (define_insn "*mov<mode>_softfloat32"
>  
>  (define_insn "*mov<mode>_hardfloat64"
>    [(set (match_operand:FMOVE64 0 "nonimmediate_operand"
> -           "=m,           d,          d,          <f64_p9>,   wY,
> -             <f64_av>,    Z,          <f64_vsx>,  <f64_vsx>,  !r,
> +           "=m,           d,          <f64_vsx>,  <f64_p9>,   wY,
> +             <f64_av>,    Z,          wa,         <f64_vsx>,  !r,
>               YZ,          r,          !r,         *c*l,       !r,
> -            *h,           r,          <f64_dm>,   wa")
> +            *h,           r,          <f64_dm>,   d,          wn,
> +            wa")
>  	(match_operand:FMOVE64 1 "input_operand"
> -            "d,           m,          d,          wY,         <f64_p9>,
> -             Z,           <f64_av>,   <f64_vsx>,  <zero_fp>,  <zero_fp>,
> +            "d,           m,          <f64_vsx>,  wY,         <f64_p9>,
> +             Z,           <f64_av>,   wa,         <zero_fp>,  <zero_fp>,
>               r,           YZ,         r,          r,          *h,
> -             0,           <f64_dm>,   r,          eP"))]
> +             0,           <f64_dm>,   r,          d,          wn,
> +             eP"))]
>    "TARGET_POWERPC64 && TARGET_HARD_FLOAT
>     && (gpc_reg_operand (operands[0], <MODE>mode)
>         || gpc_reg_operand (operands[1], <MODE>mode))"
>    "@
>     stfd%U0%X0 %1,%0
>     lfd%U1%X1 %0,%1
> -   fmr %0,%1
> +   xxlor %x0,%x1,%x1
>     lxsd %0,%1
>     stxsd %1,%0
>     lxsdx %x0,%y1
>     stxsdx %x1,%y0
> -   xxlor %x0,%x1,%x1
> +   fmr %0,%1
>     xxlxor %x0,%x0,%x0
>     li %0,0
>     std%U0%X0 %1,%0
> @@ -8467,23 +8474,28 @@ (define_insn "*mov<mode>_hardfloat64"
>     nop
>     mfvsrd %0,%x1
>     mtvsrd %x0,%1
> +   fmr %0,%1
> +   fmr %0,%1
>     #"
>    [(set_attr "type"
> -            "fpstore,     fpload,     fpsimple,   fpload,     fpstore,
> +            "fpstore,     fpload,     veclogical, fpload,     fpstore,
>               fpload,      fpstore,    veclogical, veclogical, integer,
>               store,       load,       *,          mtjmpr,     mfjmpr,
> -             *,           mfvsr,      mtvsr,      vecperm")
> +             *,           mfvsr,      mtvsr,      fpsimple,   fpsimple,
> +             vecperm")
>     (set_attr "size" "64")
>     (set_attr "isa"
> -            "*,           *,          *,          p9v,        p9v,
> -             p7v,         p7v,        *,          *,          *,
> -             *,           *,          *,          *,          *,
> -             *,           p8v,        p8v,        p10")
> +            "*,           *,          p7p8v,        p9v,        p9v,
> +             p7v,         p7v,        *,           *,          *,
> +             *,           *,          *,           *,          *,
> +             *,           p8v,        p8v,         *,          *,
> +             p10")
>     (set_attr "prefixed"
>              "*,           *,          *,          *,          *,
>               *,           *,          *,          *,          *,
>               *,           *,          *,          *,          *,
> -             *,           *,          *,          *")])
> +             *,           *,          *,          *,          *,
> +             *")])
>  
>  ;;           STD      LD       MR      MT<SPR> MF<SPR> G-const
>  ;;           H-const  F-const  Special

next prev parent reply	other threads:[~2023-06-12  8:43 UTC|newest]

Thread overview: 4+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-02-25  9:50 Ajit Agarwal
2023-06-12  8:43 ` Ajit Agarwal [this message]
2023-06-12 12:51 ` Segher Boessenkool
2023-06-14  6:59 ` Surya Kumari Jangala

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=fe4774ab-bb30-510e-71cf-d0b6427476b5@linux.ibm.com \
    --to=aagarwa1@linux.ibm.com \
    --cc=bergner@linux.ibm.com \
    --cc=gcc-patches@gcc.gnu.org \
    --cc=segher@kernel.crashing.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).