* [PATCH][GCC][AArch64] optimize float immediate moves (2 /4) - HF/DF/SF mode.
@ 2017-06-07 11:38 Tamar Christina
2017-06-12 7:31 ` Tamar Christina
2017-06-14 8:43 ` James Greenhalgh
0 siblings, 2 replies; 16+ messages in thread
From: Tamar Christina @ 2017-06-07 11:38 UTC (permalink / raw)
To: GCC Patches; +Cc: nd, James Greenhalgh, Marcus Shawcroft, Richard Earnshaw
[-- Attachment #1: Type: text/plain, Size: 868 bytes --]
Hi All,
This patch adds support for creating floating point constants
using mov immediate instructions. The movi SIMD instruction can
be used for HFmode and SFmode constants, eg. for -0.0f we generate:
movi v0.2s, 0x80, lsl 24
More complex constants can be generated using an integer MOV or
MOV+MOVK:
mov w0, 48128
movk w0, 0x47f0, lsl 16
fmov s0, w0
We allow up to 3 instructions as this allows all HF, SF and most DF
constants to be generated without a literal load, and is overall best
for codesize.
Regression tested on aarch64-none-linux-gnu and no regressions.
OK for trunk?
Thanks,
Tamar
gcc/
2017-06-07 Tamar Christina <tamar.christina@arm.com>
* config/aarch64/aarch64.md (mov<mode>): Generalize.
(*movhf_aarch64, *movsf_aarch64, *movdf_aarch64):
Add integer and movi cases.
[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #2: float-move-2.patch --]
[-- Type: text/x-patch; name="float-move-2.patch", Size: 5625 bytes --]
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index 5adc5edb8dde9c30450b04932a37c41f84cc5ed1..7f107672882b13809be01355ffafbc2807cc5adb 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -1167,66 +1167,120 @@
}
)
-(define_insn "*movhf_aarch64"
- [(set (match_operand:HF 0 "nonimmediate_operand" "=w,w ,?r,w,w,m,r,m ,r")
- (match_operand:HF 1 "general_operand" "Y ,?rY, w,w,m,w,m,rY,r"))]
+(define_insn_and_split "*movhf_aarch64"
+ [(set (match_operand:HF 0 "nonimmediate_operand" "=w,w ,?r,w,w ,w ,w,m,r,m ,r")
+ (match_operand:HF 1 "general_operand" "Y ,?rY, w,w,Ufc,Uvi,m,w,m,rY,r"))]
"TARGET_FLOAT && (register_operand (operands[0], HFmode)
- || aarch64_reg_or_fp_zero (operands[1], HFmode))"
+ || aarch64_reg_or_fp_float (operands[1], HFmode))"
"@
movi\\t%0.4h, #0
- mov\\t%0.h[0], %w1
+ fmov\\t%s0, %w1
umov\\t%w0, %1.h[0]
mov\\t%0.h[0], %1.h[0]
+ fmov\\t%s0, %1
+ * return aarch64_output_scalar_simd_mov_immediate (operands[1], SImode);
ldr\\t%h0, %1
str\\t%h1, %0
ldrh\\t%w0, %1
strh\\t%w1, %0
mov\\t%w0, %w1"
- [(set_attr "type" "neon_move,neon_from_gp,neon_to_gp,neon_move,\
- f_loads,f_stores,load1,store1,mov_reg")
- (set_attr "simd" "yes,yes,yes,yes,*,*,*,*,*")]
+ "&& can_create_pseudo_p ()
+ && !aarch64_can_const_movi_rtx_p (operands[1], HFmode)
+ && !aarch64_float_const_representable_p (operands[1])
+ && aarch64_float_const_rtx_p (operands[1])"
+ [(const_int 0)]
+ "{
+ unsigned HOST_WIDE_INT ival;
+ if (!aarch64_reinterpret_float_as_int (operands[1], &ival))
+ FAIL;
+
+ rtx tmp = gen_reg_rtx (SImode);
+ aarch64_expand_mov_immediate (tmp, GEN_INT (ival));
+ tmp = simplify_gen_subreg (HImode, tmp, SImode, 0);
+ emit_move_insn (operands[0], gen_lowpart (HFmode, tmp));
+ DONE;
+ }"
+ [(set_attr "type" "neon_move,f_mcr,neon_to_gp,neon_move,fconsts, \
+ neon_move,f_loads,f_stores,load1,store1,mov_reg")
+ (set_attr "simd" "yes,*,yes,yes,*,yes,*,*,*,*,*")]
)
-(define_insn "*movsf_aarch64"
- [(set (match_operand:SF 0 "nonimmediate_operand" "=w,w ,?r,w,w ,w,m,r,m ,r")
- (match_operand:SF 1 "general_operand" "Y ,?rY, w,w,Ufc,m,w,m,rY,r"))]
+(define_insn_and_split "*movsf_aarch64"
+ [(set (match_operand:SF 0 "nonimmediate_operand" "=w,w ,?r,w,w ,w ,w,m,r,m ,r,r")
+ (match_operand:SF 1 "general_operand" "Y ,?rY, w,w,Ufc,Uvi,m,w,m,rY,r,M"))]
"TARGET_FLOAT && (register_operand (operands[0], SFmode)
- || aarch64_reg_or_fp_zero (operands[1], SFmode))"
+ || aarch64_reg_or_fp_float (operands[1], SFmode))"
"@
movi\\t%0.2s, #0
fmov\\t%s0, %w1
fmov\\t%w0, %s1
fmov\\t%s0, %s1
fmov\\t%s0, %1
+ * return aarch64_output_scalar_simd_mov_immediate (operands[1], SImode);
ldr\\t%s0, %1
str\\t%s1, %0
ldr\\t%w0, %1
str\\t%w1, %0
- mov\\t%w0, %w1"
- [(set_attr "type" "neon_move,f_mcr,f_mrc,fmov,fconsts,\
- f_loads,f_stores,load1,store1,mov_reg")
- (set_attr "simd" "yes,*,*,*,*,*,*,*,*,*")]
+ mov\\t%w0, %w1
+ mov\\t%w0, %1"
+ "&& can_create_pseudo_p ()
+ && !aarch64_can_const_movi_rtx_p (operands[1], SFmode)
+ && !aarch64_float_const_representable_p (operands[1])
+ && aarch64_float_const_rtx_p (operands[1])"
+ [(const_int 0)]
+ "{
+ unsigned HOST_WIDE_INT ival;
+ if (!aarch64_reinterpret_float_as_int (operands[1], &ival))
+ FAIL;
+
+ rtx tmp = gen_reg_rtx (SImode);
+ aarch64_expand_mov_immediate (tmp, GEN_INT (ival));
+ emit_move_insn (operands[0], gen_lowpart (SFmode, tmp));
+ DONE;
+ }"
+ [(set_attr "type" "neon_move,f_mcr,f_mrc,fmov,fconsts,neon_move,\
+ f_loads,f_stores,load1,store1,mov_reg,\
+ fconsts")
+ (set_attr "simd" "yes,*,*,*,*,yes,*,*,*,*,*,*")]
)
-(define_insn "*movdf_aarch64"
- [(set (match_operand:DF 0 "nonimmediate_operand" "=w,w ,?r,w,w ,w,m,r,m ,r")
- (match_operand:DF 1 "general_operand" "Y ,?rY, w,w,Ufc,m,w,m,rY,r"))]
+(define_insn_and_split "*movdf_aarch64"
+ [(set (match_operand:DF 0 "nonimmediate_operand" "=w, w ,?r,w,w ,w ,w,m,r,m ,r,r")
+ (match_operand:DF 1 "general_operand" "Y , ?rY, w,w,Ufc,Uvi,m,w,m,rY,r,N"))]
"TARGET_FLOAT && (register_operand (operands[0], DFmode)
- || aarch64_reg_or_fp_zero (operands[1], DFmode))"
+ || aarch64_reg_or_fp_float (operands[1], DFmode))"
"@
movi\\t%d0, #0
fmov\\t%d0, %x1
fmov\\t%x0, %d1
fmov\\t%d0, %d1
fmov\\t%d0, %1
+ * return aarch64_output_scalar_simd_mov_immediate (operands[1], DImode);
ldr\\t%d0, %1
str\\t%d1, %0
ldr\\t%x0, %1
str\\t%x1, %0
- mov\\t%x0, %x1"
- [(set_attr "type" "neon_move,f_mcr,f_mrc,fmov,fconstd,\
- f_loadd,f_stored,load1,store1,mov_reg")
- (set_attr "simd" "yes,*,*,*,*,*,*,*,*,*")]
+ mov\\t%x0, %x1
+ mov\\t%x0, %1"
+ "&& can_create_pseudo_p ()
+ && !aarch64_can_const_movi_rtx_p (operands[1], DFmode)
+ && !aarch64_float_const_representable_p (operands[1])
+ && aarch64_float_const_rtx_p (operands[1])"
+ [(const_int 0)]
+ "{
+ unsigned HOST_WIDE_INT ival;
+ if (!aarch64_reinterpret_float_as_int (operands[1], &ival))
+ FAIL;
+
+ rtx tmp = gen_reg_rtx (DImode);
+ aarch64_expand_mov_immediate (tmp, GEN_INT (ival));
+ emit_move_insn (operands[0], gen_lowpart (DFmode, tmp));
+ DONE;
+ }"
+ [(set_attr "type" "neon_move,f_mcr,f_mrc,fmov,fconstd,neon_move,\
+ f_loadd,f_stored,load1,store1,mov_reg,\
+ fconstd")
+ (set_attr "simd" "yes,*,*,*,*,yes,*,*,*,*,*,*")]
)
(define_insn "*movtf_aarch64"
^ permalink raw reply [flat|nested] 16+ messages in thread
* Re: [PATCH][GCC][AArch64] optimize float immediate moves (2 /4) - HF/DF/SF mode.
2017-06-07 11:38 [PATCH][GCC][AArch64] optimize float immediate moves (2 /4) - HF/DF/SF mode Tamar Christina
@ 2017-06-12 7:31 ` Tamar Christina
2017-06-14 10:06 ` Richard Sandiford
2017-06-14 8:43 ` James Greenhalgh
1 sibling, 1 reply; 16+ messages in thread
From: Tamar Christina @ 2017-06-12 7:31 UTC (permalink / raw)
To: GCC Patches; +Cc: nd, James Greenhalgh, Marcus Shawcroft, Richard Earnshaw
[-- Attachment #1: Type: text/plain, Size: 1365 bytes --]
Hi All,
Updating this patch with the feedback I've received from patch 1/4.
Thanks,
Tamar
________________________________________
From: gcc-patches-owner@gcc.gnu.org <gcc-patches-owner@gcc.gnu.org> on behalf of Tamar Christina <Tamar.Christina@arm.com>
Sent: Wednesday, June 7, 2017 12:38:37 PM
To: GCC Patches
Cc: nd; James Greenhalgh; Marcus Shawcroft; Richard Earnshaw
Subject: [PATCH][GCC][AArch64] optimize float immediate moves (2 /4) - HF/DF/SF mode.
Hi All,
This patch adds support for creating floating point constants
using mov immediate instructions. The movi SIMD instruction can
be used for HFmode and SFmode constants, eg. for -0.0f we generate:
movi v0.2s, 0x80, lsl 24
More complex constants can be generated using an integer MOV or
MOV+MOVK:
mov w0, 48128
movk w0, 0x47f0, lsl 16
fmov s0, w0
We allow up to 3 instructions as this allows all HF, SF and most DF
constants to be generated without a literal load, and is overall best
for codesize.
Regression tested on aarch64-none-linux-gnu and no regressions.
OK for trunk?
Thanks,
Tamar
gcc/
2017-06-07 Tamar Christina <tamar.christina@arm.com>
* config/aarch64/aarch64.md (mov<mode>): Generalize.
(*movhf_aarch64, *movsf_aarch64, *movdf_aarch64):
Add integer and movi cases.
[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #2: float-move-2-r2.patch --]
[-- Type: text/x-patch; name="float-move-2-r2.patch", Size: 5664 bytes --]
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index 5adc5edb8dde9c30450b04932a37c41f84cc5ed1..62ad76731d2c5b4b3d02def8c2b1457c713c2d8e 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -1167,66 +1167,120 @@
}
)
-(define_insn "*movhf_aarch64"
- [(set (match_operand:HF 0 "nonimmediate_operand" "=w,w ,?r,w,w,m,r,m ,r")
- (match_operand:HF 1 "general_operand" "Y ,?rY, w,w,m,w,m,rY,r"))]
+(define_insn_and_split "*movhf_aarch64"
+ [(set (match_operand:HF 0 "nonimmediate_operand" "=w,w ,?r,w,w ,w ,w,m,r,m ,r")
+ (match_operand:HF 1 "general_operand" "Y ,?rY, w,w,Ufc,Uvi,m,w,m,rY,r"))]
"TARGET_FLOAT && (register_operand (operands[0], HFmode)
- || aarch64_reg_or_fp_zero (operands[1], HFmode))"
+ || aarch64_reg_or_fp_float (operands[1], HFmode))"
"@
movi\\t%0.4h, #0
- mov\\t%0.h[0], %w1
+ fmov\\t%s0, %w1
umov\\t%w0, %1.h[0]
mov\\t%0.h[0], %1.h[0]
+ fmov\\t%s0, %1
+ * return aarch64_output_scalar_simd_mov_immediate (operands[1], SImode);
ldr\\t%h0, %1
str\\t%h1, %0
ldrh\\t%w0, %1
strh\\t%w1, %0
mov\\t%w0, %w1"
- [(set_attr "type" "neon_move,neon_from_gp,neon_to_gp,neon_move,\
- f_loads,f_stores,load1,store1,mov_reg")
- (set_attr "simd" "yes,yes,yes,yes,*,*,*,*,*")]
+ "&& can_create_pseudo_p ()
+ && !aarch64_can_const_movi_rtx_p (operands[1], HFmode)
+ && !aarch64_float_const_representable_p (operands[1])
+ && aarch64_float_const_rtx_p (operands[1])"
+ [(const_int 0)]
+ "{
+ unsigned HOST_WIDE_INT ival;
+ if (!aarch64_reinterpret_float_as_int (operands[1], &ival))
+ FAIL;
+
+ rtx tmp = gen_reg_rtx (SImode);
+ aarch64_expand_mov_immediate (tmp, gen_int_mode (ival, SImode));
+ tmp = simplify_gen_subreg (HImode, tmp, SImode, 0);
+ emit_move_insn (operands[0], gen_lowpart (HFmode, tmp));
+ DONE;
+ }"
+ [(set_attr "type" "neon_move,f_mcr,neon_to_gp,neon_move,fconsts, \
+ neon_move,f_loads,f_stores,load1,store1,mov_reg")
+ (set_attr "simd" "yes,*,yes,yes,*,yes,*,*,*,*,*")]
)
-(define_insn "*movsf_aarch64"
- [(set (match_operand:SF 0 "nonimmediate_operand" "=w,w ,?r,w,w ,w,m,r,m ,r")
- (match_operand:SF 1 "general_operand" "Y ,?rY, w,w,Ufc,m,w,m,rY,r"))]
+(define_insn_and_split "*movsf_aarch64"
+ [(set (match_operand:SF 0 "nonimmediate_operand" "=w,w ,?r,w,w ,w ,w,m,r,m ,r,r")
+ (match_operand:SF 1 "general_operand" "Y ,?rY, w,w,Ufc,Uvi,m,w,m,rY,r,M"))]
"TARGET_FLOAT && (register_operand (operands[0], SFmode)
- || aarch64_reg_or_fp_zero (operands[1], SFmode))"
+ || aarch64_reg_or_fp_float (operands[1], SFmode))"
"@
movi\\t%0.2s, #0
fmov\\t%s0, %w1
fmov\\t%w0, %s1
fmov\\t%s0, %s1
fmov\\t%s0, %1
+ * return aarch64_output_scalar_simd_mov_immediate (operands[1], SImode);
ldr\\t%s0, %1
str\\t%s1, %0
ldr\\t%w0, %1
str\\t%w1, %0
- mov\\t%w0, %w1"
- [(set_attr "type" "neon_move,f_mcr,f_mrc,fmov,fconsts,\
- f_loads,f_stores,load1,store1,mov_reg")
- (set_attr "simd" "yes,*,*,*,*,*,*,*,*,*")]
+ mov\\t%w0, %w1
+ mov\\t%w0, %1"
+ "&& can_create_pseudo_p ()
+ && !aarch64_can_const_movi_rtx_p (operands[1], SFmode)
+ && !aarch64_float_const_representable_p (operands[1])
+ && aarch64_float_const_rtx_p (operands[1])"
+ [(const_int 0)]
+ "{
+ unsigned HOST_WIDE_INT ival;
+ if (!aarch64_reinterpret_float_as_int (operands[1], &ival))
+ FAIL;
+
+ rtx tmp = gen_reg_rtx (SImode);
+ aarch64_expand_mov_immediate (tmp, gen_int_mode (ival, SImode));
+ emit_move_insn (operands[0], gen_lowpart (SFmode, tmp));
+ DONE;
+ }"
+ [(set_attr "type" "neon_move,f_mcr,f_mrc,fmov,fconsts,neon_move,\
+ f_loads,f_stores,load1,store1,mov_reg,\
+ fconsts")
+ (set_attr "simd" "yes,*,*,*,*,yes,*,*,*,*,*,*")]
)
-(define_insn "*movdf_aarch64"
- [(set (match_operand:DF 0 "nonimmediate_operand" "=w,w ,?r,w,w ,w,m,r,m ,r")
- (match_operand:DF 1 "general_operand" "Y ,?rY, w,w,Ufc,m,w,m,rY,r"))]
+(define_insn_and_split "*movdf_aarch64"
+ [(set (match_operand:DF 0 "nonimmediate_operand" "=w, w ,?r,w,w ,w ,w,m,r,m ,r,r")
+ (match_operand:DF 1 "general_operand" "Y , ?rY, w,w,Ufc,Uvi,m,w,m,rY,r,N"))]
"TARGET_FLOAT && (register_operand (operands[0], DFmode)
- || aarch64_reg_or_fp_zero (operands[1], DFmode))"
+ || aarch64_reg_or_fp_float (operands[1], DFmode))"
"@
movi\\t%d0, #0
fmov\\t%d0, %x1
fmov\\t%x0, %d1
fmov\\t%d0, %d1
fmov\\t%d0, %1
+ * return aarch64_output_scalar_simd_mov_immediate (operands[1], DImode);
ldr\\t%d0, %1
str\\t%d1, %0
ldr\\t%x0, %1
str\\t%x1, %0
- mov\\t%x0, %x1"
- [(set_attr "type" "neon_move,f_mcr,f_mrc,fmov,fconstd,\
- f_loadd,f_stored,load1,store1,mov_reg")
- (set_attr "simd" "yes,*,*,*,*,*,*,*,*,*")]
+ mov\\t%x0, %x1
+ mov\\t%x0, %1"
+ "&& can_create_pseudo_p ()
+ && !aarch64_can_const_movi_rtx_p (operands[1], DFmode)
+ && !aarch64_float_const_representable_p (operands[1])
+ && aarch64_float_const_rtx_p (operands[1])"
+ [(const_int 0)]
+ "{
+ unsigned HOST_WIDE_INT ival;
+ if (!aarch64_reinterpret_float_as_int (operands[1], &ival))
+ FAIL;
+
+ rtx tmp = gen_reg_rtx (DImode);
+ aarch64_expand_mov_immediate (tmp, gen_int_mode (ival, DImode));
+ emit_move_insn (operands[0], gen_lowpart (DFmode, tmp));
+ DONE;
+ }"
+ [(set_attr "type" "neon_move,f_mcr,f_mrc,fmov,fconstd,neon_move,\
+ f_loadd,f_stored,load1,store1,mov_reg,\
+ fconstd")
+ (set_attr "simd" "yes,*,*,*,*,yes,*,*,*,*,*,*")]
)
(define_insn "*movtf_aarch64"
^ permalink raw reply [flat|nested] 16+ messages in thread
* Re: [PATCH][GCC][AArch64] optimize float immediate moves (2 /4) - HF/DF/SF mode.
2017-06-07 11:38 [PATCH][GCC][AArch64] optimize float immediate moves (2 /4) - HF/DF/SF mode Tamar Christina
2017-06-12 7:31 ` Tamar Christina
@ 2017-06-14 8:43 ` James Greenhalgh
2017-06-21 10:48 ` Tamar Christina
1 sibling, 1 reply; 16+ messages in thread
From: James Greenhalgh @ 2017-06-14 8:43 UTC (permalink / raw)
To: Tamar Christina; +Cc: GCC Patches, nd, Marcus Shawcroft, Richard Earnshaw
On Wed, Jun 07, 2017 at 12:38:37PM +0100, Tamar Christina wrote:
> Hi All,
>
>
> This patch adds support for creating floating point constants
> using mov immediate instructions. The movi SIMD instruction can
> be used for HFmode and SFmode constants, eg. for -0.0f we generate:
>
> movi v0.2s, 0x80, lsl 24
>
> More complex constants can be generated using an integer MOV or
> MOV+MOVK:
>
> mov w0, 48128
> movk w0, 0x47f0, lsl 16
> fmov s0, w0
>
> We allow up to 3 instructions as this allows all HF, SF and most DF
> constants to be generated without a literal load, and is overall best
> for codesize.
>
>
> Regression tested on aarch64-none-linux-gnu and no regressions.
>
> OK for trunk?
>
> diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
> index 5adc5edb8dde9c30450b04932a37c41f84cc5ed1..7f107672882b13809be01355ffafbc2807cc5adb 100644
> --- a/gcc/config/aarch64/aarch64.md
> +++ b/gcc/config/aarch64/aarch64.md
> @@ -1167,66 +1167,120 @@
> }
> )
>
> -(define_insn "*movhf_aarch64"
> - [(set (match_operand:HF 0 "nonimmediate_operand" "=w,w ,?r,w,w,m,r,m ,r")
> - (match_operand:HF 1 "general_operand" "Y ,?rY, w,w,m,w,m,rY,r"))]
> +(define_insn_and_split "*movhf_aarch64"
> + [(set (match_operand:HF 0 "nonimmediate_operand" "=w,w ,?r,w,w ,w ,w,m,r,m ,r")
> + (match_operand:HF 1 "general_operand" "Y ,?rY, w,w,Ufc,Uvi,m,w,m,rY,r"))]
> "TARGET_FLOAT && (register_operand (operands[0], HFmode)
> - || aarch64_reg_or_fp_zero (operands[1], HFmode))"
> + || aarch64_reg_or_fp_float (operands[1], HFmode))"
> "@
> movi\\t%0.4h, #0
> - mov\\t%0.h[0], %w1
> + fmov\\t%s0, %w1
Should this not be %h0?
> umov\\t%w0, %1.h[0]
> mov\\t%0.h[0], %1.h[0]
> + fmov\\t%s0, %1
Likewise, and much more important for correctness as it changes the way
the bit pattern ends up in the register (see table C2-1 in release B.a of
the ARM Architecture Reference Manual for ARMv8-A), here.
> + * return aarch64_output_scalar_simd_mov_immediate (operands[1], SImode);
> ldr\\t%h0, %1
> str\\t%h1, %0
> ldrh\\t%w0, %1
> strh\\t%w1, %0
> mov\\t%w0, %w1"
> - [(set_attr "type" "neon_move,neon_from_gp,neon_to_gp,neon_move,\
> - f_loads,f_stores,load1,store1,mov_reg")
> - (set_attr "simd" "yes,yes,yes,yes,*,*,*,*,*")]
> + "&& can_create_pseudo_p ()
> + && !aarch64_can_const_movi_rtx_p (operands[1], HFmode)
> + && !aarch64_float_const_representable_p (operands[1])
> + && aarch64_float_const_rtx_p (operands[1])"
> + [(const_int 0)]
> + "{
> + unsigned HOST_WIDE_INT ival;
> + if (!aarch64_reinterpret_float_as_int (operands[1], &ival))
> + FAIL;
> +
> + rtx tmp = gen_reg_rtx (SImode);
> + aarch64_expand_mov_immediate (tmp, GEN_INT (ival));
> + tmp = simplify_gen_subreg (HImode, tmp, SImode, 0);
> + emit_move_insn (operands[0], gen_lowpart (HFmode, tmp));
> + DONE;
> + }"
> + [(set_attr "type" "neon_move,f_mcr,neon_to_gp,neon_move,fconsts, \
> + neon_move,f_loads,f_stores,load1,store1,mov_reg")
> + (set_attr "simd" "yes,*,yes,yes,*,yes,*,*,*,*,*")]
> )
Thanks,
James
^ permalink raw reply [flat|nested] 16+ messages in thread
* Re: [PATCH][GCC][AArch64] optimize float immediate moves (2 /4) - HF/DF/SF mode.
2017-06-12 7:31 ` Tamar Christina
@ 2017-06-14 10:06 ` Richard Sandiford
2017-06-15 13:25 ` Tamar Christina
0 siblings, 1 reply; 16+ messages in thread
From: Richard Sandiford @ 2017-06-14 10:06 UTC (permalink / raw)
To: Tamar Christina
Cc: GCC Patches, nd, James Greenhalgh, Marcus Shawcroft, Richard Earnshaw
In addition to James's comments:
Tamar Christina <Tamar.Christina@arm.com> writes:
> + [(const_int 0)]
> + "{
"{ ... }" isn't necessary, we can just use { ... }
> + unsigned HOST_WIDE_INT ival;
> + if (!aarch64_reinterpret_float_as_int (operands[1], &ival))
> + FAIL;
> +
> + rtx tmp = gen_reg_rtx (SImode);
> + aarch64_expand_mov_immediate (tmp, gen_int_mode (ival, SImode));
> + tmp = simplify_gen_subreg (HImode, tmp, SImode, 0);
This looks wrong for big-endian, and...
> + emit_move_insn (operands[0], gen_lowpart (HFmode, tmp));
...either it should be OK to go directly from tmp to the HFmode lowpart,
or we should move the HImode temporary into a fresh REG. Current
validate_subreg seems to suggest that we need the latter.
Isn't it possible to use a HImode move immediate instead of an SImode one?
Thanks,
Richard
^ permalink raw reply [flat|nested] 16+ messages in thread
* RE: [PATCH][GCC][AArch64] optimize float immediate moves (2 /4) - HF/DF/SF mode.
2017-06-14 10:06 ` Richard Sandiford
@ 2017-06-15 13:25 ` Tamar Christina
2017-06-15 14:28 ` Tamar Christina
0 siblings, 1 reply; 16+ messages in thread
From: Tamar Christina @ 2017-06-15 13:25 UTC (permalink / raw)
To: Richard Sandiford
Cc: GCC Patches, nd, James Greenhalgh, Marcus Shawcroft, Richard Earnshaw
Hi Richard,
> > + rtx tmp = gen_reg_rtx (SImode);
> > + aarch64_expand_mov_immediate (tmp, gen_int_mode (ival, SImode));
> > + tmp = simplify_gen_subreg (HImode, tmp, SImode, 0);
>
> This looks wrong for big-endian, and...
>
> > + emit_move_insn (operands[0], gen_lowpart (HFmode, tmp));
>
> ...either it should be OK to go directly from tmp to the HFmode lowpart, or
> we should move the HImode temporary into a fresh REG. Current
> validate_subreg seems to suggest that we need the latter.
>
> Isn't it possible to use a HImode move immediate instead of an SImode one?
We don't really have a movehi pattern, currently a movhi would end up in the general
mov<mode>_aarch64 pattern which would then use end up using a w register as well.
I'll take a look at big-endian.
>
> Thanks,
> Richard
^ permalink raw reply [flat|nested] 16+ messages in thread
* RE: [PATCH][GCC][AArch64] optimize float immediate moves (2 /4) - HF/DF/SF mode.
2017-06-15 13:25 ` Tamar Christina
@ 2017-06-15 14:28 ` Tamar Christina
2017-06-16 7:53 ` Richard Sandiford
0 siblings, 1 reply; 16+ messages in thread
From: Tamar Christina @ 2017-06-15 14:28 UTC (permalink / raw)
To: Richard Sandiford
Cc: GCC Patches, nd, James Greenhalgh, Marcus Shawcroft, Richard Earnshaw
Hi Richard,
> > > + rtx tmp = gen_reg_rtx (SImode);
> > > + aarch64_expand_mov_immediate (tmp, gen_int_mode (ival,
> SImode));
> > > + tmp = simplify_gen_subreg (HImode, tmp, SImode, 0);
> >
> > This looks wrong for big-endian, and...
> >
> > > + emit_move_insn (operands[0], gen_lowpart (HFmode, tmp));
> >
> > ...either it should be OK to go directly from tmp to the HFmode
> > lowpart, or we should move the HImode temporary into a fresh REG.
> > Current validate_subreg seems to suggest that we need the latter.
> >
> > Isn't it possible to use a HImode move immediate instead of an SImode
> one?
>
> We don't really have a movehi pattern, currently a movhi would end up in
> the general
> mov<mode>_aarch64 pattern which would then use end up using a w
> register as well.
Also aarch64_expand_mov_immediate doesn't allow HImode moves, only SI and DI.
>
> I'll take a look at big-endian.
>
> >
> > Thanks,
> > Richard
^ permalink raw reply [flat|nested] 16+ messages in thread
* Re: [PATCH][GCC][AArch64] optimize float immediate moves (2 /4) - HF/DF/SF mode.
2017-06-15 14:28 ` Tamar Christina
@ 2017-06-16 7:53 ` Richard Sandiford
2017-06-16 8:42 ` Tamar Christina
0 siblings, 1 reply; 16+ messages in thread
From: Richard Sandiford @ 2017-06-16 7:53 UTC (permalink / raw)
To: Tamar Christina
Cc: GCC Patches, nd, James Greenhalgh, Marcus Shawcroft, Richard Earnshaw
Tamar Christina <Tamar.Christina@arm.com> writes:
> Hi Richard,
>> > > + rtx tmp = gen_reg_rtx (SImode);
>> > > + aarch64_expand_mov_immediate (tmp, gen_int_mode (ival,
>> SImode));
>> > > + tmp = simplify_gen_subreg (HImode, tmp, SImode, 0);
>> >
>> > This looks wrong for big-endian, and...
>> >
>> > > + emit_move_insn (operands[0], gen_lowpart (HFmode, tmp));
>> >
>> > ...either it should be OK to go directly from tmp to the HFmode
>> > lowpart, or we should move the HImode temporary into a fresh REG.
>> > Current validate_subreg seems to suggest that we need the latter.
>> >
>> > Isn't it possible to use a HImode move immediate instead of an SImode
>> one?
>>
>> We don't really have a movehi pattern, currently a movhi would end up
>> in the general mov<mode>_aarch64 pattern
movqi and movhi patterns are defined from the same mov<mode> template,
but they're still "proper" move patterns.
>> which would then use end up using a w register as well.
Isn't that what you want though? f16_mov_immediate_1.c is testing for:
/* { dg-final { scan-assembler-times "mov\tw\[0-9\]+, #?19520" 3 } } */
> Also aarch64_expand_mov_immediate doesn't allow HImode moves, only SI and DI.
It doesn't need to, because all HImode CONST_INTs are already legitimate.
You can just use emit_move_insn instead.
FWIW, the following seems to pass the same tests and avoids the subreg
dance. Just a proof of concept, and I'm not attached to the new
iterator name.
Thanks,
Richard
Index: gcc/gcc/config/aarch64/aarch64.md
===================================================================
--- gcc.orig/gcc/config/aarch64/aarch64.md
+++ gcc/gcc/config/aarch64/aarch64.md
@@ -1063,7 +1063,28 @@
}
)
-(define_insn_and_split "*movhf_aarch64"
+(define_split
+ [(set (match_operand:GPF_MOV_F16 0 "nonimmediate_operand")
+ (match_operand:GPF_MOV_F16 1 "immediate_operand"))]
+ "TARGET_FLOAT
+ && can_create_pseudo_p ()
+ && !aarch64_can_const_movi_rtx_p (operands[1], <MODE>mode)
+ && !aarch64_float_const_representable_p (operands[1])
+ && aarch64_float_const_rtx_p (operands[1])"
+ [(const_int 0)]
+ {
+ unsigned HOST_WIDE_INT ival;
+ if (!aarch64_reinterpret_float_as_int (operands[1], &ival))
+ FAIL;
+
+ rtx tmp = gen_reg_rtx (<FCVT_TARGET>mode);
+ emit_move_insn (tmp, gen_int_mode (ival, <FCVT_TARGET>mode));
+ emit_move_insn (operands[0], gen_lowpart (<MODE>mode, tmp));
+ DONE;
+ }
+)
+
+(define_insn "*movhf_aarch64"
[(set (match_operand:HF 0 "nonimmediate_operand" "=w,w ,?r,w,w ,w ,w,m,r,m ,r")
(match_operand:HF 1 "general_operand" "Y ,?rY, w,w,Ufc,Uvi,m,w,m,rY,r"))]
"TARGET_FLOAT && (register_operand (operands[0], HFmode)
@@ -1080,28 +1101,12 @@
ldrh\\t%w0, %1
strh\\t%w1, %0
mov\\t%w0, %w1"
- "&& can_create_pseudo_p ()
- && !aarch64_can_const_movi_rtx_p (operands[1], HFmode)
- && !aarch64_float_const_representable_p (operands[1])
- && aarch64_float_const_rtx_p (operands[1])"
- [(const_int 0)]
- "{
- unsigned HOST_WIDE_INT ival;
- if (!aarch64_reinterpret_float_as_int (operands[1], &ival))
- FAIL;
-
- rtx tmp = gen_reg_rtx (SImode);
- aarch64_expand_mov_immediate (tmp, GEN_INT (ival));
- tmp = simplify_gen_subreg (HImode, tmp, SImode, 0);
- emit_move_insn (operands[0], gen_lowpart (HFmode, tmp));
- DONE;
- }"
[(set_attr "type" "neon_move,f_mcr,neon_to_gp,neon_move,fconsts, \
neon_move,f_loads,f_stores,load1,store1,mov_reg")
(set_attr "simd" "yes,*,yes,yes,*,yes,*,*,*,*,*")]
)
-(define_insn_and_split "*movsf_aarch64"
+(define_insn "*movsf_aarch64"
[(set (match_operand:SF 0 "nonimmediate_operand" "=w,w ,?r,w,w ,w ,w,m,r,m ,r,r")
(match_operand:SF 1 "general_operand" "Y ,?rY, w,w,Ufc,Uvi,m,w,m,rY,r,M"))]
"TARGET_FLOAT && (register_operand (operands[0], SFmode)
@@ -1119,28 +1124,13 @@
str\\t%w1, %0
mov\\t%w0, %w1
mov\\t%w0, %1"
- "&& can_create_pseudo_p ()
- && !aarch64_can_const_movi_rtx_p (operands[1], SFmode)
- && !aarch64_float_const_representable_p (operands[1])
- && aarch64_float_const_rtx_p (operands[1])"
- [(const_int 0)]
- "{
- unsigned HOST_WIDE_INT ival;
- if (!aarch64_reinterpret_float_as_int (operands[1], &ival))
- FAIL;
-
- rtx tmp = gen_reg_rtx (SImode);
- aarch64_expand_mov_immediate (tmp, GEN_INT (ival));
- emit_move_insn (operands[0], gen_lowpart (SFmode, tmp));
- DONE;
- }"
[(set_attr "type" "neon_move,f_mcr,f_mrc,fmov,fconsts,neon_move,\
f_loads,f_stores,load1,store1,mov_reg,\
fconsts")
(set_attr "simd" "yes,*,*,*,*,yes,*,*,*,*,*,*")]
)
-(define_insn_and_split "*movdf_aarch64"
+(define_insn "*movdf_aarch64"
[(set (match_operand:DF 0 "nonimmediate_operand" "=w, w ,?r,w,w ,w ,w,m,r,m ,r,r")
(match_operand:DF 1 "general_operand" "Y , ?rY, w,w,Ufc,Uvi,m,w,m,rY,r,N"))]
"TARGET_FLOAT && (register_operand (operands[0], DFmode)
@@ -1158,21 +1148,6 @@
str\\t%x1, %0
mov\\t%x0, %x1
mov\\t%x0, %1"
- "&& can_create_pseudo_p ()
- && !aarch64_can_const_movi_rtx_p (operands[1], DFmode)
- && !aarch64_float_const_representable_p (operands[1])
- && aarch64_float_const_rtx_p (operands[1])"
- [(const_int 0)]
- "{
- unsigned HOST_WIDE_INT ival;
- if (!aarch64_reinterpret_float_as_int (operands[1], &ival))
- FAIL;
-
- rtx tmp = gen_reg_rtx (DImode);
- aarch64_expand_mov_immediate (tmp, GEN_INT (ival));
- emit_move_insn (operands[0], gen_lowpart (DFmode, tmp));
- DONE;
- }"
[(set_attr "type" "neon_move,f_mcr,f_mrc,fmov,fconstd,neon_move,\
f_loadd,f_stored,load1,store1,mov_reg,\
fconstd")
Index: gcc/gcc/config/aarch64/iterators.md
===================================================================
--- gcc.orig/gcc/config/aarch64/iterators.md
+++ gcc/gcc/config/aarch64/iterators.md
@@ -44,6 +44,10 @@
;; Iterator for all scalar floating point modes (HF, SF, DF)
(define_mode_iterator GPF_F16 [(HF "AARCH64_ISA_F16") SF DF])
+;; Iterator for all scalar floating point modes (HF, SF, DF), without
+;; requiring AARCH64_ISA_F16 for HF.
+(define_mode_iterator GPF_MOV_F16 [HF SF DF])
+
;; Iterator for all scalar floating point modes (HF, SF, DF and TF)
(define_mode_iterator GPF_TF_F16 [HF SF DF TF])
^ permalink raw reply [flat|nested] 16+ messages in thread
* RE: [PATCH][GCC][AArch64] optimize float immediate moves (2 /4) - HF/DF/SF mode.
2017-06-16 7:53 ` Richard Sandiford
@ 2017-06-16 8:42 ` Tamar Christina
0 siblings, 0 replies; 16+ messages in thread
From: Tamar Christina @ 2017-06-16 8:42 UTC (permalink / raw)
To: Richard Sandiford
Cc: GCC Patches, nd, James Greenhalgh, Marcus Shawcroft, Richard Earnshaw
>
> It doesn't need to, because all HImode CONST_INTs are already legitimate.
> You can just use emit_move_insn instead.
>
Ah right, that's true.
> FWIW, the following seems to pass the same tests and avoids the subreg
> dance. Just a proof of concept, and I'm not attached to the new iterator
> name.
Ah thanks! that is a bit simpler. I'll take a similar approach.
> Thanks,
> Richard
>
>
> Index: gcc/gcc/config/aarch64/aarch64.md
> ==========================================================
> =========
> --- gcc.orig/gcc/config/aarch64/aarch64.md
> +++ gcc/gcc/config/aarch64/aarch64.md
> @@ -1063,7 +1063,28 @@
> }
> )
>
> -(define_insn_and_split "*movhf_aarch64"
> +(define_split
> + [(set (match_operand:GPF_MOV_F16 0 "nonimmediate_operand")
> + (match_operand:GPF_MOV_F16 1 "immediate_operand"))]
> + "TARGET_FLOAT
> + && can_create_pseudo_p ()
> + && !aarch64_can_const_movi_rtx_p (operands[1], <MODE>mode)
> + && !aarch64_float_const_representable_p (operands[1])
> + && aarch64_float_const_rtx_p (operands[1])"
> + [(const_int 0)]
> + {
> + unsigned HOST_WIDE_INT ival;
> + if (!aarch64_reinterpret_float_as_int (operands[1], &ival))
> + FAIL;
> +
> + rtx tmp = gen_reg_rtx (<FCVT_TARGET>mode);
> + emit_move_insn (tmp, gen_int_mode (ival, <FCVT_TARGET>mode));
> + emit_move_insn (operands[0], gen_lowpart (<MODE>mode, tmp));
> + DONE;
> + }
> +)
> +
> +(define_insn "*movhf_aarch64"
> [(set (match_operand:HF 0 "nonimmediate_operand" "=w,w ,?r,w,w ,w
> ,w,m,r,m ,r")
> (match_operand:HF 1 "general_operand" "Y ,?rY,
> w,w,Ufc,Uvi,m,w,m,rY,r"))]
> "TARGET_FLOAT && (register_operand (operands[0], HFmode) @@ -
> 1080,28 +1101,12 @@
> ldrh\\t%w0, %1
> strh\\t%w1, %0
> mov\\t%w0, %w1"
> - "&& can_create_pseudo_p ()
> - && !aarch64_can_const_movi_rtx_p (operands[1], HFmode)
> - && !aarch64_float_const_representable_p (operands[1])
> - && aarch64_float_const_rtx_p (operands[1])"
> - [(const_int 0)]
> - "{
> - unsigned HOST_WIDE_INT ival;
> - if (!aarch64_reinterpret_float_as_int (operands[1], &ival))
> - FAIL;
> -
> - rtx tmp = gen_reg_rtx (SImode);
> - aarch64_expand_mov_immediate (tmp, GEN_INT (ival));
> - tmp = simplify_gen_subreg (HImode, tmp, SImode, 0);
> - emit_move_insn (operands[0], gen_lowpart (HFmode, tmp));
> - DONE;
> - }"
> [(set_attr "type" "neon_move,f_mcr,neon_to_gp,neon_move,fconsts, \
> neon_move,f_loads,f_stores,load1,store1,mov_reg")
> (set_attr "simd" "yes,*,yes,yes,*,yes,*,*,*,*,*")]
> )
>
> -(define_insn_and_split "*movsf_aarch64"
> +(define_insn "*movsf_aarch64"
> [(set (match_operand:SF 0 "nonimmediate_operand" "=w,w ,?r,w,w ,w
> ,w,m,r,m ,r,r")
> (match_operand:SF 1 "general_operand" "Y ,?rY,
> w,w,Ufc,Uvi,m,w,m,rY,r,M"))]
> "TARGET_FLOAT && (register_operand (operands[0], SFmode) @@ -
> 1119,28 +1124,13 @@
> str\\t%w1, %0
> mov\\t%w0, %w1
> mov\\t%w0, %1"
> - "&& can_create_pseudo_p ()
> - && !aarch64_can_const_movi_rtx_p (operands[1], SFmode)
> - && !aarch64_float_const_representable_p (operands[1])
> - && aarch64_float_const_rtx_p (operands[1])"
> - [(const_int 0)]
> - "{
> - unsigned HOST_WIDE_INT ival;
> - if (!aarch64_reinterpret_float_as_int (operands[1], &ival))
> - FAIL;
> -
> - rtx tmp = gen_reg_rtx (SImode);
> - aarch64_expand_mov_immediate (tmp, GEN_INT (ival));
> - emit_move_insn (operands[0], gen_lowpart (SFmode, tmp));
> - DONE;
> - }"
> [(set_attr "type" "neon_move,f_mcr,f_mrc,fmov,fconsts,neon_move,\
> f_loads,f_stores,load1,store1,mov_reg,\
> fconsts")
> (set_attr "simd" "yes,*,*,*,*,yes,*,*,*,*,*,*")]
> )
>
> -(define_insn_and_split "*movdf_aarch64"
> +(define_insn "*movdf_aarch64"
> [(set (match_operand:DF 0 "nonimmediate_operand" "=w, w ,?r,w,w ,w
> ,w,m,r,m ,r,r")
> (match_operand:DF 1 "general_operand" "Y , ?rY,
> w,w,Ufc,Uvi,m,w,m,rY,r,N"))]
> "TARGET_FLOAT && (register_operand (operands[0], DFmode) @@ -
> 1158,21 +1148,6 @@
> str\\t%x1, %0
> mov\\t%x0, %x1
> mov\\t%x0, %1"
> - "&& can_create_pseudo_p ()
> - && !aarch64_can_const_movi_rtx_p (operands[1], DFmode)
> - && !aarch64_float_const_representable_p (operands[1])
> - && aarch64_float_const_rtx_p (operands[1])"
> - [(const_int 0)]
> - "{
> - unsigned HOST_WIDE_INT ival;
> - if (!aarch64_reinterpret_float_as_int (operands[1], &ival))
> - FAIL;
> -
> - rtx tmp = gen_reg_rtx (DImode);
> - aarch64_expand_mov_immediate (tmp, GEN_INT (ival));
> - emit_move_insn (operands[0], gen_lowpart (DFmode, tmp));
> - DONE;
> - }"
> [(set_attr "type" "neon_move,f_mcr,f_mrc,fmov,fconstd,neon_move,\
> f_loadd,f_stored,load1,store1,mov_reg,\
> fconstd")
> Index: gcc/gcc/config/aarch64/iterators.md
> ==========================================================
> =========
> --- gcc.orig/gcc/config/aarch64/iterators.md
> +++ gcc/gcc/config/aarch64/iterators.md
> @@ -44,6 +44,10 @@
> ;; Iterator for all scalar floating point modes (HF, SF, DF)
> (define_mode_iterator GPF_F16 [(HF "AARCH64_ISA_F16") SF DF])
>
> +;; Iterator for all scalar floating point modes (HF, SF, DF), without
> +;; requiring AARCH64_ISA_F16 for HF.
> +(define_mode_iterator GPF_MOV_F16 [HF SF DF])
> +
> ;; Iterator for all scalar floating point modes (HF, SF, DF and TF)
> (define_mode_iterator GPF_TF_F16 [HF SF DF TF])
>
^ permalink raw reply [flat|nested] 16+ messages in thread
* RE: [PATCH][GCC][AArch64] optimize float immediate moves (2 /4) - HF/DF/SF mode.
2017-06-14 8:43 ` James Greenhalgh
@ 2017-06-21 10:48 ` Tamar Christina
2017-06-26 10:50 ` Tamar Christina
0 siblings, 1 reply; 16+ messages in thread
From: Tamar Christina @ 2017-06-21 10:48 UTC (permalink / raw)
To: James Greenhalgh; +Cc: GCC Patches, nd, Marcus Shawcroft, Richard Earnshaw
> > movi\\t%0.4h, #0
> > - mov\\t%0.h[0], %w1
> > + fmov\\t%s0, %w1
>
> Should this not be %h0?
The problem is that H registers are only available in ARMv8.2+,
I'm not sure what to do about ARMv8.1 given your other feedback
Pointing out that the bit patterns between how it's stored in s vs h registers
differ.
>
> > umov\\t%w0, %1.h[0]
> > mov\\t%0.h[0], %1.h[0]
> > + fmov\\t%s0, %1
>
> Likewise, and much more important for correctness as it changes the way the
> bit pattern ends up in the register (see table C2-1 in release B.a of the ARM
> Architecture Reference Manual for ARMv8-A), here.
>
> > + * return aarch64_output_scalar_simd_mov_immediate (operands[1],
> > + SImode);
> > ldr\\t%h0, %1
> > str\\t%h1, %0
> > ldrh\\t%w0, %1
> > strh\\t%w1, %0
> > mov\\t%w0, %w1"
> > - [(set_attr "type"
> "neon_move,neon_from_gp,neon_to_gp,neon_move,\
> > - f_loads,f_stores,load1,store1,mov_reg")
> > - (set_attr "simd" "yes,yes,yes,yes,*,*,*,*,*")]
> > + "&& can_create_pseudo_p ()
> > + && !aarch64_can_const_movi_rtx_p (operands[1], HFmode)
> > + && !aarch64_float_const_representable_p (operands[1])
> > + && aarch64_float_const_rtx_p (operands[1])"
> > + [(const_int 0)]
> > + "{
> > + unsigned HOST_WIDE_INT ival;
> > + if (!aarch64_reinterpret_float_as_int (operands[1], &ival))
> > + FAIL;
> > +
> > + rtx tmp = gen_reg_rtx (SImode);
> > + aarch64_expand_mov_immediate (tmp, GEN_INT (ival));
> > + tmp = simplify_gen_subreg (HImode, tmp, SImode, 0);
> > + emit_move_insn (operands[0], gen_lowpart (HFmode, tmp));
> > + DONE;
> > + }"
> > + [(set_attr "type" "neon_move,f_mcr,neon_to_gp,neon_move,fconsts,
> \
> > + neon_move,f_loads,f_stores,load1,store1,mov_reg")
> > + (set_attr "simd" "yes,*,yes,yes,*,yes,*,*,*,*,*")]
> > )
>
> Thanks,
> James
^ permalink raw reply [flat|nested] 16+ messages in thread
* Re: [PATCH][GCC][AArch64] optimize float immediate moves (2 /4) - HF/DF/SF mode.
2017-06-21 10:48 ` Tamar Christina
@ 2017-06-26 10:50 ` Tamar Christina
2017-07-03 6:12 ` Tamar Christina
` (2 more replies)
0 siblings, 3 replies; 16+ messages in thread
From: Tamar Christina @ 2017-06-26 10:50 UTC (permalink / raw)
To: James Greenhalgh; +Cc: GCC Patches, nd, Marcus Shawcroft, Richard Earnshaw
[-- Attachment #1: Type: text/plain, Size: 2939 bytes --]
Hi all,
Here's the re-spun patch.
Aside from the grouping of the split patterns it now also uses h register for the fmov for HF when available,
otherwise it forces a literal load.
Regression tested on aarch64-none-linux-gnu and no regressions.
OK for trunk?
Thanks,
Tamar
gcc/
2017-06-26 Tamar Christina <tamar.christina@arm.com>
Richard Sandiford <richard.sandiford@linaro.org>
* config/aarch64/aarch64.md (mov<mode>): Generalize.
(*movhf_aarch64, *movsf_aarch64, *movdf_aarch64):
Add integer and movi cases.
(movi-split-hf-df-sf split, fp16): New.
(enabled): Added TARGET_FP_F16INST.
* config/aarch64/iterators.md (GPF_HF): New.
________________________________________
From: Tamar Christina
Sent: Wednesday, June 21, 2017 11:48:33 AM
To: James Greenhalgh
Cc: GCC Patches; nd; Marcus Shawcroft; Richard Earnshaw
Subject: RE: [PATCH][GCC][AArch64] optimize float immediate moves (2 /4) - HF/DF/SF mode.
> > movi\\t%0.4h, #0
> > - mov\\t%0.h[0], %w1
> > + fmov\\t%s0, %w1
>
> Should this not be %h0?
The problem is that H registers are only available in ARMv8.2+,
I'm not sure what to do about ARMv8.1 given your other feedback
Pointing out that the bit patterns between how it's stored in s vs h registers
differ.
>
> > umov\\t%w0, %1.h[0]
> > mov\\t%0.h[0], %1.h[0]
> > + fmov\\t%s0, %1
>
> Likewise, and much more important for correctness as it changes the way the
> bit pattern ends up in the register (see table C2-1 in release B.a of the ARM
> Architecture Reference Manual for ARMv8-A), here.
>
> > + * return aarch64_output_scalar_simd_mov_immediate (operands[1],
> > + SImode);
> > ldr\\t%h0, %1
> > str\\t%h1, %0
> > ldrh\\t%w0, %1
> > strh\\t%w1, %0
> > mov\\t%w0, %w1"
> > - [(set_attr "type"
> "neon_move,neon_from_gp,neon_to_gp,neon_move,\
> > - f_loads,f_stores,load1,store1,mov_reg")
> > - (set_attr "simd" "yes,yes,yes,yes,*,*,*,*,*")]
> > + "&& can_create_pseudo_p ()
> > + && !aarch64_can_const_movi_rtx_p (operands[1], HFmode)
> > + && !aarch64_float_const_representable_p (operands[1])
> > + && aarch64_float_const_rtx_p (operands[1])"
> > + [(const_int 0)]
> > + "{
> > + unsigned HOST_WIDE_INT ival;
> > + if (!aarch64_reinterpret_float_as_int (operands[1], &ival))
> > + FAIL;
> > +
> > + rtx tmp = gen_reg_rtx (SImode);
> > + aarch64_expand_mov_immediate (tmp, GEN_INT (ival));
> > + tmp = simplify_gen_subreg (HImode, tmp, SImode, 0);
> > + emit_move_insn (operands[0], gen_lowpart (HFmode, tmp));
> > + DONE;
> > + }"
> > + [(set_attr "type" "neon_move,f_mcr,neon_to_gp,neon_move,fconsts,
> \
> > + neon_move,f_loads,f_stores,load1,store1,mov_reg")
> > + (set_attr "simd" "yes,*,yes,yes,*,yes,*,*,*,*,*")]
> > )
>
> Thanks,
> James
[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #2: float-mov_modes-2.patch --]
[-- Type: text/x-patch; name="float-mov_modes-2.patch", Size: 6529 bytes --]
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index 1a721bfbe42270ec75268b6e2366290aa6ad2134..c951efc383c17ea81800e482e39760eb17830c0a 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -181,6 +181,11 @@
;; will be disabled when !TARGET_FLOAT.
(define_attr "fp" "no,yes" (const_string "no"))
+;; Attribute that specifies whether or not the instruction touches half
+;; precision fp registers. When this is set to yes for an alternative,
+;; that alternative will be disabled when !TARGET_FP_F16INST.
+(define_attr "fp16" "no,yes" (const_string "no"))
+
;; Attribute that specifies whether or not the instruction touches simd
;; registers. When this is set to yes for an alternative, that alternative
;; will be disabled when !TARGET_SIMD.
@@ -194,11 +199,14 @@
;; registers when -mgeneral-regs-only is specified.
(define_attr "enabled" "no,yes"
(cond [(ior
- (and (eq_attr "fp" "yes")
- (eq (symbol_ref "TARGET_FLOAT") (const_int 0)))
- (and (eq_attr "simd" "yes")
- (eq (symbol_ref "TARGET_SIMD") (const_int 0))))
- (const_string "no")
+ (ior
+ (and (eq_attr "fp" "yes")
+ (eq (symbol_ref "TARGET_FLOAT") (const_int 0)))
+ (and (eq_attr "simd" "yes")
+ (eq (symbol_ref "TARGET_SIMD") (const_int 0))))
+ (and (eq_attr "fp16" "yes")
+ (eq (symbol_ref "TARGET_FP_F16INST") (const_int 0))))
+ (const_string "no")
] (const_string "yes")))
;; Attribute that specifies whether we are dealing with a branch to a
@@ -1062,65 +1070,94 @@
)
(define_insn "*movhf_aarch64"
- [(set (match_operand:HF 0 "nonimmediate_operand" "=w,w ,?r,w,w,m,r,m ,r")
- (match_operand:HF 1 "general_operand" "Y ,?rY, w,w,m,w,m,rY,r"))]
+ [(set (match_operand:HF 0 "nonimmediate_operand" "=w,w ,?r,w,w ,w ,w,m,r,m ,r")
+ (match_operand:HF 1 "general_operand" "Y ,?rY, w,w,Ufc,Uvi,m,w,m,rY,r"))]
"TARGET_FLOAT && (register_operand (operands[0], HFmode)
- || aarch64_reg_or_fp_zero (operands[1], HFmode))"
+ || aarch64_reg_or_fp_float (operands[1], HFmode))"
"@
movi\\t%0.4h, #0
- mov\\t%0.h[0], %w1
+ fmov\\t%h0, %w1
umov\\t%w0, %1.h[0]
mov\\t%0.h[0], %1.h[0]
+ fmov\\t%h0, %1
+ * return aarch64_output_scalar_simd_mov_immediate (operands[1], SImode);
ldr\\t%h0, %1
str\\t%h1, %0
ldrh\\t%w0, %1
strh\\t%w1, %0
mov\\t%w0, %w1"
- [(set_attr "type" "neon_move,neon_from_gp,neon_to_gp,neon_move,\
- f_loads,f_stores,load1,store1,mov_reg")
- (set_attr "simd" "yes,yes,yes,yes,*,*,*,*,*")]
+ [(set_attr "type" "neon_move,f_mcr,neon_to_gp,neon_move,fconsts, \
+ neon_move,f_loads,f_stores,load1,store1,mov_reg")
+ (set_attr "simd" "yes,*,yes,yes,*,yes,*,*,*,*,*")
+ (set_attr "fp16" "*,yes,*,*,yes,*,*,*,*,*,*")]
)
(define_insn "*movsf_aarch64"
- [(set (match_operand:SF 0 "nonimmediate_operand" "=w,w ,?r,w,w ,w,m,r,m ,r")
- (match_operand:SF 1 "general_operand" "Y ,?rY, w,w,Ufc,m,w,m,rY,r"))]
+ [(set (match_operand:SF 0 "nonimmediate_operand" "=w,w ,?r,w,w ,w ,w,m,r,m ,r,r")
+ (match_operand:SF 1 "general_operand" "Y ,?rY, w,w,Ufc,Uvi,m,w,m,rY,r,M"))]
"TARGET_FLOAT && (register_operand (operands[0], SFmode)
- || aarch64_reg_or_fp_zero (operands[1], SFmode))"
+ || aarch64_reg_or_fp_float (operands[1], SFmode))"
"@
movi\\t%0.2s, #0
fmov\\t%s0, %w1
fmov\\t%w0, %s1
fmov\\t%s0, %s1
fmov\\t%s0, %1
+ * return aarch64_output_scalar_simd_mov_immediate (operands[1], SImode);
ldr\\t%s0, %1
str\\t%s1, %0
ldr\\t%w0, %1
str\\t%w1, %0
- mov\\t%w0, %w1"
- [(set_attr "type" "neon_move,f_mcr,f_mrc,fmov,fconsts,\
- f_loads,f_stores,load1,store1,mov_reg")
- (set_attr "simd" "yes,*,*,*,*,*,*,*,*,*")]
+ mov\\t%w0, %w1
+ mov\\t%w0, %1"
+ [(set_attr "type" "neon_move,f_mcr,f_mrc,fmov,fconsts,neon_move,\
+ f_loads,f_stores,load1,store1,mov_reg,\
+ fconsts")
+ (set_attr "simd" "yes,*,*,*,*,yes,*,*,*,*,*,*")]
)
(define_insn "*movdf_aarch64"
- [(set (match_operand:DF 0 "nonimmediate_operand" "=w,w ,?r,w,w ,w,m,r,m ,r")
- (match_operand:DF 1 "general_operand" "Y ,?rY, w,w,Ufc,m,w,m,rY,r"))]
+ [(set (match_operand:DF 0 "nonimmediate_operand" "=w, w ,?r,w,w ,w ,w,m,r,m ,r,r")
+ (match_operand:DF 1 "general_operand" "Y , ?rY, w,w,Ufc,Uvi,m,w,m,rY,r,N"))]
"TARGET_FLOAT && (register_operand (operands[0], DFmode)
- || aarch64_reg_or_fp_zero (operands[1], DFmode))"
+ || aarch64_reg_or_fp_float (operands[1], DFmode))"
"@
movi\\t%d0, #0
fmov\\t%d0, %x1
fmov\\t%x0, %d1
fmov\\t%d0, %d1
fmov\\t%d0, %1
+ * return aarch64_output_scalar_simd_mov_immediate (operands[1], DImode);
ldr\\t%d0, %1
str\\t%d1, %0
ldr\\t%x0, %1
str\\t%x1, %0
- mov\\t%x0, %x1"
- [(set_attr "type" "neon_move,f_mcr,f_mrc,fmov,fconstd,\
- f_loadd,f_stored,load1,store1,mov_reg")
- (set_attr "simd" "yes,*,*,*,*,*,*,*,*,*")]
+ mov\\t%x0, %x1
+ mov\\t%x0, %1"
+ [(set_attr "type" "neon_move,f_mcr,f_mrc,fmov,fconstd,neon_move,\
+ f_loadd,f_stored,load1,store1,mov_reg,\
+ fconstd")
+ (set_attr "simd" "yes,*,*,*,*,yes,*,*,*,*,*,*")]
+)
+
+(define_split
+ [(set (match_operand:GPF_HF 0 "nonimmediate_operand")
+ (match_operand:GPF_HF 1 "general_operand"))]
+ "can_create_pseudo_p ()
+ && !aarch64_can_const_movi_rtx_p (operands[1], <MODE>mode)
+ && !aarch64_float_const_representable_p (operands[1])
+ && aarch64_float_const_rtx_p (operands[1])"
+ [(const_int 0)]
+ {
+ unsigned HOST_WIDE_INT ival;
+ if (!aarch64_reinterpret_float_as_int (operands[1], &ival))
+ FAIL;
+
+ rtx tmp = gen_reg_rtx (<FCVT_TARGET>mode);
+ emit_move_insn (tmp, gen_int_mode (ival, <FCVT_TARGET>mode));
+ emit_move_insn (operands[0], gen_lowpart (<MODE>mode, tmp));
+ DONE;
+ }
)
(define_insn "*movtf_aarch64"
diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md
index 43be7fd361116d305b0300a814ffca82a9c44a88..067cef785331a243632715e0d0fe35a314eeebff 100644
--- a/gcc/config/aarch64/iterators.md
+++ b/gcc/config/aarch64/iterators.md
@@ -44,6 +44,9 @@
;; Iterator for all scalar floating point modes (HF, SF, DF)
(define_mode_iterator GPF_F16 [(HF "AARCH64_ISA_F16") SF DF])
+;; Iterator for all scalar floating point modes (HF, SF, DF)
+(define_mode_iterator GPF_HF [HF SF DF])
+
;; Iterator for all scalar floating point modes (HF, SF, DF and TF)
(define_mode_iterator GPF_TF_F16 [HF SF DF TF])
^ permalink raw reply [flat|nested] 16+ messages in thread
* Re: [PATCH][GCC][AArch64] optimize float immediate moves (2 /4) - HF/DF/SF mode.
2017-06-26 10:50 ` Tamar Christina
@ 2017-07-03 6:12 ` Tamar Christina
2017-07-10 7:35 ` Tamar Christina
2017-07-27 16:09 ` James Greenhalgh
2017-08-01 11:47 ` Bin.Cheng
2 siblings, 1 reply; 16+ messages in thread
From: Tamar Christina @ 2017-07-03 6:12 UTC (permalink / raw)
To: James Greenhalgh; +Cc: GCC Patches, nd, Marcus Shawcroft, Richard Earnshaw
Ping
________________________________________
From: gcc-patches-owner@gcc.gnu.org <gcc-patches-owner@gcc.gnu.org> on behalf of Tamar Christina <Tamar.Christina@arm.com>
Sent: Monday, June 26, 2017 11:50:51 AM
To: James Greenhalgh
Cc: GCC Patches; nd; Marcus Shawcroft; Richard Earnshaw
Subject: Re: [PATCH][GCC][AArch64] optimize float immediate moves (2 /4) - HF/DF/SF mode.
Hi all,
Here's the re-spun patch.
Aside from the grouping of the split patterns it now also uses h register for the fmov for HF when available,
otherwise it forces a literal load.
Regression tested on aarch64-none-linux-gnu and no regressions.
OK for trunk?
Thanks,
Tamar
gcc/
2017-06-26 Tamar Christina <tamar.christina@arm.com>
Richard Sandiford <richard.sandiford@linaro.org>
* config/aarch64/aarch64.md (mov<mode>): Generalize.
(*movhf_aarch64, *movsf_aarch64, *movdf_aarch64):
Add integer and movi cases.
(movi-split-hf-df-sf split, fp16): New.
(enabled): Added TARGET_FP_F16INST.
* config/aarch64/iterators.md (GPF_HF): New.
________________________________________
From: Tamar Christina
Sent: Wednesday, June 21, 2017 11:48:33 AM
To: James Greenhalgh
Cc: GCC Patches; nd; Marcus Shawcroft; Richard Earnshaw
Subject: RE: [PATCH][GCC][AArch64] optimize float immediate moves (2 /4) - HF/DF/SF mode.
> > movi\\t%0.4h, #0
> > - mov\\t%0.h[0], %w1
> > + fmov\\t%s0, %w1
>
> Should this not be %h0?
The problem is that H registers are only available in ARMv8.2+,
I'm not sure what to do about ARMv8.1 given your other feedback
Pointing out that the bit patterns between how it's stored in s vs h registers
differ.
>
> > umov\\t%w0, %1.h[0]
> > mov\\t%0.h[0], %1.h[0]
> > + fmov\\t%s0, %1
>
> Likewise, and much more important for correctness as it changes the way the
> bit pattern ends up in the register (see table C2-1 in release B.a of the ARM
> Architecture Reference Manual for ARMv8-A), here.
>
> > + * return aarch64_output_scalar_simd_mov_immediate (operands[1],
> > + SImode);
> > ldr\\t%h0, %1
> > str\\t%h1, %0
> > ldrh\\t%w0, %1
> > strh\\t%w1, %0
> > mov\\t%w0, %w1"
> > - [(set_attr "type"
> "neon_move,neon_from_gp,neon_to_gp,neon_move,\
> > - f_loads,f_stores,load1,store1,mov_reg")
> > - (set_attr "simd" "yes,yes,yes,yes,*,*,*,*,*")]
> > + "&& can_create_pseudo_p ()
> > + && !aarch64_can_const_movi_rtx_p (operands[1], HFmode)
> > + && !aarch64_float_const_representable_p (operands[1])
> > + && aarch64_float_const_rtx_p (operands[1])"
> > + [(const_int 0)]
> > + "{
> > + unsigned HOST_WIDE_INT ival;
> > + if (!aarch64_reinterpret_float_as_int (operands[1], &ival))
> > + FAIL;
> > +
> > + rtx tmp = gen_reg_rtx (SImode);
> > + aarch64_expand_mov_immediate (tmp, GEN_INT (ival));
> > + tmp = simplify_gen_subreg (HImode, tmp, SImode, 0);
> > + emit_move_insn (operands[0], gen_lowpart (HFmode, tmp));
> > + DONE;
> > + }"
> > + [(set_attr "type" "neon_move,f_mcr,neon_to_gp,neon_move,fconsts,
> \
> > + neon_move,f_loads,f_stores,load1,store1,mov_reg")
> > + (set_attr "simd" "yes,*,yes,yes,*,yes,*,*,*,*,*")]
> > )
>
> Thanks,
> James
^ permalink raw reply [flat|nested] 16+ messages in thread
* Re: [PATCH][GCC][AArch64] optimize float immediate moves (2 /4) - HF/DF/SF mode.
2017-07-03 6:12 ` Tamar Christina
@ 2017-07-10 7:35 ` Tamar Christina
0 siblings, 0 replies; 16+ messages in thread
From: Tamar Christina @ 2017-07-10 7:35 UTC (permalink / raw)
To: James Greenhalgh
Cc: GCC Patches, nd, Marcus Shawcroft, Richard Earnshaw, Richard Sandiford
Ping
________________________________________
From: Tamar Christina
Sent: Monday, July 3, 2017 7:12:05 AM
To: James Greenhalgh
Cc: GCC Patches; nd; Marcus Shawcroft; Richard Earnshaw
Subject: Re: [PATCH][GCC][AArch64] optimize float immediate moves (2 /4) - HF/DF/SF mode.
Ping
________________________________________
From: gcc-patches-owner@gcc.gnu.org <gcc-patches-owner@gcc.gnu.org> on behalf of Tamar Christina <Tamar.Christina@arm.com>
Sent: Monday, June 26, 2017 11:50:51 AM
To: James Greenhalgh
Cc: GCC Patches; nd; Marcus Shawcroft; Richard Earnshaw
Subject: Re: [PATCH][GCC][AArch64] optimize float immediate moves (2 /4) - HF/DF/SF mode.
Hi all,
Here's the re-spun patch.
Aside from the grouping of the split patterns it now also uses h register for the fmov for HF when available,
otherwise it forces a literal load.
Regression tested on aarch64-none-linux-gnu and no regressions.
OK for trunk?
Thanks,
Tamar
gcc/
2017-06-26 Tamar Christina <tamar.christina@arm.com>
Richard Sandiford <richard.sandiford@linaro.org>
* config/aarch64/aarch64.md (mov<mode>): Generalize.
(*movhf_aarch64, *movsf_aarch64, *movdf_aarch64):
Add integer and movi cases.
(movi-split-hf-df-sf split, fp16): New.
(enabled): Added TARGET_FP_F16INST.
* config/aarch64/iterators.md (GPF_HF): New.
________________________________________
From: Tamar Christina
Sent: Wednesday, June 21, 2017 11:48:33 AM
To: James Greenhalgh
Cc: GCC Patches; nd; Marcus Shawcroft; Richard Earnshaw
Subject: RE: [PATCH][GCC][AArch64] optimize float immediate moves (2 /4) - HF/DF/SF mode.
> > movi\\t%0.4h, #0
> > - mov\\t%0.h[0], %w1
> > + fmov\\t%s0, %w1
>
> Should this not be %h0?
The problem is that H registers are only available in ARMv8.2+,
I'm not sure what to do about ARMv8.1 given your other feedback
Pointing out that the bit patterns between how it's stored in s vs h registers
differ.
>
> > umov\\t%w0, %1.h[0]
> > mov\\t%0.h[0], %1.h[0]
> > + fmov\\t%s0, %1
>
> Likewise, and much more important for correctness as it changes the way the
> bit pattern ends up in the register (see table C2-1 in release B.a of the ARM
> Architecture Reference Manual for ARMv8-A), here.
>
> > + * return aarch64_output_scalar_simd_mov_immediate (operands[1],
> > + SImode);
> > ldr\\t%h0, %1
> > str\\t%h1, %0
> > ldrh\\t%w0, %1
> > strh\\t%w1, %0
> > mov\\t%w0, %w1"
> > - [(set_attr "type"
> "neon_move,neon_from_gp,neon_to_gp,neon_move,\
> > - f_loads,f_stores,load1,store1,mov_reg")
> > - (set_attr "simd" "yes,yes,yes,yes,*,*,*,*,*")]
> > + "&& can_create_pseudo_p ()
> > + && !aarch64_can_const_movi_rtx_p (operands[1], HFmode)
> > + && !aarch64_float_const_representable_p (operands[1])
> > + && aarch64_float_const_rtx_p (operands[1])"
> > + [(const_int 0)]
> > + "{
> > + unsigned HOST_WIDE_INT ival;
> > + if (!aarch64_reinterpret_float_as_int (operands[1], &ival))
> > + FAIL;
> > +
> > + rtx tmp = gen_reg_rtx (SImode);
> > + aarch64_expand_mov_immediate (tmp, GEN_INT (ival));
> > + tmp = simplify_gen_subreg (HImode, tmp, SImode, 0);
> > + emit_move_insn (operands[0], gen_lowpart (HFmode, tmp));
> > + DONE;
> > + }"
> > + [(set_attr "type" "neon_move,f_mcr,neon_to_gp,neon_move,fconsts,
> \
> > + neon_move,f_loads,f_stores,load1,store1,mov_reg")
> > + (set_attr "simd" "yes,*,yes,yes,*,yes,*,*,*,*,*")]
> > )
>
> Thanks,
> James
^ permalink raw reply [flat|nested] 16+ messages in thread
* Re: [PATCH][GCC][AArch64] optimize float immediate moves (2 /4) - HF/DF/SF mode.
2017-06-26 10:50 ` Tamar Christina
2017-07-03 6:12 ` Tamar Christina
@ 2017-07-27 16:09 ` James Greenhalgh
2017-08-01 11:47 ` Bin.Cheng
2 siblings, 0 replies; 16+ messages in thread
From: James Greenhalgh @ 2017-07-27 16:09 UTC (permalink / raw)
To: Tamar Christina; +Cc: GCC Patches, nd, Marcus Shawcroft, Richard Earnshaw
On Mon, Jun 26, 2017 at 11:50:51AM +0100, Tamar Christina wrote:
> Hi all,
>
> Here's the re-spun patch.
> Aside from the grouping of the split patterns it now also uses h register for the fmov for HF when available,
> otherwise it forces a literal load.
>
> Regression tested on aarch64-none-linux-gnu and no regressions.
>
> OK for trunk?
OK.
Thanks,
James
>
> Thanks,
> Tamar
>
>
> gcc/
> 2017-06-26 Tamar Christina <tamar.christina@arm.com>
> Richard Sandiford <richard.sandiford@linaro.org>
>
> * config/aarch64/aarch64.md (mov<mode>): Generalize.
> (*movhf_aarch64, *movsf_aarch64, *movdf_aarch64):
> Add integer and movi cases.
> (movi-split-hf-df-sf split, fp16): New.
> (enabled): Added TARGET_FP_F16INST.
> * config/aarch64/iterators.md (GPF_HF): New.
^ permalink raw reply [flat|nested] 16+ messages in thread
* Re: [PATCH][GCC][AArch64] optimize float immediate moves (2 /4) - HF/DF/SF mode.
2017-06-26 10:50 ` Tamar Christina
2017-07-03 6:12 ` Tamar Christina
2017-07-27 16:09 ` James Greenhalgh
@ 2017-08-01 11:47 ` Bin.Cheng
2017-08-01 11:51 ` Tamar Christina
2 siblings, 1 reply; 16+ messages in thread
From: Bin.Cheng @ 2017-08-01 11:47 UTC (permalink / raw)
To: Tamar Christina
Cc: James Greenhalgh, GCC Patches, nd, Marcus Shawcroft, Richard Earnshaw
On Mon, Jun 26, 2017 at 11:50 AM, Tamar Christina
<Tamar.Christina@arm.com> wrote:
> Hi all,
>
> Here's the re-spun patch.
> Aside from the grouping of the split patterns it now also uses h register for the fmov for HF when available,
> otherwise it forces a literal load.
>
> Regression tested on aarch64-none-linux-gnu and no regressions.
Hi,
There are lots of test failures on aarch64_be-none-elf, I verified two:
gcc.dg/vect/pr61680.c execution test
gcc.dg/vect/pr63148.c execution test
are caused by svn+ssh://gcc.gnu.org/svn/gcc/trunk@250673
Given review comment already pointed out big-endian issue and patch
was updated to address it, I would expect reg-test on a big-endian
target before applying patch, right?
Thanks,
bin
>
> OK for trunk?
>
> Thanks,
> Tamar
>
>
> gcc/
> 2017-06-26 Tamar Christina <tamar.christina@arm.com>
> Richard Sandiford <richard.sandiford@linaro.org>
>
> * config/aarch64/aarch64.md (mov<mode>): Generalize.
> (*movhf_aarch64, *movsf_aarch64, *movdf_aarch64):
> Add integer and movi cases.
> (movi-split-hf-df-sf split, fp16): New.
> (enabled): Added TARGET_FP_F16INST.
> * config/aarch64/iterators.md (GPF_HF): New.
> ________________________________________
> From: Tamar Christina
> Sent: Wednesday, June 21, 2017 11:48:33 AM
> To: James Greenhalgh
> Cc: GCC Patches; nd; Marcus Shawcroft; Richard Earnshaw
> Subject: RE: [PATCH][GCC][AArch64] optimize float immediate moves (2 /4) - HF/DF/SF mode.
>
>> > movi\\t%0.4h, #0
>> > - mov\\t%0.h[0], %w1
>> > + fmov\\t%s0, %w1
>>
>> Should this not be %h0?
>
> The problem is that H registers are only available in ARMv8.2+,
> I'm not sure what to do about ARMv8.1 given your other feedback
> Pointing out that the bit patterns between how it's stored in s vs h registers
> differ.
>
>>
>> > umov\\t%w0, %1.h[0]
>> > mov\\t%0.h[0], %1.h[0]
>> > + fmov\\t%s0, %1
>>
>> Likewise, and much more important for correctness as it changes the way the
>> bit pattern ends up in the register (see table C2-1 in release B.a of the ARM
>> Architecture Reference Manual for ARMv8-A), here.
>>
>> > + * return aarch64_output_scalar_simd_mov_immediate (operands[1],
>> > + SImode);
>> > ldr\\t%h0, %1
>> > str\\t%h1, %0
>> > ldrh\\t%w0, %1
>> > strh\\t%w1, %0
>> > mov\\t%w0, %w1"
>> > - [(set_attr "type"
>> "neon_move,neon_from_gp,neon_to_gp,neon_move,\
>> > - f_loads,f_stores,load1,store1,mov_reg")
>> > - (set_attr "simd" "yes,yes,yes,yes,*,*,*,*,*")]
>> > + "&& can_create_pseudo_p ()
>> > + && !aarch64_can_const_movi_rtx_p (operands[1], HFmode)
>> > + && !aarch64_float_const_representable_p (operands[1])
>> > + && aarch64_float_const_rtx_p (operands[1])"
>> > + [(const_int 0)]
>> > + "{
>> > + unsigned HOST_WIDE_INT ival;
>> > + if (!aarch64_reinterpret_float_as_int (operands[1], &ival))
>> > + FAIL;
>> > +
>> > + rtx tmp = gen_reg_rtx (SImode);
>> > + aarch64_expand_mov_immediate (tmp, GEN_INT (ival));
>> > + tmp = simplify_gen_subreg (HImode, tmp, SImode, 0);
>> > + emit_move_insn (operands[0], gen_lowpart (HFmode, tmp));
>> > + DONE;
>> > + }"
>> > + [(set_attr "type" "neon_move,f_mcr,neon_to_gp,neon_move,fconsts,
>> \
>> > + neon_move,f_loads,f_stores,load1,store1,mov_reg")
>> > + (set_attr "simd" "yes,*,yes,yes,*,yes,*,*,*,*,*")]
>> > )
>>
>> Thanks,
>> James
>
^ permalink raw reply [flat|nested] 16+ messages in thread
* RE: [PATCH][GCC][AArch64] optimize float immediate moves (2 /4) - HF/DF/SF mode.
2017-08-01 11:47 ` Bin.Cheng
@ 2017-08-01 11:51 ` Tamar Christina
2017-08-01 12:04 ` Bin.Cheng
0 siblings, 1 reply; 16+ messages in thread
From: Tamar Christina @ 2017-08-01 11:51 UTC (permalink / raw)
To: Bin.Cheng
Cc: James Greenhalgh, GCC Patches, nd, Marcus Shawcroft, Richard Earnshaw
>
> Given review comment already pointed out big-endian issue and patch was
> updated to address it, I would expect reg-test on a big-endian target before
> applying patch, right?
The patch spent 6 months in external review.
Given that, I simply forgot to rerun big endian before the commit as I did the rest.
The failing tests were all added after the submission of this patch. I'll have a look.
> Thanks,
> bin
> >
> > OK for trunk?
> >
> > Thanks,
> > Tamar
> >
> >
> > gcc/
> > 2017-06-26 Tamar Christina <tamar.christina@arm.com>
> > Richard Sandiford <richard.sandiford@linaro.org>
> >
> > * config/aarch64/aarch64.md (mov<mode>): Generalize.
> > (*movhf_aarch64, *movsf_aarch64, *movdf_aarch64):
> > Add integer and movi cases.
> > (movi-split-hf-df-sf split, fp16): New.
> > (enabled): Added TARGET_FP_F16INST.
> > * config/aarch64/iterators.md (GPF_HF): New.
> > ________________________________________
> > From: Tamar Christina
> > Sent: Wednesday, June 21, 2017 11:48:33 AM
> > To: James Greenhalgh
> > Cc: GCC Patches; nd; Marcus Shawcroft; Richard Earnshaw
> > Subject: RE: [PATCH][GCC][AArch64] optimize float immediate moves (2 /4)
> - HF/DF/SF mode.
> >
> >> > movi\\t%0.4h, #0
> >> > - mov\\t%0.h[0], %w1
> >> > + fmov\\t%s0, %w1
> >>
> >> Should this not be %h0?
> >
> > The problem is that H registers are only available in ARMv8.2+, I'm
> > not sure what to do about ARMv8.1 given your other feedback Pointing
> > out that the bit patterns between how it's stored in s vs h registers
> > differ.
> >
> >>
> >> > umov\\t%w0, %1.h[0]
> >> > mov\\t%0.h[0], %1.h[0]
> >> > + fmov\\t%s0, %1
> >>
> >> Likewise, and much more important for correctness as it changes the
> >> way the bit pattern ends up in the register (see table C2-1 in
> >> release B.a of the ARM Architecture Reference Manual for ARMv8-A),
> here.
> >>
> >> > + * return aarch64_output_scalar_simd_mov_immediate
> (operands[1],
> >> > + SImode);
> >> > ldr\\t%h0, %1
> >> > str\\t%h1, %0
> >> > ldrh\\t%w0, %1
> >> > strh\\t%w1, %0
> >> > mov\\t%w0, %w1"
> >> > - [(set_attr "type"
> >> "neon_move,neon_from_gp,neon_to_gp,neon_move,\
> >> > - f_loads,f_stores,load1,store1,mov_reg")
> >> > - (set_attr "simd" "yes,yes,yes,yes,*,*,*,*,*")]
> >> > + "&& can_create_pseudo_p ()
> >> > + && !aarch64_can_const_movi_rtx_p (operands[1], HFmode)
> >> > + && !aarch64_float_const_representable_p (operands[1])
> >> > + && aarch64_float_const_rtx_p (operands[1])"
> >> > + [(const_int 0)]
> >> > + "{
> >> > + unsigned HOST_WIDE_INT ival;
> >> > + if (!aarch64_reinterpret_float_as_int (operands[1], &ival))
> >> > + FAIL;
> >> > +
> >> > + rtx tmp = gen_reg_rtx (SImode);
> >> > + aarch64_expand_mov_immediate (tmp, GEN_INT (ival));
> >> > + tmp = simplify_gen_subreg (HImode, tmp, SImode, 0);
> >> > + emit_move_insn (operands[0], gen_lowpart (HFmode, tmp));
> >> > + DONE;
> >> > + }"
> >> > + [(set_attr "type"
> "neon_move,f_mcr,neon_to_gp,neon_move,fconsts,
> >> \
> >> > + neon_move,f_loads,f_stores,load1,store1,mov_reg")
> >> > + (set_attr "simd" "yes,*,yes,yes,*,yes,*,*,*,*,*")]
> >> > )
> >>
> >> Thanks,
> >> James
> >
^ permalink raw reply [flat|nested] 16+ messages in thread
* Re: [PATCH][GCC][AArch64] optimize float immediate moves (2 /4) - HF/DF/SF mode.
2017-08-01 11:51 ` Tamar Christina
@ 2017-08-01 12:04 ` Bin.Cheng
0 siblings, 0 replies; 16+ messages in thread
From: Bin.Cheng @ 2017-08-01 12:04 UTC (permalink / raw)
To: Tamar Christina
Cc: James Greenhalgh, GCC Patches, nd, Marcus Shawcroft, Richard Earnshaw
On Tue, Aug 1, 2017 at 12:51 PM, Tamar Christina
<Tamar.Christina@arm.com> wrote:
>>
>> Given review comment already pointed out big-endian issue and patch was
>> updated to address it, I would expect reg-test on a big-endian target before
>> applying patch, right?
>
> The patch spent 6 months in external review.
> Given that, I simply forgot to rerun big endian before the commit as I did the rest.
>
> The failing tests were all added after the submission of this patch. I'll have a look.
I may bisect to wrong commit? The patch is committed three days ago,
how could all failing tests be added after it?
Thanks,
bin
^ permalink raw reply [flat|nested] 16+ messages in thread
end of thread, other threads:[~2017-08-01 12:04 UTC | newest]
Thread overview: 16+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-06-07 11:38 [PATCH][GCC][AArch64] optimize float immediate moves (2 /4) - HF/DF/SF mode Tamar Christina
2017-06-12 7:31 ` Tamar Christina
2017-06-14 10:06 ` Richard Sandiford
2017-06-15 13:25 ` Tamar Christina
2017-06-15 14:28 ` Tamar Christina
2017-06-16 7:53 ` Richard Sandiford
2017-06-16 8:42 ` Tamar Christina
2017-06-14 8:43 ` James Greenhalgh
2017-06-21 10:48 ` Tamar Christina
2017-06-26 10:50 ` Tamar Christina
2017-07-03 6:12 ` Tamar Christina
2017-07-10 7:35 ` Tamar Christina
2017-07-27 16:09 ` James Greenhalgh
2017-08-01 11:47 ` Bin.Cheng
2017-08-01 11:51 ` Tamar Christina
2017-08-01 12:04 ` Bin.Cheng
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).