* [PATCH] RISC-V: Expand subreg move via slide if necessary [PR116086].
@ 2024-07-26 20:42 Robin Dapp
2024-07-26 22:48 ` Jeff Law
0 siblings, 1 reply; 7+ messages in thread
From: Robin Dapp @ 2024-07-26 20:42 UTC (permalink / raw)
To: gcc-patches
Cc: palmer, kito.cheng, juzhe.zhong, jeffreyalaw, rdapp.gcc, Li, Pan2
Hi,
when the source mode is potentially larger than one vector (e.g. an
LMUL2 mode for VLEN=128) we don't know which vector the subreg actually
refers to. For zvl128b and LMUL=2 the subreg in (subreg:V2DI (reg:V4DI))
could actually be the a full (high) vector register of a two-register
group (at VLEN=128) or the higher part of a single register (at VLEN>128).
In that case we need to use a slidedown instead of moving a register
directly.
Regtested on rv64gcv_zvfh_zvbb -mrvv-max-lmul=m2 at vlen 128 and vlen 256.
This also fixes
gcc.dg/vect/bb-slp-cond-1.c
gcc.dg/vect/bb-slp-pr101668.c
gcc.dg/vect/pr66251.c
and others from the vector test suite when ran with vlen 256.
Regtested on rv64gcv_zvfh_zvbb -mrvv-max-lmul=m2 and vlen 128 as well as vlen
256. Still curious what the CI says.
Regards
Robin
gcc/ChangeLog:
PR target/116086
* config/riscv/riscv-v.cc (legitimize_move): Slide down instead
of moving register directly.
gcc/testsuite/ChangeLog:
* gcc.target/riscv/rvv/autovec/pr116086-2-run.c: New test.
* gcc.target/riscv/rvv/autovec/pr116086-2.c: New test.
* gcc.target/riscv/rvv/autovec/pr116086.c: New test.
---
gcc/config/riscv/riscv-v.cc | 29 +++++++
.../riscv/rvv/autovec/pr116086-2-run.c | 5 ++
.../gcc.target/riscv/rvv/autovec/pr116086-2.c | 18 +++++
.../gcc.target/riscv/rvv/autovec/pr116086.c | 75 +++++++++++++++++++
4 files changed, 127 insertions(+)
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/pr116086-2-run.c
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/pr116086-2.c
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/pr116086.c
diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc
index e290675bbf0..f475fa32173 100644
--- a/gcc/config/riscv/riscv-v.cc
+++ b/gcc/config/riscv/riscv-v.cc
@@ -1550,6 +1550,35 @@ legitimize_move (rtx dest, rtx *srcp)
if (riscv_v_ext_vls_mode_p (mode))
{
+ /* When the source mode is larger than one vector (like an LMUL2 mode
+ for VLEN=128) we don't know which vector the subreg actually
+ refers to.
+ For zvl128 the subreg in (subreg:V2DI (reg:V4DI)) could actually be
+ the full (high) vector register of a register group (at VLEN=128) or
+ the higher part of a single register (at VLEN>128). */
+ if (SUBREG_P (src) && SUBREG_BYTE (src).to_constant () > 0)
+ {
+ rtx reg = SUBREG_REG (src);
+ machine_mode reg_mode = GET_MODE (reg);
+
+ if (cmp_lmul_gt_one (reg_mode)
+ && GET_MODE_SIZE (reg_mode).to_constant ()
+ > GET_MODE_SIZE (mode).to_constant ()
+ && GET_MODE_INNER (reg_mode) == GET_MODE_INNER (mode))
+ {
+ int slide = (SUBREG_BYTE (src).to_constant ()
+ / GET_MODE_SIZE (mode).to_constant ()) *
+ GET_MODE_NUNITS (mode).to_constant ();
+ rtx tmp = gen_reg_rtx (reg_mode);
+ rtx slide_ops[] = {tmp, reg, GEN_INT (slide)};
+ insn_code icode = code_for_pred_slide (UNSPEC_VSLIDEDOWN,
+ reg_mode);
+ emit_vlmax_insn (icode, BINARY_OP, slide_ops);
+ emit_move_insn (dest, gen_lowpart (mode, tmp));
+ return true;
+ }
+ }
+
if (GET_MODE_NUNITS (mode).to_constant () <= 31)
{
/* For NUNITS <= 31 VLS modes, we don't need extrac
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr116086-2-run.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr116086-2-run.c
new file mode 100644
index 00000000000..2d523f98f7b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr116086-2-run.c
@@ -0,0 +1,5 @@
+/* { dg-do run } */
+/* { dg-require-effective-target riscv_v } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -mrvv-max-lmul=m2" } */
+
+#include "pr116086-2.c"
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr116086-2.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr116086-2.c
new file mode 100644
index 00000000000..8b5ea6be955
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr116086-2.c
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -mrvv-max-lmul=m2" } */
+
+long a;
+long b;
+long c[80];
+int main() {
+ for (int d = 0; d < 16; d++)
+ c[d] = a;
+ for (int d = 16; d < 80; d++)
+ c[d] = c[d - 2];
+ for (int d = 0; d < 80; d += 8)
+ b += c[d];
+ if (b != 0)
+ __builtin_abort ();
+}
+
+/* { dg-final { scan-assembler-times "vmv1r" 0 } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr116086.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr116086.c
new file mode 100644
index 00000000000..cc67357b768
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr116086.c
@@ -0,0 +1,75 @@
+/* { dg-do run } */
+/* { dg-require-effective-target riscv_v } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -mrvv-max-lmul=m2" } */
+
+typedef unsigned int uint32_t;
+typedef unsigned long long uint64_t;
+
+typedef struct
+{
+ uint64_t length;
+ uint64_t state[8];
+ uint32_t curlen;
+ unsigned char buf[128];
+} sha512_state;
+
+static uint64_t load64(const unsigned char* y)
+{
+ uint64_t res = 0;
+ for(int i = 0; i != 8; ++i)
+ res |= (uint64_t)(y[i]) << ((7-i) * 8);
+ return res;
+}
+
+static const uint64_t K[80] =
+{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
+
+__attribute__ ((noipa))
+static void sha_compress(sha512_state *md, const unsigned char *buf)
+{
+ uint64_t S[8], W[80];
+
+ for(int i = 0; i < 8; i++)
+ S[i] = 0;
+
+ // Copy the state into 1024-bits into W[0..15]
+ for(int i = 0; i < 16; i++)
+ W[i] = load64(buf + (8*i));
+
+ // Fill W[16..79]
+ for(int i = 16; i < 80; i++)
+ W[i] = W[i - 2] + W[i - 7] + W[i - 15] + W[i - 16];
+
+ S[7] = W[72];
+
+ // Feedback
+ for(int i = 0; i < 8; i++)
+ md->state[i] = md->state[i] + S[i];
+}
+
+int main ()
+{
+ sha512_state md;
+ md.curlen = 0;
+ md.length = 0;
+ md.state[0] = 0;
+ md.state[1] = 0;
+ md.state[2] = 0;
+ md.state[3] = 0;
+ md.state[4] = 0;
+ md.state[5] = 0;
+ md.state[6] = 0;
+ md.state[7] = 0;
+
+ for (int i = 0; i < 128; i++)
+ md.buf[i] = 0;
+
+ md.buf[md.curlen++] = (unsigned char)0x80;
+
+ sha_compress (&md, md.buf);
+
+ if (md.state[7] != 0x8000000000000000ULL)
+ __builtin_abort ();
+
+ return 0;
+}
--
2.45.2
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [PATCH] RISC-V: Expand subreg move via slide if necessary [PR116086].
2024-07-26 20:42 [PATCH] RISC-V: Expand subreg move via slide if necessary [PR116086] Robin Dapp
@ 2024-07-26 22:48 ` Jeff Law
2024-07-29 12:35 ` Richard Sandiford
0 siblings, 1 reply; 7+ messages in thread
From: Jeff Law @ 2024-07-26 22:48 UTC (permalink / raw)
To: Robin Dapp, gcc-patches; +Cc: palmer, kito.cheng, juzhe.zhong, Li, Pan2
On 7/26/24 2:42 PM, Robin Dapp wrote:
> Hi,
>
> when the source mode is potentially larger than one vector (e.g. an
> LMUL2 mode for VLEN=128) we don't know which vector the subreg actually
> refers to. For zvl128b and LMUL=2 the subreg in (subreg:V2DI (reg:V4DI))
> could actually be the a full (high) vector register of a two-register
> group (at VLEN=128) or the higher part of a single register (at VLEN>128).
>
> In that case we need to use a slidedown instead of moving a register
> directly.
>
> Regtested on rv64gcv_zvfh_zvbb -mrvv-max-lmul=m2 at vlen 128 and vlen 256.
> This also fixes
> gcc.dg/vect/bb-slp-cond-1.c
> gcc.dg/vect/bb-slp-pr101668.c
> gcc.dg/vect/pr66251.c
> and others from the vector test suite when ran with vlen 256.
>
> Regtested on rv64gcv_zvfh_zvbb -mrvv-max-lmul=m2 and vlen 128 as well as vlen
> 256. Still curious what the CI says.
>
> Regards
> Robin
>
> gcc/ChangeLog:
>
> PR target/116086
>
> * config/riscv/riscv-v.cc (legitimize_move): Slide down instead
> of moving register directly.
>
> gcc/testsuite/ChangeLog:
>
> * gcc.target/riscv/rvv/autovec/pr116086-2-run.c: New test.
> * gcc.target/riscv/rvv/autovec/pr116086-2.c: New test.
> * gcc.target/riscv/rvv/autovec/pr116086.c: New test.
So the representational issues LMUL > 1 brings with GCC have been in the
back of my mind, but never bubbled up enough for me to say anything.
This seems to start and touch on those concerns. While your patch fixes
the immediate issue in the RISC-V backend, I won't be at all surprised
if we find other cases in the generic code that assume they know how to
interpret a SUBREG and get it wrong.
And we may currently be hiding these issues by having GCC and QEMU be
consistent in their handling in the default configurations.
Anyway, the patch is sensible. Essentially using a slidedown is a good
way to avoid a subclass of the potential problems.
Jeff
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [PATCH] RISC-V: Expand subreg move via slide if necessary [PR116086].
2024-07-26 22:48 ` Jeff Law
@ 2024-07-29 12:35 ` Richard Sandiford
2024-07-29 12:43 ` Richard Sandiford
0 siblings, 1 reply; 7+ messages in thread
From: Richard Sandiford @ 2024-07-29 12:35 UTC (permalink / raw)
To: Jeff Law
Cc: Robin Dapp, gcc-patches, palmer, kito.cheng, juzhe.zhong, Li, Pan2
Jeff Law <jeffreyalaw@gmail.com> writes:
> On 7/26/24 2:42 PM, Robin Dapp wrote:
>> Hi,
>>
>> when the source mode is potentially larger than one vector (e.g. an
>> LMUL2 mode for VLEN=128) we don't know which vector the subreg actually
>> refers to. For zvl128b and LMUL=2 the subreg in (subreg:V2DI (reg:V4DI))
>> could actually be the a full (high) vector register of a two-register
>> group (at VLEN=128) or the higher part of a single register (at VLEN>128).
>>
>> In that case we need to use a slidedown instead of moving a register
>> directly.
>>
>> Regtested on rv64gcv_zvfh_zvbb -mrvv-max-lmul=m2 at vlen 128 and vlen 256.
>> This also fixes
>> gcc.dg/vect/bb-slp-cond-1.c
>> gcc.dg/vect/bb-slp-pr101668.c
>> gcc.dg/vect/pr66251.c
>> and others from the vector test suite when ran with vlen 256.
>>
>> Regtested on rv64gcv_zvfh_zvbb -mrvv-max-lmul=m2 and vlen 128 as well as vlen
>> 256. Still curious what the CI says.
>>
>> Regards
>> Robin
>>
>> gcc/ChangeLog:
>>
>> PR target/116086
>>
>> * config/riscv/riscv-v.cc (legitimize_move): Slide down instead
>> of moving register directly.
>>
>> gcc/testsuite/ChangeLog:
>>
>> * gcc.target/riscv/rvv/autovec/pr116086-2-run.c: New test.
>> * gcc.target/riscv/rvv/autovec/pr116086-2.c: New test.
>> * gcc.target/riscv/rvv/autovec/pr116086.c: New test.
> So the representational issues LMUL > 1 brings with GCC have been in the
> back of my mind, but never bubbled up enough for me to say anything.
>
> This seems to start and touch on those concerns. While your patch fixes
> the immediate issue in the RISC-V backend, I won't be at all surprised
> if we find other cases in the generic code that assume they know how to
> interpret a SUBREG and get it wrong.
Yeah, I agree. It seems like this is papering over an issue elsewhere,
and that we're losing our chance to see what it is.
A somewhat similar situation can happen for SVE with subregs like:
(subreg:V4SI (reg:VNx8SI R) 16)
IMO, what ought to happen here is that the RA should spill
the inner register to memory and load the V4SI back from there.
(Or vice versa, for an lvalue.) Obviously that's not very efficient,
and so a patch like the above might be useful as an optimisation.[*]
But it shouldn't be needed for correctness. The target-independent
code should already have the information it needs to realise that
it can't predict the register index at compile time (at least for SVE).
Richard
[*] Big-endian SVE also checks for tricky subregs in the move patterns,
and tries to optimise them to something that doesn't involve a subreg.
But it's only an optimisation.
>
> And we may currently be hiding these issues by having GCC and QEMU be
> consistent in their handling in the default configurations.
>
> Anyway, the patch is sensible. Essentially using a slidedown is a good
> way to avoid a subclass of the potential problems.
>
> Jeff
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [PATCH] RISC-V: Expand subreg move via slide if necessary [PR116086].
2024-07-29 12:35 ` Richard Sandiford
@ 2024-07-29 12:43 ` Richard Sandiford
2024-07-30 14:07 ` Robin Dapp
0 siblings, 1 reply; 7+ messages in thread
From: Richard Sandiford @ 2024-07-29 12:43 UTC (permalink / raw)
To: Jeff Law
Cc: Robin Dapp, gcc-patches, palmer, kito.cheng, juzhe.zhong, Li, Pan2
Richard Sandiford <richard.sandiford@arm.com> writes:
> A somewhat similar situation can happen for SVE with subregs like:
>
> (subreg:V4SI (reg:VNx8SI R) 16)
>
> IMO, what ought to happen here is that the RA should spill
> the inner register to memory and load the V4SI back from there.
> (Or vice versa, for an lvalue.) Obviously that's not very efficient,
> and so a patch like the above might be useful as an optimisation.[*]
> But it shouldn't be needed for correctness. The target-independent
> code should already have the information it needs to realise that
> it can't predict the register index at compile time (at least for SVE).
Or actually, for that case:
/* For pseudo registers, we want most of the same checks. Namely:
Assume that the pseudo register will be allocated to hard registers
that can hold REGSIZE bytes each. If OSIZE is not a multiple of REGSIZE,
the remainder must correspond to the lowpart of the containing hard
register. If BYTES_BIG_ENDIAN, the lowpart is at the highest offset,
otherwise it is at the lowest offset.
Given that we've already checked the mode and offset alignment,
we only have to check subblock subregs here. */
if (maybe_lt (osize, regsize)
&& ! (lra_in_progress && (FLOAT_MODE_P (imode) || FLOAT_MODE_P (omode))))
{
/* It is invalid for the target to pick a register size for a mode
that isn't ordered wrt to the size of that mode. */
poly_uint64 block_size = ordered_min (isize, regsize);
unsigned int start_reg;
poly_uint64 offset_within_reg;
if (!can_div_trunc_p (offset, block_size, &start_reg, &offset_within_reg)
...
in validate_subreg should reject the offset.
Richard
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [PATCH] RISC-V: Expand subreg move via slide if necessary [PR116086].
2024-07-29 12:43 ` Richard Sandiford
@ 2024-07-30 14:07 ` Robin Dapp
2024-07-31 8:49 ` Richard Sandiford
0 siblings, 1 reply; 7+ messages in thread
From: Robin Dapp @ 2024-07-30 14:07 UTC (permalink / raw)
To: Richard Sandiford, Jeff Law
Cc: gcc-patches, palmer, kito.cheng, juzhe.zhong, pan2.li, Robin Dapp
> > IMO, what ought to happen here is that the RA should spill
> > the inner register to memory and load the V4SI back from there.
> > (Or vice versa, for an lvalue.) Obviously that's not very efficient,
> > and so a patch like the above might be useful as an optimisation.[*]
> > But it shouldn't be needed for correctness. The target-independent
> > code should already have the information it needs to realise that
> > it can't predict the register index at compile time (at least for SVE).
>
> Or actually, for that case:
>
> /* For pseudo registers, we want most of the same checks. Namely:
>
> Assume that the pseudo register will be allocated to hard registers
> that can hold REGSIZE bytes each. If OSIZE is not a multiple of REGSIZE,
> the remainder must correspond to the lowpart of the containing hard
> register. If BYTES_BIG_ENDIAN, the lowpart is at the highest offset,
> otherwise it is at the lowest offset.
>
> Given that we've already checked the mode and offset alignment,
> we only have to check subblock subregs here. */
> if (maybe_lt (osize, regsize)
> && ! (lra_in_progress && (FLOAT_MODE_P (imode) || FLOAT_MODE_P (omode))))
> {
> /* It is invalid for the target to pick a register size for a mode
> that isn't ordered wrt to the size of that mode. */
> poly_uint64 block_size = ordered_min (isize, regsize);
> unsigned int start_reg;
> poly_uint64 offset_within_reg;
> if (!can_div_trunc_p (offset, block_size, &start_reg, &offset_within_reg)
> ...
>
Like aarch64 we set REGMODE_NATURAL_SIZE for fixed-size modes to
UNITS_PER_WORD. Isn't that part of the problem?
In extract_bit_field_as_subreg we check lowpart_bit_field_p (= true because
128 is a multiple of UNITS_PER_WORD). This leads to the subreg expression.
If I have REGMODE_NATURAL_SIZE return a VLA number this fails and we extract
via memory - but that of course breaks almost everything else :)
When you say the target-independent code should already have all information it
needs, what are you referring to? Something else than REGMODE_NATURAL_SIZE?
Thanks.
--
Regards
Robin
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [PATCH] RISC-V: Expand subreg move via slide if necessary [PR116086].
2024-07-30 14:07 ` Robin Dapp
@ 2024-07-31 8:49 ` Richard Sandiford
2024-07-31 9:16 ` Robin Dapp
0 siblings, 1 reply; 7+ messages in thread
From: Richard Sandiford @ 2024-07-31 8:49 UTC (permalink / raw)
To: Robin Dapp
Cc: Jeff Law, gcc-patches, palmer, kito.cheng, juzhe.zhong, pan2.li
"Robin Dapp" <rdapp.gcc@gmail.com> writes:
>> > IMO, what ought to happen here is that the RA should spill
>> > the inner register to memory and load the V4SI back from there.
>> > (Or vice versa, for an lvalue.) Obviously that's not very efficient,
>> > and so a patch like the above might be useful as an optimisation.[*]
>> > But it shouldn't be needed for correctness. The target-independent
>> > code should already have the information it needs to realise that
>> > it can't predict the register index at compile time (at least for SVE).
>>
>> Or actually, for that case:
>>
>> /* For pseudo registers, we want most of the same checks. Namely:
>>
>> Assume that the pseudo register will be allocated to hard registers
>> that can hold REGSIZE bytes each. If OSIZE is not a multiple of REGSIZE,
>> the remainder must correspond to the lowpart of the containing hard
>> register. If BYTES_BIG_ENDIAN, the lowpart is at the highest offset,
>> otherwise it is at the lowest offset.
>>
>> Given that we've already checked the mode and offset alignment,
>> we only have to check subblock subregs here. */
>> if (maybe_lt (osize, regsize)
>> && ! (lra_in_progress && (FLOAT_MODE_P (imode) || FLOAT_MODE_P (omode))))
>> {
>> /* It is invalid for the target to pick a register size for a mode
>> that isn't ordered wrt to the size of that mode. */
>> poly_uint64 block_size = ordered_min (isize, regsize);
>> unsigned int start_reg;
>> poly_uint64 offset_within_reg;
>> if (!can_div_trunc_p (offset, block_size, &start_reg, &offset_within_reg)
>> ...
>>
>
> Like aarch64 we set REGMODE_NATURAL_SIZE for fixed-size modes to
> UNITS_PER_WORD. Isn't that part of the problem?
>
> In extract_bit_field_as_subreg we check lowpart_bit_field_p (= true because
> 128 is a multiple of UNITS_PER_WORD). This leads to the subreg expression.
>
> If I have REGMODE_NATURAL_SIZE return a VLA number this fails and we extract
> via memory - but that of course breaks almost everything else :)
>
> When you say the target-independent code should already have all information it
> needs, what are you referring to? Something else than REGMODE_NATURAL_SIZE?
In the aarch64 example I mentioned, the REGMODE_NATURAL_SIZE of the inner
mode is variable. (The REGMODE_NATURAL_SIZE of the outer mode is constant,
but it's the inner mode that matters here.)
Thanks,
Richard
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [PATCH] RISC-V: Expand subreg move via slide if necessary [PR116086].
2024-07-31 8:49 ` Richard Sandiford
@ 2024-07-31 9:16 ` Robin Dapp
0 siblings, 0 replies; 7+ messages in thread
From: Robin Dapp @ 2024-07-31 9:16 UTC (permalink / raw)
To: Richard Sandiford
Cc: jeffreyalaw, gcc-patches, palmer, kito.cheng, juzhe.zhong,
pan2.li, Robin Dapp
> > Like aarch64 we set REGMODE_NATURAL_SIZE for fixed-size modes to
> > UNITS_PER_WORD. Isn't that part of the problem?
> >
> > In extract_bit_field_as_subreg we check lowpart_bit_field_p (= true because
> > 128 is a multiple of UNITS_PER_WORD). This leads to the subreg expression.
> >
> > If I have REGMODE_NATURAL_SIZE return a VLA number this fails and we extract
> > via memory - but that of course breaks almost everything else :)
> >
> > When you say the target-independent code should already have all information it
> > needs, what are you referring to? Something else than REGMODE_NATURAL_SIZE?
>
> In the aarch64 example I mentioned, the REGMODE_NATURAL_SIZE of the inner
> mode is variable. (The REGMODE_NATURAL_SIZE of the outer mode is constant,
> but it's the inner mode that matters here.)
Yes, because in that example the inner mode is a VLA mode.
I meant, for the riscv case, wouldn't we need to make the REGMODE_NATURAL_SIZE
of the VLS mode variable as well? (As that is what it is actually going to be
in the end, some kind of vector move of a variable-sized vector)
Right now we onlye enable VLS modes whose size is not larger than the base
variable vector size (e.g. [128 128] bits) exactly to avoid the kind of
ambiguity between modes that arises here.
--
Regards
Robin
^ permalink raw reply [flat|nested] 7+ messages in thread
end of thread, other threads:[~2024-07-31 9:16 UTC | newest]
Thread overview: 7+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2024-07-26 20:42 [PATCH] RISC-V: Expand subreg move via slide if necessary [PR116086] Robin Dapp
2024-07-26 22:48 ` Jeff Law
2024-07-29 12:35 ` Richard Sandiford
2024-07-29 12:43 ` Richard Sandiford
2024-07-30 14:07 ` Robin Dapp
2024-07-31 8:49 ` Richard Sandiford
2024-07-31 9:16 ` Robin Dapp
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).