public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [pushed v2 0/5] aarch64: Add support for SME2
@ 2023-12-05 10:24 Richard Sandiford
  2023-12-05 10:24 ` [pushed v2 1/5] aarch64: Add +sme2 Richard Sandiford
                   ` (4 more replies)
  0 siblings, 5 replies; 6+ messages in thread
From: Richard Sandiford @ 2023-12-05 10:24 UTC (permalink / raw)
  To: gcc-patches; +Cc: Richard Sandiford

This is the version of the SME support that I pushed to trunk.
The only significant differences from the original version:

  https://gcc.gnu.org/pipermail/gcc-patches/2023-November/637072.html

are:

- it extends svget2, svset2, svcreate2 and svundef2 to the new svboolx2_t
  type, which I forgot to do last time

- it fixes the incoming and outgoing liveness state for ZA in functions
  that share ZT0 but not ZA (plus tests)

- it has tests for all the new overloaded function "shapes", with some
  fixes & improvements to the error messages

Retested on aarch64-linux-gnu.


Richard Sandiford (5):
  aarch64: Add +sme2
  aarch64: Add svcount_t
  aarch64: Add svboolx2_t
  aarch64: Add ZT0
  aarch64: Add support for SME2 intrinsics

 gcc/config/aarch64/aarch64-c.cc               |    2 +
 gcc/config/aarch64/aarch64-modes.def          |    3 +
 .../aarch64/aarch64-option-extensions.def     |    2 +
 gcc/config/aarch64/aarch64-protos.h           |    1 +
 gcc/config/aarch64/aarch64-sme.md             | 1155 ++++++++++++++++-
 .../aarch64/aarch64-sve-builtins-base.cc      |  168 ++-
 .../aarch64/aarch64-sve-builtins-base.def     |   38 +-
 .../aarch64/aarch64-sve-builtins-functions.h  |  170 ++-
 .../aarch64/aarch64-sve-builtins-shapes.cc    |  877 ++++++++++++-
 .../aarch64/aarch64-sve-builtins-shapes.h     |   37 +-
 .../aarch64/aarch64-sve-builtins-sme.cc       |  175 ++-
 .../aarch64/aarch64-sve-builtins-sme.def      |  122 ++
 gcc/config/aarch64/aarch64-sve-builtins-sme.h |   28 +-
 .../aarch64/aarch64-sve-builtins-sve2.cc      |  107 +-
 .../aarch64/aarch64-sve-builtins-sve2.def     |   74 +-
 .../aarch64/aarch64-sve-builtins-sve2.h       |   21 +
 gcc/config/aarch64/aarch64-sve-builtins.cc    |  793 +++++++++--
 gcc/config/aarch64/aarch64-sve-builtins.def   |   17 +
 gcc/config/aarch64/aarch64-sve-builtins.h     |   52 +-
 gcc/config/aarch64/aarch64-sve.md             |  120 +-
 gcc/config/aarch64/aarch64-sve2.md            |  703 ++++++++++
 gcc/config/aarch64/aarch64.cc                 |  372 ++++--
 gcc/config/aarch64/aarch64.h                  |   37 +-
 gcc/config/aarch64/aarch64.md                 |   11 +-
 gcc/config/aarch64/atomics.md                 |    2 +-
 gcc/config/aarch64/constraints.md             |   30 +-
 gcc/config/aarch64/iterators.md               |  369 +++++-
 gcc/config/aarch64/predicates.md              |   27 +-
 gcc/doc/invoke.texi                           |    3 +-
 gcc/doc/sourcebuild.texi                      |    2 +
 .../aarch64/sme2/aarch64-sme2-acle-asm.exp    |   82 ++
 .../aarch64/sve/acle/general-c++/mangle_1.C   |    2 +
 .../aarch64/sve/acle/general-c++/mangle_2.C   |    2 +
 .../aarch64/sve/acle/general-c++/svcount_1.C  |   10 +
 .../aarch64/sme/acle-asm/clamp_s16.c          |   42 +
 .../aarch64/sme/acle-asm/clamp_s32.c          |   42 +
 .../aarch64/sme/acle-asm/clamp_s64.c          |   42 +
 .../aarch64/sme/acle-asm/clamp_s8.c           |   42 +
 .../aarch64/sme/acle-asm/clamp_u16.c          |   42 +
 .../aarch64/sme/acle-asm/clamp_u32.c          |   42 +
 .../aarch64/sme/acle-asm/clamp_u64.c          |   42 +
 .../aarch64/sme/acle-asm/clamp_u8.c           |   42 +
 .../aarch64/sme/acle-asm/revd_bf16.c          |   76 ++
 .../aarch64/sme/acle-asm/revd_f16.c           |   76 ++
 .../aarch64/sme/acle-asm/revd_f32.c           |   76 ++
 .../aarch64/sme/acle-asm/revd_f64.c           |   76 ++
 .../aarch64/sme/acle-asm/revd_s16.c           |   76 ++
 .../aarch64/sme/acle-asm/revd_s32.c           |   76 ++
 .../aarch64/sme/acle-asm/revd_s64.c           |   76 ++
 .../gcc.target/aarch64/sme/acle-asm/revd_s8.c |   76 ++
 .../aarch64/sme/acle-asm/revd_u16.c           |   76 ++
 .../aarch64/sme/acle-asm/revd_u32.c           |   76 ++
 .../aarch64/sme/acle-asm/revd_u64.c           |   76 ++
 .../gcc.target/aarch64/sme/acle-asm/revd_u8.c |   76 ++
 .../gcc.target/aarch64/sme/clamp_1.c          |   38 +
 .../gcc.target/aarch64/sme/clamp_2.c          |   32 +
 .../gcc.target/aarch64/sme/clamp_3.c          |   26 +
 .../gcc.target/aarch64/sme/clamp_4.c          |   20 +
 .../gcc.target/aarch64/sme/zt0_state_1.c      |   65 +
 .../gcc.target/aarch64/sme/zt0_state_2.c      |   31 +
 .../gcc.target/aarch64/sme/zt0_state_3.c      |    6 +
 .../gcc.target/aarch64/sme/zt0_state_4.c      |   53 +
 .../gcc.target/aarch64/sme/zt0_state_5.c      |  260 ++++
 .../gcc.target/aarch64/sme/zt0_state_6.c      |   54 +
 .../aarch64/sme2/aarch64-sme2-acle-asm.exp    |   81 ++
 .../aarch64/sme2/acle-asm/add_s16_x2.c        |  115 ++
 .../aarch64/sme2/acle-asm/add_s16_x4.c        |  125 ++
 .../aarch64/sme2/acle-asm/add_s32_x2.c        |  115 ++
 .../aarch64/sme2/acle-asm/add_s32_x4.c        |  125 ++
 .../aarch64/sme2/acle-asm/add_s64_x2.c        |  115 ++
 .../aarch64/sme2/acle-asm/add_s64_x4.c        |  125 ++
 .../aarch64/sme2/acle-asm/add_s8_x2.c         |  115 ++
 .../aarch64/sme2/acle-asm/add_s8_x4.c         |  125 ++
 .../aarch64/sme2/acle-asm/add_u16_x2.c        |  115 ++
 .../aarch64/sme2/acle-asm/add_u16_x4.c        |  125 ++
 .../aarch64/sme2/acle-asm/add_u32_x2.c        |  115 ++
 .../aarch64/sme2/acle-asm/add_u32_x4.c        |  125 ++
 .../aarch64/sme2/acle-asm/add_u64_x2.c        |  115 ++
 .../aarch64/sme2/acle-asm/add_u64_x4.c        |  125 ++
 .../aarch64/sme2/acle-asm/add_u8_x2.c         |  115 ++
 .../aarch64/sme2/acle-asm/add_u8_x4.c         |  125 ++
 .../sme2/acle-asm/add_write_za32_s32_vg1x2.c  |  180 +++
 .../sme2/acle-asm/add_write_za32_s32_vg1x4.c  |  172 +++
 .../sme2/acle-asm/add_write_za32_u32_vg1x2.c  |  180 +++
 .../sme2/acle-asm/add_write_za32_u32_vg1x4.c  |  172 +++
 .../sme2/acle-asm/add_write_za64_s64_vg1x2.c  |  182 +++
 .../sme2/acle-asm/add_write_za64_s64_vg1x4.c  |  174 +++
 .../sme2/acle-asm/add_write_za64_u64_vg1x2.c  |  182 +++
 .../sme2/acle-asm/add_write_za64_u64_vg1x4.c  |  174 +++
 .../sme2/acle-asm/add_za32_f32_vg1x2.c        |  122 ++
 .../sme2/acle-asm/add_za32_f32_vg1x4.c        |  137 ++
 .../sme2/acle-asm/add_za32_s32_vg1x2.c        |  122 ++
 .../sme2/acle-asm/add_za32_s32_vg1x4.c        |  137 ++
 .../sme2/acle-asm/add_za32_u32_vg1x2.c        |  122 ++
 .../sme2/acle-asm/add_za32_u32_vg1x4.c        |  137 ++
 .../sme2/acle-asm/add_za64_f64_vg1x2.c        |  126 ++
 .../sme2/acle-asm/add_za64_f64_vg1x4.c        |  141 ++
 .../sme2/acle-asm/add_za64_s64_vg1x2.c        |  124 ++
 .../sme2/acle-asm/add_za64_s64_vg1x4.c        |  139 ++
 .../sme2/acle-asm/add_za64_u64_vg1x2.c        |  124 ++
 .../sme2/acle-asm/add_za64_u64_vg1x4.c        |  139 ++
 .../aarch64/sme2/acle-asm/bfmlslb_f32.c       |   65 +
 .../aarch64/sme2/acle-asm/bfmlslb_lane_f32.c  |   84 ++
 .../aarch64/sme2/acle-asm/bfmlslt_f32.c       |   65 +
 .../aarch64/sme2/acle-asm/bfmlslt_lane_f32.c  |   84 ++
 .../aarch64/sme2/acle-asm/bmopa_za32.c        |   30 +
 .../aarch64/sme2/acle-asm/bmops_za32.c        |   30 +
 .../aarch64/sme2/acle-asm/clamp_f16.c         |   42 +
 .../aarch64/sme2/acle-asm/clamp_f16_x2.c      |   94 ++
 .../aarch64/sme2/acle-asm/clamp_f16_x4.c      |  104 ++
 .../aarch64/sme2/acle-asm/clamp_f32.c         |   42 +
 .../aarch64/sme2/acle-asm/clamp_f32_x2.c      |   94 ++
 .../aarch64/sme2/acle-asm/clamp_f32_x4.c      |  104 ++
 .../aarch64/sme2/acle-asm/clamp_f64.c         |   42 +
 .../aarch64/sme2/acle-asm/clamp_f64_x2.c      |   94 ++
 .../aarch64/sme2/acle-asm/clamp_f64_x4.c      |  104 ++
 .../aarch64/sme2/acle-asm/clamp_s16_x2.c      |   94 ++
 .../aarch64/sme2/acle-asm/clamp_s16_x4.c      |  104 ++
 .../aarch64/sme2/acle-asm/clamp_s32_x2.c      |   94 ++
 .../aarch64/sme2/acle-asm/clamp_s32_x4.c      |  104 ++
 .../aarch64/sme2/acle-asm/clamp_s64_x2.c      |   94 ++
 .../aarch64/sme2/acle-asm/clamp_s64_x4.c      |  104 ++
 .../aarch64/sme2/acle-asm/clamp_s8_x2.c       |   94 ++
 .../aarch64/sme2/acle-asm/clamp_s8_x4.c       |  104 ++
 .../aarch64/sme2/acle-asm/clamp_u16_x2.c      |   94 ++
 .../aarch64/sme2/acle-asm/clamp_u16_x4.c      |  104 ++
 .../aarch64/sme2/acle-asm/clamp_u32_x2.c      |   94 ++
 .../aarch64/sme2/acle-asm/clamp_u32_x4.c      |  104 ++
 .../aarch64/sme2/acle-asm/clamp_u64_x2.c      |   94 ++
 .../aarch64/sme2/acle-asm/clamp_u64_x4.c      |  104 ++
 .../aarch64/sme2/acle-asm/clamp_u8_x2.c       |   94 ++
 .../aarch64/sme2/acle-asm/clamp_u8_x4.c       |  104 ++
 .../aarch64/sme2/acle-asm/cntp_c16.c          |   39 +
 .../aarch64/sme2/acle-asm/cntp_c32.c          |   39 +
 .../aarch64/sme2/acle-asm/cntp_c64.c          |   39 +
 .../aarch64/sme2/acle-asm/cntp_c8.c           |   39 +
 .../aarch64/sme2/acle-asm/cvt_bf16_f32_x2.c   |   50 +
 .../aarch64/sme2/acle-asm/cvt_f16_f32_x2.c    |   50 +
 .../aarch64/sme2/acle-asm/cvt_f32_s32_x2.c    |   43 +
 .../aarch64/sme2/acle-asm/cvt_f32_s32_x4.c    |   77 ++
 .../aarch64/sme2/acle-asm/cvt_f32_u32_x2.c    |   43 +
 .../aarch64/sme2/acle-asm/cvt_f32_u32_x4.c    |   77 ++
 .../aarch64/sme2/acle-asm/cvt_s32_f32_x2.c    |   43 +
 .../aarch64/sme2/acle-asm/cvt_s32_f32_x4.c    |   77 ++
 .../aarch64/sme2/acle-asm/cvt_u32_f32_x2.c    |   43 +
 .../aarch64/sme2/acle-asm/cvt_u32_f32_x4.c    |   77 ++
 .../aarch64/sme2/acle-asm/cvtn_bf16_f32_x2.c  |   50 +
 .../aarch64/sme2/acle-asm/cvtn_f16_f32_x2.c   |   50 +
 .../aarch64/sme2/acle-asm/dot_f32.c           |   44 +
 .../aarch64/sme2/acle-asm/dot_lane_f32.c      |   93 ++
 .../aarch64/sme2/acle-asm/dot_lane_s32.c      |   93 ++
 .../aarch64/sme2/acle-asm/dot_lane_u32.c      |   93 ++
 .../sme2/acle-asm/dot_lane_za32_bf16_vg1x2.c  |  102 ++
 .../sme2/acle-asm/dot_lane_za32_bf16_vg1x4.c  |  108 ++
 .../sme2/acle-asm/dot_lane_za32_f16_vg1x2.c   |  102 ++
 .../sme2/acle-asm/dot_lane_za32_f16_vg1x4.c   |  108 ++
 .../sme2/acle-asm/dot_lane_za32_s16_vg1x2.c   |  102 ++
 .../sme2/acle-asm/dot_lane_za32_s16_vg1x4.c   |  108 ++
 .../sme2/acle-asm/dot_lane_za32_s8_vg1x2.c    |  102 ++
 .../sme2/acle-asm/dot_lane_za32_s8_vg1x4.c    |  108 ++
 .../sme2/acle-asm/dot_lane_za32_u16_vg1x2.c   |  102 ++
 .../sme2/acle-asm/dot_lane_za32_u16_vg1x4.c   |  108 ++
 .../sme2/acle-asm/dot_lane_za32_u8_vg1x2.c    |  102 ++
 .../sme2/acle-asm/dot_lane_za32_u8_vg1x4.c    |  108 ++
 .../sme2/acle-asm/dot_lane_za64_s16_vg1x2.c   |  104 ++
 .../sme2/acle-asm/dot_lane_za64_s16_vg1x4.c   |  110 ++
 .../sme2/acle-asm/dot_lane_za64_u16_vg1x2.c   |  104 ++
 .../sme2/acle-asm/dot_lane_za64_u16_vg1x4.c   |  110 ++
 .../aarch64/sme2/acle-asm/dot_s32.c           |   44 +
 .../aarch64/sme2/acle-asm/dot_u32.c           |   44 +
 .../sme2/acle-asm/dot_za32_bf16_vg1x2.c       |  243 ++++
 .../sme2/acle-asm/dot_za32_bf16_vg1x4.c       |  254 ++++
 .../sme2/acle-asm/dot_za32_f16_vg1x2.c        |  243 ++++
 .../sme2/acle-asm/dot_za32_f16_vg1x4.c        |  254 ++++
 .../sme2/acle-asm/dot_za32_s16_vg1x2.c        |  243 ++++
 .../sme2/acle-asm/dot_za32_s16_vg1x4.c        |  254 ++++
 .../aarch64/sme2/acle-asm/dot_za32_s8_vg1x2.c |  243 ++++
 .../aarch64/sme2/acle-asm/dot_za32_s8_vg1x4.c |  254 ++++
 .../sme2/acle-asm/dot_za32_u16_vg1x2.c        |  243 ++++
 .../sme2/acle-asm/dot_za32_u16_vg1x4.c        |  254 ++++
 .../aarch64/sme2/acle-asm/dot_za32_u8_vg1x2.c |  243 ++++
 .../aarch64/sme2/acle-asm/dot_za32_u8_vg1x4.c |  254 ++++
 .../sme2/acle-asm/dot_za64_s16_vg1x2.c        |  245 ++++
 .../sme2/acle-asm/dot_za64_s16_vg1x4.c        |  256 ++++
 .../sme2/acle-asm/dot_za64_u16_vg1x2.c        |  245 ++++
 .../sme2/acle-asm/dot_za64_u16_vg1x4.c        |  256 ++++
 .../aarch64/sme2/acle-asm/ld1_bf16_x2.c       |  262 ++++
 .../aarch64/sme2/acle-asm/ld1_bf16_x4.c       |  354 +++++
 .../aarch64/sme2/acle-asm/ld1_f16_x2.c        |  262 ++++
 .../aarch64/sme2/acle-asm/ld1_f16_x4.c        |  354 +++++
 .../aarch64/sme2/acle-asm/ld1_f32_x2.c        |  262 ++++
 .../aarch64/sme2/acle-asm/ld1_f32_x4.c        |  354 +++++
 .../aarch64/sme2/acle-asm/ld1_f64_x2.c        |  262 ++++
 .../aarch64/sme2/acle-asm/ld1_f64_x4.c        |  354 +++++
 .../aarch64/sme2/acle-asm/ld1_s16_x2.c        |  262 ++++
 .../aarch64/sme2/acle-asm/ld1_s16_x4.c        |  354 +++++
 .../aarch64/sme2/acle-asm/ld1_s32_x2.c        |  262 ++++
 .../aarch64/sme2/acle-asm/ld1_s32_x4.c        |  354 +++++
 .../aarch64/sme2/acle-asm/ld1_s64_x2.c        |  262 ++++
 .../aarch64/sme2/acle-asm/ld1_s64_x4.c        |  354 +++++
 .../aarch64/sme2/acle-asm/ld1_s8_x2.c         |  262 ++++
 .../aarch64/sme2/acle-asm/ld1_s8_x4.c         |  354 +++++
 .../aarch64/sme2/acle-asm/ld1_u16_x2.c        |  262 ++++
 .../aarch64/sme2/acle-asm/ld1_u16_x4.c        |  354 +++++
 .../aarch64/sme2/acle-asm/ld1_u32_x2.c        |  262 ++++
 .../aarch64/sme2/acle-asm/ld1_u32_x4.c        |  354 +++++
 .../aarch64/sme2/acle-asm/ld1_u64_x2.c        |  262 ++++
 .../aarch64/sme2/acle-asm/ld1_u64_x4.c        |  354 +++++
 .../aarch64/sme2/acle-asm/ld1_u8_x2.c         |  262 ++++
 .../aarch64/sme2/acle-asm/ld1_u8_x4.c         |  354 +++++
 .../aarch64/sme2/acle-asm/ldnt1_bf16_x2.c     |  262 ++++
 .../aarch64/sme2/acle-asm/ldnt1_bf16_x4.c     |  354 +++++
 .../aarch64/sme2/acle-asm/ldnt1_f16_x2.c      |  262 ++++
 .../aarch64/sme2/acle-asm/ldnt1_f16_x4.c      |  354 +++++
 .../aarch64/sme2/acle-asm/ldnt1_f32_x2.c      |  262 ++++
 .../aarch64/sme2/acle-asm/ldnt1_f32_x4.c      |  354 +++++
 .../aarch64/sme2/acle-asm/ldnt1_f64_x2.c      |  262 ++++
 .../aarch64/sme2/acle-asm/ldnt1_f64_x4.c      |  354 +++++
 .../aarch64/sme2/acle-asm/ldnt1_s16_x2.c      |  262 ++++
 .../aarch64/sme2/acle-asm/ldnt1_s16_x4.c      |  354 +++++
 .../aarch64/sme2/acle-asm/ldnt1_s32_x2.c      |  262 ++++
 .../aarch64/sme2/acle-asm/ldnt1_s32_x4.c      |  354 +++++
 .../aarch64/sme2/acle-asm/ldnt1_s64_x2.c      |  262 ++++
 .../aarch64/sme2/acle-asm/ldnt1_s64_x4.c      |  354 +++++
 .../aarch64/sme2/acle-asm/ldnt1_s8_x2.c       |  262 ++++
 .../aarch64/sme2/acle-asm/ldnt1_s8_x4.c       |  354 +++++
 .../aarch64/sme2/acle-asm/ldnt1_u16_x2.c      |  262 ++++
 .../aarch64/sme2/acle-asm/ldnt1_u16_x4.c      |  354 +++++
 .../aarch64/sme2/acle-asm/ldnt1_u32_x2.c      |  262 ++++
 .../aarch64/sme2/acle-asm/ldnt1_u32_x4.c      |  354 +++++
 .../aarch64/sme2/acle-asm/ldnt1_u64_x2.c      |  262 ++++
 .../aarch64/sme2/acle-asm/ldnt1_u64_x4.c      |  354 +++++
 .../aarch64/sme2/acle-asm/ldnt1_u8_x2.c       |  262 ++++
 .../aarch64/sme2/acle-asm/ldnt1_u8_x4.c       |  354 +++++
 .../gcc.target/aarch64/sme2/acle-asm/ldr_zt.c |   36 +
 .../aarch64/sme2/acle-asm/luti2_bf16.c        |   48 +
 .../aarch64/sme2/acle-asm/luti2_bf16_x2.c     |   50 +
 .../aarch64/sme2/acle-asm/luti2_bf16_x4.c     |   56 +
 .../aarch64/sme2/acle-asm/luti2_f16.c         |   48 +
 .../aarch64/sme2/acle-asm/luti2_f16_x2.c      |   50 +
 .../aarch64/sme2/acle-asm/luti2_f16_x4.c      |   56 +
 .../aarch64/sme2/acle-asm/luti2_f32.c         |   48 +
 .../aarch64/sme2/acle-asm/luti2_f32_x2.c      |   50 +
 .../aarch64/sme2/acle-asm/luti2_f32_x4.c      |   56 +
 .../aarch64/sme2/acle-asm/luti2_s16.c         |   48 +
 .../aarch64/sme2/acle-asm/luti2_s16_x2.c      |   50 +
 .../aarch64/sme2/acle-asm/luti2_s16_x4.c      |   56 +
 .../aarch64/sme2/acle-asm/luti2_s32.c         |   48 +
 .../aarch64/sme2/acle-asm/luti2_s32_x2.c      |   50 +
 .../aarch64/sme2/acle-asm/luti2_s32_x4.c      |   56 +
 .../aarch64/sme2/acle-asm/luti2_s8.c          |   48 +
 .../aarch64/sme2/acle-asm/luti2_s8_x2.c       |   50 +
 .../aarch64/sme2/acle-asm/luti2_s8_x4.c       |   56 +
 .../aarch64/sme2/acle-asm/luti2_u16.c         |   48 +
 .../aarch64/sme2/acle-asm/luti2_u16_x2.c      |   50 +
 .../aarch64/sme2/acle-asm/luti2_u16_x4.c      |   56 +
 .../aarch64/sme2/acle-asm/luti2_u32.c         |   48 +
 .../aarch64/sme2/acle-asm/luti2_u32_x2.c      |   50 +
 .../aarch64/sme2/acle-asm/luti2_u32_x4.c      |   56 +
 .../aarch64/sme2/acle-asm/luti2_u8.c          |   48 +
 .../aarch64/sme2/acle-asm/luti2_u8_x2.c       |   50 +
 .../aarch64/sme2/acle-asm/luti2_u8_x4.c       |   56 +
 .../aarch64/sme2/acle-asm/luti4_bf16.c        |   48 +
 .../aarch64/sme2/acle-asm/luti4_bf16_x2.c     |   50 +
 .../aarch64/sme2/acle-asm/luti4_bf16_x4.c     |   56 +
 .../aarch64/sme2/acle-asm/luti4_f16.c         |   48 +
 .../aarch64/sme2/acle-asm/luti4_f16_x2.c      |   50 +
 .../aarch64/sme2/acle-asm/luti4_f16_x4.c      |   56 +
 .../aarch64/sme2/acle-asm/luti4_f32.c         |   48 +
 .../aarch64/sme2/acle-asm/luti4_f32_x2.c      |   50 +
 .../aarch64/sme2/acle-asm/luti4_f32_x4.c      |   56 +
 .../aarch64/sme2/acle-asm/luti4_s16.c         |   48 +
 .../aarch64/sme2/acle-asm/luti4_s16_x2.c      |   50 +
 .../aarch64/sme2/acle-asm/luti4_s16_x4.c      |   56 +
 .../aarch64/sme2/acle-asm/luti4_s32.c         |   48 +
 .../aarch64/sme2/acle-asm/luti4_s32_x2.c      |   50 +
 .../aarch64/sme2/acle-asm/luti4_s32_x4.c      |   56 +
 .../aarch64/sme2/acle-asm/luti4_s8.c          |   48 +
 .../aarch64/sme2/acle-asm/luti4_s8_x2.c       |   50 +
 .../aarch64/sme2/acle-asm/luti4_u16.c         |   48 +
 .../aarch64/sme2/acle-asm/luti4_u16_x2.c      |   50 +
 .../aarch64/sme2/acle-asm/luti4_u16_x4.c      |   56 +
 .../aarch64/sme2/acle-asm/luti4_u32.c         |   48 +
 .../aarch64/sme2/acle-asm/luti4_u32_x2.c      |   50 +
 .../aarch64/sme2/acle-asm/luti4_u32_x4.c      |   56 +
 .../aarch64/sme2/acle-asm/luti4_u8.c          |   48 +
 .../aarch64/sme2/acle-asm/luti4_u8_x2.c       |   50 +
 .../aarch64/sme2/acle-asm/max_f16_x2.c        |  207 +++
 .../aarch64/sme2/acle-asm/max_f16_x4.c        |  249 ++++
 .../aarch64/sme2/acle-asm/max_f32_x2.c        |  207 +++
 .../aarch64/sme2/acle-asm/max_f32_x4.c        |  249 ++++
 .../aarch64/sme2/acle-asm/max_f64_x2.c        |  207 +++
 .../aarch64/sme2/acle-asm/max_f64_x4.c        |  249 ++++
 .../aarch64/sme2/acle-asm/max_s16_x2.c        |  207 +++
 .../aarch64/sme2/acle-asm/max_s16_x4.c        |  249 ++++
 .../aarch64/sme2/acle-asm/max_s32_x2.c        |  207 +++
 .../aarch64/sme2/acle-asm/max_s32_x4.c        |  249 ++++
 .../aarch64/sme2/acle-asm/max_s64_x2.c        |  207 +++
 .../aarch64/sme2/acle-asm/max_s64_x4.c        |  249 ++++
 .../aarch64/sme2/acle-asm/max_s8_x2.c         |  207 +++
 .../aarch64/sme2/acle-asm/max_s8_x4.c         |  249 ++++
 .../aarch64/sme2/acle-asm/max_u16_x2.c        |  207 +++
 .../aarch64/sme2/acle-asm/max_u16_x4.c        |  249 ++++
 .../aarch64/sme2/acle-asm/max_u32_x2.c        |  207 +++
 .../aarch64/sme2/acle-asm/max_u32_x4.c        |  249 ++++
 .../aarch64/sme2/acle-asm/max_u64_x2.c        |  207 +++
 .../aarch64/sme2/acle-asm/max_u64_x4.c        |  249 ++++
 .../aarch64/sme2/acle-asm/max_u8_x2.c         |  207 +++
 .../aarch64/sme2/acle-asm/max_u8_x4.c         |  249 ++++
 .../aarch64/sme2/acle-asm/maxnm_f16_x2.c      |  207 +++
 .../aarch64/sme2/acle-asm/maxnm_f16_x4.c      |  249 ++++
 .../aarch64/sme2/acle-asm/maxnm_f32_x2.c      |  207 +++
 .../aarch64/sme2/acle-asm/maxnm_f32_x4.c      |  249 ++++
 .../aarch64/sme2/acle-asm/maxnm_f64_x2.c      |  207 +++
 .../aarch64/sme2/acle-asm/maxnm_f64_x4.c      |  249 ++++
 .../aarch64/sme2/acle-asm/min_f16_x2.c        |  207 +++
 .../aarch64/sme2/acle-asm/min_f16_x4.c        |  249 ++++
 .../aarch64/sme2/acle-asm/min_f32_x2.c        |  207 +++
 .../aarch64/sme2/acle-asm/min_f32_x4.c        |  249 ++++
 .../aarch64/sme2/acle-asm/min_f64_x2.c        |  207 +++
 .../aarch64/sme2/acle-asm/min_f64_x4.c        |  249 ++++
 .../aarch64/sme2/acle-asm/min_s16_x2.c        |  207 +++
 .../aarch64/sme2/acle-asm/min_s16_x4.c        |  249 ++++
 .../aarch64/sme2/acle-asm/min_s32_x2.c        |  207 +++
 .../aarch64/sme2/acle-asm/min_s32_x4.c        |  249 ++++
 .../aarch64/sme2/acle-asm/min_s64_x2.c        |  207 +++
 .../aarch64/sme2/acle-asm/min_s64_x4.c        |  249 ++++
 .../aarch64/sme2/acle-asm/min_s8_x2.c         |  207 +++
 .../aarch64/sme2/acle-asm/min_s8_x4.c         |  249 ++++
 .../aarch64/sme2/acle-asm/min_u16_x2.c        |  207 +++
 .../aarch64/sme2/acle-asm/min_u16_x4.c        |  249 ++++
 .../aarch64/sme2/acle-asm/min_u32_x2.c        |  207 +++
 .../aarch64/sme2/acle-asm/min_u32_x4.c        |  249 ++++
 .../aarch64/sme2/acle-asm/min_u64_x2.c        |  207 +++
 .../aarch64/sme2/acle-asm/min_u64_x4.c        |  249 ++++
 .../aarch64/sme2/acle-asm/min_u8_x2.c         |  207 +++
 .../aarch64/sme2/acle-asm/min_u8_x4.c         |  249 ++++
 .../aarch64/sme2/acle-asm/minnm_f16_x2.c      |  207 +++
 .../aarch64/sme2/acle-asm/minnm_f16_x4.c      |  249 ++++
 .../aarch64/sme2/acle-asm/minnm_f32_x2.c      |  207 +++
 .../aarch64/sme2/acle-asm/minnm_f32_x4.c      |  249 ++++
 .../aarch64/sme2/acle-asm/minnm_f64_x2.c      |  207 +++
 .../aarch64/sme2/acle-asm/minnm_f64_x4.c      |  249 ++++
 .../sme2/acle-asm/mla_lane_za32_bf16_vg2x1.c  |  148 +++
 .../sme2/acle-asm/mla_lane_za32_bf16_vg2x2.c  |  112 ++
 .../sme2/acle-asm/mla_lane_za32_bf16_vg2x4.c  |  118 ++
 .../sme2/acle-asm/mla_lane_za32_f16_vg2x1.c   |  148 +++
 .../sme2/acle-asm/mla_lane_za32_f16_vg2x2.c   |  112 ++
 .../sme2/acle-asm/mla_lane_za32_f16_vg2x4.c   |  118 ++
 .../sme2/acle-asm/mla_lane_za32_f32_vg1x2.c   |  102 ++
 .../sme2/acle-asm/mla_lane_za32_f32_vg1x4.c   |  108 ++
 .../sme2/acle-asm/mla_lane_za32_s16_vg2x1.c   |  148 +++
 .../sme2/acle-asm/mla_lane_za32_s16_vg2x2.c   |  112 ++
 .../sme2/acle-asm/mla_lane_za32_s16_vg2x4.c   |  118 ++
 .../sme2/acle-asm/mla_lane_za32_s8_vg4x1.c    |  150 +++
 .../sme2/acle-asm/mla_lane_za32_s8_vg4x2.c    |  122 ++
 .../sme2/acle-asm/mla_lane_za32_s8_vg4x4.c    |  128 ++
 .../sme2/acle-asm/mla_lane_za32_u16_vg2x1.c   |  148 +++
 .../sme2/acle-asm/mla_lane_za32_u16_vg2x2.c   |  112 ++
 .../sme2/acle-asm/mla_lane_za32_u16_vg2x4.c   |  118 ++
 .../sme2/acle-asm/mla_lane_za32_u8_vg4x1.c    |  150 +++
 .../sme2/acle-asm/mla_lane_za32_u8_vg4x2.c    |  122 ++
 .../sme2/acle-asm/mla_lane_za32_u8_vg4x4.c    |  128 ++
 .../sme2/acle-asm/mla_lane_za64_f64_vg1x2.c   |  104 ++
 .../sme2/acle-asm/mla_lane_za64_f64_vg1x4.c   |  110 ++
 .../sme2/acle-asm/mla_lane_za64_s16_vg4x1.c   |  152 +++
 .../sme2/acle-asm/mla_lane_za64_s16_vg4x2.c   |  124 ++
 .../sme2/acle-asm/mla_lane_za64_s16_vg4x4.c   |  130 ++
 .../sme2/acle-asm/mla_lane_za64_u16_vg4x1.c   |  152 +++
 .../sme2/acle-asm/mla_lane_za64_u16_vg4x2.c   |  124 ++
 .../sme2/acle-asm/mla_lane_za64_u16_vg4x4.c   |  130 ++
 .../sme2/acle-asm/mla_za32_bf16_vg2x1.c       |  148 +++
 .../sme2/acle-asm/mla_za32_bf16_vg2x2.c       |  247 ++++
 .../sme2/acle-asm/mla_za32_bf16_vg2x4.c       |  258 ++++
 .../sme2/acle-asm/mla_za32_f16_vg2x1.c        |  148 +++
 .../sme2/acle-asm/mla_za32_f16_vg2x2.c        |  247 ++++
 .../sme2/acle-asm/mla_za32_f16_vg2x4.c        |  258 ++++
 .../sme2/acle-asm/mla_za32_f32_vg1x2.c        |  180 +++
 .../sme2/acle-asm/mla_za32_f32_vg1x4.c        |  172 +++
 .../sme2/acle-asm/mla_za32_s16_vg2x1.c        |  148 +++
 .../sme2/acle-asm/mla_za32_s16_vg2x2.c        |  247 ++++
 .../sme2/acle-asm/mla_za32_s16_vg2x4.c        |  258 ++++
 .../aarch64/sme2/acle-asm/mla_za32_s8_vg4x1.c |  149 +++
 .../aarch64/sme2/acle-asm/mla_za32_s8_vg4x2.c |  249 ++++
 .../aarch64/sme2/acle-asm/mla_za32_s8_vg4x4.c |  260 ++++
 .../sme2/acle-asm/mla_za32_u16_vg2x1.c        |  148 +++
 .../sme2/acle-asm/mla_za32_u16_vg2x2.c        |  247 ++++
 .../sme2/acle-asm/mla_za32_u16_vg2x4.c        |  258 ++++
 .../aarch64/sme2/acle-asm/mla_za32_u8_vg4x1.c |  149 +++
 .../aarch64/sme2/acle-asm/mla_za32_u8_vg4x2.c |  249 ++++
 .../aarch64/sme2/acle-asm/mla_za32_u8_vg4x4.c |  260 ++++
 .../sme2/acle-asm/mla_za64_f64_vg1x2.c        |  182 +++
 .../sme2/acle-asm/mla_za64_f64_vg1x4.c        |  174 +++
 .../sme2/acle-asm/mla_za64_s16_vg4x1.c        |  151 +++
 .../sme2/acle-asm/mla_za64_s16_vg4x2.c        |  251 ++++
 .../sme2/acle-asm/mla_za64_s16_vg4x4.c        |  262 ++++
 .../sme2/acle-asm/mla_za64_u16_vg4x1.c        |  151 +++
 .../sme2/acle-asm/mla_za64_u16_vg4x2.c        |  251 ++++
 .../sme2/acle-asm/mla_za64_u16_vg4x4.c        |  262 ++++
 .../sme2/acle-asm/mls_lane_za32_bf16_vg2x1.c  |  148 +++
 .../sme2/acle-asm/mls_lane_za32_bf16_vg2x2.c  |  112 ++
 .../sme2/acle-asm/mls_lane_za32_bf16_vg2x4.c  |  118 ++
 .../sme2/acle-asm/mls_lane_za32_f16_vg2x1.c   |  148 +++
 .../sme2/acle-asm/mls_lane_za32_f16_vg2x2.c   |  112 ++
 .../sme2/acle-asm/mls_lane_za32_f16_vg2x4.c   |  118 ++
 .../sme2/acle-asm/mls_lane_za32_f32_vg1x2.c   |  102 ++
 .../sme2/acle-asm/mls_lane_za32_f32_vg1x4.c   |  108 ++
 .../sme2/acle-asm/mls_lane_za32_s16_vg2x1.c   |  148 +++
 .../sme2/acle-asm/mls_lane_za32_s16_vg2x2.c   |  112 ++
 .../sme2/acle-asm/mls_lane_za32_s16_vg2x4.c   |  118 ++
 .../sme2/acle-asm/mls_lane_za32_s8_vg4x1.c    |  150 +++
 .../sme2/acle-asm/mls_lane_za32_s8_vg4x2.c    |  122 ++
 .../sme2/acle-asm/mls_lane_za32_s8_vg4x4.c    |  128 ++
 .../sme2/acle-asm/mls_lane_za32_u16_vg2x1.c   |  148 +++
 .../sme2/acle-asm/mls_lane_za32_u16_vg2x2.c   |  112 ++
 .../sme2/acle-asm/mls_lane_za32_u16_vg2x4.c   |  118 ++
 .../sme2/acle-asm/mls_lane_za32_u8_vg4x1.c    |  150 +++
 .../sme2/acle-asm/mls_lane_za32_u8_vg4x2.c    |  122 ++
 .../sme2/acle-asm/mls_lane_za32_u8_vg4x4.c    |  128 ++
 .../sme2/acle-asm/mls_lane_za64_f64_vg1x2.c   |  104 ++
 .../sme2/acle-asm/mls_lane_za64_f64_vg1x4.c   |  110 ++
 .../sme2/acle-asm/mls_lane_za64_s16_vg4x1.c   |  152 +++
 .../sme2/acle-asm/mls_lane_za64_s16_vg4x2.c   |  124 ++
 .../sme2/acle-asm/mls_lane_za64_s16_vg4x4.c   |  130 ++
 .../sme2/acle-asm/mls_lane_za64_u16_vg4x1.c   |  152 +++
 .../sme2/acle-asm/mls_lane_za64_u16_vg4x2.c   |  124 ++
 .../sme2/acle-asm/mls_lane_za64_u16_vg4x4.c   |  130 ++
 .../sme2/acle-asm/mls_za32_bf16_vg2x1.c       |  148 +++
 .../sme2/acle-asm/mls_za32_bf16_vg2x2.c       |  247 ++++
 .../sme2/acle-asm/mls_za32_bf16_vg2x4.c       |  258 ++++
 .../sme2/acle-asm/mls_za32_f16_vg2x1.c        |  148 +++
 .../sme2/acle-asm/mls_za32_f16_vg2x2.c        |  247 ++++
 .../sme2/acle-asm/mls_za32_f16_vg2x4.c        |  258 ++++
 .../sme2/acle-asm/mls_za32_f32_vg1x2.c        |  180 +++
 .../sme2/acle-asm/mls_za32_f32_vg1x4.c        |  172 +++
 .../sme2/acle-asm/mls_za32_s16_vg2x1.c        |  148 +++
 .../sme2/acle-asm/mls_za32_s16_vg2x2.c        |  247 ++++
 .../sme2/acle-asm/mls_za32_s16_vg2x4.c        |  258 ++++
 .../aarch64/sme2/acle-asm/mls_za32_s8_vg4x1.c |  149 +++
 .../aarch64/sme2/acle-asm/mls_za32_s8_vg4x2.c |  249 ++++
 .../aarch64/sme2/acle-asm/mls_za32_s8_vg4x4.c |  260 ++++
 .../sme2/acle-asm/mls_za32_u16_vg2x1.c        |  148 +++
 .../sme2/acle-asm/mls_za32_u16_vg2x2.c        |  247 ++++
 .../sme2/acle-asm/mls_za32_u16_vg2x4.c        |  258 ++++
 .../aarch64/sme2/acle-asm/mls_za32_u8_vg4x1.c |  149 +++
 .../aarch64/sme2/acle-asm/mls_za32_u8_vg4x2.c |  249 ++++
 .../aarch64/sme2/acle-asm/mls_za32_u8_vg4x4.c |  260 ++++
 .../sme2/acle-asm/mls_za64_f64_vg1x2.c        |  182 +++
 .../sme2/acle-asm/mls_za64_f64_vg1x4.c        |  174 +++
 .../sme2/acle-asm/mls_za64_s16_vg4x1.c        |  151 +++
 .../sme2/acle-asm/mls_za64_s16_vg4x2.c        |  251 ++++
 .../sme2/acle-asm/mls_za64_s16_vg4x4.c        |  262 ++++
 .../sme2/acle-asm/mls_za64_u16_vg4x1.c        |  151 +++
 .../sme2/acle-asm/mls_za64_u16_vg4x2.c        |  251 ++++
 .../sme2/acle-asm/mls_za64_u16_vg4x4.c        |  262 ++++
 .../aarch64/sme2/acle-asm/mopa_za32.c         |   48 +
 .../aarch64/sme2/acle-asm/mops_za32.c         |   48 +
 .../aarch64/sme2/acle-asm/pext_c16.c          |   50 +
 .../aarch64/sme2/acle-asm/pext_c16_x2.c       |   54 +
 .../aarch64/sme2/acle-asm/pext_c32.c          |   50 +
 .../aarch64/sme2/acle-asm/pext_c32_x2.c       |   54 +
 .../aarch64/sme2/acle-asm/pext_c64.c          |   50 +
 .../aarch64/sme2/acle-asm/pext_c64_x2.c       |   54 +
 .../aarch64/sme2/acle-asm/pext_c8.c           |   50 +
 .../aarch64/sme2/acle-asm/pext_c8_x2.c        |   54 +
 .../aarch64/sme2/acle-asm/pfalse_c.c          |   39 +
 .../aarch64/sme2/acle-asm/psel_b16.c          |   89 ++
 .../aarch64/sme2/acle-asm/psel_b32.c          |   89 ++
 .../aarch64/sme2/acle-asm/psel_b64.c          |   80 ++
 .../aarch64/sme2/acle-asm/psel_b8.c           |   89 ++
 .../aarch64/sme2/acle-asm/psel_c16.c          |   89 ++
 .../aarch64/sme2/acle-asm/psel_c32.c          |   89 ++
 .../aarch64/sme2/acle-asm/psel_c64.c          |   80 ++
 .../aarch64/sme2/acle-asm/psel_c8.c           |   89 ++
 .../aarch64/sme2/acle-asm/ptrue_c16.c         |   41 +
 .../aarch64/sme2/acle-asm/ptrue_c32.c         |   41 +
 .../aarch64/sme2/acle-asm/ptrue_c64.c         |   41 +
 .../aarch64/sme2/acle-asm/ptrue_c8.c          |   41 +
 .../aarch64/sme2/acle-asm/qcvt_s16_s32_x2.c   |   50 +
 .../aarch64/sme2/acle-asm/qcvt_s16_s64_x4.c   |   65 +
 .../aarch64/sme2/acle-asm/qcvt_s8_s32_x4.c    |   65 +
 .../aarch64/sme2/acle-asm/qcvt_u16_s32_x2.c   |   50 +
 .../aarch64/sme2/acle-asm/qcvt_u16_s64_x4.c   |   65 +
 .../aarch64/sme2/acle-asm/qcvt_u16_u32_x2.c   |   50 +
 .../aarch64/sme2/acle-asm/qcvt_u16_u64_x4.c   |   65 +
 .../aarch64/sme2/acle-asm/qcvt_u8_s32_x4.c    |   65 +
 .../aarch64/sme2/acle-asm/qcvt_u8_u32_x4.c    |   65 +
 .../aarch64/sme2/acle-asm/qcvtn_s16_s32_x2.c  |   50 +
 .../aarch64/sme2/acle-asm/qcvtn_s16_s64_x4.c  |   65 +
 .../aarch64/sme2/acle-asm/qcvtn_s8_s32_x4.c   |   65 +
 .../aarch64/sme2/acle-asm/qcvtn_u16_s32_x2.c  |   50 +
 .../aarch64/sme2/acle-asm/qcvtn_u16_s64_x4.c  |   65 +
 .../aarch64/sme2/acle-asm/qcvtn_u16_u32_x2.c  |   50 +
 .../aarch64/sme2/acle-asm/qcvtn_u16_u64_x4.c  |   65 +
 .../aarch64/sme2/acle-asm/qcvtn_u8_s32_x4.c   |   65 +
 .../aarch64/sme2/acle-asm/qcvtn_u8_u32_x4.c   |   65 +
 .../aarch64/sme2/acle-asm/qdmulh_s16_x2.c     |  207 +++
 .../aarch64/sme2/acle-asm/qdmulh_s16_x4.c     |  249 ++++
 .../aarch64/sme2/acle-asm/qdmulh_s32_x2.c     |  207 +++
 .../aarch64/sme2/acle-asm/qdmulh_s32_x4.c     |  249 ++++
 .../aarch64/sme2/acle-asm/qdmulh_s64_x2.c     |  207 +++
 .../aarch64/sme2/acle-asm/qdmulh_s64_x4.c     |  249 ++++
 .../aarch64/sme2/acle-asm/qdmulh_s8_x2.c      |  207 +++
 .../aarch64/sme2/acle-asm/qdmulh_s8_x4.c      |  249 ++++
 .../aarch64/sme2/acle-asm/qrshr_s16_x2.c      |   50 +
 .../aarch64/sme2/acle-asm/qrshr_s16_x4.c      |   65 +
 .../aarch64/sme2/acle-asm/qrshr_s8_x4.c       |   65 +
 .../aarch64/sme2/acle-asm/qrshr_u16_x2.c      |   50 +
 .../aarch64/sme2/acle-asm/qrshr_u16_x4.c      |   65 +
 .../aarch64/sme2/acle-asm/qrshr_u8_x4.c       |   65 +
 .../aarch64/sme2/acle-asm/qrshrn_s16_x2.c     |   50 +
 .../aarch64/sme2/acle-asm/qrshrn_s16_x4.c     |   65 +
 .../aarch64/sme2/acle-asm/qrshrn_s8_x4.c      |   65 +
 .../aarch64/sme2/acle-asm/qrshrn_u16_x2.c     |   50 +
 .../aarch64/sme2/acle-asm/qrshrn_u16_x4.c     |   65 +
 .../aarch64/sme2/acle-asm/qrshrn_u8_x4.c      |   65 +
 .../aarch64/sme2/acle-asm/qrshru_u16_x2.c     |   50 +
 .../aarch64/sme2/acle-asm/qrshru_u16_x4.c     |   65 +
 .../aarch64/sme2/acle-asm/qrshru_u8_x4.c      |   65 +
 .../aarch64/sme2/acle-asm/qrshrun_u16_x2.c    |   50 +
 .../aarch64/sme2/acle-asm/qrshrun_u16_x4.c    |   65 +
 .../aarch64/sme2/acle-asm/qrshrun_u8_x4.c     |   65 +
 .../aarch64/sme2/acle-asm/read_hor_za16_vg2.c |  140 ++
 .../aarch64/sme2/acle-asm/read_hor_za16_vg4.c |  138 ++
 .../aarch64/sme2/acle-asm/read_hor_za32_vg2.c |  112 ++
 .../aarch64/sme2/acle-asm/read_hor_za32_vg4.c |  129 ++
 .../aarch64/sme2/acle-asm/read_hor_za64_vg2.c |  113 ++
 .../aarch64/sme2/acle-asm/read_hor_za64_vg4.c |  129 ++
 .../aarch64/sme2/acle-asm/read_hor_za8_vg2.c  |  140 ++
 .../aarch64/sme2/acle-asm/read_hor_za8_vg4.c  |  156 +++
 .../aarch64/sme2/acle-asm/read_ver_za16_vg2.c |  140 ++
 .../aarch64/sme2/acle-asm/read_ver_za16_vg4.c |  138 ++
 .../aarch64/sme2/acle-asm/read_ver_za32_vg2.c |  112 ++
 .../aarch64/sme2/acle-asm/read_ver_za32_vg4.c |  129 ++
 .../aarch64/sme2/acle-asm/read_ver_za64_vg2.c |  113 ++
 .../aarch64/sme2/acle-asm/read_ver_za64_vg4.c |  129 ++
 .../aarch64/sme2/acle-asm/read_ver_za8_vg2.c  |  140 ++
 .../aarch64/sme2/acle-asm/read_ver_za8_vg4.c  |  156 +++
 .../aarch64/sme2/acle-asm/read_za16_vg1x2.c   |  122 ++
 .../aarch64/sme2/acle-asm/read_za16_vg1x4.c   |  137 ++
 .../aarch64/sme2/acle-asm/read_za32_vg1x2.c   |  122 ++
 .../aarch64/sme2/acle-asm/read_za32_vg1x4.c   |  137 ++
 .../aarch64/sme2/acle-asm/read_za64_vg1x2.c   |  122 ++
 .../aarch64/sme2/acle-asm/read_za64_vg1x4.c   |  137 ++
 .../aarch64/sme2/acle-asm/read_za8_vg1x2.c    |  122 ++
 .../aarch64/sme2/acle-asm/read_za8_vg1x4.c    |  137 ++
 .../aarch64/sme2/acle-asm/rinta_s32_x2.c      |   61 +
 .../aarch64/sme2/acle-asm/rinta_s32_x4.c      |   73 ++
 .../aarch64/sme2/acle-asm/rintm_u32_x2.c      |   61 +
 .../aarch64/sme2/acle-asm/rintm_u32_x4.c      |   73 ++
 .../aarch64/sme2/acle-asm/rintn_u32_x2.c      |   61 +
 .../aarch64/sme2/acle-asm/rintn_u32_x4.c      |   73 ++
 .../aarch64/sme2/acle-asm/rintp_u32_x2.c      |   61 +
 .../aarch64/sme2/acle-asm/rintp_u32_x4.c      |   73 ++
 .../aarch64/sme2/acle-asm/rshl_s16_x2.c       |  207 +++
 .../aarch64/sme2/acle-asm/rshl_s16_x4.c       |  249 ++++
 .../aarch64/sme2/acle-asm/rshl_s32_x2.c       |  207 +++
 .../aarch64/sme2/acle-asm/rshl_s32_x4.c       |  249 ++++
 .../aarch64/sme2/acle-asm/rshl_s64_x2.c       |  207 +++
 .../aarch64/sme2/acle-asm/rshl_s64_x4.c       |  249 ++++
 .../aarch64/sme2/acle-asm/rshl_s8_x2.c        |  207 +++
 .../aarch64/sme2/acle-asm/rshl_s8_x4.c        |  249 ++++
 .../aarch64/sme2/acle-asm/rshl_u16_x2.c       |  207 +++
 .../aarch64/sme2/acle-asm/rshl_u16_x4.c       |  228 ++++
 .../aarch64/sme2/acle-asm/rshl_u32_x2.c       |  207 +++
 .../aarch64/sme2/acle-asm/rshl_u32_x4.c       |  228 ++++
 .../aarch64/sme2/acle-asm/rshl_u64_x2.c       |  207 +++
 .../aarch64/sme2/acle-asm/rshl_u64_x4.c       |  228 ++++
 .../aarch64/sme2/acle-asm/rshl_u8_x2.c        |  207 +++
 .../aarch64/sme2/acle-asm/rshl_u8_x4.c        |  228 ++++
 .../aarch64/sme2/acle-asm/sel_bf16_x2.c       |   92 ++
 .../aarch64/sme2/acle-asm/sel_bf16_x4.c       |   92 ++
 .../aarch64/sme2/acle-asm/sel_f16_x2.c        |   92 ++
 .../aarch64/sme2/acle-asm/sel_f16_x4.c        |   92 ++
 .../aarch64/sme2/acle-asm/sel_f32_x2.c        |   92 ++
 .../aarch64/sme2/acle-asm/sel_f32_x4.c        |   92 ++
 .../aarch64/sme2/acle-asm/sel_f64_x2.c        |   92 ++
 .../aarch64/sme2/acle-asm/sel_f64_x4.c        |   92 ++
 .../aarch64/sme2/acle-asm/sel_s16_x2.c        |   92 ++
 .../aarch64/sme2/acle-asm/sel_s16_x4.c        |   92 ++
 .../aarch64/sme2/acle-asm/sel_s32_x2.c        |   92 ++
 .../aarch64/sme2/acle-asm/sel_s32_x4.c        |   92 ++
 .../aarch64/sme2/acle-asm/sel_s64_x2.c        |   92 ++
 .../aarch64/sme2/acle-asm/sel_s64_x4.c        |   92 ++
 .../aarch64/sme2/acle-asm/sel_s8_x2.c         |   92 ++
 .../aarch64/sme2/acle-asm/sel_s8_x4.c         |   92 ++
 .../aarch64/sme2/acle-asm/sel_u16_x2.c        |   92 ++
 .../aarch64/sme2/acle-asm/sel_u16_x4.c        |   92 ++
 .../aarch64/sme2/acle-asm/sel_u32_x2.c        |   92 ++
 .../aarch64/sme2/acle-asm/sel_u32_x4.c        |   92 ++
 .../aarch64/sme2/acle-asm/sel_u64_x2.c        |   92 ++
 .../aarch64/sme2/acle-asm/sel_u64_x4.c        |   92 ++
 .../aarch64/sme2/acle-asm/sel_u8_x2.c         |   92 ++
 .../aarch64/sme2/acle-asm/sel_u8_x4.c         |   92 ++
 .../aarch64/sme2/acle-asm/st1_bf16_x2.c       |  262 ++++
 .../aarch64/sme2/acle-asm/st1_bf16_x4.c       |  354 +++++
 .../aarch64/sme2/acle-asm/st1_f16_x2.c        |  262 ++++
 .../aarch64/sme2/acle-asm/st1_f16_x4.c        |  354 +++++
 .../aarch64/sme2/acle-asm/st1_f32_x2.c        |  262 ++++
 .../aarch64/sme2/acle-asm/st1_f32_x4.c        |  354 +++++
 .../aarch64/sme2/acle-asm/st1_f64_x2.c        |  262 ++++
 .../aarch64/sme2/acle-asm/st1_f64_x4.c        |  354 +++++
 .../aarch64/sme2/acle-asm/st1_s16_x2.c        |  262 ++++
 .../aarch64/sme2/acle-asm/st1_s16_x4.c        |  354 +++++
 .../aarch64/sme2/acle-asm/st1_s32_x2.c        |  262 ++++
 .../aarch64/sme2/acle-asm/st1_s32_x4.c        |  354 +++++
 .../aarch64/sme2/acle-asm/st1_s64_x2.c        |  262 ++++
 .../aarch64/sme2/acle-asm/st1_s64_x4.c        |  354 +++++
 .../aarch64/sme2/acle-asm/st1_s8_x2.c         |  262 ++++
 .../aarch64/sme2/acle-asm/st1_s8_x4.c         |  354 +++++
 .../aarch64/sme2/acle-asm/st1_u16_x2.c        |  262 ++++
 .../aarch64/sme2/acle-asm/st1_u16_x4.c        |  354 +++++
 .../aarch64/sme2/acle-asm/st1_u32_x2.c        |  262 ++++
 .../aarch64/sme2/acle-asm/st1_u32_x4.c        |  354 +++++
 .../aarch64/sme2/acle-asm/st1_u64_x2.c        |  262 ++++
 .../aarch64/sme2/acle-asm/st1_u64_x4.c        |  354 +++++
 .../aarch64/sme2/acle-asm/st1_u8_x2.c         |  262 ++++
 .../aarch64/sme2/acle-asm/st1_u8_x4.c         |  354 +++++
 .../aarch64/sme2/acle-asm/stnt1_bf16_x2.c     |  262 ++++
 .../aarch64/sme2/acle-asm/stnt1_bf16_x4.c     |  354 +++++
 .../aarch64/sme2/acle-asm/stnt1_f16_x2.c      |  262 ++++
 .../aarch64/sme2/acle-asm/stnt1_f16_x4.c      |  354 +++++
 .../aarch64/sme2/acle-asm/stnt1_f32_x2.c      |  262 ++++
 .../aarch64/sme2/acle-asm/stnt1_f32_x4.c      |  354 +++++
 .../aarch64/sme2/acle-asm/stnt1_f64_x2.c      |  262 ++++
 .../aarch64/sme2/acle-asm/stnt1_f64_x4.c      |  354 +++++
 .../aarch64/sme2/acle-asm/stnt1_s16_x2.c      |  262 ++++
 .../aarch64/sme2/acle-asm/stnt1_s16_x4.c      |  354 +++++
 .../aarch64/sme2/acle-asm/stnt1_s32_x2.c      |  262 ++++
 .../aarch64/sme2/acle-asm/stnt1_s32_x4.c      |  354 +++++
 .../aarch64/sme2/acle-asm/stnt1_s64_x2.c      |  262 ++++
 .../aarch64/sme2/acle-asm/stnt1_s64_x4.c      |  354 +++++
 .../aarch64/sme2/acle-asm/stnt1_s8_x2.c       |  262 ++++
 .../aarch64/sme2/acle-asm/stnt1_s8_x4.c       |  354 +++++
 .../aarch64/sme2/acle-asm/stnt1_u16_x2.c      |  262 ++++
 .../aarch64/sme2/acle-asm/stnt1_u16_x4.c      |  354 +++++
 .../aarch64/sme2/acle-asm/stnt1_u32_x2.c      |  262 ++++
 .../aarch64/sme2/acle-asm/stnt1_u32_x4.c      |  354 +++++
 .../aarch64/sme2/acle-asm/stnt1_u64_x2.c      |  262 ++++
 .../aarch64/sme2/acle-asm/stnt1_u64_x4.c      |  354 +++++
 .../aarch64/sme2/acle-asm/stnt1_u8_x2.c       |  262 ++++
 .../aarch64/sme2/acle-asm/stnt1_u8_x4.c       |  354 +++++
 .../gcc.target/aarch64/sme2/acle-asm/str_zt.c |   36 +
 .../sme2/acle-asm/sub_write_za32_s32_vg1x2.c  |  180 +++
 .../sme2/acle-asm/sub_write_za32_s32_vg1x4.c  |  172 +++
 .../sme2/acle-asm/sub_write_za32_u32_vg1x2.c  |  180 +++
 .../sme2/acle-asm/sub_write_za32_u32_vg1x4.c  |  172 +++
 .../sme2/acle-asm/sub_write_za64_s64_vg1x2.c  |  182 +++
 .../sme2/acle-asm/sub_write_za64_s64_vg1x4.c  |  174 +++
 .../sme2/acle-asm/sub_write_za64_u64_vg1x2.c  |  182 +++
 .../sme2/acle-asm/sub_write_za64_u64_vg1x4.c  |  174 +++
 .../sme2/acle-asm/sub_za32_f32_vg1x2.c        |  122 ++
 .../sme2/acle-asm/sub_za32_f32_vg1x4.c        |  137 ++
 .../sme2/acle-asm/sub_za32_s32_vg1x2.c        |  122 ++
 .../sme2/acle-asm/sub_za32_s32_vg1x4.c        |  137 ++
 .../sme2/acle-asm/sub_za32_u32_vg1x2.c        |  122 ++
 .../sme2/acle-asm/sub_za32_u32_vg1x4.c        |  137 ++
 .../sme2/acle-asm/sub_za64_f64_vg1x2.c        |  126 ++
 .../sme2/acle-asm/sub_za64_f64_vg1x4.c        |  141 ++
 .../sme2/acle-asm/sub_za64_s64_vg1x2.c        |  124 ++
 .../sme2/acle-asm/sub_za64_s64_vg1x4.c        |  139 ++
 .../sme2/acle-asm/sub_za64_u64_vg1x2.c        |  124 ++
 .../sme2/acle-asm/sub_za64_u64_vg1x4.c        |  139 ++
 .../sme2/acle-asm/sudot_lane_za32_s8_vg1x2.c  |  102 ++
 .../sme2/acle-asm/sudot_lane_za32_s8_vg1x4.c  |  108 ++
 .../sme2/acle-asm/sudot_za32_s8_vg1x2.c       |  243 ++++
 .../sme2/acle-asm/sudot_za32_s8_vg1x4.c       |  254 ++++
 .../sme2/acle-asm/suvdot_lane_za32_s8_vg1x4.c |  108 ++
 .../aarch64/sme2/acle-asm/test_sme2_acle.h    |  124 ++
 .../aarch64/sme2/acle-asm/unpk_s16_x2.c       |   50 +
 .../aarch64/sme2/acle-asm/unpk_s16_x4.c       |   76 ++
 .../aarch64/sme2/acle-asm/unpk_s32_x2.c       |   50 +
 .../aarch64/sme2/acle-asm/unpk_s32_x4.c       |   76 ++
 .../aarch64/sme2/acle-asm/unpk_s8_x2.c        |   50 +
 .../aarch64/sme2/acle-asm/unpk_s8_x4.c        |   76 ++
 .../aarch64/sme2/acle-asm/unpk_u16_x2.c       |   50 +
 .../aarch64/sme2/acle-asm/unpk_u16_x4.c       |   76 ++
 .../aarch64/sme2/acle-asm/unpk_u32_x2.c       |   50 +
 .../aarch64/sme2/acle-asm/unpk_u32_x4.c       |   76 ++
 .../aarch64/sme2/acle-asm/unpk_u8_x2.c        |   50 +
 .../aarch64/sme2/acle-asm/unpk_u8_x4.c        |   76 ++
 .../sme2/acle-asm/usdot_lane_za32_u8_vg1x2.c  |  102 ++
 .../sme2/acle-asm/usdot_lane_za32_u8_vg1x4.c  |  108 ++
 .../sme2/acle-asm/usdot_za32_u8_vg1x2.c       |  243 ++++
 .../sme2/acle-asm/usdot_za32_u8_vg1x4.c       |  254 ++++
 .../sme2/acle-asm/usvdot_lane_za32_u8_vg1x4.c |  108 ++
 .../aarch64/sme2/acle-asm/uzp_bf16_x2.c       |   77 ++
 .../aarch64/sme2/acle-asm/uzp_bf16_x4.c       |   73 ++
 .../aarch64/sme2/acle-asm/uzp_f16_x2.c        |   77 ++
 .../aarch64/sme2/acle-asm/uzp_f16_x4.c        |   73 ++
 .../aarch64/sme2/acle-asm/uzp_f32_x2.c        |   77 ++
 .../aarch64/sme2/acle-asm/uzp_f32_x4.c        |   73 ++
 .../aarch64/sme2/acle-asm/uzp_f64_x2.c        |   77 ++
 .../aarch64/sme2/acle-asm/uzp_f64_x4.c        |   73 ++
 .../aarch64/sme2/acle-asm/uzp_s16_x2.c        |   77 ++
 .../aarch64/sme2/acle-asm/uzp_s16_x4.c        |   73 ++
 .../aarch64/sme2/acle-asm/uzp_s32_x2.c        |   77 ++
 .../aarch64/sme2/acle-asm/uzp_s32_x4.c        |   73 ++
 .../aarch64/sme2/acle-asm/uzp_s64_x2.c        |   77 ++
 .../aarch64/sme2/acle-asm/uzp_s64_x4.c        |   73 ++
 .../aarch64/sme2/acle-asm/uzp_s8_x2.c         |   77 ++
 .../aarch64/sme2/acle-asm/uzp_s8_x4.c         |   73 ++
 .../aarch64/sme2/acle-asm/uzp_u16_x2.c        |   77 ++
 .../aarch64/sme2/acle-asm/uzp_u16_x4.c        |   73 ++
 .../aarch64/sme2/acle-asm/uzp_u32_x2.c        |   77 ++
 .../aarch64/sme2/acle-asm/uzp_u32_x4.c        |   73 ++
 .../aarch64/sme2/acle-asm/uzp_u64_x2.c        |   77 ++
 .../aarch64/sme2/acle-asm/uzp_u64_x4.c        |   73 ++
 .../aarch64/sme2/acle-asm/uzp_u8_x2.c         |   77 ++
 .../aarch64/sme2/acle-asm/uzp_u8_x4.c         |   73 ++
 .../aarch64/sme2/acle-asm/uzpq_bf16_x2.c      |   77 ++
 .../aarch64/sme2/acle-asm/uzpq_bf16_x4.c      |   73 ++
 .../aarch64/sme2/acle-asm/uzpq_f16_x2.c       |   77 ++
 .../aarch64/sme2/acle-asm/uzpq_f16_x4.c       |   73 ++
 .../aarch64/sme2/acle-asm/uzpq_f32_x2.c       |   77 ++
 .../aarch64/sme2/acle-asm/uzpq_f32_x4.c       |   73 ++
 .../aarch64/sme2/acle-asm/uzpq_f64_x2.c       |   77 ++
 .../aarch64/sme2/acle-asm/uzpq_f64_x4.c       |   73 ++
 .../aarch64/sme2/acle-asm/uzpq_s16_x2.c       |   77 ++
 .../aarch64/sme2/acle-asm/uzpq_s16_x4.c       |   73 ++
 .../aarch64/sme2/acle-asm/uzpq_s32_x2.c       |   77 ++
 .../aarch64/sme2/acle-asm/uzpq_s32_x4.c       |   73 ++
 .../aarch64/sme2/acle-asm/uzpq_s64_x2.c       |   77 ++
 .../aarch64/sme2/acle-asm/uzpq_s64_x4.c       |   73 ++
 .../aarch64/sme2/acle-asm/uzpq_s8_x2.c        |   77 ++
 .../aarch64/sme2/acle-asm/uzpq_s8_x4.c        |   73 ++
 .../aarch64/sme2/acle-asm/uzpq_u16_x2.c       |   77 ++
 .../aarch64/sme2/acle-asm/uzpq_u16_x4.c       |   73 ++
 .../aarch64/sme2/acle-asm/uzpq_u32_x2.c       |   77 ++
 .../aarch64/sme2/acle-asm/uzpq_u32_x4.c       |   73 ++
 .../aarch64/sme2/acle-asm/uzpq_u64_x2.c       |   77 ++
 .../aarch64/sme2/acle-asm/uzpq_u64_x4.c       |   73 ++
 .../aarch64/sme2/acle-asm/uzpq_u8_x2.c        |   77 ++
 .../aarch64/sme2/acle-asm/uzpq_u8_x4.c        |   73 ++
 .../sme2/acle-asm/vdot_lane_za32_bf16_vg1x2.c |  102 ++
 .../sme2/acle-asm/vdot_lane_za32_f16_vg1x2.c  |  102 ++
 .../sme2/acle-asm/vdot_lane_za32_s16_vg1x2.c  |  102 ++
 .../sme2/acle-asm/vdot_lane_za32_s8_vg1x4.c   |  108 ++
 .../sme2/acle-asm/vdot_lane_za32_u16_vg1x2.c  |  102 ++
 .../sme2/acle-asm/vdot_lane_za32_u8_vg1x4.c   |  108 ++
 .../sme2/acle-asm/vdot_lane_za64_s16_vg1x4.c  |  110 ++
 .../sme2/acle-asm/vdot_lane_za64_u16_vg1x4.c  |  110 ++
 .../aarch64/sme2/acle-asm/whilege_b16.c       |  119 ++
 .../aarch64/sme2/acle-asm/whilege_b32.c       |  119 ++
 .../aarch64/sme2/acle-asm/whilege_b64.c       |  119 ++
 .../aarch64/sme2/acle-asm/whilege_b8.c        |  119 ++
 .../aarch64/sme2/acle-asm/whilege_c16.c       |  117 ++
 .../aarch64/sme2/acle-asm/whilege_c32.c       |  117 ++
 .../aarch64/sme2/acle-asm/whilege_c64.c       |  117 ++
 .../aarch64/sme2/acle-asm/whilege_c8.c        |  117 ++
 .../aarch64/sme2/acle-asm/whilegt_b16.c       |  119 ++
 .../aarch64/sme2/acle-asm/whilegt_b32.c       |  119 ++
 .../aarch64/sme2/acle-asm/whilegt_b64.c       |  119 ++
 .../aarch64/sme2/acle-asm/whilegt_b8.c        |  119 ++
 .../aarch64/sme2/acle-asm/whilegt_c16.c       |  117 ++
 .../aarch64/sme2/acle-asm/whilegt_c32.c       |  117 ++
 .../aarch64/sme2/acle-asm/whilegt_c64.c       |  117 ++
 .../aarch64/sme2/acle-asm/whilegt_c8.c        |  117 ++
 .../aarch64/sme2/acle-asm/whilele_b16.c       |  119 ++
 .../aarch64/sme2/acle-asm/whilele_b32.c       |  119 ++
 .../aarch64/sme2/acle-asm/whilele_b64.c       |  119 ++
 .../aarch64/sme2/acle-asm/whilele_b8.c        |  119 ++
 .../aarch64/sme2/acle-asm/whilele_c16.c       |  117 ++
 .../aarch64/sme2/acle-asm/whilele_c32.c       |  117 ++
 .../aarch64/sme2/acle-asm/whilele_c64.c       |  117 ++
 .../aarch64/sme2/acle-asm/whilele_c8.c        |  117 ++
 .../aarch64/sme2/acle-asm/whilelt_b16.c       |  119 ++
 .../aarch64/sme2/acle-asm/whilelt_b32.c       |  119 ++
 .../aarch64/sme2/acle-asm/whilelt_b64.c       |  119 ++
 .../aarch64/sme2/acle-asm/whilelt_b8.c        |  119 ++
 .../aarch64/sme2/acle-asm/whilelt_c16.c       |  117 ++
 .../aarch64/sme2/acle-asm/whilelt_c32.c       |  117 ++
 .../aarch64/sme2/acle-asm/whilelt_c64.c       |  117 ++
 .../aarch64/sme2/acle-asm/whilelt_c8.c        |  117 ++
 .../sme2/acle-asm/write_hor_za16_vg2.c        |  140 ++
 .../sme2/acle-asm/write_hor_za16_vg4.c        |  138 ++
 .../sme2/acle-asm/write_hor_za32_vg2.c        |  112 ++
 .../sme2/acle-asm/write_hor_za32_vg4.c        |  129 ++
 .../sme2/acle-asm/write_hor_za64_vg2.c        |  113 ++
 .../sme2/acle-asm/write_hor_za64_vg4.c        |  129 ++
 .../aarch64/sme2/acle-asm/write_hor_za8_vg2.c |  140 ++
 .../aarch64/sme2/acle-asm/write_hor_za8_vg4.c |  156 +++
 .../sme2/acle-asm/write_ver_za16_vg2.c        |  140 ++
 .../sme2/acle-asm/write_ver_za16_vg4.c        |  138 ++
 .../sme2/acle-asm/write_ver_za32_vg2.c        |  112 ++
 .../sme2/acle-asm/write_ver_za32_vg4.c        |  129 ++
 .../sme2/acle-asm/write_ver_za64_vg2.c        |  113 ++
 .../sme2/acle-asm/write_ver_za64_vg4.c        |  129 ++
 .../aarch64/sme2/acle-asm/write_ver_za8_vg2.c |  140 ++
 .../aarch64/sme2/acle-asm/write_ver_za8_vg4.c |  156 +++
 .../aarch64/sme2/acle-asm/write_za16_vg1x2.c  |  122 ++
 .../aarch64/sme2/acle-asm/write_za16_vg1x4.c  |  137 ++
 .../aarch64/sme2/acle-asm/write_za32_vg1x2.c  |  122 ++
 .../aarch64/sme2/acle-asm/write_za32_vg1x4.c  |  137 ++
 .../aarch64/sme2/acle-asm/write_za64_vg1x2.c  |  122 ++
 .../aarch64/sme2/acle-asm/write_za64_vg1x4.c  |  137 ++
 .../aarch64/sme2/acle-asm/write_za8_vg1x2.c   |  122 ++
 .../aarch64/sme2/acle-asm/write_za8_vg1x4.c   |  137 ++
 .../aarch64/sme2/acle-asm/zero_zt.c           |   12 +
 .../aarch64/sme2/acle-asm/zip_bf16_x2.c       |   77 ++
 .../aarch64/sme2/acle-asm/zip_bf16_x4.c       |   73 ++
 .../aarch64/sme2/acle-asm/zip_f16_x2.c        |   77 ++
 .../aarch64/sme2/acle-asm/zip_f16_x4.c        |   73 ++
 .../aarch64/sme2/acle-asm/zip_f32_x2.c        |   77 ++
 .../aarch64/sme2/acle-asm/zip_f32_x4.c        |   73 ++
 .../aarch64/sme2/acle-asm/zip_f64_x2.c        |   77 ++
 .../aarch64/sme2/acle-asm/zip_f64_x4.c        |   73 ++
 .../aarch64/sme2/acle-asm/zip_s16_x2.c        |   77 ++
 .../aarch64/sme2/acle-asm/zip_s16_x4.c        |   73 ++
 .../aarch64/sme2/acle-asm/zip_s32_x2.c        |   77 ++
 .../aarch64/sme2/acle-asm/zip_s32_x4.c        |   73 ++
 .../aarch64/sme2/acle-asm/zip_s64_x2.c        |   77 ++
 .../aarch64/sme2/acle-asm/zip_s64_x4.c        |   73 ++
 .../aarch64/sme2/acle-asm/zip_s8_x2.c         |   77 ++
 .../aarch64/sme2/acle-asm/zip_s8_x4.c         |   73 ++
 .../aarch64/sme2/acle-asm/zip_u16_x2.c        |   77 ++
 .../aarch64/sme2/acle-asm/zip_u16_x4.c        |   73 ++
 .../aarch64/sme2/acle-asm/zip_u32_x2.c        |   77 ++
 .../aarch64/sme2/acle-asm/zip_u32_x4.c        |   73 ++
 .../aarch64/sme2/acle-asm/zip_u64_x2.c        |   77 ++
 .../aarch64/sme2/acle-asm/zip_u64_x4.c        |   73 ++
 .../aarch64/sme2/acle-asm/zip_u8_x2.c         |   77 ++
 .../aarch64/sme2/acle-asm/zip_u8_x4.c         |   73 ++
 .../aarch64/sme2/acle-asm/zipq_bf16_x2.c      |   77 ++
 .../aarch64/sme2/acle-asm/zipq_bf16_x4.c      |   73 ++
 .../aarch64/sme2/acle-asm/zipq_f16_x2.c       |   77 ++
 .../aarch64/sme2/acle-asm/zipq_f16_x4.c       |   73 ++
 .../aarch64/sme2/acle-asm/zipq_f32_x2.c       |   77 ++
 .../aarch64/sme2/acle-asm/zipq_f32_x4.c       |   73 ++
 .../aarch64/sme2/acle-asm/zipq_f64_x2.c       |   77 ++
 .../aarch64/sme2/acle-asm/zipq_f64_x4.c       |   73 ++
 .../aarch64/sme2/acle-asm/zipq_s16_x2.c       |   77 ++
 .../aarch64/sme2/acle-asm/zipq_s16_x4.c       |   73 ++
 .../aarch64/sme2/acle-asm/zipq_s32_x2.c       |   77 ++
 .../aarch64/sme2/acle-asm/zipq_s32_x4.c       |   73 ++
 .../aarch64/sme2/acle-asm/zipq_s64_x2.c       |   77 ++
 .../aarch64/sme2/acle-asm/zipq_s64_x4.c       |   73 ++
 .../aarch64/sme2/acle-asm/zipq_s8_x2.c        |   77 ++
 .../aarch64/sme2/acle-asm/zipq_s8_x4.c        |   73 ++
 .../aarch64/sme2/acle-asm/zipq_u16_x2.c       |   77 ++
 .../aarch64/sme2/acle-asm/zipq_u16_x4.c       |   73 ++
 .../aarch64/sme2/acle-asm/zipq_u32_x2.c       |   77 ++
 .../aarch64/sme2/acle-asm/zipq_u32_x4.c       |   73 ++
 .../aarch64/sme2/acle-asm/zipq_u64_x2.c       |   77 ++
 .../aarch64/sme2/acle-asm/zipq_u64_x4.c       |   73 ++
 .../aarch64/sme2/acle-asm/zipq_u8_x2.c        |   77 ++
 .../aarch64/sme2/acle-asm/zipq_u8_x4.c        |   73 ++
 .../aarch64/sve/acle/asm/create2_1.c          |   18 +
 .../gcc.target/aarch64/sve/acle/asm/get2_b.c  |   55 +
 .../aarch64/sve/acle/asm/reinterpret_b.c      |   20 +
 .../gcc.target/aarch64/sve/acle/asm/set2_b.c  |   41 +
 .../aarch64/sve/acle/asm/test_sve_acle.h      |  284 +++-
 .../general-c/binary_int_opt_single_n_1.c     |   35 +
 .../general-c/binary_int_opt_single_n_2.c     |   36 +
 .../acle/general-c/binary_opt_single_n_1.c    |   26 +
 .../acle/general-c/binary_opt_single_n_2.c    |   38 +
 .../sve/acle/general-c/binary_single_1.c      |   34 +
 .../sve/acle/general-c/binary_za_m_1.c        |    1 -
 .../binary_za_slice_int_opt_single_1.c        |   61 +
 .../acle/general-c/binary_za_slice_lane_1.c   |   73 ++
 .../acle/general-c/binary_za_slice_lane_2.c   |   78 ++
 .../acle/general-c/binary_za_slice_lane_3.c   |   78 ++
 .../acle/general-c/binary_za_slice_lane_4.c   |   26 +
 .../general-c/binary_za_slice_opt_single_1.c  |   76 ++
 .../general-c/binary_za_slice_opt_single_2.c  |   29 +
 .../general-c/binary_za_slice_opt_single_3.c  |   16 +
 .../binary_za_slice_uint_opt_single_1.c       |   61 +
 .../aarch64/sve/acle/general-c/binaryxn_1.c   |   23 +
 .../aarch64/sve/acle/general-c/binaryxn_2.c   |   33 +
 .../aarch64/sve/acle/general-c/clamp_1.c      |   30 +
 .../acle/general-c/compare_scalar_count_1.c   |   55 +
 .../aarch64/sve/acle/general-c/create_1.c     |    2 +-
 .../acle/general-c/dot_za_slice_int_lane_1.c  |   59 +
 .../sve/acle/general-c/dot_za_slice_lane_1.c  |   83 ++
 .../sve/acle/general-c/dot_za_slice_lane_2.c  |   83 ++
 .../acle/general-c/dot_za_slice_uint_lane_1.c |   59 +
 .../aarch64/sve/acle/general-c/load_1.c       |    4 +-
 .../general-c/shift_right_imm_narrowxn_1.c    |   89 ++
 .../aarch64/sve/acle/general-c/store_1.c      |    2 +-
 .../aarch64/sve/acle/general-c/store_2.c      |    2 +-
 .../aarch64/sve/acle/general-c/storexn_1.c    |   33 +
 .../aarch64/sve/acle/general-c/svboolx2_1.c   |  135 ++
 .../aarch64/sve/acle/general-c/svcount_1.c    |   10 +
 .../sve/acle/general-c/ternary_qq_lane_1.c    |   30 +-
 .../sve/acle/general-c/ternary_qq_opt_n_2.c   |   12 +-
 .../acle/general-c/ternary_qq_or_011_lane_1.c |   33 +
 .../sve/acle/general-c/unary_convert_1.c      |    8 +-
 .../sve/acle/general-c/unary_convertxn_1.c    |   28 +
 .../sve/acle/general-c/unary_za_slice_1.c     |   54 +
 .../sve/acle/general-c/unary_za_slice_2.c     |   27 +
 .../sve/acle/general-c/unary_za_slice_3.c     |   16 +
 .../aarch64/sve/acle/general-c/unaryxn_1.c    |   15 +
 .../aarch64/sve/acle/general-c/write_za_1.c   |   50 +
 .../sve/acle/general-c/write_za_slice_1.c     |   38 +
 .../aarch64/sve/acle/general/attributes_7.c   |    1 +
 .../gcc.target/aarch64/sve/pcs/annotate_1.c   |    4 +
 .../gcc.target/aarch64/sve/pcs/annotate_2.c   |    4 +
 .../gcc.target/aarch64/sve/pcs/args_12.c      |  214 +++
 .../gcc.target/aarch64/sve/pcs/struct_3_128.c |    6 +-
 gcc/testsuite/lib/target-supports.exp         |   14 +-
 900 files changed, 123399 insertions(+), 379 deletions(-)
 create mode 100644 gcc/testsuite/g++.target/aarch64/sme2/aarch64-sme2-acle-asm.exp
 create mode 100644 gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/svcount_1.C
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/clamp_s16.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/clamp_s32.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/clamp_s64.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/clamp_s8.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/clamp_u16.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/clamp_u32.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/clamp_u64.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/clamp_u8.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/revd_bf16.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/revd_f16.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/revd_f32.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/revd_f64.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/revd_s16.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/revd_s32.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/revd_s64.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/revd_s8.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/revd_u16.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/revd_u32.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/revd_u64.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/revd_u8.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/clamp_1.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/clamp_2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/clamp_3.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/clamp_4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/zt0_state_1.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/zt0_state_2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/zt0_state_3.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/zt0_state_4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/zt0_state_5.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/zt0_state_6.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/aarch64-sme2-acle-asm.exp
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/add_s16_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/add_s16_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/add_s32_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/add_s32_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/add_s64_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/add_s64_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/add_s8_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/add_s8_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/add_u16_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/add_u16_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/add_u32_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/add_u32_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/add_u64_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/add_u64_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/add_u8_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/add_u8_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/add_write_za32_s32_vg1x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/add_write_za32_s32_vg1x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/add_write_za32_u32_vg1x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/add_write_za32_u32_vg1x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/add_write_za64_s64_vg1x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/add_write_za64_s64_vg1x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/add_write_za64_u64_vg1x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/add_write_za64_u64_vg1x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/add_za32_f32_vg1x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/add_za32_f32_vg1x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/add_za32_s32_vg1x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/add_za32_s32_vg1x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/add_za32_u32_vg1x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/add_za32_u32_vg1x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/add_za64_f64_vg1x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/add_za64_f64_vg1x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/add_za64_s64_vg1x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/add_za64_s64_vg1x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/add_za64_u64_vg1x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/add_za64_u64_vg1x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/bfmlslb_f32.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/bfmlslb_lane_f32.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/bfmlslt_f32.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/bfmlslt_lane_f32.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/bmopa_za32.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/bmops_za32.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/clamp_f16.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/clamp_f16_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/clamp_f16_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/clamp_f32.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/clamp_f32_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/clamp_f32_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/clamp_f64.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/clamp_f64_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/clamp_f64_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/clamp_s16_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/clamp_s16_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/clamp_s32_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/clamp_s32_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/clamp_s64_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/clamp_s64_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/clamp_s8_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/clamp_s8_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/clamp_u16_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/clamp_u16_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/clamp_u32_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/clamp_u32_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/clamp_u64_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/clamp_u64_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/clamp_u8_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/clamp_u8_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/cntp_c16.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/cntp_c32.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/cntp_c64.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/cntp_c8.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/cvt_bf16_f32_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/cvt_f16_f32_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/cvt_f32_s32_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/cvt_f32_s32_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/cvt_f32_u32_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/cvt_f32_u32_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/cvt_s32_f32_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/cvt_s32_f32_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/cvt_u32_f32_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/cvt_u32_f32_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/cvtn_bf16_f32_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/cvtn_f16_f32_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/dot_f32.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/dot_lane_f32.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/dot_lane_s32.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/dot_lane_u32.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/dot_lane_za32_bf16_vg1x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/dot_lane_za32_bf16_vg1x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/dot_lane_za32_f16_vg1x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/dot_lane_za32_f16_vg1x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/dot_lane_za32_s16_vg1x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/dot_lane_za32_s16_vg1x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/dot_lane_za32_s8_vg1x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/dot_lane_za32_s8_vg1x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/dot_lane_za32_u16_vg1x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/dot_lane_za32_u16_vg1x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/dot_lane_za32_u8_vg1x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/dot_lane_za32_u8_vg1x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/dot_lane_za64_s16_vg1x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/dot_lane_za64_s16_vg1x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/dot_lane_za64_u16_vg1x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/dot_lane_za64_u16_vg1x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/dot_s32.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/dot_u32.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/dot_za32_bf16_vg1x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/dot_za32_bf16_vg1x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/dot_za32_f16_vg1x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/dot_za32_f16_vg1x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/dot_za32_s16_vg1x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/dot_za32_s16_vg1x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/dot_za32_s8_vg1x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/dot_za32_s8_vg1x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/dot_za32_u16_vg1x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/dot_za32_u16_vg1x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/dot_za32_u8_vg1x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/dot_za32_u8_vg1x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/dot_za64_s16_vg1x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/dot_za64_s16_vg1x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/dot_za64_u16_vg1x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/dot_za64_u16_vg1x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/ld1_bf16_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/ld1_bf16_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/ld1_f16_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/ld1_f16_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/ld1_f32_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/ld1_f32_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/ld1_f64_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/ld1_f64_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/ld1_s16_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/ld1_s16_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/ld1_s32_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/ld1_s32_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/ld1_s64_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/ld1_s64_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/ld1_s8_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/ld1_s8_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/ld1_u16_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/ld1_u16_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/ld1_u32_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/ld1_u32_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/ld1_u64_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/ld1_u64_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/ld1_u8_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/ld1_u8_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/ldnt1_bf16_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/ldnt1_bf16_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/ldnt1_f16_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/ldnt1_f16_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/ldnt1_f32_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/ldnt1_f32_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/ldnt1_f64_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/ldnt1_f64_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/ldnt1_s16_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/ldnt1_s16_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/ldnt1_s32_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/ldnt1_s32_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/ldnt1_s64_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/ldnt1_s64_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/ldnt1_s8_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/ldnt1_s8_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/ldnt1_u16_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/ldnt1_u16_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/ldnt1_u32_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/ldnt1_u32_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/ldnt1_u64_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/ldnt1_u64_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/ldnt1_u8_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/ldnt1_u8_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/ldr_zt.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/luti2_bf16.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/luti2_bf16_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/luti2_bf16_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/luti2_f16.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/luti2_f16_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/luti2_f16_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/luti2_f32.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/luti2_f32_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/luti2_f32_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/luti2_s16.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/luti2_s16_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/luti2_s16_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/luti2_s32.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/luti2_s32_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/luti2_s32_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/luti2_s8.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/luti2_s8_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/luti2_s8_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/luti2_u16.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/luti2_u16_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/luti2_u16_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/luti2_u32.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/luti2_u32_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/luti2_u32_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/luti2_u8.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/luti2_u8_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/luti2_u8_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/luti4_bf16.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/luti4_bf16_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/luti4_bf16_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/luti4_f16.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/luti4_f16_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/luti4_f16_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/luti4_f32.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/luti4_f32_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/luti4_f32_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/luti4_s16.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/luti4_s16_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/luti4_s16_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/luti4_s32.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/luti4_s32_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/luti4_s32_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/luti4_s8.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/luti4_s8_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/luti4_u16.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/luti4_u16_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/luti4_u16_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/luti4_u32.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/luti4_u32_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/luti4_u32_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/luti4_u8.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/luti4_u8_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/max_f16_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/max_f16_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/max_f32_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/max_f32_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/max_f64_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/max_f64_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/max_s16_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/max_s16_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/max_s32_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/max_s32_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/max_s64_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/max_s64_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/max_s8_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/max_s8_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/max_u16_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/max_u16_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/max_u32_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/max_u32_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/max_u64_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/max_u64_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/max_u8_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/max_u8_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/maxnm_f16_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/maxnm_f16_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/maxnm_f32_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/maxnm_f32_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/maxnm_f64_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/maxnm_f64_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/min_f16_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/min_f16_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/min_f32_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/min_f32_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/min_f64_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/min_f64_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/min_s16_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/min_s16_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/min_s32_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/min_s32_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/min_s64_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/min_s64_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/min_s8_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/min_s8_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/min_u16_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/min_u16_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/min_u32_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/min_u32_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/min_u64_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/min_u64_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/min_u8_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/min_u8_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/minnm_f16_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/minnm_f16_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/minnm_f32_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/minnm_f32_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/minnm_f64_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/minnm_f64_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_lane_za32_bf16_vg2x1.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_lane_za32_bf16_vg2x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_lane_za32_bf16_vg2x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_lane_za32_f16_vg2x1.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_lane_za32_f16_vg2x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_lane_za32_f16_vg2x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_lane_za32_f32_vg1x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_lane_za32_f32_vg1x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_lane_za32_s16_vg2x1.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_lane_za32_s16_vg2x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_lane_za32_s16_vg2x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_lane_za32_s8_vg4x1.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_lane_za32_s8_vg4x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_lane_za32_s8_vg4x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_lane_za32_u16_vg2x1.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_lane_za32_u16_vg2x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_lane_za32_u16_vg2x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_lane_za32_u8_vg4x1.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_lane_za32_u8_vg4x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_lane_za32_u8_vg4x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_lane_za64_f64_vg1x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_lane_za64_f64_vg1x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_lane_za64_s16_vg4x1.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_lane_za64_s16_vg4x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_lane_za64_s16_vg4x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_lane_za64_u16_vg4x1.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_lane_za64_u16_vg4x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_lane_za64_u16_vg4x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_za32_bf16_vg2x1.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_za32_bf16_vg2x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_za32_bf16_vg2x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_za32_f16_vg2x1.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_za32_f16_vg2x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_za32_f16_vg2x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_za32_f32_vg1x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_za32_f32_vg1x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_za32_s16_vg2x1.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_za32_s16_vg2x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_za32_s16_vg2x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_za32_s8_vg4x1.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_za32_s8_vg4x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_za32_s8_vg4x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_za32_u16_vg2x1.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_za32_u16_vg2x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_za32_u16_vg2x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_za32_u8_vg4x1.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_za32_u8_vg4x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_za32_u8_vg4x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_za64_f64_vg1x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_za64_f64_vg1x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_za64_s16_vg4x1.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_za64_s16_vg4x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_za64_s16_vg4x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_za64_u16_vg4x1.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_za64_u16_vg4x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_za64_u16_vg4x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mls_lane_za32_bf16_vg2x1.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mls_lane_za32_bf16_vg2x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mls_lane_za32_bf16_vg2x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mls_lane_za32_f16_vg2x1.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mls_lane_za32_f16_vg2x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mls_lane_za32_f16_vg2x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mls_lane_za32_f32_vg1x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mls_lane_za32_f32_vg1x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mls_lane_za32_s16_vg2x1.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mls_lane_za32_s16_vg2x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mls_lane_za32_s16_vg2x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mls_lane_za32_s8_vg4x1.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mls_lane_za32_s8_vg4x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mls_lane_za32_s8_vg4x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mls_lane_za32_u16_vg2x1.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mls_lane_za32_u16_vg2x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mls_lane_za32_u16_vg2x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mls_lane_za32_u8_vg4x1.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mls_lane_za32_u8_vg4x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mls_lane_za32_u8_vg4x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mls_lane_za64_f64_vg1x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mls_lane_za64_f64_vg1x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mls_lane_za64_s16_vg4x1.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mls_lane_za64_s16_vg4x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mls_lane_za64_s16_vg4x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mls_lane_za64_u16_vg4x1.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mls_lane_za64_u16_vg4x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mls_lane_za64_u16_vg4x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mls_za32_bf16_vg2x1.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mls_za32_bf16_vg2x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mls_za32_bf16_vg2x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mls_za32_f16_vg2x1.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mls_za32_f16_vg2x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mls_za32_f16_vg2x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mls_za32_f32_vg1x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mls_za32_f32_vg1x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mls_za32_s16_vg2x1.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mls_za32_s16_vg2x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mls_za32_s16_vg2x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mls_za32_s8_vg4x1.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mls_za32_s8_vg4x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mls_za32_s8_vg4x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mls_za32_u16_vg2x1.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mls_za32_u16_vg2x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mls_za32_u16_vg2x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mls_za32_u8_vg4x1.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mls_za32_u8_vg4x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mls_za32_u8_vg4x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mls_za64_f64_vg1x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mls_za64_f64_vg1x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mls_za64_s16_vg4x1.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mls_za64_s16_vg4x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mls_za64_s16_vg4x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mls_za64_u16_vg4x1.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mls_za64_u16_vg4x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mls_za64_u16_vg4x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mopa_za32.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mops_za32.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/pext_c16.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/pext_c16_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/pext_c32.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/pext_c32_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/pext_c64.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/pext_c64_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/pext_c8.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/pext_c8_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/pfalse_c.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/psel_b16.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/psel_b32.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/psel_b64.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/psel_b8.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/psel_c16.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/psel_c32.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/psel_c64.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/psel_c8.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/ptrue_c16.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/ptrue_c32.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/ptrue_c64.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/ptrue_c8.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/qcvt_s16_s32_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/qcvt_s16_s64_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/qcvt_s8_s32_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/qcvt_u16_s32_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/qcvt_u16_s64_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/qcvt_u16_u32_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/qcvt_u16_u64_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/qcvt_u8_s32_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/qcvt_u8_u32_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/qcvtn_s16_s32_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/qcvtn_s16_s64_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/qcvtn_s8_s32_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/qcvtn_u16_s32_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/qcvtn_u16_s64_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/qcvtn_u16_u32_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/qcvtn_u16_u64_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/qcvtn_u8_s32_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/qcvtn_u8_u32_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/qdmulh_s16_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/qdmulh_s16_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/qdmulh_s32_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/qdmulh_s32_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/qdmulh_s64_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/qdmulh_s64_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/qdmulh_s8_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/qdmulh_s8_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/qrshr_s16_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/qrshr_s16_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/qrshr_s8_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/qrshr_u16_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/qrshr_u16_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/qrshr_u8_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/qrshrn_s16_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/qrshrn_s16_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/qrshrn_s8_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/qrshrn_u16_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/qrshrn_u16_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/qrshrn_u8_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/qrshru_u16_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/qrshru_u16_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/qrshru_u8_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/qrshrun_u16_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/qrshrun_u16_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/qrshrun_u8_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/read_hor_za16_vg2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/read_hor_za16_vg4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/read_hor_za32_vg2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/read_hor_za32_vg4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/read_hor_za64_vg2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/read_hor_za64_vg4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/read_hor_za8_vg2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/read_hor_za8_vg4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/read_ver_za16_vg2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/read_ver_za16_vg4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/read_ver_za32_vg2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/read_ver_za32_vg4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/read_ver_za64_vg2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/read_ver_za64_vg4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/read_ver_za8_vg2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/read_ver_za8_vg4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/read_za16_vg1x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/read_za16_vg1x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/read_za32_vg1x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/read_za32_vg1x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/read_za64_vg1x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/read_za64_vg1x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/read_za8_vg1x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/read_za8_vg1x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/rinta_s32_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/rinta_s32_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/rintm_u32_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/rintm_u32_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/rintn_u32_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/rintn_u32_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/rintp_u32_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/rintp_u32_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/rshl_s16_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/rshl_s16_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/rshl_s32_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/rshl_s32_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/rshl_s64_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/rshl_s64_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/rshl_s8_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/rshl_s8_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/rshl_u16_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/rshl_u16_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/rshl_u32_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/rshl_u32_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/rshl_u64_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/rshl_u64_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/rshl_u8_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/rshl_u8_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/sel_bf16_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/sel_bf16_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/sel_f16_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/sel_f16_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/sel_f32_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/sel_f32_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/sel_f64_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/sel_f64_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/sel_s16_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/sel_s16_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/sel_s32_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/sel_s32_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/sel_s64_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/sel_s64_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/sel_s8_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/sel_s8_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/sel_u16_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/sel_u16_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/sel_u32_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/sel_u32_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/sel_u64_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/sel_u64_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/sel_u8_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/sel_u8_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/st1_bf16_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/st1_bf16_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/st1_f16_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/st1_f16_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/st1_f32_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/st1_f32_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/st1_f64_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/st1_f64_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/st1_s16_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/st1_s16_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/st1_s32_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/st1_s32_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/st1_s64_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/st1_s64_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/st1_s8_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/st1_s8_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/st1_u16_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/st1_u16_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/st1_u32_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/st1_u32_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/st1_u64_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/st1_u64_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/st1_u8_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/st1_u8_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/stnt1_bf16_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/stnt1_bf16_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/stnt1_f16_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/stnt1_f16_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/stnt1_f32_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/stnt1_f32_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/stnt1_f64_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/stnt1_f64_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/stnt1_s16_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/stnt1_s16_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/stnt1_s32_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/stnt1_s32_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/stnt1_s64_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/stnt1_s64_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/stnt1_s8_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/stnt1_s8_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/stnt1_u16_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/stnt1_u16_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/stnt1_u32_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/stnt1_u32_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/stnt1_u64_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/stnt1_u64_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/stnt1_u8_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/stnt1_u8_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/str_zt.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/sub_write_za32_s32_vg1x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/sub_write_za32_s32_vg1x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/sub_write_za32_u32_vg1x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/sub_write_za32_u32_vg1x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/sub_write_za64_s64_vg1x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/sub_write_za64_s64_vg1x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/sub_write_za64_u64_vg1x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/sub_write_za64_u64_vg1x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/sub_za32_f32_vg1x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/sub_za32_f32_vg1x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/sub_za32_s32_vg1x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/sub_za32_s32_vg1x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/sub_za32_u32_vg1x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/sub_za32_u32_vg1x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/sub_za64_f64_vg1x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/sub_za64_f64_vg1x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/sub_za64_s64_vg1x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/sub_za64_s64_vg1x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/sub_za64_u64_vg1x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/sub_za64_u64_vg1x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/sudot_lane_za32_s8_vg1x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/sudot_lane_za32_s8_vg1x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/sudot_za32_s8_vg1x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/sudot_za32_s8_vg1x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/suvdot_lane_za32_s8_vg1x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/test_sme2_acle.h
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/unpk_s16_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/unpk_s16_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/unpk_s32_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/unpk_s32_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/unpk_s8_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/unpk_s8_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/unpk_u16_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/unpk_u16_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/unpk_u32_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/unpk_u32_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/unpk_u8_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/unpk_u8_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/usdot_lane_za32_u8_vg1x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/usdot_lane_za32_u8_vg1x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/usdot_za32_u8_vg1x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/usdot_za32_u8_vg1x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/usvdot_lane_za32_u8_vg1x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzp_bf16_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzp_bf16_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzp_f16_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzp_f16_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzp_f32_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzp_f32_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzp_f64_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzp_f64_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzp_s16_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzp_s16_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzp_s32_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzp_s32_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzp_s64_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzp_s64_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzp_s8_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzp_s8_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzp_u16_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzp_u16_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzp_u32_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzp_u32_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzp_u64_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzp_u64_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzp_u8_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzp_u8_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzpq_bf16_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzpq_bf16_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzpq_f16_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzpq_f16_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzpq_f32_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzpq_f32_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzpq_f64_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzpq_f64_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzpq_s16_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzpq_s16_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzpq_s32_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzpq_s32_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzpq_s64_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzpq_s64_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzpq_s8_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzpq_s8_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzpq_u16_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzpq_u16_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzpq_u32_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzpq_u32_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzpq_u64_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzpq_u64_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzpq_u8_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzpq_u8_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/vdot_lane_za32_bf16_vg1x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/vdot_lane_za32_f16_vg1x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/vdot_lane_za32_s16_vg1x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/vdot_lane_za32_s8_vg1x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/vdot_lane_za32_u16_vg1x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/vdot_lane_za32_u8_vg1x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/vdot_lane_za64_s16_vg1x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/vdot_lane_za64_u16_vg1x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/whilege_b16.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/whilege_b32.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/whilege_b64.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/whilege_b8.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/whilege_c16.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/whilege_c32.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/whilege_c64.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/whilege_c8.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/whilegt_b16.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/whilegt_b32.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/whilegt_b64.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/whilegt_b8.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/whilegt_c16.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/whilegt_c32.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/whilegt_c64.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/whilegt_c8.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/whilele_b16.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/whilele_b32.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/whilele_b64.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/whilele_b8.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/whilele_c16.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/whilele_c32.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/whilele_c64.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/whilele_c8.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/whilelt_b16.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/whilelt_b32.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/whilelt_b64.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/whilelt_b8.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/whilelt_c16.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/whilelt_c32.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/whilelt_c64.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/whilelt_c8.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/write_hor_za16_vg2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/write_hor_za16_vg4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/write_hor_za32_vg2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/write_hor_za32_vg4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/write_hor_za64_vg2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/write_hor_za64_vg4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/write_hor_za8_vg2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/write_hor_za8_vg4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/write_ver_za16_vg2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/write_ver_za16_vg4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/write_ver_za32_vg2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/write_ver_za32_vg4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/write_ver_za64_vg2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/write_ver_za64_vg4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/write_ver_za8_vg2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/write_ver_za8_vg4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/write_za16_vg1x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/write_za16_vg1x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/write_za32_vg1x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/write_za32_vg1x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/write_za64_vg1x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/write_za64_vg1x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/write_za8_vg1x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/write_za8_vg1x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zero_zt.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zip_bf16_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zip_bf16_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zip_f16_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zip_f16_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zip_f32_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zip_f32_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zip_f64_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zip_f64_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zip_s16_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zip_s16_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zip_s32_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zip_s32_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zip_s64_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zip_s64_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zip_s8_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zip_s8_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zip_u16_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zip_u16_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zip_u32_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zip_u32_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zip_u64_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zip_u64_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zip_u8_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zip_u8_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zipq_bf16_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zipq_bf16_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zipq_f16_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zipq_f16_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zipq_f32_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zipq_f32_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zipq_f64_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zipq_f64_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zipq_s16_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zipq_s16_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zipq_s32_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zipq_s32_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zipq_s64_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zipq_s64_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zipq_s8_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zipq_s8_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zipq_u16_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zipq_u16_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zipq_u32_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zipq_u32_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zipq_u64_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zipq_u64_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zipq_u8_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zipq_u8_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/acle/asm/get2_b.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/acle/asm/reinterpret_b.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/acle/asm/set2_b.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_int_opt_single_n_1.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_int_opt_single_n_2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_opt_single_n_1.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_opt_single_n_2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_single_1.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_za_slice_int_opt_single_1.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_za_slice_lane_1.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_za_slice_lane_2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_za_slice_lane_3.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_za_slice_lane_4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_za_slice_opt_single_1.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_za_slice_opt_single_2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_za_slice_opt_single_3.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_za_slice_uint_opt_single_1.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binaryxn_1.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binaryxn_2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/clamp_1.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/compare_scalar_count_1.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/dot_za_slice_int_lane_1.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/dot_za_slice_lane_1.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/dot_za_slice_lane_2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/dot_za_slice_uint_lane_1.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/shift_right_imm_narrowxn_1.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/storexn_1.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/svboolx2_1.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/svcount_1.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_qq_or_011_lane_1.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/unary_convertxn_1.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/unary_za_slice_1.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/unary_za_slice_2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/unary_za_slice_3.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/unaryxn_1.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/write_za_1.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/write_za_slice_1.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/pcs/args_12.c

-- 
2.25.1


^ permalink raw reply	[flat|nested] 6+ messages in thread

* [pushed v2 1/5] aarch64: Add +sme2
  2023-12-05 10:24 [pushed v2 0/5] aarch64: Add support for SME2 Richard Sandiford
@ 2023-12-05 10:24 ` Richard Sandiford
  2023-12-05 10:25 ` [pushed v2 2/5] aarch64: Add svcount_t Richard Sandiford
                   ` (3 subsequent siblings)
  4 siblings, 0 replies; 6+ messages in thread
From: Richard Sandiford @ 2023-12-05 10:24 UTC (permalink / raw)
  To: gcc-patches; +Cc: Richard Sandiford

gcc/
	* doc/invoke.texi: Document +sme2.
	* doc/sourcebuild.texi: Document aarch64_sme2.
	* config/aarch64/aarch64-option-extensions.def (AARCH64_OPT_EXTENSION):
	Add sme2.
	* config/aarch64/aarch64.h (AARCH64_ISA_SME2, TARGET_SME2): New macros.

gcc/testsuite/
	* lib/target-supports.exp (check_effective_target_aarch64_sme2): New
	target test.
	(check_effective_target_aarch64_asm_sme2_ok): Likewise.
---
 gcc/config/aarch64/aarch64-option-extensions.def |  2 ++
 gcc/config/aarch64/aarch64.h                     |  4 ++++
 gcc/doc/invoke.texi                              |  3 ++-
 gcc/doc/sourcebuild.texi                         |  2 ++
 gcc/testsuite/lib/target-supports.exp            | 14 +++++++++++++-
 5 files changed, 23 insertions(+), 2 deletions(-)

diff --git a/gcc/config/aarch64/aarch64-option-extensions.def b/gcc/config/aarch64/aarch64-option-extensions.def
index 1480e498bbb..c156d2ee76a 100644
--- a/gcc/config/aarch64/aarch64-option-extensions.def
+++ b/gcc/config/aarch64/aarch64-option-extensions.def
@@ -157,4 +157,6 @@ AARCH64_OPT_EXTENSION("sme-i16i64", SME_I16I64, (SME), (), (), "")
 
 AARCH64_OPT_EXTENSION("sme-f64f64", SME_F64F64, (SME), (), (), "")
 
+AARCH64_OPT_EXTENSION("sme2", SME2, (SME), (), (), "sme2")
+
 #undef AARCH64_OPT_EXTENSION
diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h
index f9139a8e28f..854eb7bedc9 100644
--- a/gcc/config/aarch64/aarch64.h
+++ b/gcc/config/aarch64/aarch64.h
@@ -227,6 +227,7 @@ constexpr auto AARCH64_FL_DEFAULT_ISA_MODE = AARCH64_FL_SM_OFF;
 #define AARCH64_ISA_SME		   (aarch64_isa_flags & AARCH64_FL_SME)
 #define AARCH64_ISA_SME_I16I64	   (aarch64_isa_flags & AARCH64_FL_SME_I16I64)
 #define AARCH64_ISA_SME_F64F64	   (aarch64_isa_flags & AARCH64_FL_SME_F64F64)
+#define AARCH64_ISA_SME2	   (aarch64_isa_flags & AARCH64_FL_SME2)
 #define AARCH64_ISA_V8_3A	   (aarch64_isa_flags & AARCH64_FL_V8_3A)
 #define AARCH64_ISA_DOTPROD	   (aarch64_isa_flags & AARCH64_FL_DOTPROD)
 #define AARCH64_ISA_AES	           (aarch64_isa_flags & AARCH64_FL_AES)
@@ -332,6 +333,9 @@ constexpr auto AARCH64_FL_DEFAULT_ISA_MODE = AARCH64_FL_SM_OFF;
 /* The FEAT_SME_F64F64 extension to SME, enabled through +sme-f64f64.  */
 #define TARGET_SME_F64F64 (AARCH64_ISA_SME_F64F64)
 
+/* SME2 instructions, enabled through +sme2.  */
+#define TARGET_SME2 (AARCH64_ISA_SME2)
+
 /* ARMv8.3-A features.  */
 #define TARGET_ARMV8_3	(AARCH64_ISA_V8_3A)
 
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index 806babc3dfa..f93128dbe2b 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -21279,7 +21279,8 @@ Enable the Scalable Matrix Extension.
 Enable the FEAT_SME_I16I64 extension to SME.
 @item sme-f64f64
 Enable the FEAT_SME_F64F64 extension to SME.
-
++@item sme2
+Enable the Scalable Matrix Extension 2.  This also enables SME instructions.
 @end table
 
 Feature @option{crypto} implies @option{aes}, @option{sha2}, and @option{simd},
diff --git a/gcc/doc/sourcebuild.texi b/gcc/doc/sourcebuild.texi
index 123e73508b6..c9909026854 100644
--- a/gcc/doc/sourcebuild.texi
+++ b/gcc/doc/sourcebuild.texi
@@ -2318,6 +2318,8 @@ Binutils installed on test system supports relocation types required by -fpic
 for AArch64 small memory model.
 @item aarch64_sme
 AArch64 target that generates instructions for SME.
+@item aarch64_sme2
+AArch64 target that generates instructions for SME2.
 @item aarch64_sve_hw
 AArch64 target that is able to generate and execute SVE code (regardless of
 whether it does so by default).
diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp
index 986004bf13c..3fcce6be49d 100644
--- a/gcc/testsuite/lib/target-supports.exp
+++ b/gcc/testsuite/lib/target-supports.exp
@@ -4398,6 +4398,18 @@ proc check_effective_target_aarch64_sme { } {
     }]
 }
 
+# Return 1 if this is an AArch64 target that generates instructions for SME.
+proc check_effective_target_aarch64_sme2 { } {
+    if { ![istarget aarch64*-*-*] } {
+	return 0
+    }
+    return [check_no_compiler_messages aarch64_sme2 assembly {
+	#if !defined (__ARM_FEATURE_SME2)
+	#error FOO
+	#endif
+    }]
+}
+
 # Return 1 if this is a compiler supporting ARC atomic operations
 proc check_effective_target_arc_atomic { } {
     return [check_no_compiler_messages arc_atomic assembly {
@@ -11628,7 +11640,7 @@ proc check_effective_target_aarch64_tiny { } {
 
 foreach { aarch64_ext } { "fp" "simd" "crypto" "crc" "lse" "dotprod" "sve"
 			  "i8mm" "f32mm" "f64mm" "bf16" "sb" "sve2" "ls64"
-			  "sme" "sme-i16i64" } {
+			  "sme" "sme-i16i64" "sme2" } {
     eval [string map [list FUNC $aarch64_ext] {
 	proc check_effective_target_aarch64_asm_FUNC_ok { } {
 	  if { [istarget aarch64*-*-*] } {
-- 
2.25.1


^ permalink raw reply	[flat|nested] 6+ messages in thread

* [pushed v2 2/5] aarch64: Add svcount_t
  2023-12-05 10:24 [pushed v2 0/5] aarch64: Add support for SME2 Richard Sandiford
  2023-12-05 10:24 ` [pushed v2 1/5] aarch64: Add +sme2 Richard Sandiford
@ 2023-12-05 10:25 ` Richard Sandiford
  2023-12-05 10:25 ` [pushed v2 3/5] aarch64: Add svboolx2_t Richard Sandiford
                   ` (2 subsequent siblings)
  4 siblings, 0 replies; 6+ messages in thread
From: Richard Sandiford @ 2023-12-05 10:25 UTC (permalink / raw)
  To: gcc-patches; +Cc: Richard Sandiford

Some SME2 instructions interpret predicates as counters, rather than
as bit-per-byte masks.  The SME2 ACLE defines an svcount_t type for
this interpretation.

I don't think we have a better way of representing counters than
the VNx16BI that we use for masks.  The patch therefore doesn't
add a new mode for this representation.  It's just something that
is interpreted in context, a bit like signed vs. unsigned integers.

gcc/
	* config/aarch64/aarch64-sve-builtins-base.cc
	(svreinterpret_impl::fold): Handle reinterprets between svbool_t
	and svcount_t.
	(svreinterpret_impl::expand): Likewise.
	* config/aarch64/aarch64-sve-builtins-base.def (svreinterpret): Add
	b<->c forms.
	* config/aarch64/aarch64-sve-builtins.cc (TYPES_reinterpret_b): New
	type suffix list.
	(wrap_type_in_struct, register_type_decl): New functions, split out
	from...
	(register_tuple_type): ...here.
	(register_builtin_types): Handle svcount_t.
	(handle_arm_sve_h): Don't create tuples of svcount_t.
	* config/aarch64/aarch64-sve-builtins.def (svcount_t): New type.
	(c): New type suffix.
	* config/aarch64/aarch64-sve-builtins.h (TYPE_count): New type class.

gcc/testsuite/
	* g++.target/aarch64/sve/acle/general-c++/mangle_1.C: Add test
	for svcount_t.
	* g++.target/aarch64/sve/acle/general-c++/mangle_2.C: Likewise.
	* g++.target/aarch64/sve/acle/general-c++/svcount_1.C: New test.
	* gcc.target/aarch64/sve/acle/asm/test_sve_acle.h (TEST_DUAL_P)
	(TEST_DUAL_P_REV): New macros.
	* gcc.target/aarch64/sve/acle/asm/reinterpret_b.c: New test.
	* gcc.target/aarch64/sve/acle/general-c/load_1.c: Test passing
	an svcount_t.
	* gcc.target/aarch64/sve/acle/general-c/svcount_1.c: New test.
	* gcc.target/aarch64/sve/acle/general-c/unary_convert_1.c: Test
	reinterprets involving svcount_t.
	* gcc.target/aarch64/sve/acle/general/attributes_7.c: Test svcount_t.
	* gcc.target/aarch64/sve/pcs/annotate_1.c: Likewise.
	* gcc.target/aarch64/sve/pcs/annotate_2.c: Likewise.
	* gcc.target/aarch64/sve/pcs/args_12.c: New test.
---
 .../aarch64/aarch64-sve-builtins-base.cc      |   8 +-
 .../aarch64/aarch64-sve-builtins-base.def     |   1 +
 gcc/config/aarch64/aarch64-sve-builtins.cc    | 157 ++++++++-----
 gcc/config/aarch64/aarch64-sve-builtins.def   |   2 +
 gcc/config/aarch64/aarch64-sve-builtins.h     |   4 +-
 .../aarch64/sve/acle/general-c++/mangle_1.C   |   2 +
 .../aarch64/sve/acle/general-c++/mangle_2.C   |   2 +
 .../aarch64/sve/acle/general-c++/svcount_1.C  |  10 +
 .../aarch64/sve/acle/asm/reinterpret_b.c      |  20 ++
 .../aarch64/sve/acle/asm/test_sve_acle.h      |  15 ++
 .../aarch64/sve/acle/general-c/load_1.c       |   4 +-
 .../aarch64/sve/acle/general-c/svcount_1.c    |  10 +
 .../sve/acle/general-c/unary_convert_1.c      |   8 +-
 .../aarch64/sve/acle/general/attributes_7.c   |   1 +
 .../gcc.target/aarch64/sve/pcs/annotate_1.c   |   4 +
 .../gcc.target/aarch64/sve/pcs/annotate_2.c   |   4 +
 .../gcc.target/aarch64/sve/pcs/args_12.c      | 214 ++++++++++++++++++
 17 files changed, 402 insertions(+), 64 deletions(-)
 create mode 100644 gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/svcount_1.C
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/acle/asm/reinterpret_b.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/svcount_1.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/pcs/args_12.c

diff --git a/gcc/config/aarch64/aarch64-sve-builtins-base.cc b/gcc/config/aarch64/aarch64-sve-builtins-base.cc
index a219c88085a..89035135a38 100644
--- a/gcc/config/aarch64/aarch64-sve-builtins-base.cc
+++ b/gcc/config/aarch64/aarch64-sve-builtins-base.cc
@@ -2153,8 +2153,9 @@ public:
 
     /* Punt to rtl if the effect of the reinterpret on registers does not
        conform to GCC's endianness model.  */
-    if (!targetm.can_change_mode_class (f.vector_mode (0),
-					f.vector_mode (1), FP_REGS))
+    if (GET_MODE_CLASS (f.vector_mode (0)) != MODE_VECTOR_BOOL
+	&& !targetm.can_change_mode_class (f.vector_mode (0),
+					   f.vector_mode (1), FP_REGS))
       return NULL;
 
     /* Otherwise svreinterpret corresponds directly to a VIEW_CONVERT_EXPR
@@ -2168,6 +2169,9 @@ public:
   expand (function_expander &e) const override
   {
     machine_mode mode = e.tuple_mode (0);
+    /* Handle svbool_t <-> svcount_t.  */
+    if (mode == e.tuple_mode (1))
+      return e.args[0];
     return e.use_exact_insn (code_for_aarch64_sve_reinterpret (mode));
   }
 };
diff --git a/gcc/config/aarch64/aarch64-sve-builtins-base.def b/gcc/config/aarch64/aarch64-sve-builtins-base.def
index ac53f35220d..a742c7bbc56 100644
--- a/gcc/config/aarch64/aarch64-sve-builtins-base.def
+++ b/gcc/config/aarch64/aarch64-sve-builtins-base.def
@@ -198,6 +198,7 @@ DEF_SVE_FUNCTION (svrecpe, unary, all_float, none)
 DEF_SVE_FUNCTION (svrecps, binary, all_float, none)
 DEF_SVE_FUNCTION (svrecpx, unary, all_float, mxz)
 DEF_SVE_FUNCTION_GS (svreinterpret, reinterpret, reinterpret, x1234, none)
+DEF_SVE_FUNCTION (svreinterpret, reinterpret, reinterpret_b, none)
 DEF_SVE_FUNCTION (svrev, unary, all_data, none)
 DEF_SVE_FUNCTION (svrev, unary_pred, all_pred, none)
 DEF_SVE_FUNCTION (svrevb, unary, hsd_integer, mxz)
diff --git a/gcc/config/aarch64/aarch64-sve-builtins.cc b/gcc/config/aarch64/aarch64-sve-builtins.cc
index a40d448685d..e32f0f8f903 100644
--- a/gcc/config/aarch64/aarch64-sve-builtins.cc
+++ b/gcc/config/aarch64/aarch64-sve-builtins.cc
@@ -430,6 +430,12 @@ CONSTEXPR const group_suffix_info group_suffixes[] = {
   TYPES_reinterpret1 (D, u32), \
   TYPES_reinterpret1 (D, u64)
 
+/* _b_c
+   _c_b.  */
+#define TYPES_reinterpret_b(S, D) \
+  D (b, c), \
+  D (c, b)
+
 /* { _b8 _b16 _b32 _b64 } x { _s32 _s64 }
 			    { _u32 _u64 } */
 #define TYPES_while1(D, bn) \
@@ -579,6 +585,7 @@ DEF_SVE_TYPES_ARRAY (cvt_narrow_s);
 DEF_SVE_TYPES_ARRAY (cvt_narrow);
 DEF_SVE_TYPES_ARRAY (inc_dec_n);
 DEF_SVE_TYPES_ARRAY (reinterpret);
+DEF_SVE_TYPES_ARRAY (reinterpret_b);
 DEF_SVE_TYPES_ARRAY (while);
 DEF_SVE_TYPES_ARRAY (all_za);
 DEF_SVE_TYPES_ARRAY (d_za);
@@ -3789,6 +3796,49 @@ function_expander::expand ()
   return base->expand (*this);
 }
 
+/* Return a structure type that contains a single field of type FIELD_TYPE.
+   The field is called __val, but that's an internal detail rather than
+   an exposed part of the API.  */
+static tree
+wrap_type_in_struct (tree field_type)
+{
+  tree field = build_decl (input_location, FIELD_DECL,
+			   get_identifier ("__val"), field_type);
+  tree struct_type = lang_hooks.types.make_type (RECORD_TYPE);
+  DECL_FIELD_CONTEXT (field) = struct_type;
+  TYPE_FIELDS (struct_type) = field;
+  make_type_sizeless (struct_type);
+  layout_type (struct_type);
+  return struct_type;
+}
+
+/* Register a built-in TYPE_DECL called NAME for TYPE.  This is used/needed
+   when TYPE is a structure type.  */
+static void
+register_type_decl (tree type, const char *name)
+{
+  tree decl = build_decl (input_location, TYPE_DECL,
+			  get_identifier (name), type);
+  TYPE_NAME (type) = decl;
+  TYPE_STUB_DECL (type) = decl;
+  lang_hooks.decls.pushdecl (decl);
+  /* ??? Undo the effect of set_underlying_type for C.  The C frontend
+     doesn't recognize DECL as a built-in because (as intended) the decl has
+     a real location instead of BUILTINS_LOCATION.  The frontend therefore
+     treats the decl like a normal C "typedef struct foo foo;", expecting
+     the type for tag "struct foo" to have a dummy unnamed TYPE_DECL instead
+     of the named one we attached above.  It then sets DECL_ORIGINAL_TYPE
+     on the supposedly unnamed decl, creating a circularity that upsets
+     dwarf2out.
+
+     We don't want to follow the normal C model and create "struct foo"
+     tags for tuple types since (a) the types are supposed to be opaque
+     and (b) they couldn't be defined as a real struct anyway.  Treating
+     the TYPE_DECLs as "typedef struct foo foo;" without creating
+     "struct foo" would lead to confusing error messages.  */
+  DECL_ORIGINAL_TYPE (decl) = NULL_TREE;
+}
+
 /* Register the built-in SVE ABI types, such as __SVBool_t.  */
 static void
 register_builtin_types ()
@@ -3799,48 +3849,63 @@ register_builtin_types ()
 
   for (unsigned int i = 0; i < NUM_VECTOR_TYPES; ++i)
     {
-      tree eltype = scalar_types[i];
       tree vectype;
       unsigned int num_zr = 0, num_pr = 0;
-      if (eltype == boolean_type_node)
+      if (vector_type_index (i) == VECTOR_TYPE_svcount_t)
 	{
-	  vectype = build_truth_vector_type_for_mode (BYTES_PER_SVE_VECTOR,
-						      VNx16BImode);
-	  gcc_assert (TYPE_MODE (vectype) == VNx16BImode
-		      && TYPE_MODE (vectype) == TYPE_MODE_RAW (vectype)
-		      && TYPE_ALIGN (vectype) == 16
-		      && known_eq (wi::to_poly_offset (TYPE_SIZE (vectype)),
-				   BYTES_PER_SVE_VECTOR));
+	  vectype = abi_vector_types[VECTOR_TYPE_svbool_t];
+	  vectype = wrap_type_in_struct (vectype);
 	  num_pr = 1;
 	}
       else
 	{
-	  scalar_mode elmode = SCALAR_TYPE_MODE (eltype);
-	  unsigned int elbytes = GET_MODE_SIZE (elmode);
-	  poly_uint64 nunits = exact_div (BYTES_PER_SVE_VECTOR, elbytes);
-	  machine_mode mode
-	    = aarch64_sve_data_mode (elmode, nunits).require ();
-	  vectype = build_vector_type_for_mode (eltype, mode);
-	  gcc_assert (VECTOR_MODE_P (TYPE_MODE (vectype))
-		      && TYPE_MODE (vectype) == mode
-		      && TYPE_MODE_RAW (vectype) == mode
-		      && TYPE_ALIGN (vectype) == 128
-		      && known_eq (wi::to_poly_offset (TYPE_SIZE (vectype)),
-				   BITS_PER_SVE_VECTOR));
-	  num_zr = 1;
+	  tree eltype = scalar_types[i];
+	  if (eltype == boolean_type_node)
+	    {
+	      vectype = build_truth_vector_type_for_mode (BYTES_PER_SVE_VECTOR,
+							  VNx16BImode);
+	      num_pr = 1;
+	    }
+	  else
+	    {
+	      scalar_mode elmode = SCALAR_TYPE_MODE (eltype);
+	      unsigned int elbytes = GET_MODE_SIZE (elmode);
+	      poly_uint64 nunits = exact_div (BYTES_PER_SVE_VECTOR, elbytes);
+	      machine_mode mode
+		= aarch64_sve_data_mode (elmode, nunits).require ();
+	      vectype = build_vector_type_for_mode (eltype, mode);
+	      auto size = wi::to_poly_offset (TYPE_SIZE (vectype));
+	      gcc_assert (VECTOR_MODE_P (TYPE_MODE (vectype))
+			  && TYPE_MODE (vectype) == mode
+			  && TYPE_MODE_RAW (vectype) == mode
+			  && TYPE_ALIGN (vectype) == 128
+			  && known_eq (size, BITS_PER_SVE_VECTOR));
+	      num_zr = 1;
+	    }
+	  vectype = build_distinct_type_copy (vectype);
+	  gcc_assert (vectype == TYPE_MAIN_VARIANT (vectype));
+	  SET_TYPE_STRUCTURAL_EQUALITY (vectype);
+	  TYPE_ARTIFICIAL (vectype) = 1;
+	  TYPE_INDIVISIBLE_P (vectype) = 1;
+	  make_type_sizeless (vectype);
+	}
+      if (num_pr)
+	{
+	  auto size = wi::to_poly_offset (TYPE_SIZE (vectype));
+	  gcc_assert (TYPE_MODE (vectype) == VNx16BImode
+		      && TYPE_MODE (vectype) == TYPE_MODE_RAW (vectype)
+		      && TYPE_ALIGN (vectype) == 16
+		      && known_eq (size, BYTES_PER_SVE_VECTOR));
 	}
-      vectype = build_distinct_type_copy (vectype);
-      gcc_assert (vectype == TYPE_MAIN_VARIANT (vectype));
-      SET_TYPE_STRUCTURAL_EQUALITY (vectype);
-      TYPE_ARTIFICIAL (vectype) = 1;
-      TYPE_INDIVISIBLE_P (vectype) = 1;
       add_sve_type_attribute (vectype, num_zr, num_pr,
 			      vector_types[i].mangled_name,
 			      vector_types[i].acle_name);
-      make_type_sizeless (vectype);
       abi_vector_types[i] = vectype;
-      lang_hooks.types.register_builtin_type (vectype,
-					      vector_types[i].abi_name);
+      if (TREE_CODE (vectype) == RECORD_TYPE)
+	register_type_decl (vectype, vector_types[i].abi_name);
+      else
+	lang_hooks.types.register_builtin_type (vectype,
+						vector_types[i].abi_name);
     }
 }
 
@@ -3884,8 +3949,6 @@ register_vector_type (vector_type_index type)
 static void
 register_tuple_type (unsigned int num_vectors, vector_type_index type)
 {
-  tree tuple_type = lang_hooks.types.make_type (RECORD_TYPE);
-
   /* Work out the structure name.  */
   char buffer[sizeof ("svbfloat16x4_t")];
   const char *vector_type_name = vector_types[type].acle_name;
@@ -3912,37 +3975,13 @@ register_tuple_type (unsigned int num_vectors, vector_type_index type)
 	      && TYPE_MODE_RAW (array_type) == TYPE_MODE (array_type)
 	      && TYPE_ALIGN (array_type) == 128);
 
-  tree field = build_decl (input_location, FIELD_DECL,
-			   get_identifier ("__val"), array_type);
-  DECL_FIELD_CONTEXT (field) = tuple_type;
-  TYPE_FIELDS (tuple_type) = field;
+  tree tuple_type = wrap_type_in_struct (array_type);
   add_sve_type_attribute (tuple_type, num_vectors, 0, NULL, buffer);
-  make_type_sizeless (tuple_type);
-  layout_type (tuple_type);
   gcc_assert (VECTOR_MODE_P (TYPE_MODE (tuple_type))
 	      && TYPE_MODE_RAW (tuple_type) == TYPE_MODE (tuple_type)
 	      && TYPE_ALIGN (tuple_type) == 128);
 
-  tree decl = build_decl (input_location, TYPE_DECL,
-			  get_identifier (buffer), tuple_type);
-  TYPE_NAME (tuple_type) = decl;
-  TYPE_STUB_DECL (tuple_type) = decl;
-  lang_hooks.decls.pushdecl (decl);
-  /* ??? Undo the effect of set_underlying_type for C.  The C frontend
-     doesn't recognize DECL as a built-in because (as intended) the decl has
-     a real location instead of BUILTINS_LOCATION.  The frontend therefore
-     treats the decl like a normal C "typedef struct foo foo;", expecting
-     the type for tag "struct foo" to have a dummy unnamed TYPE_DECL instead
-     of the named one we attached above.  It then sets DECL_ORIGINAL_TYPE
-     on the supposedly unnamed decl, creating a circularity that upsets
-     dwarf2out.
-
-     We don't want to follow the normal C model and create "struct foo"
-     tags for tuple types since (a) the types are supposed to be opaque
-     and (b) they couldn't be defined as a real struct anyway.  Treating
-     the TYPE_DECLs as "typedef struct foo foo;" without creating
-     "struct foo" would lead to confusing error messages.  */
-  DECL_ORIGINAL_TYPE (decl) = NULL_TREE;
+  register_type_decl (tuple_type, buffer);
 
   acle_vector_types[num_vectors - 1][type] = tuple_type;
 }
@@ -3992,7 +4031,7 @@ handle_arm_sve_h ()
     {
       vector_type_index type = vector_type_index (type_i);
       register_vector_type (type);
-      if (type != VECTOR_TYPE_svbool_t)
+      if (scalar_types[type_i] != boolean_type_node)
 	for (unsigned int count = 2; count <= MAX_TUPLE_SIZE; ++count)
 	  register_tuple_type (count, type);
     }
diff --git a/gcc/config/aarch64/aarch64-sve-builtins.def b/gcc/config/aarch64/aarch64-sve-builtins.def
index 5824dc797f9..297904f3e47 100644
--- a/gcc/config/aarch64/aarch64-sve-builtins.def
+++ b/gcc/config/aarch64/aarch64-sve-builtins.def
@@ -84,6 +84,7 @@ DEF_SVE_MODE (u64offset, none, svuint64_t, bytes)
 DEF_SVE_MODE (vnum, none, none, vectors)
 
 DEF_SVE_TYPE (svbool_t, 10, __SVBool_t, boolean_type_node)
+DEF_SVE_TYPE (svcount_t, 11, __SVCount_t, boolean_type_node)
 DEF_SVE_TYPE (svbfloat16_t, 14, __SVBfloat16_t, bfloat16_type_node)
 DEF_SVE_TYPE (svfloat16_t, 13, __SVFloat16_t, aarch64_fp16_type_node)
 DEF_SVE_TYPE (svfloat32_t, 13, __SVFloat32_t, float_type_node)
@@ -106,6 +107,7 @@ DEF_SVE_TYPE_SUFFIX (b16, svbool_t, bool, 16, VNx8BImode)
 DEF_SVE_TYPE_SUFFIX (b32, svbool_t, bool, 32, VNx4BImode)
 DEF_SVE_TYPE_SUFFIX (b64, svbool_t, bool, 64, VNx2BImode)
 DEF_SVE_TYPE_SUFFIX (bf16, svbfloat16_t, bfloat, 16, VNx8BFmode)
+DEF_SVE_TYPE_SUFFIX (c, svcount_t, count, 8, VNx16BImode)
 DEF_SVE_TYPE_SUFFIX (f16, svfloat16_t, float, 16, VNx8HFmode)
 DEF_SVE_TYPE_SUFFIX (f32, svfloat32_t, float, 32, VNx4SFmode)
 DEF_SVE_TYPE_SUFFIX (f64, svfloat64_t, float, 64, VNx2DFmode)
diff --git a/gcc/config/aarch64/aarch64-sve-builtins.h b/gcc/config/aarch64/aarch64-sve-builtins.h
index 1cd31d2d733..51774825c23 100644
--- a/gcc/config/aarch64/aarch64-sve-builtins.h
+++ b/gcc/config/aarch64/aarch64-sve-builtins.h
@@ -152,11 +152,13 @@ enum predication_index
 };
 
 /* Classifies element types, based on type suffixes with the bit count
-   removed.  */
+   removed.  "count" isn't really an element type, but we pretend it is
+   for consistency.  */
 enum type_class_index
 {
   TYPE_bool,
   TYPE_bfloat,
+  TYPE_count,
   TYPE_float,
   TYPE_signed,
   TYPE_unsigned,
diff --git a/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/mangle_1.C b/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/mangle_1.C
index 36dab3c9b71..2ad0c7f9838 100644
--- a/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/mangle_1.C
+++ b/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/mangle_1.C
@@ -15,6 +15,7 @@ void f10(svfloat16_t) {}
 void f11(svfloat32_t) {}
 void f12(svfloat64_t) {}
 void f13(svbfloat16_t) {}
+void f14(svcount_t) {}
 
 /* { dg-final { scan-assembler "_Z2f1u10__SVBool_t:" } } */
 /* { dg-final { scan-assembler "_Z2f2u10__SVInt8_t:" } } */
@@ -29,3 +30,4 @@ void f13(svbfloat16_t) {}
 /* { dg-final { scan-assembler "_Z3f11u13__SVFloat32_t:" } } */
 /* { dg-final { scan-assembler "_Z3f12u13__SVFloat64_t:" } } */
 /* { dg-final { scan-assembler "_Z3f13u14__SVBfloat16_t:" } } */
+/* { dg-final { scan-assembler "_Z3f14u11__SVCount_t:" } } */
diff --git a/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/mangle_2.C b/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/mangle_2.C
index ad4aaee291f..c8bfcc5a9c2 100644
--- a/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/mangle_2.C
+++ b/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/mangle_2.C
@@ -13,6 +13,7 @@ void f10(__SVFloat16_t) {}
 void f11(__SVFloat32_t) {}
 void f12(__SVFloat64_t) {}
 void f13(__SVBfloat16_t) {}
+void f14(__SVCount_t) {}
 
 /* { dg-final { scan-assembler "_Z2f1u10__SVBool_t:" } } */
 /* { dg-final { scan-assembler "_Z2f2u10__SVInt8_t:" } } */
@@ -27,3 +28,4 @@ void f13(__SVBfloat16_t) {}
 /* { dg-final { scan-assembler "_Z3f11u13__SVFloat32_t:" } } */
 /* { dg-final { scan-assembler "_Z3f12u13__SVFloat64_t:" } } */
 /* { dg-final { scan-assembler "_Z3f13u14__SVBfloat16_t:" } } */
+/* { dg-final { scan-assembler "_Z3f14u11__SVCount_t:" } } */
diff --git a/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/svcount_1.C b/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/svcount_1.C
new file mode 100644
index 00000000000..9eac65aafff
--- /dev/null
+++ b/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/svcount_1.C
@@ -0,0 +1,10 @@
+#include <arm_sve.h>
+
+svbool_t f1 (svcount_t x) { return x; } // { dg-error {cannot convert 'svcount_t' to 'svbool_t' in return} }
+svcount_t f2 (svbool_t x) { return x; } // { dg-error {cannot convert 'svbool_t' to 'svcount_t' in return} }
+void f3 (svbool_t *p, svcount_t x) { *p = x; } // { dg-error {cannot convert 'svcount_t' to 'svbool_t' in assignment} }
+void f4 (svcount_t *p, svbool_t x) { *p = x; } // { dg-error {cannot convert 'svbool_t' to 'svcount_t' in assignment} }
+svbool_t *f5 (svcount_t *p) { return p; } // { dg-error {cannot convert} }
+svcount_t *f6 (svbool_t *p) { return p; } // { dg-error {cannot convert} }
+svbool_t f7 (svcount_t x) { return (svbool_t) x; } // { dg-error {invalid cast from type 'svcount_t' to type 'svbool_t'} }
+svcount_t f8 (svbool_t x) { return (svcount_t) x; } // { dg-error {invalid cast from type 'svbool_t' to type 'svcount_t'} }
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/reinterpret_b.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/reinterpret_b.c
new file mode 100644
index 00000000000..57736ecb6c2
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/reinterpret_b.c
@@ -0,0 +1,20 @@
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+/*
+** reinterpret_b_c_tied1:
+**	ret
+*/
+TEST_DUAL_P_REV (reinterpret_b_c_tied1, svbool_t, svcount_t,
+		 p0_res = svreinterpret_b_c (p0),
+		 p0_res = svreinterpret_b (p0))
+
+/*
+** reinterpret_b_c_untied:
+**	mov	p0\.b, p2\.b
+**	ret
+*/
+TEST_DUAL_P (reinterpret_b_c_untied, svbool_t, svcount_t,
+	     p0 = svreinterpret_b_c (p2),
+	     p0 = svreinterpret_b (p2))
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/test_sve_acle.h b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/test_sve_acle.h
index 84925b9bd48..5ce0be5947b 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/test_sve_acle.h
+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/test_sve_acle.h
@@ -85,6 +85,21 @@
     return z0_res;						\
   }
 
+#define TEST_DUAL_P(NAME, TYPE1, TYPE2, CODE1, CODE2)		\
+  PROTO (NAME, TYPE1, (TYPE1 p0, TYPE1 p1, TYPE2 p2, TYPE2 p3))	\
+  {								\
+    INVOKE (CODE1, CODE2);					\
+    return p0;							\
+  }
+
+#define TEST_DUAL_P_REV(NAME, TYPE1, TYPE2, CODE1, CODE2)	\
+  PROTO (NAME, TYPE1, (TYPE2 p0, TYPE2 p1, TYPE1 p2, TYPE1 p3))	\
+  {								\
+    TYPE1 p0_res;						\
+    INVOKE (CODE1, CODE2);					\
+    return p0_res;						\
+  }
+
 #define TEST_TRIPLE_Z(NAME, TYPE1, TYPE2, TYPE3, CODE1, CODE2)	\
   PROTO (NAME, TYPE1, (TYPE1 z0, TYPE1 z1, TYPE2 z2, TYPE2 z3,	\
 		       TYPE3 z4, TYPE3 z5,			\
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/load_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/load_1.c
index 784fdc317e6..564295a87f3 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/load_1.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/load_1.c
@@ -6,12 +6,14 @@
 struct s { signed char x; };
 
 svuint8_t
-f1 (svbool_t pg, signed char *s8_ptr, void *void_ptr, struct s *s_ptr,
+f1 (svbool_t pg, svcount_t pn,
+    signed char *s8_ptr, void *void_ptr, struct s *s_ptr,
     float *f32_ptr, _Complex float *cf32_ptr, int **ptr_ptr)
 {
   svld1 (pg); /* { dg-error {too few arguments to function 'svld1'} } */
   svld1 (pg, s8_ptr, 0); /* { dg-error {too many arguments to function 'svld1'} } */
   svld1 (0, s8_ptr); /* { dg-error {passing 'int' to argument 1 of 'svld1', which expects 'svbool_t'} } */
+  svld1 (pn, s8_ptr); /* { dg-error {passing 'svcount_t' to argument 1 of 'svld1', which expects 'svbool_t'} } */
   svld1 (pg, 0); /* { dg-error {passing 'int' to argument 2 of 'svld1', which expects a pointer type} } */
   svld1 (pg, (int32_t *) 0);
   svld1 (pg, void_ptr); /* { dg-error {passing 'void \*' to argument 2 of 'svld1', but 'void' is not a valid SVE element type} } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/svcount_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/svcount_1.c
new file mode 100644
index 00000000000..920d37e4ce7
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/svcount_1.c
@@ -0,0 +1,10 @@
+#include <arm_sve.h>
+
+svbool_t f1 (svcount_t x) { return x; } /* { dg-error {incompatible types} } */
+svcount_t f2 (svbool_t x) { return x; } /* { dg-error {incompatible types} } */
+void f3 (svbool_t *p, svcount_t x) { *p = x; } /* { dg-error {incompatible types} } */
+void f4 (svcount_t *p, svbool_t x) { *p = x; } /* { dg-error {incompatible types} } */
+svbool_t *f5 (svcount_t *p) { return p; } /* { dg-error {incompatible return type} } */
+svcount_t *f6 (svbool_t *p) { return p; } /* { dg-error {incompatible return type} } */
+svbool_t f7 (svcount_t x) { return (svbool_t) x; } /* { dg-error {conversion to non-scalar} } */
+svcount_t f8 (svbool_t x) { return (svcount_t) x; } /* { dg-error {conversion to non-scalar} } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/unary_convert_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/unary_convert_1.c
index f59ad590ba4..b7258e434db 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/unary_convert_1.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/unary_convert_1.c
@@ -4,7 +4,7 @@ void
 test (svbool_t pg, svint8_t s8, svuint8_t u8,
       svint16_t s16, svuint16_t u16, svint32_t s32, svuint32_t u32,
       svint64_t s64, svuint64_t u64, svfloat16_t f16, svfloat32_t f32,
-      svfloat64_t f64)
+      svfloat64_t f64, svcount_t pn)
 {
   svcvt_f64_x (pg); /* { dg-error {too few arguments to function 'svcvt_f64_x'} } */
   svcvt_f64_x (pg, s32, 0); /* { dg-error {too many arguments to function 'svcvt_f64_x'} } */
@@ -70,4 +70,10 @@ test (svbool_t pg, svint8_t s8, svuint8_t u8,
   svcvt_u16_x (pg, f16);
   svcvt_u16_x (pg, f32); /* { dg-error {'svcvt_u16_x' has no form that takes 'svfloat32_t' arguments} } */
   svcvt_u16_x (pg, f64); /* { dg-error {'svcvt_u16_x' has no form that takes 'svfloat64_t' arguments} } */
+
+  svreinterpret_b (pg); /* { dg-error {'svreinterpret_b' has no form that takes 'svbool_t' arguments} } */
+  svreinterpret_b (pn);
+
+  svreinterpret_c (pg);
+  svreinterpret_c (pn); /* { dg-error {'svreinterpret_c' has no form that takes 'svcount_t' arguments} } */
 }
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general/attributes_7.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/attributes_7.c
index 5658a206fa3..edfadb8da38 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general/attributes_7.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/attributes_7.c
@@ -27,6 +27,7 @@ typedef svint32x2_t bad_type_6 __attribute__ ((arm_sve_vector_bits (N))); // { d
 typedef svint8_t bad_type_7 __attribute__ ((arm_sve_vector_bits (N))) __attribute__ ((arm_sve_vector_bits (N))); // { dg-error {'arm_sve_vector_bits' applied to type 'svint8_t __attribute__\(\(arm_sve_vector_bits\([0-9]+\)\)\)', which already has a size} }
 typedef fixed_bool_t bad_type_8 __attribute__ ((arm_sve_vector_bits (N))) __attribute__ ((arm_sve_vector_bits (N))); // { dg-error {'arm_sve_vector_bits' applied to type 'fixed_bool_t' {aka 'svbool_t __attribute__\(\(arm_sve_vector_bits\([0-9]+\)\)\)'}, which already has a size} }
 typedef gnu_int8_t bad_type_9 __attribute__ ((arm_sve_vector_bits (N))) __attribute__ ((arm_sve_vector_bits (N))); // { dg-error {'arm_sve_vector_bits' applied to non-SVE type 'gnu_int8_t'} }
+typedef svcount_t bad_type_10 __attribute__ ((arm_sve_vector_bits (N))); // { dg-error {'arm_sve_vector_bits' applied to non-vector type 'svcount_t'} }
 
 void
 f (int c)
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/annotate_1.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/annotate_1.c
index 12ae7678948..c3ac692d7ff 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve/pcs/annotate_1.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/annotate_1.c
@@ -4,6 +4,8 @@
 
 svbool_t ret_b (void) { return svptrue_b8 (); }
 
+svcount_t ret_c (svcount_t *ptr) { return *ptr; }
+
 svint8_t ret_s8 (void) { return svdup_s8 (0); }
 svint16_t ret_s16 (void) { return svdup_s16 (0); }
 svint32_t ret_s32 (void) { return svdup_s32 (0); }
@@ -58,6 +60,8 @@ svfloat64x4_t ret_f64x4 (void) { return svundef4_f64 (); }
 
 /* { dg-final { scan-assembler {\t\.variant_pcs\tret_b\n} } } */
 
+/* { dg-final { scan-assembler {\t\.variant_pcs\tret_c\n} } } */
+
 /* { dg-final { scan-assembler {\t\.variant_pcs\tret_s8\n} } } */
 /* { dg-final { scan-assembler {\t\.variant_pcs\tret_s16\n} } } */
 /* { dg-final { scan-assembler {\t\.variant_pcs\tret_s32\n} } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/annotate_2.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/annotate_2.c
index 9f0741e3c26..c3508735fc4 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve/pcs/annotate_2.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/annotate_2.c
@@ -4,6 +4,8 @@
 
 void fn_b (svbool_t x) {}
 
+void fn_c (svcount_t x) {}
+
 void fn_s8 (svint8_t x) {}
 void fn_s16 (svint16_t x) {}
 void fn_s32 (svint32_t x) {}
@@ -58,6 +60,8 @@ void fn_f64x4 (svfloat64x4_t x) {}
 
 /* { dg-final { scan-assembler {\t\.variant_pcs\tfn_b\n} } } */
 
+/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_c\n} } } */
+
 /* { dg-final { scan-assembler {\t\.variant_pcs\tfn_s8\n} } } */
 /* { dg-final { scan-assembler {\t\.variant_pcs\tfn_s16\n} } } */
 /* { dg-final { scan-assembler {\t\.variant_pcs\tfn_s32\n} } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_12.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_12.c
new file mode 100644
index 00000000000..a589484b394
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_12.c
@@ -0,0 +1,214 @@
+/* { dg-do compile } */
+/* { dg-options "-O -fno-stack-clash-protection -g" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include <arm_sve.h>
+
+/*
+** callee_1:
+**	mov	p0\.b, p3\.b
+**	ret
+*/
+svcount_t __attribute__ ((noipa))
+callee_1 (svcount_t p0, svcount_t p1, svcount_t p2, svcount_t p3)
+{
+  return p3;
+}
+
+/*
+** callee_2:
+**	str	p0, \[x0\]
+**	str	p1, \[x1\]
+**	str	p2, \[x2\]
+**	str	p3, \[x3\]
+**	ret
+*/
+void __attribute__ ((noipa))
+callee_2 (svcount_t p0, svcount_t p1, svcount_t p2, svcount_t p3,
+	  svcount_t *ptr0, svcount_t *ptr1, svcount_t *ptr2, svcount_t *ptr3)
+{
+  *ptr0 = p0;
+  *ptr1 = p1;
+  *ptr2 = p2;
+  *ptr3 = p3;
+}
+
+/*
+** callee_3:
+**	str	p3, \[x0\]
+**	ret
+*/
+void __attribute__ ((noipa))
+callee_3 (svbool_t p0, svbool_t p1, svbool_t p2, svcount_t p3, svcount_t *ptr)
+{
+  *ptr = p3;
+}
+
+/*
+** callee_4:
+**	str	p3, \[x0\]
+**	ret
+*/
+void __attribute__ ((noipa))
+callee_4 (svcount_t p0, svcount_t p1, svcount_t p2, svbool_t p3, svbool_t *ptr)
+{
+  *ptr = p3;
+}
+
+/*
+** callee_5:
+**	ldr	p0, \[x0\]
+**	ret
+*/
+svcount_t __attribute__ ((noipa))
+callee_5 (svcount_t p0, svcount_t p1, svcount_t p2, svcount_t p3,
+	  svcount_t p4)
+{
+  return p4;
+}
+
+/*
+** callee_6:
+**	ldr	p0, \[x0\]
+**	ret
+*/
+svcount_t __attribute__ ((noipa))
+callee_6 (svcount_t p0, svcount_t p1, svcount_t p2, svcount_t p3,
+	  svcount_t p4, int x1, int x2, int x3, int x4, int x5, int x6, int x7,
+	  int x8)
+{
+  return p4;
+}
+
+/*
+** callee_7:
+**	ldr	(x[0-9]+), \[sp\]
+**	ldr	p0, \[\1\]
+**	ret
+*/
+svcount_t __attribute__ ((noipa))
+callee_7 (svcount_t p0, svcount_t p1, svcount_t p2, svcount_t p3,
+	  int x0, int x1, int x2, int x3, int x4, int x5, int x6, int x7,
+	  svcount_t p4)
+{
+  return p4;
+}
+
+/*
+** caller_1:
+**	...
+**	ldr	p0, \[x0\]
+**	ldr	p1, \[x1\]
+**	ldr	p2, \[x2\]
+**	ldr	p3, \[x3\]
+**	bl	callee_1
+**	...
+**	str	p0, .*
+**	...
+*/
+void __attribute__ ((noipa))
+caller_1 (volatile svcount_t *ptr0, volatile svcount_t *ptr1,
+	  volatile svcount_t *ptr2, volatile svcount_t *ptr3,
+	  svcount_t *ptr4)
+{
+  svcount_t p0 = *ptr0;
+  svcount_t p1 = *ptr1;
+  svcount_t p2 = *ptr2;
+  svcount_t p3 = *ptr3;
+  *ptr4 = callee_1 (p0, p1, p2, p3);
+}
+
+/*
+** caller_3:
+**	...
+**	ldr	p0, \[x1\]
+**	ldr	p1, \[x2\]
+**	ldr	p2, \[x3\]
+**	ldr	p3, \[x4\]
+**	bl	callee_3
+**	...
+*/
+void __attribute__ ((noipa))
+caller_3 (svcount_t *ptr,
+	  volatile svbool_t *ptr0, volatile svbool_t *ptr1,
+	  volatile svbool_t *ptr2, volatile svcount_t *ptr3)
+{
+  svbool_t p0 = *ptr0;
+  svbool_t p1 = *ptr1;
+  svbool_t p2 = *ptr2;
+  svcount_t p3 = *ptr3;
+  callee_3 (p0, p1, p2, p3, ptr);
+}
+
+/*
+** caller_4:
+**	...
+**	ldr	p0, \[x1\]
+**	ldr	p1, \[x2\]
+**	ldr	p2, \[x3\]
+**	ldr	p3, \[x4\]
+**	bl	callee_4
+**	...
+*/
+void __attribute__ ((noipa))
+caller_4 (svbool_t *ptr,
+	  volatile svcount_t *ptr0, volatile svcount_t *ptr1,
+	  volatile svcount_t *ptr2, volatile svbool_t *ptr3)
+{
+  svcount_t p0 = *ptr0;
+  svcount_t p1 = *ptr1;
+  svcount_t p2 = *ptr2;
+  svbool_t p3 = *ptr3;
+  callee_4 (p0, p1, p2, p3, ptr);
+}
+
+/*
+** caller_5:
+**	...
+**	ldr	p0, \[x1\]
+**	ldr	p1, \[x2\]
+**	ldr	p2, \[x3\]
+**	ldr	p3, \[x4\]
+**	...
+**	mov	x0, sp
+**	...
+**	str	p[0-9]+, \[(?:x0|sp)\]
+**	...
+**	bl	callee_5
+**	...
+**	str	p0, .*
+**	...
+*/
+void __attribute__ ((noipa))
+caller_5 (svcount_t *ptr,
+	  volatile svcount_t *ptr0, volatile svcount_t *ptr1,
+	  volatile svcount_t *ptr2, volatile svcount_t *ptr3,
+	  volatile svcount_t *ptr4)
+{
+  svcount_t p0 = *ptr0;
+  svcount_t p1 = *ptr1;
+  svcount_t p2 = *ptr2;
+  svcount_t p3 = *ptr3;
+  svcount_t p4 = *ptr4;
+  *ptr = callee_5 (p0, p1, p2, p3, p4);
+}
+
+/*
+** caller_7:
+**	...
+**	ldr	(p[0-9]+), \[x2\]
+**	...
+**	str	\1, \[(x[0-9]+)\]
+**	...
+**	str	\2, \[sp\]
+**	...
+**	bl	callee_7
+**	...
+*/
+void __attribute__ ((noipa))
+caller_7 (svcount_t *ptr, volatile svcount_t *ptr0, volatile svcount_t *ptr1)
+{
+  svcount_t p0 = *ptr0;
+  svcount_t p1 = *ptr1;
+  *ptr = callee_7 (p0, p0, p0, p0, 0, 0, 0, 0, 0, 0, 0, 0, p1);
+}
-- 
2.25.1


^ permalink raw reply	[flat|nested] 6+ messages in thread

* [pushed v2 3/5] aarch64: Add svboolx2_t
  2023-12-05 10:24 [pushed v2 0/5] aarch64: Add support for SME2 Richard Sandiford
  2023-12-05 10:24 ` [pushed v2 1/5] aarch64: Add +sme2 Richard Sandiford
  2023-12-05 10:25 ` [pushed v2 2/5] aarch64: Add svcount_t Richard Sandiford
@ 2023-12-05 10:25 ` Richard Sandiford
  2023-12-05 10:25 ` [pushed v2 4/5] aarch64: Add ZT0 Richard Sandiford
  2023-12-05 10:25 ` [pushed v2 5/5] aarch64: Add support for SME2 intrinsics Richard Sandiford
  4 siblings, 0 replies; 6+ messages in thread
From: Richard Sandiford @ 2023-12-05 10:25 UTC (permalink / raw)
  To: gcc-patches; +Cc: Richard Sandiford

SME2 has some instructions that operate on pairs of predicates.
The SME2 ACLE defines an svboolx2_t type for the associated
intrinsics.

The patch uses a double-width predicate mode, VNx32BI, to represent
the contents, similarly to how data vector tuples work.  At present
there doesn't seem to be any need to define pairs for VNx2BI,
VNx4BI and VNx8BI.

We already supported pairs of svbool_ts at the PCS level, as part
of a more general framework.  All that changes on the PCS side is
that we now have an associated mode.

gcc/
	* config/aarch64/aarch64-modes.def (VNx32BI): New mode.
	* config/aarch64/aarch64-protos.h (aarch64_split_double_move): Declare.
	* config/aarch64/aarch64-sve-builtins.cc
	(register_tuple_type): Handle tuples of predicates.
	(handle_arm_sve_h): Define svboolx2_t as a pair of two svbool_ts.
	* config/aarch64/aarch64-sve.md (movvnx32bi): New insn.
	* config/aarch64/aarch64.cc
	(pure_scalable_type_info::piece::get_rtx): Use VNx32BI for pairs
	of predicates.
	(pure_scalable_type_info::add_piece): Don't try to form pairs of
	predicates.
	(VEC_STRUCT): Generalize comment.
	(aarch64_classify_vector_mode): Handle VNx32BI.
	(aarch64_array_mode): Likewise.  Return BLKmode for arrays of
	predicates that have no associated mode, rather than allowing
	an integer mode to be chosen.
	(aarch64_hard_regno_nregs): Handle VNx32BI.
	(aarch64_hard_regno_mode_ok): Likewise.
	(aarch64_split_double_move): New function, split out from...
	(aarch64_split_128bit_move): ...here.
	(aarch64_ptrue_reg): Tighten assert to aarch64_sve_pred_mode_p.
	(aarch64_pfalse_reg): Likewise.
	(aarch64_sve_same_pred_for_ptest_p): Likewise.
	(aarch64_sme_mode_switch_regs::add_reg): Handle VNx32BI.
	(aarch64_expand_mov_immediate): Restrict handling of boolean vector
	constants to single-predicate modes.
	(aarch64_classify_address): Handle VNx32BI, ensuring that both halves
	can be addressed.
	(aarch64_class_max_nregs): Handle VNx32BI.
	(aarch64_member_type_forces_blk): Don't for BLKmode for svboolx2_t.
	(aarch64_simd_valid_immediate): Allow all-zeros and all-ones for
	VNx32BI.
	(aarch64_mov_operand_p): Restrict predicate constant canonicalization
	to single-predicate modes.
	(aarch64_evpc_ext): Generalize exclusion to all predicate modes.
	(aarch64_evpc_rev_local, aarch64_evpc_dup): Likewise.
	* config/aarch64/constraints.md (PR_REGS): New predicate.

gcc/testsuite/
	* gcc.target/aarch64/sve/pcs/struct_3_128.c (test_nonpst3): Adjust
	stack offsets.
	(ret_nonpst3): Remove XFAIL.
	* gcc.target/aarch64/sve/acle/general-c/svboolx2_1.c: New test.
---
 gcc/config/aarch64/aarch64-modes.def          |   3 +
 gcc/config/aarch64/aarch64-protos.h           |   1 +
 gcc/config/aarch64/aarch64-sve-builtins.cc    |  18 ++-
 gcc/config/aarch64/aarch64-sve.md             |  22 +++
 gcc/config/aarch64/aarch64.cc                 | 136 ++++++++++++------
 gcc/config/aarch64/constraints.md             |   4 +
 .../aarch64/sve/acle/general-c/svboolx2_1.c   | 135 +++++++++++++++++
 .../gcc.target/aarch64/sve/pcs/struct_3_128.c |   6 +-
 8 files changed, 272 insertions(+), 53 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/svboolx2_1.c

diff --git a/gcc/config/aarch64/aarch64-modes.def b/gcc/config/aarch64/aarch64-modes.def
index a3efc5b8484..ffca5517dec 100644
--- a/gcc/config/aarch64/aarch64-modes.def
+++ b/gcc/config/aarch64/aarch64-modes.def
@@ -48,16 +48,19 @@ ADJUST_FLOAT_FORMAT (HF, &ieee_half_format);
 
 /* Vector modes.  */
 
+VECTOR_BOOL_MODE (VNx32BI, 32, BI, 4);
 VECTOR_BOOL_MODE (VNx16BI, 16, BI, 2);
 VECTOR_BOOL_MODE (VNx8BI, 8, BI, 2);
 VECTOR_BOOL_MODE (VNx4BI, 4, BI, 2);
 VECTOR_BOOL_MODE (VNx2BI, 2, BI, 2);
 
+ADJUST_NUNITS (VNx32BI, aarch64_sve_vg * 16);
 ADJUST_NUNITS (VNx16BI, aarch64_sve_vg * 8);
 ADJUST_NUNITS (VNx8BI, aarch64_sve_vg * 4);
 ADJUST_NUNITS (VNx4BI, aarch64_sve_vg * 2);
 ADJUST_NUNITS (VNx2BI, aarch64_sve_vg);
 
+ADJUST_ALIGNMENT (VNx32BI, 2);
 ADJUST_ALIGNMENT (VNx16BI, 2);
 ADJUST_ALIGNMENT (VNx8BI, 2);
 ADJUST_ALIGNMENT (VNx4BI, 2);
diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h
index ce7046b050e..25a9103f0e7 100644
--- a/gcc/config/aarch64/aarch64-protos.h
+++ b/gcc/config/aarch64/aarch64-protos.h
@@ -947,6 +947,7 @@ rtx aarch64_simd_expand_builtin (int, tree, rtx);
 void aarch64_simd_lane_bounds (rtx, HOST_WIDE_INT, HOST_WIDE_INT, const_tree);
 rtx aarch64_endian_lane_rtx (machine_mode, unsigned int);
 
+void aarch64_split_double_move (rtx, rtx, machine_mode);
 void aarch64_split_128bit_move (rtx, rtx);
 
 bool aarch64_split_128bit_move_p (rtx, rtx);
diff --git a/gcc/config/aarch64/aarch64-sve-builtins.cc b/gcc/config/aarch64/aarch64-sve-builtins.cc
index e32f0f8f903..7e4b9e67ed8 100644
--- a/gcc/config/aarch64/aarch64-sve-builtins.cc
+++ b/gcc/config/aarch64/aarch64-sve-builtins.cc
@@ -3949,6 +3949,9 @@ register_vector_type (vector_type_index type)
 static void
 register_tuple_type (unsigned int num_vectors, vector_type_index type)
 {
+  tree vector_type = acle_vector_types[0][type];
+  bool is_pred = GET_MODE_CLASS (TYPE_MODE (vector_type)) == MODE_VECTOR_BOOL;
+
   /* Work out the structure name.  */
   char buffer[sizeof ("svbfloat16x4_t")];
   const char *vector_type_name = vector_types[type].acle_name;
@@ -3969,17 +3972,19 @@ register_tuple_type (unsigned int num_vectors, vector_type_index type)
 
      Using arrays simplifies the handling of svget and svset for variable
      arguments.  */
-  tree vector_type = acle_vector_types[0][type];
   tree array_type = build_array_type_nelts (vector_type, num_vectors);
   gcc_assert (VECTOR_MODE_P (TYPE_MODE (array_type))
 	      && TYPE_MODE_RAW (array_type) == TYPE_MODE (array_type)
-	      && TYPE_ALIGN (array_type) == 128);
+	      && TYPE_ALIGN (array_type) == (is_pred ? 16 : 128));
 
   tree tuple_type = wrap_type_in_struct (array_type);
-  add_sve_type_attribute (tuple_type, num_vectors, 0, NULL, buffer);
+  if (is_pred)
+    add_sve_type_attribute (tuple_type, 0, num_vectors, NULL, buffer);
+  else
+    add_sve_type_attribute (tuple_type, num_vectors, 0, NULL, buffer);
   gcc_assert (VECTOR_MODE_P (TYPE_MODE (tuple_type))
 	      && TYPE_MODE_RAW (tuple_type) == TYPE_MODE (tuple_type)
-	      && TYPE_ALIGN (tuple_type) == 128);
+	      && TYPE_ALIGN (tuple_type) == TYPE_ALIGN (array_type));
 
   register_type_decl (tuple_type, buffer);
 
@@ -4031,9 +4036,10 @@ handle_arm_sve_h ()
     {
       vector_type_index type = vector_type_index (type_i);
       register_vector_type (type);
-      if (scalar_types[type_i] != boolean_type_node)
+      if (type != VECTOR_TYPE_svcount_t)
 	for (unsigned int count = 2; count <= MAX_TUPLE_SIZE; ++count)
-	  register_tuple_type (count, type);
+	  if (type != VECTOR_TYPE_svbool_t || count == 2)
+	    register_tuple_type (count, type);
     }
 
   /* Define the enums.  */
diff --git a/gcc/config/aarch64/aarch64-sve.md b/gcc/config/aarch64/aarch64-sve.md
index 3f48e4cdf26..3729c67eb69 100644
--- a/gcc/config/aarch64/aarch64-sve.md
+++ b/gcc/config/aarch64/aarch64-sve.md
@@ -33,6 +33,7 @@
 ;; ---- Moves of single vectors
 ;; ---- Moves of multiple vectors
 ;; ---- Moves of predicates
+;; ---- Moves of multiple predicates
 ;; ---- Moves relating to the FFR
 ;;
 ;; == Loads
@@ -1069,6 +1070,27 @@ (define_insn_and_rewrite "*aarch64_sve_ptrue<mode>_ptest"
   }
 )
 
+;; -------------------------------------------------------------------------
+;; ---- Moves of multiple predicates
+;; -------------------------------------------------------------------------
+
+(define_insn_and_split "movvnx32bi"
+  [(set (match_operand:VNx32BI 0 "nonimmediate_operand")
+	(match_operand:VNx32BI 1 "aarch64_mov_operand"))]
+  "TARGET_SVE"
+  {@ [ cons: =0 , 1   ]
+     [ Upa      , Upa ] #
+     [ Upa      , m   ] #
+     [ m        , Upa ] #
+  }
+  "&& reload_completed"
+  [(const_int 0)]
+  {
+    aarch64_split_double_move (operands[0], operands[1], VNx16BImode);
+    DONE;
+  }
+)
+
 ;; -------------------------------------------------------------------------
 ;; ---- Moves relating to the FFR
 ;; -------------------------------------------------------------------------
diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
index 48b7811c100..b29d56b3743 100644
--- a/gcc/config/aarch64/aarch64.cc
+++ b/gcc/config/aarch64/aarch64.cc
@@ -846,7 +846,7 @@ pure_scalable_type_info::piece::get_rtx (unsigned int first_zr,
   if (num_zr > 0 && num_pr == 0)
     return gen_rtx_REG (mode, first_zr);
 
-  if (num_zr == 0 && num_pr == 1)
+  if (num_zr == 0 && num_pr <= 2)
     return gen_rtx_REG (mode, first_pr);
 
   gcc_unreachable ();
@@ -1069,6 +1069,7 @@ pure_scalable_type_info::add_piece (const piece &p)
       gcc_assert (VECTOR_MODE_P (p.mode) && VECTOR_MODE_P (prev.mode));
       unsigned int nelems1, nelems2;
       if (prev.orig_mode == p.orig_mode
+	  && GET_MODE_CLASS (p.orig_mode) != MODE_VECTOR_BOOL
 	  && known_eq (prev.offset + GET_MODE_SIZE (prev.mode), p.offset)
 	  && constant_multiple_p (GET_MODE_NUNITS (prev.mode),
 				  GET_MODE_NUNITS (p.orig_mode), &nelems1)
@@ -1370,8 +1371,7 @@ aarch64_sve_pred_mode_p (machine_mode mode)
 const unsigned int VEC_ADVSIMD  = 1;
 const unsigned int VEC_SVE_DATA = 2;
 const unsigned int VEC_SVE_PRED = 4;
-/* Can be used in combination with VEC_ADVSIMD or VEC_SVE_DATA to indicate
-   a structure of 2, 3 or 4 vectors.  */
+/* Indicates a structure of 2, 3 or 4 vectors or predicates.  */
 const unsigned int VEC_STRUCT   = 8;
 /* Can be used in combination with VEC_SVE_DATA to indicate that the
    vector has fewer significant bytes than a full SVE vector.  */
@@ -1534,6 +1534,9 @@ aarch64_classify_vector_mode (machine_mode mode, bool any_target_p = false)
     case E_V2DFmode:
       return (TARGET_FLOAT || any_target_p) ? VEC_ADVSIMD : 0;
 
+    case E_VNx32BImode:
+      return TARGET_SVE ? VEC_SVE_PRED | VEC_STRUCT : 0;
+
     default:
       return 0;
     }
@@ -1661,12 +1664,24 @@ aarch64_sve_data_mode (scalar_mode inner_mode, poly_uint64 nunits)
 static opt_machine_mode
 aarch64_array_mode (machine_mode mode, unsigned HOST_WIDE_INT nelems)
 {
-  if (aarch64_classify_vector_mode (mode) == VEC_SVE_DATA
-      && IN_RANGE (nelems, 2, 4))
+  if (TARGET_SVE && GET_MODE_CLASS (mode) == MODE_VECTOR_BOOL)
+    {
+      /* Use VNx32BI for pairs of predicates, but explicitly reject giving
+	 a mode to other array sizes.  Using integer modes requires a round
+	 trip through memory and generates terrible code.  */
+      if (nelems == 1)
+	return mode;
+      if (mode == VNx16BImode && nelems == 2)
+	return VNx32BImode;
+      return BLKmode;
+    }
+
+  auto flags = aarch64_classify_vector_mode (mode);
+  if (flags == VEC_SVE_DATA && IN_RANGE (nelems, 2, 4))
     return aarch64_sve_data_mode (GET_MODE_INNER (mode),
 				  GET_MODE_NUNITS (mode) * nelems);
-  if (aarch64_classify_vector_mode (mode) == VEC_ADVSIMD
-      && IN_RANGE (nelems, 2, 4))
+
+  if (flags == VEC_ADVSIMD && IN_RANGE (nelems, 2, 4))
     return aarch64_advsimd_vector_array_mode (mode, nelems);
 
   return opt_machine_mode ();
@@ -1886,13 +1901,17 @@ aarch64_hard_regno_nregs (unsigned regno, machine_mode mode)
 	  return GET_MODE_SIZE (mode).to_constant () / 8;
 	return CEIL (lowest_size, UNITS_PER_VREG);
       }
+
     case PR_REGS:
     case PR_LO_REGS:
     case PR_HI_REGS:
+      return mode == VNx32BImode ? 2 : 1;
+
     case FFR_REGS:
     case PR_AND_FFR_REGS:
     case FAKE_REGS:
       return 1;
+
     default:
       return CEIL (lowest_size, UNITS_PER_WORD);
     }
@@ -1916,9 +1935,12 @@ aarch64_hard_regno_mode_ok (unsigned regno, machine_mode mode)
     return mode == DImode;
 
   unsigned int vec_flags = aarch64_classify_vector_mode (mode);
-  if (vec_flags & VEC_SVE_PRED)
+  if (vec_flags == VEC_SVE_PRED)
     return pr_or_ffr_regnum_p (regno);
 
+  if (vec_flags == (VEC_SVE_PRED | VEC_STRUCT))
+    return PR_REGNUM_P (regno);
+
   if (pr_or_ffr_regnum_p (regno))
     return false;
 
@@ -3000,6 +3022,33 @@ aarch64_emit_binop (rtx dest, optab binoptab, rtx op0, rtx op1)
     emit_move_insn (dest, tmp);
 }
 
+/* Split a move from SRC to DST into two moves of mode SINGLE_MODE.  */
+
+void
+aarch64_split_double_move (rtx dst, rtx src, machine_mode single_mode)
+{
+  machine_mode mode = GET_MODE (dst);
+
+  rtx dst0 = simplify_gen_subreg (single_mode, dst, mode, 0);
+  rtx dst1 = simplify_gen_subreg (single_mode, dst, mode,
+				  GET_MODE_SIZE (single_mode));
+  rtx src0 = simplify_gen_subreg (single_mode, src, mode, 0);
+  rtx src1 = simplify_gen_subreg (single_mode, src, mode,
+				  GET_MODE_SIZE (single_mode));
+
+  /* At most one pairing may overlap.  */
+  if (reg_overlap_mentioned_p (dst0, src1))
+    {
+      aarch64_emit_move (dst1, src1);
+      aarch64_emit_move (dst0, src0);
+    }
+  else
+    {
+      aarch64_emit_move (dst0, src0);
+      aarch64_emit_move (dst1, src1);
+    }
+}
+
 /* Split a 128-bit move operation into two 64-bit move operations,
    taking care to handle partial overlap of register to register
    copies.  Special cases are needed when moving between GP regs and
@@ -3009,9 +3058,6 @@ aarch64_emit_binop (rtx dest, optab binoptab, rtx op0, rtx op1)
 void
 aarch64_split_128bit_move (rtx dst, rtx src)
 {
-  rtx dst_lo, dst_hi;
-  rtx src_lo, src_hi;
-
   machine_mode mode = GET_MODE (dst);
 
   gcc_assert (mode == TImode || mode == TFmode || mode == TDmode);
@@ -3026,8 +3072,8 @@ aarch64_split_128bit_move (rtx dst, rtx src)
       /* Handle FP <-> GP regs.  */
       if (FP_REGNUM_P (dst_regno) && GP_REGNUM_P (src_regno))
 	{
-	  src_lo = gen_lowpart (word_mode, src);
-	  src_hi = gen_highpart (word_mode, src);
+	  rtx src_lo = gen_lowpart (word_mode, src);
+	  rtx src_hi = gen_highpart (word_mode, src);
 
 	  emit_insn (gen_aarch64_movlow_di (mode, dst, src_lo));
 	  emit_insn (gen_aarch64_movhigh_di (mode, dst, src_hi));
@@ -3035,8 +3081,8 @@ aarch64_split_128bit_move (rtx dst, rtx src)
 	}
       else if (GP_REGNUM_P (dst_regno) && FP_REGNUM_P (src_regno))
 	{
-	  dst_lo = gen_lowpart (word_mode, dst);
-	  dst_hi = gen_highpart (word_mode, dst);
+	  rtx dst_lo = gen_lowpart (word_mode, dst);
+	  rtx dst_hi = gen_highpart (word_mode, dst);
 
 	  emit_insn (gen_aarch64_movdi_low (mode, dst_lo, src));
 	  emit_insn (gen_aarch64_movdi_high (mode, dst_hi, src));
@@ -3044,22 +3090,7 @@ aarch64_split_128bit_move (rtx dst, rtx src)
 	}
     }
 
-  dst_lo = gen_lowpart (word_mode, dst);
-  dst_hi = gen_highpart (word_mode, dst);
-  src_lo = gen_lowpart (word_mode, src);
-  src_hi = gen_highpart_mode (word_mode, mode, src);
-
-  /* At most one pairing may overlap.  */
-  if (reg_overlap_mentioned_p (dst_lo, src_hi))
-    {
-      aarch64_emit_move (dst_hi, src_hi);
-      aarch64_emit_move (dst_lo, src_lo);
-    }
-  else
-    {
-      aarch64_emit_move (dst_lo, src_lo);
-      aarch64_emit_move (dst_hi, src_hi);
-    }
+  aarch64_split_double_move (dst, src, word_mode);
 }
 
 /* Return true if we should split a move from 128-bit value SRC
@@ -3325,7 +3356,7 @@ aarch64_ptrue_all (unsigned int elt_size)
 rtx
 aarch64_ptrue_reg (machine_mode mode)
 {
-  gcc_assert (GET_MODE_CLASS (mode) == MODE_VECTOR_BOOL);
+  gcc_assert (aarch64_sve_pred_mode_p (mode));
   rtx reg = force_reg (VNx16BImode, CONSTM1_RTX (VNx16BImode));
   return gen_lowpart (mode, reg);
 }
@@ -3335,7 +3366,7 @@ aarch64_ptrue_reg (machine_mode mode)
 rtx
 aarch64_pfalse_reg (machine_mode mode)
 {
-  gcc_assert (GET_MODE_CLASS (mode) == MODE_VECTOR_BOOL);
+  gcc_assert (aarch64_sve_pred_mode_p (mode));
   rtx reg = force_reg (VNx16BImode, CONST0_RTX (VNx16BImode));
   return gen_lowpart (mode, reg);
 }
@@ -3351,7 +3382,7 @@ bool
 aarch64_sve_same_pred_for_ptest_p (rtx *pred1, rtx *pred2)
 {
   machine_mode mode = GET_MODE (pred1[0]);
-  gcc_assert (GET_MODE_CLASS (mode) == MODE_VECTOR_BOOL
+  gcc_assert (aarch64_sve_pred_mode_p (mode)
 	      && mode == GET_MODE (pred2[0])
 	      && aarch64_sve_ptrue_flag (pred1[1], SImode)
 	      && aarch64_sve_ptrue_flag (pred2[1], SImode));
@@ -4824,7 +4855,9 @@ aarch64_sme_mode_switch_regs::add_reg (machine_mode mode, unsigned int regno)
       machine_mode submode = mode;
       if (vec_flags & VEC_STRUCT)
 	{
-	  if (vec_flags & VEC_SVE_DATA)
+	  if (vec_flags & VEC_SVE_PRED)
+	    submode = VNx16BImode;
+	  else if (vec_flags & VEC_SVE_DATA)
 	    submode = SVE_BYTE_MODE;
 	  else if (vec_flags & VEC_PARTIAL)
 	    submode = V8QImode;
@@ -4833,7 +4866,7 @@ aarch64_sme_mode_switch_regs::add_reg (machine_mode mode, unsigned int regno)
 	}
       save_location loc;
       loc.reg = gen_rtx_REG (submode, regno);
-      if (vec_flags == VEC_SVE_PRED)
+      if (vec_flags & VEC_SVE_PRED)
 	{
 	  gcc_assert (PR_REGNUM_P (regno));
 	  loc.group = MEM_SVE_PRED;
@@ -5845,7 +5878,7 @@ aarch64_expand_mov_immediate (rtx dest, rtx imm)
 
   if (!CONST_INT_P (imm))
     {
-      if (GET_MODE_CLASS (mode) == MODE_VECTOR_BOOL)
+      if (aarch64_sve_pred_mode_p (mode))
 	{
 	  /* Only the low bit of each .H, .S and .D element is defined,
 	     so we can set the upper bits to whatever we like.  If the
@@ -10311,6 +10344,15 @@ aarch64_classify_address (struct aarch64_address_info *info,
 	  if (vec_flags == VEC_SVE_PRED)
 	    return offset_9bit_signed_scaled_p (mode, offset);
 
+	  if (vec_flags == (VEC_SVE_PRED | VEC_STRUCT))
+	    {
+	      poly_int64 end_offset = (offset
+				       + GET_MODE_SIZE (mode)
+				       - BYTES_PER_SVE_PRED);
+	      return (offset_9bit_signed_scaled_p (VNx16BImode, end_offset)
+		      && offset_9bit_signed_scaled_p (VNx16BImode, offset));
+	    }
+
 	  if (load_store_pair_p)
 	    return ((known_eq (GET_MODE_SIZE (mode), 4)
 		     || known_eq (GET_MODE_SIZE (mode), 8)
@@ -12611,10 +12653,12 @@ aarch64_class_max_nregs (reg_class_t regclass, machine_mode mode)
 	      ? CEIL (lowest_size, UNITS_PER_VREG)
 	      : CEIL (lowest_size, UNITS_PER_WORD));
 
-    case STACK_REG:
     case PR_REGS:
     case PR_LO_REGS:
     case PR_HI_REGS:
+      return mode == VNx32BImode ? 2 : 1;
+
+    case STACK_REG:
     case FFR_REGS:
     case PR_AND_FFR_REGS:
     case FAKE_REGS:
@@ -20252,11 +20296,11 @@ aarch64_member_type_forces_blk (const_tree field_or_array, machine_mode mode)
      an ARRAY_TYPE.  In both cases we're interested in the TREE_TYPE.  */
   const_tree type = TREE_TYPE (field_or_array);
 
-  /* Assign BLKmode to anything that contains multiple SVE predicates.
+  /* Assign BLKmode to anything that contains more than 2 SVE predicates.
      For structures, the "multiple" case is indicated by MODE being
      VOIDmode.  */
   unsigned int num_zr, num_pr;
-  if (aarch64_sve::builtin_type_p (type, &num_zr, &num_pr) && num_pr != 0)
+  if (aarch64_sve::builtin_type_p (type, &num_zr, &num_pr) && num_pr > 2)
     {
       if (TREE_CODE (field_or_array) == ARRAY_TYPE)
 	return !simple_cst_equal (TYPE_SIZE (field_or_array),
@@ -21496,6 +21540,9 @@ aarch64_simd_valid_immediate (rtx op, simd_immediate_info *info,
   if ((vec_flags & VEC_ADVSIMD) && !TARGET_SIMD)
     return false;
 
+  if (vec_flags == (VEC_SVE_PRED | VEC_STRUCT))
+    return op == CONST0_RTX (mode) || op == CONSTM1_RTX (mode);
+
   if (vec_flags & VEC_SVE_PRED)
     return aarch64_sve_pred_valid_immediate (op, info);
 
@@ -21669,7 +21716,8 @@ aarch64_mov_operand_p (rtx x, machine_mode mode)
 	 force everything to have a canonical form.  */
       if (!lra_in_progress
 	  && !reload_completed
-	  && GET_MODE_CLASS (GET_MODE (x)) == MODE_VECTOR_BOOL
+	  && aarch64_sve_pred_mode_p (GET_MODE (x))
+	  && known_eq (GET_MODE_SIZE (GET_MODE (x)), BYTES_PER_SVE_PRED)
 	  && GET_MODE (x) != VNx16BImode)
 	return false;
 
@@ -24272,7 +24320,7 @@ aarch64_evpc_ext (struct expand_vec_perm_d *d)
 
   /* The first element always refers to the first vector.
      Check if the extracted indices are increasing by one.  */
-  if (d->vec_flags == VEC_SVE_PRED
+  if ((d->vec_flags & VEC_SVE_PRED)
       || !d->perm[0].is_constant (&location)
       || !d->perm.series_p (0, 1, location, 1))
     return false;
@@ -24316,7 +24364,7 @@ aarch64_evpc_rev_local (struct expand_vec_perm_d *d)
   unsigned int i, size, unspec;
   machine_mode pred_mode;
 
-  if (d->vec_flags == VEC_SVE_PRED
+  if ((d->vec_flags & VEC_SVE_PRED)
       || !d->one_vector_p
       || !d->perm[0].is_constant (&diff)
       || !diff)
@@ -24397,7 +24445,7 @@ aarch64_evpc_dup (struct expand_vec_perm_d *d)
   machine_mode vmode = d->vmode;
   rtx lane;
 
-  if (d->vec_flags == VEC_SVE_PRED
+  if ((d->vec_flags & VEC_SVE_PRED)
       || d->perm.encoding ().encoded_nelts () != 1
       || !d->perm[0].is_constant (&elt))
     return false;
diff --git a/gcc/config/aarch64/constraints.md b/gcc/config/aarch64/constraints.md
index 38ed927ec14..78a62af1abf 100644
--- a/gcc/config/aarch64/constraints.md
+++ b/gcc/config/aarch64/constraints.md
@@ -42,6 +42,10 @@ (define_register_constraint "w" "FP_REGS"
 (define_register_constraint "Upa" "PR_REGS"
   "SVE predicate registers p0 - p15.")
 
+(define_register_constraint "Up2" "PR_REGS"
+  "An even SVE predicate register, p0 - p14."
+  "regno % 2 == 0")
+
 (define_register_constraint "Upl" "PR_LO_REGS"
   "SVE predicate registers p0 - p7.")
 
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/svboolx2_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/svboolx2_1.c
new file mode 100644
index 00000000000..877b1849986
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/svboolx2_1.c
@@ -0,0 +1,135 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include <arm_sve.h>
+
+/*
+** ret_p0:
+**	ret
+*/
+svboolx2_t
+ret_p0 (svboolx2_t p0)
+{
+  return p0;
+}
+
+/*
+** ret_p1:
+**	mov	p0\.b, p1\.b
+**	mov	p1\.b, p2\.b
+**	ret
+*/
+svboolx2_t
+ret_p1 (svbool_t p0, svboolx2_t p1)
+{
+  return p1;
+}
+
+/*
+** ret_p2:
+** (
+**	mov	p0\.b, p2\.b
+**	mov	p1\.b, p3\.b
+** |
+**	mov	p1\.b, p3\.b
+**	mov	p0\.b, p2\.b
+** )
+**	ret
+*/
+svboolx2_t
+ret_p2 (svboolx2_t p0, svboolx2_t p2)
+{
+  return p2;
+}
+
+/*
+** ret_mem:
+** (
+**	ldr	p0, \[x0\]
+**	ldr	p1, \[x0, #1, mul vl\]
+** |
+**	ldr	p1, \[x0, #1, mul vl\]
+**	ldr	p0, \[x0\]
+** )
+**	ret
+*/
+svboolx2_t
+ret_mem (svboolx2_t p0, svbool_t p2, svboolx2_t mem)
+{
+  return mem;
+}
+
+/*
+** load:
+** (
+**	ldr	p0, \[x0\]
+**	ldr	p1, \[x0, #1, mul vl\]
+** |
+**	ldr	p1, \[x0, #1, mul vl\]
+**	ldr	p0, \[x0\]
+** )
+**	ret
+*/
+svboolx2_t
+load (svboolx2_t *ptr)
+{
+  return *ptr;
+}
+
+/*
+** store:
+** (
+**	str	p1, \[x0\]
+**	str	p2, \[x0, #1, mul vl\]
+** |
+**	str	p2, \[x0, #1, mul vl\]
+**	str	p1, \[x0\]
+** )
+**	ret
+*/
+void
+store (svbool_t p0, svboolx2_t p1, svboolx2_t *ptr)
+{
+  *ptr = p1;
+}
+
+/*
+** upa_p1:
+**	ret
+*/
+void
+upa_p1 (svbool_t p0, svboolx2_t p1)
+{
+  asm volatile ("" :: "Upa" (p1));
+}
+
+/*
+** up2_p1:
+** (
+**	mov	p0\.b, p1\.b
+**	mov	p1\.b, p2\.b
+** |
+**	mov	p3\.b, p2\.b
+**	mov	p2\.b, p1\.b
+** )
+**	ret
+*/
+void
+up2_p1 (svbool_t p0, svboolx2_t p1)
+{
+  asm volatile ("" :: "Up2" (p1));
+}
+
+/*
+** p1_to_p2:
+**	mov	p3\.b, p2\.b
+**	mov	p2\.b, p1\.b
+**	ret
+*/
+void
+p1_to_p2 (svbool_t p0, svboolx2_t p1)
+{
+  register svboolx2_t p2 asm ("p2") = p1;
+  asm volatile ("" :: "Up2" (p2));
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/struct_3_128.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/struct_3_128.c
index f6d78469aa5..b8fe86058a9 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve/pcs/struct_3_128.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/struct_3_128.c
@@ -908,8 +908,8 @@ SEL2 (union, nonpst3)
 /*
 ** test_nonpst3:
 **	sub	sp, sp, #16
-**	str	w0, \[sp, #?8\]
-**	ldr	p0, \[sp, #4, mul vl\]
+**	str	w0, \[sp, #?12\]
+**	ldr	p0, \[sp, #6, mul vl\]
 **	add	sp, sp, #?16
 **	ret
 */
@@ -921,7 +921,7 @@ test_nonpst3 (union nonpst3 x)
 }
 
 /*
-** ret_nonpst3: { xfail *-*-* }
+** ret_nonpst3:
 **	mov	w0, #?(?:0xffff|65535)
 **	ret
 */
-- 
2.25.1


^ permalink raw reply	[flat|nested] 6+ messages in thread

* [pushed v2 4/5] aarch64: Add ZT0
  2023-12-05 10:24 [pushed v2 0/5] aarch64: Add support for SME2 Richard Sandiford
                   ` (2 preceding siblings ...)
  2023-12-05 10:25 ` [pushed v2 3/5] aarch64: Add svboolx2_t Richard Sandiford
@ 2023-12-05 10:25 ` Richard Sandiford
  2023-12-05 10:25 ` [pushed v2 5/5] aarch64: Add support for SME2 intrinsics Richard Sandiford
  4 siblings, 0 replies; 6+ messages in thread
From: Richard Sandiford @ 2023-12-05 10:25 UTC (permalink / raw)
  To: gcc-patches; +Cc: Richard Sandiford

SME2 adds a 512-bit lookup table called ZT0.  It is enabled
and disabled by PSTATE.ZA, just like ZA itself.  This patch
adds support for the register, including saving and restoring
contents.

The code reuses the V8DI that was added for LS64, including
the associated memory classification rules.  (The ZT0 range
is more restricted than the LS64 range, but that's enforced
by predicates and constraints.)

gcc/
	* config/aarch64/aarch64.md (ZT0_REGNUM): New constant.
	(LAST_FAKE_REGNUM): Bump to include it.
	* config/aarch64/aarch64.h (FIXED_REGISTERS): Add an entry for ZT0.
	(CALL_REALLY_USED_REGISTERS, REGISTER_NAMES): Likewise.
	(REG_CLASS_CONTENTS): Likewise.
	(machine_function): Add zt0_save_buffer.
	(CUMULATIVE_ARGS): Add shared_zt0_flags;
	* config/aarch64/aarch64.cc (aarch64_check_state_string): Handle zt0.
	(aarch64_fntype_pstate_za, aarch64_fndecl_pstate_za): Likewise.
	(aarch64_function_arg): Add the shared ZT0 flags as an extra
	limb of the parallel.
	(aarch64_init_cumulative_args): Initialize shared_zt0_flags.
	(aarch64_extra_live_on_entry): Handle ZT0_REGNUM.
	(aarch64_epilogue_uses): Likewise.
	(aarch64_get_zt0_save_buffer, aarch64_save_zt0): New functions.
	(aarch64_restore_zt0): Likewise.
	(aarch64_start_call_args): Reject calls to functions that share
	ZT0 from functions that have no ZT0 state.  Save ZT0 around shared-ZA
	calls that do not share ZT0.
	(aarch64_expand_call): Handle ZT0.  Reject calls to functions that
	share ZT0 but not ZA from functions with ZA state.
	(aarch64_end_call_args): Restore ZT0 after calls to shared-ZA functions
	that do not share ZT0.
	(aarch64_set_current_function): Require +sme2 for functions that
	have ZT0 state.
	(aarch64_function_attribute_inlinable_p): Don't allow functions to
	be inlined if they have local zt0 state.
	(AARCH64_IPA_CLOBBERS_ZT0): New constant.
	(aarch64_update_ipa_fn_target_info): Record asms that clobber ZT0.
	(aarch64_can_inline_p): Don't inline callees that clobber ZT0
	into functions that have ZT0 state.
	(aarch64_comp_type_attributes): Check for compatible ZT0 sharing.
	(aarch64_optimize_mode_switching): Use mode switching if the
	function has ZT0 state.
	(aarch64_mode_emit_local_sme_state): Save and restore ZT0 around
	calls to private-ZA functions.
	(aarch64_mode_needed_local_sme_state): Require ZA to be active
	for instructions that access ZT0.
	(aarch64_mode_entry): Mark ZA as dead on entry if the function
	only shares state other than "za" itself.
	(aarch64_mode_exit): Likewise mark ZA as dead on return.
	(aarch64_md_asm_adjust): Extend handling of ZA clobbers to ZT0.
	* config/aarch64/aarch64-c.cc (aarch64_define_unconditional_macros):
	Define __ARM_STATE_ZT0.
	* config/aarch64/aarch64-sme.md (UNSPECV_ASM_UPDATE_ZT0): New unspecv.
	(aarch64_asm_update_zt0): New insn.
	(UNSPEC_RESTORE_ZT0): New unspec.
	(aarch64_sme_ldr_zt0, aarch64_restore_zt0): New insns.
	(aarch64_sme_str_zt0): Likewise.

gcc/testsuite/
	* gcc.target/aarch64/sme/zt0_state_1.c: New test.
	* gcc.target/aarch64/sme/zt0_state_2.c: Likewise.
	* gcc.target/aarch64/sme/zt0_state_3.c: Likewise.
	* gcc.target/aarch64/sme/zt0_state_4.c: Likewise.
	* gcc.target/aarch64/sme/zt0_state_5.c: Likewise.
	* gcc.target/aarch64/sme/zt0_state_6.c: Likewise.
---
 gcc/config/aarch64/aarch64-c.cc               |   1 +
 gcc/config/aarch64/aarch64-sme.md             |  63 +++++
 gcc/config/aarch64/aarch64.cc                 | 209 ++++++++++++--
 gcc/config/aarch64/aarch64.h                  |  14 +-
 gcc/config/aarch64/aarch64.md                 |   7 +-
 .../gcc.target/aarch64/sme/zt0_state_1.c      |  65 +++++
 .../gcc.target/aarch64/sme/zt0_state_2.c      |  31 +++
 .../gcc.target/aarch64/sme/zt0_state_3.c      |   6 +
 .../gcc.target/aarch64/sme/zt0_state_4.c      |  53 ++++
 .../gcc.target/aarch64/sme/zt0_state_5.c      | 260 ++++++++++++++++++
 .../gcc.target/aarch64/sme/zt0_state_6.c      |  54 ++++
 11 files changed, 728 insertions(+), 35 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/zt0_state_1.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/zt0_state_2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/zt0_state_3.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/zt0_state_4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/zt0_state_5.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/zt0_state_6.c

diff --git a/gcc/config/aarch64/aarch64-c.cc b/gcc/config/aarch64/aarch64-c.cc
index 2a8ca46987a..017380b7563 100644
--- a/gcc/config/aarch64/aarch64-c.cc
+++ b/gcc/config/aarch64/aarch64-c.cc
@@ -74,6 +74,7 @@ aarch64_define_unconditional_macros (cpp_reader *pfile)
   builtin_define ("__GCC_ASM_FLAG_OUTPUTS__");
 
   builtin_define ("__ARM_STATE_ZA");
+  builtin_define ("__ARM_STATE_ZT0");
 
   /* Define keyword attributes like __arm_streaming as macros that expand
      to the associated [[...]] attribute.  Use __extension__ in the attribute
diff --git a/gcc/config/aarch64/aarch64-sme.md b/gcc/config/aarch64/aarch64-sme.md
index da0745f6570..505805e2ecf 100644
--- a/gcc/config/aarch64/aarch64-sme.md
+++ b/gcc/config/aarch64/aarch64-sme.md
@@ -27,7 +27,9 @@
 ;;
 ;; == Loads, stores and moves
 ;; ---- Single-vector loads
+;; ---- Table loads
 ;; ---- Single-vector stores
+;; ---- Table stores
 ;; ---- Single-vector moves
 ;; ---- Zeroing
 ;;
@@ -209,6 +211,7 @@ (define_c_enum "unspec" [
 
 (define_c_enum "unspecv" [
   UNSPECV_ASM_UPDATE_ZA
+  UNSPECV_ASM_UPDATE_ZT0
 ])
 
 ;; Use the ABI-defined routine to commit an uncommitted lazy save.
@@ -400,6 +403,19 @@ (define_insn "aarch64_asm_update_za"
   [(set_attr "type" "no_insn")]
 )
 
+;; A similar pattern for ZT0.
+(define_insn "aarch64_asm_update_zt0"
+  [(set (reg:V8DI ZT0_REGNUM)
+	(unspec_volatile:V8DI
+	  [(reg:V8DI ZT0_REGNUM)
+	   (reg:DI SME_STATE_REGNUM)
+	   (match_operand 0 "const_int_operand")]
+	  UNSPECV_ASM_UPDATE_ZT0))]
+  ""
+  ""
+  [(set_attr "type" "no_insn")]
+)
+
 ;; This pseudo-instruction is emitted as part of a call to a private-ZA
 ;; function from a function with ZA state.  It marks a natural place to set
 ;; up a lazy save, if that turns out to be necessary.  The save itself
@@ -544,6 +560,38 @@ (define_insn "@aarch64_sme_ldrn<mode>"
   "ldr\tza[%w0, %1], [%2, #%1, mul vl]"
 )
 
+;; -------------------------------------------------------------------------
+;; ---- Table loads
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - LDR
+;; -------------------------------------------------------------------------
+
+(define_c_enum "unspec" [
+  UNSPEC_RESTORE_ZT0
+])
+
+(define_insn "aarch64_sme_ldr_zt0"
+  [(set (reg:V8DI ZT0_REGNUM)
+	(match_operand:V8DI 0 "aarch64_sync_memory_operand" "Q"))
+   (use (reg:DI SME_STATE_REGNUM))]
+  "TARGET_SME2"
+  "ldr\tzt0, %0"
+)
+
+;; This version is used after calls to private-ZA functions.  Since ZT0_REGNUM
+;; represents the current function's state, it isn't clobbered by private-ZA
+;; functions, so we need to make it depend on the ZA reinitialization code.
+(define_insn "aarch64_restore_zt0"
+  [(set (reg:V8DI ZT0_REGNUM)
+	(unspec:V8DI
+	  [(reg:DI SME_STATE_REGNUM)
+	   (match_operand:V8DI 0 "aarch64_sync_memory_operand" "Q")]
+	  UNSPEC_RESTORE_ZT0))]
+  "TARGET_SME2"
+  "ldr\tzt0, %0"
+)
+
 ;; -------------------------------------------------------------------------
 ;; ---- Single-vector stores
 ;; -------------------------------------------------------------------------
@@ -614,6 +662,21 @@ (define_insn "@aarch64_sme_strn<mode>"
   "str\tza[%w0, %1], [%2, #%1, mul vl]"
 )
 
+;; -------------------------------------------------------------------------
+;; ---- Table stores
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - STR
+;; -------------------------------------------------------------------------
+
+(define_insn "aarch64_sme_str_zt0"
+  [(set (match_operand:V8DI 0 "aarch64_sync_memory_operand" "=Q")
+	(reg:V8DI ZT0_REGNUM))
+   (use (reg:DI SME_STATE_REGNUM))]
+  "TARGET_SME2"
+  "str\tzt0, %0"
+)
+
 ;; -------------------------------------------------------------------------
 ;; ---- Single-vector moves
 ;; -------------------------------------------------------------------------
diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
index b29d56b3743..8f34ca14635 100644
--- a/gcc/config/aarch64/aarch64.cc
+++ b/gcc/config/aarch64/aarch64.cc
@@ -537,7 +537,8 @@ aarch64_check_state_string (tree name, tree value)
     }
 
   const char *state_name = TREE_STRING_POINTER (value);
-  if (strcmp (state_name, "za") != 0)
+  if (strcmp (state_name, "za") != 0
+      && strcmp (state_name, "zt0") != 0)
     {
       error ("unrecognized state string %qs", state_name);
       return false;
@@ -2083,7 +2084,8 @@ aarch64_fntype_shared_flags (const_tree fntype, const char *state_name)
 static aarch64_feature_flags
 aarch64_fntype_pstate_za (const_tree fntype)
 {
-  if (aarch64_fntype_shared_flags (fntype, "za"))
+  if (aarch64_fntype_shared_flags (fntype, "za")
+      || aarch64_fntype_shared_flags (fntype, "zt0"))
     return AARCH64_FL_ZA_ON;
 
   return 0;
@@ -2138,7 +2140,8 @@ aarch64_fndecl_has_state (tree fndecl, const char *state_name)
 static aarch64_feature_flags
 aarch64_fndecl_pstate_za (const_tree fndecl)
 {
-  if (aarch64_fndecl_has_new_state (fndecl, "za"))
+  if (aarch64_fndecl_has_new_state (fndecl, "za")
+      || aarch64_fndecl_has_new_state (fndecl, "zt0"))
     return AARCH64_FL_ZA_ON;
 
   return aarch64_fntype_pstate_za (TREE_TYPE (fndecl));
@@ -6956,9 +6959,11 @@ aarch64_function_arg (cumulative_args_t pcum_v, const function_arg_info &arg)
 						  pcum->pcs_variant);
       rtx sme_mode_switch_args = aarch64_finish_sme_mode_switch_args (pcum);
       rtx shared_za_flags = gen_int_mode (pcum->shared_za_flags, SImode);
-      return gen_rtx_PARALLEL (VOIDmode, gen_rtvec (3, abi_cookie,
+      rtx shared_zt0_flags = gen_int_mode (pcum->shared_zt0_flags, SImode);
+      return gen_rtx_PARALLEL (VOIDmode, gen_rtvec (4, abi_cookie,
 						    sme_mode_switch_args,
-						    shared_za_flags));
+						    shared_za_flags,
+						    shared_zt0_flags));
     }
 
   aarch64_layout_arg (pcum_v, arg);
@@ -6996,6 +7001,8 @@ aarch64_init_cumulative_args (CUMULATIVE_ARGS *pcum,
   pcum->silent_p = silent_p;
   pcum->shared_za_flags
     = (fntype ? aarch64_fntype_shared_flags (fntype, "za") : 0U);
+  pcum->shared_zt0_flags
+    = (fntype ? aarch64_fntype_shared_flags (fntype, "zt0") : 0U);
   pcum->num_sme_mode_switch_args = 0;
 
   if (!silent_p
@@ -9130,6 +9137,13 @@ aarch64_extra_live_on_entry (bitmap regs)
       auto za_flags = aarch64_cfun_shared_flags ("za");
       if (za_flags != (AARCH64_STATE_SHARED | AARCH64_STATE_OUT))
 	bitmap_set_bit (regs, ZA_REGNUM);
+
+      /* Since ZT0 is call-clobbered, it is only live on input if
+	 it is explicitly shared, and is not a pure output.  */
+      auto zt0_flags = aarch64_cfun_shared_flags ("zt0");
+      if (zt0_flags != 0
+	  && zt0_flags != (AARCH64_STATE_SHARED | AARCH64_STATE_OUT))
+	bitmap_set_bit (regs, ZT0_REGNUM);
     }
 }
 
@@ -9158,6 +9172,8 @@ aarch64_epilogue_uses (int regno)
     return 1;
   if (regno == ZA_REGNUM && aarch64_cfun_shared_flags ("za") != 0)
     return 1;
+  if (regno == ZT0_REGNUM && aarch64_cfun_shared_flags ("zt0") != 0)
+    return 1;
   return 0;
 }
 
@@ -10828,6 +10844,40 @@ aarch64_restore_za (rtx tpidr2_block)
   emit_insn (gen_aarch64_tpidr2_restore ());
 }
 
+/* Return the ZT0 save buffer, creating one if necessary.  */
+
+static rtx
+aarch64_get_zt0_save_buffer ()
+{
+  if (!cfun->machine->zt0_save_buffer)
+    cfun->machine->zt0_save_buffer = assign_stack_local (V8DImode, 64, 128);
+  return cfun->machine->zt0_save_buffer;
+}
+
+/* Save ZT0 to the current function's save buffer.  */
+
+static void
+aarch64_save_zt0 ()
+{
+  rtx mem = aarch64_get_zt0_save_buffer ();
+  mem = replace_equiv_address (mem, force_reg (Pmode, XEXP (mem, 0)));
+  emit_insn (gen_aarch64_sme_str_zt0 (mem));
+}
+
+/* Restore ZT0 from the current function's save buffer.  FROM_LAZY_SAVE_P
+   is true if the load is happening after a call to a private-ZA function,
+   false if it can be treated as a normal load.  */
+
+static void
+aarch64_restore_zt0 (bool from_lazy_save_p)
+{
+  rtx mem = aarch64_get_zt0_save_buffer ();
+  mem = replace_equiv_address (mem, force_reg (Pmode, XEXP (mem, 0)));
+  emit_insn (from_lazy_save_p
+	     ? gen_aarch64_restore_zt0 (mem)
+	     : gen_aarch64_sme_ldr_zt0 (mem));
+}
+
 /* Implement TARGET_START_CALL_ARGS.  */
 
 static void
@@ -10848,6 +10898,10 @@ aarch64_start_call_args (cumulative_args_t ca_v)
       && !aarch64_cfun_has_state ("za"))
     error ("call to a function that shares %qs state from a function"
 	   " that has no %qs state", "za", "za");
+  else if ((ca->shared_zt0_flags & (AARCH64_STATE_IN | AARCH64_STATE_OUT))
+	   && !aarch64_cfun_has_state ("zt0"))
+    error ("call to a function that shares %qs state from a function"
+	   " that has no %qs state", "zt0", "zt0");
   else if (!TARGET_ZA && (ca->isa_mode & AARCH64_FL_ZA_ON))
     error ("call to a function that shares SME state from a function"
 	   " that has no SME state");
@@ -10857,6 +10911,13 @@ aarch64_start_call_args (cumulative_args_t ca_v)
      The code itself is inserted by the mode-switching pass.  */
   if (TARGET_ZA && !(ca->isa_mode & AARCH64_FL_ZA_ON))
     emit_insn (gen_aarch64_start_private_za_call ());
+
+  /* If this is a call to a shared-ZA function that doesn't share ZT0,
+     save and restore ZT0 around the call.  */
+  if (aarch64_cfun_has_state ("zt0")
+      && (ca->isa_mode & AARCH64_FL_ZA_ON)
+      && ca->shared_zt0_flags == 0)
+    aarch64_save_zt0 ();
 }
 
 /* This function is used by the call expanders of the machine description.
@@ -10869,8 +10930,8 @@ aarch64_start_call_args (cumulative_args_t ca_v)
        The second element is a PARALLEL that lists all the argument
        registers that need to be saved and restored around a change
        in PSTATE.SM, or const0_rtx if no such switch is needed.
-       The third element is a const_int that contains the sharing flags
-       for ZA.
+       The third and fourth elements are const_ints that contain the
+       sharing flags for ZA and ZT0 respectively.
    SIBCALL indicates whether this function call is normal call or sibling call.
    It will generate different pattern accordingly.  */
 
@@ -10884,16 +10945,28 @@ aarch64_expand_call (rtx result, rtx mem, rtx cookie, bool sibcall)
   rtx callee_abi = cookie;
   rtx sme_mode_switch_args = const0_rtx;
   unsigned int shared_za_flags = 0;
+  unsigned int shared_zt0_flags = 0;
   if (GET_CODE (cookie) == PARALLEL)
     {
       callee_abi = XVECEXP (cookie, 0, 0);
       sme_mode_switch_args = XVECEXP (cookie, 0, 1);
       shared_za_flags = INTVAL (XVECEXP (cookie, 0, 2));
+      shared_zt0_flags = INTVAL (XVECEXP (cookie, 0, 3));
     }
 
   gcc_assert (CONST_INT_P (callee_abi));
   auto callee_isa_mode = aarch64_callee_isa_mode (callee_abi);
 
+  if (aarch64_cfun_has_state ("za")
+      && (callee_isa_mode & AARCH64_FL_ZA_ON)
+      && !shared_za_flags)
+    {
+      sorry ("call to a function that shares state other than %qs"
+	     " from a function that has %qs state", "za", "za");
+      inform (input_location, "use %<__arm_preserves(\"za\")%> if the"
+	      " callee preserves ZA");
+    }
+
   gcc_assert (MEM_P (mem));
   callee = XEXP (mem, 0);
   mode = GET_MODE (callee);
@@ -10926,6 +10999,8 @@ aarch64_expand_call (rtx result, rtx mem, rtx cookie, bool sibcall)
      we want to know whether the call committed a lazy save.  */
   if (TARGET_ZA && !shared_za_flags)
     return_values.safe_push (gen_rtx_REG (VNx16BImode, ZA_SAVED_REGNUM));
+  if (shared_zt0_flags & AARCH64_STATE_OUT)
+    return_values.safe_push (gen_rtx_REG (V8DImode, ZT0_REGNUM));
 
   /* Create the new return value, if necessary.  */
   if (orig_num_return_values != return_values.length ())
@@ -11011,10 +11086,12 @@ aarch64_expand_call (rtx result, rtx mem, rtx cookie, bool sibcall)
     }
 
   /* Add any ZA-related information.
+
      ZA_REGNUM represents the current function's ZA state, rather than
      the contents of the ZA register itself.  We ensure that the function's
      ZA state is preserved by private-ZA call sequences, so the call itself
-     does not use or clobber ZA_REGNUM.  */
+     does not use or clobber ZA_REGNUM.  The same thing applies to
+     ZT0_REGNUM.  */
   if (TARGET_ZA)
     {
       /* The callee requires ZA to be active if the callee is shared-ZA,
@@ -11034,10 +11111,14 @@ aarch64_expand_call (rtx result, rtx mem, rtx cookie, bool sibcall)
 		 gen_rtx_REG (VNx16BImode, LOWERING_REGNUM));
 
       /* If the callee is a shared-ZA function, record whether it uses the
-	 current value of ZA.  */
+	 current value of ZA and ZT0.  */
       if (shared_za_flags & AARCH64_STATE_IN)
 	use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn),
 		 gen_rtx_REG (VNx16BImode, ZA_REGNUM));
+
+      if (shared_zt0_flags & AARCH64_STATE_IN)
+	use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn),
+		 gen_rtx_REG (V8DImode, ZT0_REGNUM));
     }
 }
 
@@ -11053,6 +11134,13 @@ aarch64_end_call_args (cumulative_args_t ca_v)
      The code itself is inserted by the mode-switching pass.  */
   if (TARGET_ZA && !(ca->isa_mode & AARCH64_FL_ZA_ON))
     emit_insn (gen_aarch64_end_private_za_call ());
+
+  /* If this is a call to a shared-ZA function that doesn't share ZT0,
+     save and restore ZT0 around the call.  */
+  if (aarch64_cfun_has_state ("zt0")
+      && (ca->isa_mode & AARCH64_FL_ZA_ON)
+      && ca->shared_zt0_flags == 0)
+    aarch64_restore_zt0 (false);
 }
 
 /* Emit call insn with PAT and do aarch64-specific handling.  */
@@ -18604,6 +18692,20 @@ aarch64_set_current_function (tree fndecl)
 		       : AARCH64_FL_DEFAULT_ISA_MODE);
   auto isa_flags = TREE_TARGET_OPTION (new_tree)->x_aarch64_isa_flags;
 
+  static bool reported_zt0_p;
+  if (!reported_zt0_p
+      && !(isa_flags & AARCH64_FL_SME2)
+      && fndecl
+      && aarch64_fndecl_has_state (fndecl, "zt0"))
+    {
+      error ("functions with %qs state require the ISA extension %qs",
+	     "zt0", "sme2");
+      inform (input_location, "you can enable %qs using the command-line"
+	      " option %<-march%>, or by using the %<target%>"
+	      " attribute or pragma", "sme2");
+      reported_zt0_p = true;
+    }
+
   /* If nothing to do, return.  #pragma GCC reset or #pragma GCC pop to
      the default have been handled by aarch64_save_restore_target_globals from
      aarch64_pragma_target_parse.  */
@@ -19215,9 +19317,10 @@ aarch64_option_valid_attribute_p (tree fndecl, tree, tree args, int)
 static bool
 aarch64_function_attribute_inlinable_p (const_tree fndecl)
 {
-  /* A function that has local ZA state cannot be inlined into its caller,
-     since we only support managing ZA switches at function scope.  */
-  return !aarch64_fndecl_has_new_state (fndecl, "za");
+  /* A function that has local SME state cannot be inlined into its caller,
+     since we only support managing PSTATE.ZA switches at function scope.  */
+  return (!aarch64_fndecl_has_new_state (fndecl, "za")
+	  && !aarch64_fndecl_has_new_state (fndecl, "zt0"));
 }
 
 /* Helper for aarch64_can_inline_p.  In the case where CALLER and CALLEE are
@@ -19248,9 +19351,10 @@ aarch64_tribools_ok_for_inlining_p (int caller, int callee,
    Not meaningful for streaming-compatible functions.  */
 constexpr auto AARCH64_IPA_SM_FIXED = 1U << 0;
 
-/* Set if the function clobbers ZA.  Not meaningful for functions that
+/* Set if the function clobbers ZA and ZT0.  Not meaningful for functions that
    have ZA state.  */
 constexpr auto AARCH64_IPA_CLOBBERS_ZA = 1U << 1;
+constexpr auto AARCH64_IPA_CLOBBERS_ZT0 = 1U << 2;
 
 /* Implement TARGET_NEED_IPA_FN_TARGET_INFO.  */
 
@@ -19278,6 +19382,8 @@ aarch64_update_ipa_fn_target_info (unsigned int &info, const gimple *stmt)
 	  const char *clobber = TREE_STRING_POINTER (TREE_VALUE (op));
 	  if (strcmp (clobber, "za") == 0)
 	    info |= AARCH64_IPA_CLOBBERS_ZA;
+	  if (strcmp (clobber, "zt0") == 0)
+	    info |= AARCH64_IPA_CLOBBERS_ZT0;
 	}
     }
   if (auto *call = dyn_cast<const gcall *> (stmt))
@@ -19353,21 +19459,25 @@ aarch64_can_inline_p (tree caller, tree callee)
       && callee_has_property (AARCH64_IPA_SM_FIXED))
     return false;
 
-  /* aarch64_function_attribute_inlinable_p prevents new-ZA functions
-     from being inlined into others.  We also need to prevent inlining
-     of shared-ZA functions into functions without ZA state, since this
-     is an error condition.
+  /* aarch64_function_attribute_inlinable_p prevents new-ZA and new-ZT0
+     functions from being inlined into others.  We also need to prevent
+     inlining of shared-ZA functions into functions without ZA state,
+     since this is an error condition.
 
      The only other problematic case for ZA is inlining a function that
-     directly clobbers ZA into a function that has ZA state.  */
+     directly clobbers ZA or ZT0 into a function that has ZA or ZT0 state.  */
   auto caller_za = (caller_opts->x_aarch64_isa_flags & AARCH64_FL_ZA_ON);
   auto callee_za = (callee_opts->x_aarch64_isa_flags & AARCH64_FL_ZA_ON);
   if (!caller_za && callee_za)
     return false;
-  if (caller_za
-      && !callee_za
+  if (!callee_za
+      && aarch64_fndecl_has_state (caller, "za")
       && callee_has_property (AARCH64_IPA_CLOBBERS_ZA))
     return false;
+  if (!callee_za
+      && aarch64_fndecl_has_state (caller, "zt0")
+      && callee_has_property (AARCH64_IPA_CLOBBERS_ZT0))
+    return false;
 
   /* Allow non-strict aligned functions inlining into strict
      aligned ones.  */
@@ -27467,6 +27577,9 @@ aarch64_comp_type_attributes (const_tree type1, const_tree type2)
   if (aarch64_lookup_shared_state_flags (TYPE_ATTRIBUTES (type1), "za")
       != aarch64_lookup_shared_state_flags (TYPE_ATTRIBUTES (type2), "za"))
     return 0;
+  if (aarch64_lookup_shared_state_flags (TYPE_ATTRIBUTES (type1), "zt0")
+      != aarch64_lookup_shared_state_flags (TYPE_ATTRIBUTES (type2), "zt0"))
+    return 0;
   return 1;
 }
 
@@ -27934,7 +28047,9 @@ aarch64_optimize_mode_switching (aarch64_mode_entity entity)
 {
   bool have_sme_state = (aarch64_cfun_incoming_pstate_za () != 0
 			 || (aarch64_cfun_has_new_state ("za")
-			     && df_regs_ever_live_p (ZA_REGNUM)));
+			     && df_regs_ever_live_p (ZA_REGNUM))
+			 || (aarch64_cfun_has_new_state ("zt0")
+			     && df_regs_ever_live_p (ZT0_REGNUM)));
 
   if (have_sme_state && nonlocal_goto_handler_labels)
     {
@@ -28021,6 +28136,11 @@ aarch64_mode_emit_local_sme_state (aarch64_local_sme_state mode,
 	     In that case, ZA still contains the current function's ZA state,
 	     and we just need to cancel the lazy save.  */
 	  emit_insn (gen_aarch64_clear_tpidr2 ());
+
+	  /* Restore the ZT0 state, if we have some.  */
+	  if (aarch64_cfun_has_state ("zt0"))
+	    aarch64_restore_zt0 (true);
+
 	  return;
 	}
 
@@ -28029,6 +28149,10 @@ aarch64_mode_emit_local_sme_state (aarch64_local_sme_state mode,
 	  /* Retrieve the current function's ZA state from the lazy save
 	     buffer.  */
 	  aarch64_restore_za (aarch64_get_tpidr2_ptr ());
+
+	  /* Restore the ZT0 state, if we have some.  */
+	  if (aarch64_cfun_has_state ("zt0"))
+	    aarch64_restore_zt0 (true);
 	  return;
 	}
 
@@ -28045,6 +28169,11 @@ aarch64_mode_emit_local_sme_state (aarch64_local_sme_state mode,
 
 	     Both cases leave ZA zeroed.  */
 	  emit_insn (gen_aarch64_smstart_za ());
+
+	  /* Restore the ZT0 state, if we have some.  */
+	  if (prev_mode == aarch64_local_sme_state::OFF
+	      && aarch64_cfun_has_state ("zt0"))
+	    aarch64_restore_zt0 (true);
 	  return;
 	}
 
@@ -28063,6 +28192,10 @@ aarch64_mode_emit_local_sme_state (aarch64_local_sme_state mode,
 	  || prev_mode == aarch64_local_sme_state::ACTIVE_DEAD
 	  || prev_mode == aarch64_local_sme_state::INACTIVE_CALLER)
 	{
+	  /* Save the ZT0 state, if we have some.  */
+	  if (aarch64_cfun_has_state ("zt0"))
+	    aarch64_save_zt0 ();
+
 	  /* A transition from ACTIVE_LIVE to INACTIVE_LOCAL is the usual
 	     case of setting up a lazy save buffer before a call.
 	     A transition from INACTIVE_CALLER is similar, except that
@@ -28090,6 +28223,13 @@ aarch64_mode_emit_local_sme_state (aarch64_local_sme_state mode,
   if (mode == aarch64_local_sme_state::INACTIVE_CALLER
       || mode == aarch64_local_sme_state::OFF)
     {
+      /* Save the ZT0 state, if we have some.  */
+      if ((prev_mode == aarch64_local_sme_state::ACTIVE_LIVE
+	   || prev_mode == aarch64_local_sme_state::ACTIVE_DEAD)
+	  && mode == aarch64_local_sme_state::OFF
+	  && aarch64_cfun_has_state ("zt0"))
+	aarch64_save_zt0 ();
+
       /* The transition to INACTIVE_CALLER is used before returning from
 	 new("za") functions.  Any state in ZA belongs to the current
 	 function rather than a caller, but that state is no longer
@@ -28238,8 +28378,10 @@ aarch64_mode_needed_local_sme_state (rtx_insn *insn, HARD_REG_SET live)
 	    : aarch64_local_sme_state::OFF);
 
   /* Force ZA to contain the current function's ZA state if INSN wants
-     to access it.  */
-  if (aarch64_insn_references_sme_state_p (insn, ZA_REGNUM))
+     to access it.  Do the same for accesses to ZT0, since ZA and ZT0
+     are both controlled by PSTATE.ZA.  */
+  if (aarch64_insn_references_sme_state_p (insn, ZA_REGNUM)
+      || aarch64_insn_references_sme_state_p (insn, ZT0_REGNUM))
     return (TEST_HARD_REG_BIT (live, ZA_REGNUM)
 	    ? aarch64_local_sme_state::ACTIVE_LIVE
 	    : aarch64_local_sme_state::ACTIVE_DEAD);
@@ -28457,6 +28599,8 @@ aarch64_mode_entry (int entity)
     case aarch64_mode_entity::LOCAL_SME_STATE:
       return int (aarch64_cfun_shared_flags ("za") != 0
 		  ? aarch64_local_sme_state::ACTIVE_LIVE
+		  : aarch64_cfun_incoming_pstate_za () != 0
+		  ? aarch64_local_sme_state::ACTIVE_DEAD
 		  : aarch64_local_sme_state::INACTIVE_CALLER);
     }
   gcc_unreachable ();
@@ -28475,6 +28619,8 @@ aarch64_mode_exit (int entity)
     case aarch64_mode_entity::LOCAL_SME_STATE:
       return int (aarch64_cfun_shared_flags ("za") != 0
 		  ? aarch64_local_sme_state::ACTIVE_LIVE
+		  : aarch64_cfun_incoming_pstate_za () != 0
+		  ? aarch64_local_sme_state::ACTIVE_DEAD
 		  : aarch64_local_sme_state::INACTIVE_CALLER);
     }
   gcc_unreachable ();
@@ -28524,27 +28670,34 @@ aarch64_md_asm_adjust (vec<rtx> &outputs, vec<rtx> &inputs,
      write directly.   Use a separate insn to model the effect.
 
      We must ensure that ZA is active on entry, which is enforced by using
-     SME_STATE_REGNUM.  The asm must ensure that ZA is active on return.  */
+     SME_STATE_REGNUM.  The asm must ensure that ZA is active on return.
+
+     The same thing applies to ZT0.  */
   if (TARGET_ZA)
     for (unsigned int i = clobbers.length (); i-- > 0; )
       {
 	rtx x = clobbers[i];
-	if (REG_P (x) && REGNO (x) == ZA_REGNUM)
+	if (REG_P (x)
+	    && (REGNO (x) == ZA_REGNUM || REGNO (x) == ZT0_REGNUM))
 	  {
 	    auto id = cfun->machine->next_asm_update_za_id++;
 
 	    start_sequence ();
 	    if (seq)
 	      emit_insn (seq);
-	    emit_insn (gen_aarch64_asm_update_za (gen_int_mode (id, SImode)));
+	    rtx id_rtx = gen_int_mode (id, SImode);
+	    emit_insn (REGNO (x) == ZA_REGNUM
+		       ? gen_aarch64_asm_update_za (id_rtx)
+		       : gen_aarch64_asm_update_zt0 (id_rtx));
 	    seq = get_insns ();
 	    end_sequence ();
 
-	    uses.safe_push (gen_rtx_REG (VNx16QImode, ZA_REGNUM));
+	    auto mode = REGNO (x) == ZA_REGNUM ? VNx16QImode : V8DImode;
+	    uses.safe_push (gen_rtx_REG (mode, REGNO (x)));
 	    uses.safe_push (gen_rtx_REG (DImode, SME_STATE_REGNUM));
 
 	    clobbers.ordered_remove (i);
-	    CLEAR_HARD_REG_BIT (clobbered_regs, ZA_REGNUM);
+	    CLEAR_HARD_REG_BIT (clobbered_regs, REGNO (x));
 	  }
       }
   return seq;
diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h
index 854eb7bedc9..5599c98ee94 100644
--- a/gcc/config/aarch64/aarch64.h
+++ b/gcc/config/aarch64/aarch64.h
@@ -485,7 +485,7 @@ constexpr auto AARCH64_FL_DEFAULT_ISA_MODE = AARCH64_FL_SM_OFF;
     0, 0, 0, 0,   0, 0, 0, 0,   /* P0 - P7 */           \
     0, 0, 0, 0,   0, 0, 0, 0,   /* P8 - P15 */          \
     1, 1,			/* FFR and FFRT */	\
-    1, 1, 1, 1, 1, 1, 1		/* Fake registers */	\
+    1, 1, 1, 1, 1, 1, 1, 1	/* Fake registers */	\
   }
 
 /* X30 is marked as caller-saved which is in line with regular function call
@@ -509,7 +509,7 @@ constexpr auto AARCH64_FL_DEFAULT_ISA_MODE = AARCH64_FL_SM_OFF;
     1, 1, 1, 1,   1, 1, 1, 1,	/* P0 - P7 */		\
     1, 1, 1, 1,   1, 1, 1, 1,	/* P8 - P15 */		\
     1, 1,			/* FFR and FFRT */	\
-    0, 0, 0, 0, 0, 0, 0		/* Fake registers */	\
+    0, 0, 0, 0, 0, 0, 0, 0	/* Fake registers */	\
   }
 
 #define REGISTER_NAMES						\
@@ -527,7 +527,7 @@ constexpr auto AARCH64_FL_DEFAULT_ISA_MODE = AARCH64_FL_SM_OFF;
     "p8",  "p9",  "p10", "p11", "p12", "p13", "p14", "p15",	\
     "ffr", "ffrt",						\
     "lowering", "tpidr2_block", "sme_state", "tpidr2_setup",	\
-    "za_free", "za_saved", "za"					\
+    "za_free", "za_saved", "za", "zt0"				\
   }
 
 /* Generate the register aliases for core register N */
@@ -773,7 +773,7 @@ enum reg_class
   { 0x00000000, 0x00000000, 0x000ffff0 },	/* PR_REGS */		\
   { 0x00000000, 0x00000000, 0x00300000 },	/* FFR_REGS */		\
   { 0x00000000, 0x00000000, 0x003ffff0 },	/* PR_AND_FFR_REGS */	\
-  { 0x00000000, 0x00000000, 0x1fc00000 },	/* FAKE_REGS */		\
+  { 0x00000000, 0x00000000, 0x3fc00000 },	/* FAKE_REGS */		\
   { 0xffffffff, 0xffffffff, 0x000fffff }	/* ALL_REGS */		\
 }
 
@@ -982,6 +982,9 @@ typedef struct GTY (()) machine_function
      or null if none.  */
   rtx za_save_buffer;
 
+  /* A stack slot that stores the contents of the function's ZT0 state.  */
+  rtx zt0_save_buffer;
+
   bool label_is_assembled;
 
   /* True if we've expanded at least one call to a function that changes
@@ -1063,8 +1066,9 @@ typedef struct
 				   raise an error for invalid calls.  */
 
   /* AARCH64_STATE_* flags that describe whether the function shares ZA
-     with its callers.  */
+     and ZT0 with its callers.  */
   unsigned int shared_za_flags;
+  unsigned int shared_zt0_flags;
 
   /* A list of registers that need to be saved and restored around a
      change to PSTATE.SM.  An auto_vec would be more convenient, but those
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index 2036dccd250..9e9ccefbfed 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -156,9 +156,12 @@ (define_constants
     ;; The contents persist even when the architected ZA is off.  Private-ZA
     ;; functions have no effect on its contents.
     (ZA_REGNUM 92)
-    ;; ----------------------------------------------------------------
+
+    ;; Similarly represents the contents of the current function's ZT0 state.
+    (ZT0_REGNUM 93)
+
     (FIRST_FAKE_REGNUM	LOWERING_REGNUM)
-    (LAST_FAKE_REGNUM	ZA_REGNUM)
+    (LAST_FAKE_REGNUM	ZT0_REGNUM)
     ;; ----------------------------------------------------------------
 
     ;; The pair of scratch registers used for stack probing with -fstack-check.
diff --git a/gcc/testsuite/gcc.target/aarch64/sme/zt0_state_1.c b/gcc/testsuite/gcc.target/aarch64/sme/zt0_state_1.c
new file mode 100644
index 00000000000..05da587d4b4
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sme/zt0_state_1.c
@@ -0,0 +1,65 @@
+// { dg-options "" }
+
+#pragma GCC target "+sme2"
+
+void share_za_zt0_a() __arm_inout("za", "zt0");
+void share_za_zt0_b() __arm_inout("za", "zt0");
+
+void share_za_preserve_zt0() __arm_inout("za") __arm_preserves("zt0");
+void share_zt0_preserve_za() __arm_inout("zt0") __arm_preserves("za");
+
+__arm_new("za", "zt0") void new_za_zt0_a() {
+  share_za_zt0_a();
+  share_za_zt0_b();
+}
+
+__arm_new("zt0", "za") void new_za_zt0_b() {
+  share_za_zt0_a();
+  share_za_zt0_b();
+}
+
+__arm_new("zt0") void new_za_zt0_c();
+__arm_new("za") void new_za_zt0_c() {
+  share_za_zt0_a();
+  share_za_zt0_b();
+}
+
+__arm_new("za") void new_za_zt0_d();
+__arm_new("zt0") void new_za_zt0_d() {
+  share_za_zt0_a();
+  share_za_zt0_b();
+}
+
+__arm_new("zt0", "za") void new_za_zt0_e();
+void new_za_zt0_e() {
+  share_za_zt0_a();
+  share_za_zt0_b();
+}
+
+__arm_new("zt0") void new_zt0_a() {
+  share_za_zt0_a(); // { dg-error {call to a function that shares 'za' state from a function that has no 'za' state} }
+}
+
+__arm_new("zt0") void new_zt0_b();
+void new_zt0_b() {
+  share_za_preserve_zt0(); // { dg-error {call to a function that shares 'za' state from a function that has no 'za' state} }
+}
+
+__arm_new("zt0") void new_zt0_c();
+void new_zt0_c() {
+  share_zt0_preserve_za();
+}
+
+__arm_new("za") void new_za_a() {
+  share_za_zt0_a(); // { dg-error {call to a function that shares 'zt0' state from a function that has no 'zt0' state} }
+}
+
+__arm_new("za") void new_za_b();
+void new_za_b() {
+  share_za_preserve_zt0();
+}
+
+__arm_new("za") void new_za_c();
+void new_za_c() {
+  share_zt0_preserve_za(); // { dg-error {call to a function that shares 'zt0' state from a function that has no 'zt0' state} }
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/sme/zt0_state_2.c b/gcc/testsuite/gcc.target/aarch64/sme/zt0_state_2.c
new file mode 100644
index 00000000000..17cd84437d7
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sme/zt0_state_2.c
@@ -0,0 +1,31 @@
+// { dg-options "" }
+
+void invalid_a() __arm_inout("za");
+void invalid_a() __arm_inout("za", "zt0"); // { dg-error {conflicting types} }
+
+void invalid_b() __arm_inout("za", "zt0");
+void invalid_b() __arm_inout("zt0"); // { dg-error {conflicting types} }
+
+void invalid_c() __arm_in("zt0") __arm_inout("za");
+void invalid_c() __arm_inout("zt0", "za"); // { dg-error {conflicting types} }
+
+void invalid_d() __arm_inout("zt0");
+void invalid_d() __arm_out("zt0"); // { dg-error {conflicting types} }
+
+void invalid_e() __arm_in("zt0");
+void invalid_e() __arm_out("zt0"); // { dg-error {conflicting types} }
+
+void invalid_f() __arm_in("zt0");
+void invalid_f() __arm_preserves("zt0"); // { dg-error {conflicting types} }
+
+void valid_a() __arm_inout("zt0") __arm_inout("za");
+void valid_a() __arm_inout("zt0", "za");
+
+void valid_b() __arm_inout("za") __arm_inout("zt0");
+void valid_b() __arm_inout("zt0") __arm_inout("za");
+
+void valid_c() __arm_inout("za", "zt0");
+void valid_c() __arm_inout("zt0", "za");
+
+void valid_d() __arm_inout("zt0", "za");
+void valid_d() __arm_inout("za", "zt0");
diff --git a/gcc/testsuite/gcc.target/aarch64/sme/zt0_state_3.c b/gcc/testsuite/gcc.target/aarch64/sme/zt0_state_3.c
new file mode 100644
index 00000000000..2489ea21de9
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sme/zt0_state_3.c
@@ -0,0 +1,6 @@
+// { dg-options "" }
+
+#pragma GCC target "+sme2"
+
+void foo() __arm_inout("zt0");
+void bar() __arm_inout("za", "zt0") { foo(); } // { dg-message {call to a function that shares state other than 'za' from a function that has 'za' state} }
diff --git a/gcc/testsuite/gcc.target/aarch64/sme/zt0_state_4.c b/gcc/testsuite/gcc.target/aarch64/sme/zt0_state_4.c
new file mode 100644
index 00000000000..29999003d8a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sme/zt0_state_4.c
@@ -0,0 +1,53 @@
+// { dg-options "-O -fomit-frame-pointer -fno-optimize-sibling-calls" }
+// { dg-final { check-function-bodies "**" "" } }
+
+#pragma GCC target "+sme2"
+
+void inout_za() __arm_inout("za");
+void inout_za_zt0() __arm_inout("za", "zt0");
+
+void inout_za_out_zt0() __arm_inout("za") __arm_out("zt0");
+void inout_za_in_zt0() __arm_inout("za") __arm_in("zt0");
+
+/*
+** test1:
+**	str	x30, \[sp, #?-16\]!
+**	bl	inout_za_zt0
+**	ldr	x30, \[sp\], #?16
+**	ret
+*/
+void test1() __arm_inout("za", "zt0")
+{
+  inout_za_zt0();
+}
+
+/*
+** test2:
+**	...
+**	str	zt0, \[(?:x[0-9]+|sp)\]
+**	...
+**	bl	inout_za
+**	...
+**	ldr	zt0, \[(?:x[0-9]+|sp)\]
+**	...
+**	ret
+*/
+void test2() __arm_inout("za", "zt0")
+{
+  inout_za();
+}
+
+/*
+** test3:
+**	...
+**	bl	inout_za
+**	bl	inout_za_out_zt0
+**	[^\n]+
+**	ret
+*/
+void test3() __arm_inout("za", "zt0")
+{
+  inout_za_in_zt0();
+  inout_za();
+  inout_za_out_zt0();
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/sme/zt0_state_5.c b/gcc/testsuite/gcc.target/aarch64/sme/zt0_state_5.c
new file mode 100644
index 00000000000..e18b395476c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sme/zt0_state_5.c
@@ -0,0 +1,260 @@
+// { dg-options "-O -fno-optimize-sibling-calls" }
+// { dg-final { check-function-bodies "**" "" } }
+
+#pragma GCC target "+sme2"
+
+void private_zt0();
+void out_zt0() __arm_out("zt0");
+void in_zt0() __arm_in("zt0");
+void inout_zt0() __arm_inout("zt0");
+void preserves_zt0() __arm_preserves("zt0");
+
+/*
+** test1:
+**	ret
+*/
+__arm_new("zt0") void test1()
+{
+}
+
+/*
+** test2:
+**	ldr	w0, \[x0\]
+**	ret
+*/
+__arm_new("zt0") int test2(int *ptr)
+{
+  return *ptr;
+}
+
+/*
+** test3:
+**	stp	[^\n]+
+**	mov	x29, sp
+**	bl	private_zt0
+** (
+**	mov	w0, 0
+**	ldp	[^\n]+
+** |
+**	ldp	[^\n]+
+**	mov	w0, 0
+** )
+**	ret
+*/
+__arm_new("zt0") int test3()
+{
+  private_zt0();
+  return 0;
+}
+
+/*
+** test4:
+**	...
+**	mrs	x0, tpidr2_el0
+**	cbz	x0, [^\n]+
+**	bl	__arm_tpidr2_save
+**	msr	tpidr2_el0, xzr
+**	smstart	za
+**	bl	in_zt0
+**	smstop	za
+**	ldp	[^\n]+
+**	ret
+*/
+__arm_new("zt0") void test4()
+{
+  in_zt0(); // Uses zeroed contents.
+}
+
+/*
+** test5:
+**	...
+**	mrs	x0, tpidr2_el0
+**	cbz	x0, [^\n]+
+**	bl	__arm_tpidr2_save
+**	msr	tpidr2_el0, xzr
+**	smstop	za
+**	bl	private_zt0
+**	smstart	za
+**	bl	out_zt0
+**	bl	in_zt0
+**	...
+**	smstop	za
+**	bl	private_zt0
+**	ldp	[^\n]+
+**	ret
+*/
+__arm_new("zt0") void test5()
+{
+  private_zt0();
+  out_zt0();
+  in_zt0();
+  private_zt0();
+}
+
+// Despite the long test, there shouldn't be too much scope for variation
+// here.  The point is both to test correctness and code quality.
+/*
+** test6:
+**	stp	[^\n]+
+**	mov	x29, sp
+**	mrs	x0, tpidr2_el0
+**	cbz	x0, [^\n]+
+**	bl	__arm_tpidr2_save
+**	msr	tpidr2_el0, xzr
+**	smstart	za
+**	bl	out_zt0
+**	...
+**	str	zt0, [^\n]+
+**	smstop	za
+**	bl	private_zt0
+**	smstart	za
+**	...
+**	ldr	zt0, [^\n]+
+**	bl	in_zt0
+**	smstop	za
+**	ldp	[^\n]+
+**	ret
+*/
+__arm_new("zt0") void test6()
+{
+  out_zt0();
+  private_zt0();
+  in_zt0();
+}
+
+// Rely on previous tests for the part leading up to the smstart.
+/*
+** test7:
+**	...
+**	smstart	za
+**	bl	out_zt0
+**	bl	in_zt0
+**	...
+**	smstop	za
+**	bl	private_zt0
+**	smstart	za
+**	bl	out_zt0
+**	bl	in_zt0
+**	smstop	za
+**	ldp	[^\n]+
+**	ret
+*/
+__arm_new("zt0") void test7()
+{
+  out_zt0();
+  in_zt0();
+  private_zt0();
+  out_zt0();
+  in_zt0();
+}
+
+/*
+** test8:
+**	...
+**	smstart	za
+**	bl	out_zt0
+**	bl	in_zt0
+**	...
+**	smstop	za
+**	bl	private_zt0
+**	smstart	za
+**	bl	out_zt0
+**	bl	in_zt0
+**	...
+**	smstop	za
+**	bl	private_zt0
+**	ldp	[^\n]+
+**	ret
+*/
+__arm_new("zt0") void test8()
+{
+  out_zt0();
+  in_zt0();
+  private_zt0();
+  out_zt0();
+  in_zt0();
+  private_zt0();
+}
+
+/*
+** test9:
+**	...
+**	str	zt0, [^\n]+
+**	smstop	za
+**	bl	private_zt0
+**	bl	private_zt0
+**	bl	private_zt0
+**	bl	private_zt0
+**	smstart	za
+**	...
+**	ldr	zt0, [^\n]+
+**	bl	in_zt0
+**	smstop	za
+**	...
+*/
+__arm_new("zt0") void test9()
+{
+  out_zt0();
+  private_zt0();
+  private_zt0();
+  private_zt0();
+  private_zt0();
+  in_zt0();
+}
+
+/*
+** test10:
+**	ldr	(w[0-9]+), \[x0\]
+**	cbz	\1, [^\n]+
+**	ldr	[^\n]+
+**	add	[^\n]+
+**	str	[^\n]+
+**	ret
+**	...
+*/
+__arm_new("zt0") void test10(volatile int *ptr)
+{
+  if (__builtin_expect (*ptr != 0, 1))
+    *ptr = *ptr + 1;
+  else
+    inout_zt0();
+}
+
+/*
+** test11:
+**	...
+**	ldr	w[0-9]+, [^\n]+
+**	add	(w[0-9]+), [^\n]+
+**	str	\1, [^\n]+
+**	...
+**	ret
+**	mrs	x[0-9]+, tpidr2_el0
+**	...
+**	smstart	za
+**	bl	inout_zt0
+**	ldr	(w[0-9]+), [^\n]+
+**	cbnz	\2, [^\n]+
+**	smstop	za
+**	...
+*/
+__arm_new("zt0") void test11(volatile int *ptr)
+{
+  if (__builtin_expect (*ptr == 0, 0))
+    do
+      inout_zt0();
+    while (*ptr);
+  else
+    *ptr += 1;
+}
+
+__arm_new("zt0") void test12(volatile int *ptr)
+{
+  do
+    {
+      inout_zt0();
+      private_zt0();
+    }
+  while (*ptr);
+  out_zt0();
+  in_zt0();
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/sme/zt0_state_6.c b/gcc/testsuite/gcc.target/aarch64/sme/zt0_state_6.c
new file mode 100644
index 00000000000..c62a8049f99
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sme/zt0_state_6.c
@@ -0,0 +1,54 @@
+// { dg-options "-O -fomit-frame-pointer -fno-optimize-sibling-calls" }
+// { dg-final { check-function-bodies "**" "" } }
+
+#pragma GCC target "+sme2"
+
+void inout_zt0() __arm_inout("zt0");
+void out_zt0() __arm_out("zt0");
+void normal();
+
+/*
+** test1:
+**	str	x30, \[sp, #?-16\]!
+**	bl	inout_zt0
+**	ldr	x30, \[sp\], #?16
+**	ret
+*/
+void test1() __arm_inout("zt0")
+{
+  inout_zt0();
+}
+
+/*
+** test2:
+**	str	x30, \[sp, #?-80\]!
+**	add	(x[0-9]+), sp, #?16
+**	str	zt0, \[\1\]
+**	smstop	za
+**	bl	normal
+**	smstart	za
+**	add	(x[0-9]+), sp, #?16
+**	ldr	zt0, \[\1\]
+**	ldr	x30, \[sp\], #?80
+**	ret
+*/
+void test2() __arm_inout("zt0")
+{
+  normal();
+}
+
+/*
+** test3:
+**	...
+**	smstop	za
+**	bl	normal
+**	smstart	za
+**	bl	out_zt0
+**	ldr	[^\n]+
+**	ret
+*/
+void test3() __arm_inout("zt0")
+{
+  normal();
+  out_zt0();
+}
-- 
2.25.1


^ permalink raw reply	[flat|nested] 6+ messages in thread

* [pushed v2 5/5] aarch64: Add support for SME2 intrinsics
  2023-12-05 10:24 [pushed v2 0/5] aarch64: Add support for SME2 Richard Sandiford
                   ` (3 preceding siblings ...)
  2023-12-05 10:25 ` [pushed v2 4/5] aarch64: Add ZT0 Richard Sandiford
@ 2023-12-05 10:25 ` Richard Sandiford
  4 siblings, 0 replies; 6+ messages in thread
From: Richard Sandiford @ 2023-12-05 10:25 UTC (permalink / raw)
  To: gcc-patches; +Cc: Richard Sandiford

This patch adds support for the SME2 <arm_sme.h> intrinsics.  The
convention I've used is to put stuff in aarch64-sve-builtins-sme.*
if it relates to ZA, ZT0, the streaming vector length, or other
such SME state.  Things that operate purely on predicates and
vectors go in aarch64-sve-builtins-sve2.* instead.  Some of these
will later be picked up for SVE2p1.

We previously used Uph internally as a constraint for 16-bit
immediates to atomic instructions.  However, we need a user-facing
constraint for the upper predicate registers (already available as
PR_HI_REGS), and Uph makes a natural pair with the existing Upl.

gcc/
	* config/aarch64/aarch64.h (TARGET_STREAMING_SME2): New macro.
	(P_ALIASES): Likewise.
	(REGISTER_NAMES): Add pn aliases of the predicate registers.
	(W8_W11_REGNUM_P): New macro.
	(W8_W11_REGS): New register class.
	(REG_CLASS_NAMES, REG_CLASS_CONTENTS): Update accordingly.
	* config/aarch64/aarch64.cc (aarch64_print_operand): Add support
	for %K, which prints a predicate as a counter.  Handle tuples of
	predicates.
	(aarch64_regno_regclass): Handle W8_W11_REGS.
	(aarch64_class_max_nregs): Likewise.
	* config/aarch64/constraints.md (Uci, Uw2, Uw4): New constraints.
	(x, y): Move further up file.
	(Uph): Redefine as the high predicate registers, renaming the old
	constraint to...
	(Uih): ...this.
	* config/aarch64/predicates.md (const_0_to_7_operand): New predicate.
	(const_0_to_4_step_4_operand, const_0_to_6_step_2_operand): Likewise.
	(const_0_to_12_step_4_operand, const_0_to_14_step_2_operand): Likewise.
	(aarch64_simd_shift_imm_qi): Use const_0_to_7_operand.
	* config/aarch64/iterators.md (VNx16SI_ONLY, VNx8SI_ONLY)
	(VNx8DI_ONLY, SVE_FULL_BHSIx2, SVE_FULL_HF, SVE_FULL_SIx2_SDIx4)
	(SVE_FULL_BHS, SVE_FULLx24, SVE_DIx24, SVE_BHSx24, SVE_Ix24)
	(SVE_Fx24, SVE_SFx24, SME_ZA_BIx24, SME_ZA_BHIx124, SME_ZA_BHIx24)
	(SME_ZA_HFx124, SME_ZA_HFx24, SME_ZA_HIx124, SME_ZA_HIx24)
	(SME_ZA_SDIx24, SME_ZA_SDFx24): New mode iterators.
	(UNSPEC_REVD, UNSPEC_CNTP_C, UNSPEC_PEXT, UNSPEC_PEXTx2): New unspecs.
	(UNSPEC_PSEL, UNSPEC_PTRUE_C, UNSPEC_SQRSHR, UNSPEC_SQRSHRN)
	(UNSPEC_SQRSHRU, UNSPEC_SQRSHRUN, UNSPEC_UQRSHR, UNSPEC_UQRSHRN)
	(UNSPEC_UZP, UNSPEC_UZPQ, UNSPEC_ZIP, UNSPEC_ZIPQ, UNSPEC_BFMLSLB)
	(UNSPEC_BFMLSLT, UNSPEC_FCVTN, UNSPEC_FDOT, UNSPEC_SQCVT): Likewise.
	(UNSPEC_SQCVTN, UNSPEC_SQCVTU, UNSPEC_SQCVTUN, UNSPEC_UQCVT): Likewise.
	(UNSPEC_SME_ADD, UNSPEC_SME_ADD_WRITE, UNSPEC_SME_BMOPA): Likewise.
	(UNSPEC_SME_BMOPS, UNSPEC_SME_FADD, UNSPEC_SME_FDOT, UNSPEC_SME_FVDOT)
	(UNSPEC_SME_FMLA, UNSPEC_SME_FMLS, UNSPEC_SME_FSUB, UNSPEC_SME_READ)
	(UNSPEC_SME_SDOT, UNSPEC_SME_SVDOT, UNSPEC_SME_SMLA, UNSPEC_SME_SMLS)
	(UNSPEC_SME_SUB, UNSPEC_SME_SUB_WRITE, UNSPEC_SME_SUDOT): Likewise.
	(UNSPEC_SME_SUVDOT, UNSPEC_SME_UDOT, UNSPEC_SME_UVDOT): Likewise.
	(UNSPEC_SME_UMLA, UNSPEC_SME_UMLS, UNSPEC_SME_USDOT): Likewise.
	(UNSPEC_SME_USVDOT, UNSPEC_SME_WRITE): Likewise.
	(Vetype, VNARROW, V2XWIDE, Ventype, V_INT_EQUIV, v_int_equiv)
	(VSINGLE, vsingle, b): Add tuple modes.
	(v2xwide, za32_offset_range, za64_offset_range, za32_long)
	(za32_last_offset, vg_modifier, z_suffix, aligned_operand)
	(aligned_fpr): New mode attributes.
	(SVE_INT_BINARY_MULTI, SVE_INT_BINARY_SINGLE, SVE_INT_BINARY_MULTI)
	(SVE_FP_BINARY_MULTI): New int iterators.
	(SVE_BFLOAT_TERNARY_LONG): Add UNSPEC_BFMLSLB and UNSPEC_BFMLSLT.
	(SVE_BFLOAT_TERNARY_LONG_LANE): Likewise.
	(SVE_WHILE_ORDER, SVE2_INT_SHIFT_IMM_NARROWxN, SVE_QCVTxN)
	(SVE2_SFx24_UNARY, SVE2_x24_PERMUTE, SVE2_x24_PERMUTEQ)
	(UNSPEC_REVD_ONLY, SME2_INT_MOP, SME2_BMOP, SME_BINARY_SLICE_SDI)
	(SME_BINARY_SLICE_SDF, SME_BINARY_WRITE_SLICE_SDI, SME_INT_DOTPROD)
	(SME_INT_DOTPROD_LANE, SME_FP_DOTPROD, SME_FP_DOTPROD_LANE)
	(SME_INT_TERNARY_SLICE, SME_FP_TERNARY_SLICE, BHSD_BITS)
	(LUTI_BITS): New int iterators.
	(optab, sve_int_op): Handle the new unspecs.
	(sme_int_op, has_16bit_form): New int attributes.
	(bits_etype): Handle 64.
	* config/aarch64/aarch64.md (UNSPEC_LD1_SVE_COUNT): New unspec.
	(UNSPEC_ST1_SVE_COUNT, UNSPEC_LDNT1_SVE_COUNT): Likewise.
	(UNSPEC_STNT1_SVE_COUNT): Likewise.
	* config/aarch64/atomics.md (cas_short_expected_imm): Use Uhi
	rather than Uph for HImode immediates.
	* config/aarch64/aarch64-sve.md (@aarch64_ld1<SVE_FULLx24:mode>)
	(@aarch64_ldnt1<SVE_FULLx24:mode>, @aarch64_st1<SVE_FULLx24:mode>)
	(@aarch64_stnt1<SVE_FULLx24:mode>): New patterns.
	(@aarch64_<sur>dot_prod_lane<vsi2qi>): Extend to...
	(@aarch64_<sur>dot_prod_lane<SVE_FULL_SDI:mode><SVE_FULL_BHI:mode>)
	(@aarch64_<sur>dot_prod_lane<VNx4SI_ONLY:mode><VNx16QI_ONLY:mode>):
	...these new patterns.
	(SVE_WHILE_B, SVE_WHILE_B_X2, SVE_WHILE_C): New constants.  Add
	SVE_WHILE_B to existing while patterns.
	* config/aarch64/aarch64-sve2.md (@aarch64_sve_ptrue_c<BHSD_BITS>)
	(@aarch64_sve_pext<BHSD_BITS>, @aarch64_sve_pext<BHSD_BITS>x2)
	(@aarch64_sve_psel<BHSD_BITS>, *aarch64_sve_psel<BHSD_BITS>_plus)
	(@aarch64_sve_cntp_c<BHSD_BITS>, <frint_pattern><mode>2)
	(<optab><mode>3, *<optab><mode>3, @aarch64_sve_single_<optab><mode>)
	(@aarch64_sve_<sve_int_op><mode>): New patterns.
	(@aarch64_sve_single_<sve_int_op><mode>, @aarch64_sve_<su>clamp<mode>)
	(*aarch64_sve_<su>clamp<mode>_x, @aarch64_sve_<su>clamp_single<mode>)
	(@aarch64_sve_fclamp<mode>, *aarch64_sve_fclamp<mode>_x)
	(@aarch64_sve_fclamp_single<mode>, <optab><mode><v2xwide>2)
	(@aarch64_sve_<sur>dotvnx4sivnx8hi): New patterns.
	(@aarch64_sve_<maxmin_uns_op><mode>): Likewise.
	(*aarch64_sve_<maxmin_uns_op><mode>): Likewise.
	(@aarch64_sve_single_<maxmin_uns_op><mode>): Likewise.
	(aarch64_sve_fdotvnx4sfvnx8hf): Likewise.
	(aarch64_fdot_prod_lanevnx4sfvnx8hf): Likewise.
	(@aarch64_sve_<optab><VNx16QI_ONLY:mode><VNx16SI_ONLY:mode>): Likewise.
	(@aarch64_sve_<optab><VNx8HI_ONLY:mode><VNx8SI_ONLY:mode>): Likewise.
	(@aarch64_sve_<optab><VNx8HI_ONLY:mode><VNx8DI_ONLY:mode>): Likewise.
	(truncvnx8sf<mode>2, @aarch64_sve_cvtn<mode>): Likewise.
	(<optab><v_int_equiv><mode>2, <optab><mode><v_int_equiv>2): Likewise.
	(@aarch64_sve_sel<mode>): Likewise.
	(@aarch64_sve_while<while_optab_cmp>_b<BHSD_BITS>_x2): Likewise.
	(@aarch64_sve_while<while_optab_cmp>_c<BHSD_BITS>): Likewise.
	(@aarch64_pred_<optab><mode>, @cond_<optab><mode>): Likewise.
	(@aarch64_sve_<optab><mode>): Likewise.
	* config/aarch64/aarch64-sme.md (@aarch64_sme_<optab><mode><mode>)
	(*aarch64_sme_<optab><mode><mode>_plus, @aarch64_sme_read<mode>)
	(*aarch64_sme_read<mode>_plus, @aarch64_sme_write<mode>): New patterns.
	(*aarch64_sme_write<mode>_plus aarch64_sme_zero_zt0): Likewise.
	(@aarch64_sme_<optab><mode>, *aarch64_sme_<optab><mode>_plus)
	(@aarch64_sme_single_<optab><mode>): Likewise.
	(*aarch64_sme_single_<optab><mode>_plus): Likewise.
	(@aarch64_sme_<optab><SME_ZA_SDI:mode><SME_ZA_BHIx24:mode>)
	(*aarch64_sme_<optab><SME_ZA_SDI:mode><SME_ZA_BHIx24:mode>_plus)
	(@aarch64_sme_single_<optab><SME_ZA_SDI:mode><SME_ZA_BHIx24:mode>)
	(*aarch64_sme_single_<optab><SME_ZA_SDI:mode><SME_ZA_BHIx24:mode>_plus)
	(@aarch64_sme_single_sudot<VNx4SI_ONLY:mode><SME_ZA_BIx24:mode>)
	(*aarch64_sme_single_sudot<VNx4SI_ONLY:mode><SME_ZA_BIx24:mode>_plus)
	(@aarch64_sme_lane_<optab><SME_ZA_SDI:mode><SME_ZA_BHIx24:mode>)
	(*aarch64_sme_lane_<optab><SME_ZA_SDI:mode><SME_ZA_BHIx24:mode>_plus)
	(@aarch64_sme_<optab><VNx4SI_ONLY:mode><SVE_FULL_BHI:mode>)
	(*aarch64_sme_<optab><VNx4SI_ONLY:mode><SVE_FULL_BHI:mode>_plus)
	(@aarch64_sme_<optab><VNx4SI_ONLY:mode><SME_ZA_BHIx24:mode>)
	(*aarch64_sme_<optab><VNx4SI_ONLY:mode><SME_ZA_BHIx24:mode>_plus)
	(@aarch64_sme_single_<optab><VNx4SI_ONLY:mode><SME_ZA_BHIx24:mode>)
	(*aarch64_sme_single_<optab><VNx4SI_ONLY:mode><SME_ZA_BHIx24:mode>_plus)
	(@aarch64_sme_lane_<optab><VNx4SI_ONLY:mode><SME_ZA_BHIx124:mode>)
	(*aarch64_sme_lane_<optab><VNx4SI_ONLY:mode><SME_ZA_BHIx124:mode>)
	(@aarch64_sme_<optab><VNx2DI_ONLY:mode><VNx8HI_ONLY:mode>)
	(*aarch64_sme_<optab><VNx2DI_ONLY:mode><VNx8HI_ONLY:mode>_plus)
	(@aarch64_sme_<optab><VNx2DI_ONLY:mode><SME_ZA_HIx24:mode>)
	(*aarch64_sme_<optab><VNx2DI_ONLY:mode><SME_ZA_HIx24:mode>_plus)
	(@aarch64_sme_single_<optab><VNx2DI_ONLY:mode><SME_ZA_HIx24:mode>)
	(*aarch64_sme_single_<optab><VNx2DI_ONLY:mode><SME_ZA_HIx24:mode>_plus)
	(@aarch64_sme_lane_<optab><VNx2DI_ONLY:mode><SME_ZA_HIx124:mode>)
	(*aarch64_sme_lane_<optab><VNx2DI_ONLY:mode><SME_ZA_HIx124:mode>)
	(@aarch64_sme_<optab><VNx4SI_ONLY:mode><VNx8HI_ONLY:mode>)
	(@aarch64_sme_<optab><VNx4SI_ONLY:mode><VNx4SI_ONLY:mode>)
	(@aarch64_sme_<optab><VNx4SI_ONLY:mode><SME_ZA_HFx24:mode>)
	(*aarch64_sme_<optab><VNx4SI_ONLY:mode><SME_ZA_HFx24:mode>_plus)
	(@aarch64_sme_single_<optab><VNx4SI_ONLY:mode><SME_ZA_HFx24:mode>)
	(*aarch64_sme_single_<optab><VNx4SI_ONLY:mode><SME_ZA_HFx24:mode>_plus)
	(@aarch64_sme_lane_<optab><VNx4SI_ONLY:mode><SME_ZA_HFx24:mode>)
	(*aarch64_sme_lane_<optab><VNx4SI_ONLY:mode><SME_ZA_HFx24:mode>_plus)
	(@aarch64_sme_<optab><SME_ZA_SDF_I:mode><SME_ZA_SDFx24:mode>)
	(*aarch64_sme_<optab><SME_ZA_SDF_I:mode><SME_ZA_SDFx24:mode>_plus)
	(@aarch64_sme_single_<optab><SME_ZA_SDF_I:mode><SME_ZA_SDFx24:mode>)
	(*aarch64_sme_single_<optab><SME_ZA_SDF_I:mode><SME_ZA_SDFx24:mode>_plus)
	(@aarch64_sme_lane_<optab><SME_ZA_SDF_I:mode><SME_ZA_SDFx24:mode>)
	(*aarch64_sme_lane_<optab><SME_ZA_SDF_I:mode><SME_ZA_SDFx24:mode>)
	(@aarch64_sme_<optab><VNx4SI_ONLY:mode><SVE_FULL_HF:mode>)
	(*aarch64_sme_<optab><VNx4SI_ONLY:mode><SVE_FULL_HF:mode>_plus)
	(@aarch64_sme_lane_<optab><VNx4SI_ONLY:mode><SME_ZA_HFx124:mode>)
	(*aarch64_sme_lane_<optab><VNx4SI_ONLY:mode><SME_ZA_HFx124:mode>)
	(@aarch64_sme_lut<LUTI_BITS><mode>): Likewise.
	(UNSPEC_SME_LUTI): New unspec.
	* config/aarch64/aarch64-sve-builtins.def (single): New mode suffix.
	(c8, c16, c32, c64): New type suffixes.
	(vg1x2, vg1x4, vg2, vg2x1, vg2x2, vg2x4, vg4, vg4x1, vg4x2)
	(vg4x4): New group suffixes.
	* config/aarch64/aarch64-sve-builtins.h (CP_READ_ZT0)
	(CP_WRITE_ZT0): New constants.
	(get_svbool_t): Delete.
	(function_resolver::report_mismatched_num_vectors): New member
	function.
	(function_resolver::resolve_conversion): Likewise.
	(function_resolver::infer_predicate_type): Likewise.
	(function_resolver::infer_64bit_scalar_integer_pair): Likewise.
	(function_resolver::require_matching_predicate_type): Likewise.
	(function_resolver::require_nonscalar_type): Likewise.
	(function_resolver::finish_opt_single_resolution): Likewise.
	(function_resolver::require_derived_vector_type): Add an
	expected_num_vectors parameter.
	(function_expander::map_to_rtx_codes): Add an extra parameter
	for unconditional FP unspecs.
	(function_instance::gp_type_index): New member function.
	(function_instance::gp_type): Likewise.
	(function_instance::gp_mode): Handle multi-vector operations.
	* config/aarch64/aarch64-sve-builtins.cc (TYPES_all_count)
	(TYPES_all_pred_count, TYPES_c, TYPES_bhs_data, TYPES_bhs_widen)
	(TYPES_hs_data, TYPES_cvt_h_s_float, TYPES_cvt_s_s, TYPES_qcvt_x2)
	(TYPES_qcvt_x4, TYPES_qrshr_x2, TYPES_qrshru_x2, TYPES_qrshr_x4)
	(TYPES_qrshru_x4, TYPES_while_x, TYPES_while_x_c, TYPES_s_narrow_fsu)
	(TYPES_za_s_b_signed, TYPES_za_s_b_unsigned, TYPES_za_s_b_integer)
	(TYPES_za_s_h_integer, TYPES_za_s_h_data, TYPES_za_s_unsigned)
	(TYPES_za_s_float, TYPES_za_s_data, TYPES_za_d_h_integer): New type
	macros.
	(groups_x2, groups_x12, groups_x4, groups_x24, groups_x124)
	(groups_vg1x2, groups_vg1x4, groups_vg1x24, groups_vg2, groups_vg4)
	(groups_vg24): New group arrays.
	(function_instance::reads_global_state_p): Handle CP_READ_ZT0.
	(function_instance::modifies_global_state_p): Handle CP_WRITE_ZT0.
	(add_shared_state_attribute): Handle zt0 state.
	(function_builder::add_overloaded_functions): Skip MODE_single
	for non-tuple groups.
	(function_resolver::report_mismatched_num_vectors): New function.
	(function_resolver::resolve_to): Add a fallback error message for
	the general two-type case.
	(function_resolver::resolve_conversion): New function.
	(function_resolver::infer_predicate_type): Likewise.
	(function_resolver::infer_64bit_scalar_integer_pair): Likewise.
	(function_resolver::require_matching_predicate_type): Likewise.
	(function_resolver::require_matching_vector_type): Specifically
	diagnose mismatched vector counts.
	(function_resolver::require_derived_vector_type): Add an
	expected_num_vectors parameter.  Extend to handle cases where
	tuples are expected.
	(function_resolver::require_nonscalar_type): New function.
	(function_resolver::check_gp_argument): Use gp_type_index rather
	than hard-coding VECTOR_TYPE_svbool_t.
	(function_resolver::finish_opt_single_resolution): New function.
	(function_checker::require_immediate_either_or): Remove hard-coded
	constants.
	(function_expander::direct_optab_handler): New function.
	(function_expander::use_pred_x_insn): Only add a strictness flag
	is the insn has an operand for it.
	(function_expander::map_to_rtx_codes): Take an unconditional
	FP unspec as an extra parameter.  Handle tuples and MODE_single.
	(function_expander::map_to_unspecs): Handle tuples and MODE_single.
	* config/aarch64/aarch64-sve-builtins-functions.h (read_zt0)
	(write_zt0): New typedefs.
	(full_width_access::memory_vector): Use the function's
	vectors_per_tuple.
	(rtx_code_function_base): Add an optional unconditional FP unspec.
	(rtx_code_function::expand): Update accordingly.
	(rtx_code_function_rotated::expand): Likewise.
	(unspec_based_function_exact_insn::expand): Use tuple_mode instead
	of vector_mode.
	(unspec_based_uncond_function): New typedef.
	(cond_or_uncond_unspec_function): New class.
	(sme_1mode_function::expand): Handle single forms.
	(sme_2mode_function_t): Likewise, adding a template parameter for them.
	(sme_2mode_function): Update accordingly.
	(sme_2mode_lane_function): New typedef.
	(multireg_permute): New class.
	(class integer_conversion): Likewise.
	(while_comparison::expand): Handle svcount_t and svboolx2_t results.
	* config/aarch64/aarch64-sve-builtins-shapes.h
	(binary_int_opt_single_n, binary_opt_single_n, binary_single)
	(binary_za_slice_lane, binary_za_slice_int_opt_single)
	(binary_za_slice_opt_single, binary_za_slice_uint_opt_single)
	(binaryx, clamp, compare_scalar_count, count_pred_c)
	(dot_za_slice_int_lane, dot_za_slice_lane, dot_za_slice_uint_lane)
	(extract_pred, inherent_zt, ldr_zt, read_za, read_za_slice)
	(select_pred, shift_right_imm_narrowxn, storexn, str_zt)
	(unary_convertxn, unary_za_slice, unaryxn, write_za)
	(write_za_slice): Declare.
	* config/aarch64/aarch64-sve-builtins-shapes.cc
	(za_group_is_pure_overload): New function.
	(apply_predication): Use the function's gp_type for the predicate,
	instead of hard-coding the use of svbool_t.
	(parse_element_type): Add support for "c" (svcount_t).
	(parse_type): Add support for "c0" and "c1" (conversion destination
	and source types).
	(binary_za_slice_lane_base): New class.
	(binary_za_slice_opt_single_base): Likewise.
	(load_contiguous_base::resolve): Pass the group suffix to r.resolve.
	(luti_lane_zt_base): New class.
	(binary_int_opt_single_n, binary_opt_single_n, binary_single)
	(binary_za_slice_lane, binary_za_slice_int_opt_single)
	(binary_za_slice_opt_single, binary_za_slice_uint_opt_single)
	(binaryx, clamp): New shapes.
	(compare_scalar_def::build): Allow the return type to be a tuple.
	(compare_scalar_def::expand): Pass the group suffix to r.resolve.
	(compare_scalar_count, count_pred_c, dot_za_slice_int_lane)
	(dot_za_slice_lane, dot_za_slice_uint_lane, extract_pred, inherent_zt)
	(ldr_zt, read_za, read_za_slice, select_pred, shift_right_imm_narrowxn)
	(storexn, str_zt): New shapes.
	(ternary_qq_lane_def, ternary_qq_opt_n_def): Replace with...
	(ternary_qq_or_011_lane_def, ternary_qq_opt_n_or_011_def): ...these
	new classes.  Allow a second suffix that specifies the type of the
	second vector argument, and that is used to derive the third.
	(unary_def::build): Extend to handle tuple types.
	(unary_convert_def::build): Use the new c0 and c1 format specifiers.
	(unary_convertxn, unary_za_slice, unaryxn, write_za): New shapes.
	(write_za_slice): Likewise.
	* config/aarch64/aarch64-sve-builtins-base.cc (svbic_impl::expand)
	(svext_bhw_impl::expand): Update call to map_to_rtx_costs.
	(svcntp_impl::expand): Handle svcount_t variants.
	(svcvt_impl::expand): Handle unpredicated conversions separately,
	dealing with tuples.
	(svdot_impl::expand): Handle 2-way dot products.
	(svdotprod_lane_impl::expand): Likewise.
	(svld1_impl::fold): Punt on tuple loads.
	(svld1_impl::expand): Handle tuple loads.
	(svldnt1_impl::expand): Likewise.
	(svpfalse_impl::fold): Punt on svcount_t forms.
	(svptrue_impl::fold): Likewise.
	(svptrue_impl::expand): Handle svcount_t forms.
	(svrint_impl): New class.
	(svsel_impl::fold): Punt on tuple forms.
	(svsel_impl::expand): Handle tuple forms.
	(svst1_impl::fold): Punt on tuple loads.
	(svst1_impl::expand): Handle tuple loads.
	(svstnt1_impl::expand): Likewise.
	(svwhilelx_impl::fold): Punt on tuple forms.
	(svdot_lane): Use UNSPEC_FDOT.
	(svmax, svmaxnm, svmin, svminmm): Add unconditional FP unspecs.
	(rinta, rinti, rintm, rintn, rintp, rintx, rintz): Use svrint_impl.
	* config/aarch64/aarch64-sve-builtins-base.def (svcreate2, svget2)
	(svset2, svundef2): Add _b variants.
	(svcvt): Use unary_convertxn.
	(svdot): Use ternary_qq_opt_n_or_011.
	(svdot_lane): Use ternary_qq_or_011_lane.
	(svmax, svmaxnm, svmin, svminnm): Use binary_opt_single_n.
	(svpfalse): Add a form that returns svcount_t results.
	(svrinta, svrintm, svrintn, svrintp): Use unaryxn.
	(svsel): Use binaryxn.
	(svst1, svstnt1): Use storexn.
	* config/aarch64/aarch64-sve-builtins-sme.h
	(svadd_za, svadd_write_za, svbmopa_za, svbmops_za, svdot_za)
	(svdot_lane_za, svldr_zt, svluti2_lane_zt, svluti4_lane_zt)
	(svmla_za, svmla_lane_za, svmls_za, svmls_lane_za, svread_za)
	(svstr_zt, svsub_za, svsub_write_za, svsudot_za, svsudot_lane_za)
	(svsuvdot_lane_za, svusdot_za, svusdot_lane_za, svusvdot_lane_za)
	(svvdot_lane_za, svwrite_za, svzero_zt): Declare.
	* config/aarch64/aarch64-sve-builtins-sme.cc (load_store_za_base):
	Rename to...
	(load_store_za_zt0_base): ...this and extend to tuples.
	(load_za_base, store_za_base): Update accordingly.
	(expand_ldr_str_zt0): New function.
	(svldr_zt_impl, svluti_lane_zt_impl, svread_za_impl, svstr_zt_impl)
	(svsudot_za_impl, svwrite_za_impl, svzero_zt_impl): New classes.
	(svadd_za, svadd_write_za, svbmopa_za, svbmops_za, svdot_za)
	(svdot_lane_za, svldr_zt, svluti2_lane_zt, svluti4_lane_zt)
	(svmla_za, svmla_lane_za, svmls_za, svmls_lane_za, svread_za)
	(svstr_zt, svsub_za, svsub_write_za, svsudot_za, svsudot_lane_za)
	(svsuvdot_lane_za, svusdot_za, svusdot_lane_za, svusvdot_lane_za)
	(svvdot_lane_za, svwrite_za, svzero_zt): New functions.
	* config/aarch64/aarch64-sve-builtins-sme.def: Add SME2 intrinsics.
	* config/aarch64/aarch64-sve-builtins-sve2.h
	(svbfmlslb, svbfmlslb_lane, svbfmlslt, svbfmlslt_lane, svclamp)
	(svcvtn, svpext, svpsel, svqcvt, svqcvtn, svqrshr, svqrshrn)
	(svqrshru, svqrshrun, svrevd, svunpk, svuzp, svuzpq, svzip)
	(svzipq): Declare.
	* config/aarch64/aarch64-sve-builtins-sve2.cc (svclamp_impl)
	(svcvtn_impl, svpext_impl, svpsel_impl): New classes.
	(svqrshl_impl::fold): Update for change to svrshl shape.
	(svrshl_impl::fold): Punt on tuple forms.
	(svsqadd_impl::expand): Update call to map_to_rtx_codes.
	(svunpk_impl): New class.
	(svbfmlslb, svbfmlslb_lane, svbfmlslt, svbfmlslt_lane, svclamp)
	(svcvtn, svpext, svpsel, svqcvt, svqcvtn, svqrshr, svqrshrn)
	(svqrshru, svqrshrun, svrevd, svunpk, svuzp, svuzpq, svzip)
	(svzipq): New functions.
	* config/aarch64/aarch64-sve-builtins-sve2.def: Add SME2 intrinsics.
	* config/aarch64/aarch64-c.cc (aarch64_update_cpp_builtins): Define
	or undefine __ARM_FEATURE_SME2.

gcc/testsuite/
	* gcc.target/aarch64/sve/acle/asm/test_sve_acle.h: Provide a way
	for test functions to share ZT0.
	(ATTR): Update accordingly.
	(TEST_LOAD_COUNT, TEST_STORE_COUNT, TEST_PN, TEST_COUNT_PN)
	(TEST_EXTRACT_PN, TEST_SELECT_P, TEST_COMPARE_S_X2, TEST_COMPARE_S_C)
	(TEST_CREATE_B, TEST_GET_B, TEST_SET_B, TEST_XN, TEST_XN_SINGLE)
	(TEST_XN_SINGLE_Z15, TEST_XN_SINGLE_AWKWARD, TEST_X2_NARROW)
	(TEST_X4_NARROW): New macros.
	* gcc.target/aarch64/sve/acle/asm/create2_1.c: Add _b tests.
	* gcc.target/aarch64/sve/acle/general-c/binary_za_m_1.c: Remove
	test for svmopa that becomes valid with SME2.
	* gcc.target/aarch64/sve/acle/general-c/create_1.c: Adjust for
	existence of svboolx2_t version of svcreate2.
	* gcc.target/aarch64/sve/acle/general-c/store_1.c: Adjust error
	messages to account for svcount_t predication.
	* gcc.target/aarch64/sve/acle/general-c/store_2.c: Likewise.
	* gcc.target/aarch64/sve/acle/general-c/ternary_qq_lane_1.c: Adjust
	error messages to account for new SME2 variants.
	* gcc.target/aarch64/sve/acle/general-c/ternary_qq_opt_n_2.c: Likewise.
---
 gcc/config/aarch64/aarch64-c.cc               |    1 +
 gcc/config/aarch64/aarch64-sme.md             | 1092 ++++++++++++++++-
 .../aarch64/aarch64-sve-builtins-base.cc      |  160 ++-
 .../aarch64/aarch64-sve-builtins-base.def     |   37 +-
 .../aarch64/aarch64-sve-builtins-functions.h  |  170 ++-
 .../aarch64/aarch64-sve-builtins-shapes.cc    |  877 ++++++++++++-
 .../aarch64/aarch64-sve-builtins-shapes.h     |   37 +-
 .../aarch64/aarch64-sve-builtins-sme.cc       |  175 ++-
 .../aarch64/aarch64-sve-builtins-sme.def      |  122 ++
 gcc/config/aarch64/aarch64-sve-builtins-sme.h |   28 +-
 .../aarch64/aarch64-sve-builtins-sve2.cc      |  107 +-
 .../aarch64/aarch64-sve-builtins-sve2.def     |   74 +-
 .../aarch64/aarch64-sve-builtins-sve2.h       |   21 +
 gcc/config/aarch64/aarch64-sve-builtins.cc    |  622 +++++++++-
 gcc/config/aarch64/aarch64-sve-builtins.def   |   15 +
 gcc/config/aarch64/aarch64-sve-builtins.h     |   48 +-
 gcc/config/aarch64/aarch64-sve.md             |   98 +-
 gcc/config/aarch64/aarch64-sve2.md            |  703 +++++++++++
 gcc/config/aarch64/aarch64.cc                 |   27 +-
 gcc/config/aarch64/aarch64.h                  |   19 +-
 gcc/config/aarch64/aarch64.md                 |    4 +
 gcc/config/aarch64/atomics.md                 |    2 +-
 gcc/config/aarch64/constraints.md             |   26 +-
 gcc/config/aarch64/iterators.md               |  369 +++++-
 gcc/config/aarch64/predicates.md              |   27 +-
 .../aarch64/sme2/aarch64-sme2-acle-asm.exp    |   82 ++
 .../aarch64/sme/acle-asm/clamp_s16.c          |   42 +
 .../aarch64/sme/acle-asm/clamp_s32.c          |   42 +
 .../aarch64/sme/acle-asm/clamp_s64.c          |   42 +
 .../aarch64/sme/acle-asm/clamp_s8.c           |   42 +
 .../aarch64/sme/acle-asm/clamp_u16.c          |   42 +
 .../aarch64/sme/acle-asm/clamp_u32.c          |   42 +
 .../aarch64/sme/acle-asm/clamp_u64.c          |   42 +
 .../aarch64/sme/acle-asm/clamp_u8.c           |   42 +
 .../aarch64/sme/acle-asm/revd_bf16.c          |   76 ++
 .../aarch64/sme/acle-asm/revd_f16.c           |   76 ++
 .../aarch64/sme/acle-asm/revd_f32.c           |   76 ++
 .../aarch64/sme/acle-asm/revd_f64.c           |   76 ++
 .../aarch64/sme/acle-asm/revd_s16.c           |   76 ++
 .../aarch64/sme/acle-asm/revd_s32.c           |   76 ++
 .../aarch64/sme/acle-asm/revd_s64.c           |   76 ++
 .../gcc.target/aarch64/sme/acle-asm/revd_s8.c |   76 ++
 .../aarch64/sme/acle-asm/revd_u16.c           |   76 ++
 .../aarch64/sme/acle-asm/revd_u32.c           |   76 ++
 .../aarch64/sme/acle-asm/revd_u64.c           |   76 ++
 .../gcc.target/aarch64/sme/acle-asm/revd_u8.c |   76 ++
 .../gcc.target/aarch64/sme/clamp_1.c          |   38 +
 .../gcc.target/aarch64/sme/clamp_2.c          |   32 +
 .../gcc.target/aarch64/sme/clamp_3.c          |   26 +
 .../gcc.target/aarch64/sme/clamp_4.c          |   20 +
 .../aarch64/sme2/aarch64-sme2-acle-asm.exp    |   81 ++
 .../aarch64/sme2/acle-asm/add_s16_x2.c        |  115 ++
 .../aarch64/sme2/acle-asm/add_s16_x4.c        |  125 ++
 .../aarch64/sme2/acle-asm/add_s32_x2.c        |  115 ++
 .../aarch64/sme2/acle-asm/add_s32_x4.c        |  125 ++
 .../aarch64/sme2/acle-asm/add_s64_x2.c        |  115 ++
 .../aarch64/sme2/acle-asm/add_s64_x4.c        |  125 ++
 .../aarch64/sme2/acle-asm/add_s8_x2.c         |  115 ++
 .../aarch64/sme2/acle-asm/add_s8_x4.c         |  125 ++
 .../aarch64/sme2/acle-asm/add_u16_x2.c        |  115 ++
 .../aarch64/sme2/acle-asm/add_u16_x4.c        |  125 ++
 .../aarch64/sme2/acle-asm/add_u32_x2.c        |  115 ++
 .../aarch64/sme2/acle-asm/add_u32_x4.c        |  125 ++
 .../aarch64/sme2/acle-asm/add_u64_x2.c        |  115 ++
 .../aarch64/sme2/acle-asm/add_u64_x4.c        |  125 ++
 .../aarch64/sme2/acle-asm/add_u8_x2.c         |  115 ++
 .../aarch64/sme2/acle-asm/add_u8_x4.c         |  125 ++
 .../sme2/acle-asm/add_write_za32_s32_vg1x2.c  |  180 +++
 .../sme2/acle-asm/add_write_za32_s32_vg1x4.c  |  172 +++
 .../sme2/acle-asm/add_write_za32_u32_vg1x2.c  |  180 +++
 .../sme2/acle-asm/add_write_za32_u32_vg1x4.c  |  172 +++
 .../sme2/acle-asm/add_write_za64_s64_vg1x2.c  |  182 +++
 .../sme2/acle-asm/add_write_za64_s64_vg1x4.c  |  174 +++
 .../sme2/acle-asm/add_write_za64_u64_vg1x2.c  |  182 +++
 .../sme2/acle-asm/add_write_za64_u64_vg1x4.c  |  174 +++
 .../sme2/acle-asm/add_za32_f32_vg1x2.c        |  122 ++
 .../sme2/acle-asm/add_za32_f32_vg1x4.c        |  137 +++
 .../sme2/acle-asm/add_za32_s32_vg1x2.c        |  122 ++
 .../sme2/acle-asm/add_za32_s32_vg1x4.c        |  137 +++
 .../sme2/acle-asm/add_za32_u32_vg1x2.c        |  122 ++
 .../sme2/acle-asm/add_za32_u32_vg1x4.c        |  137 +++
 .../sme2/acle-asm/add_za64_f64_vg1x2.c        |  126 ++
 .../sme2/acle-asm/add_za64_f64_vg1x4.c        |  141 +++
 .../sme2/acle-asm/add_za64_s64_vg1x2.c        |  124 ++
 .../sme2/acle-asm/add_za64_s64_vg1x4.c        |  139 +++
 .../sme2/acle-asm/add_za64_u64_vg1x2.c        |  124 ++
 .../sme2/acle-asm/add_za64_u64_vg1x4.c        |  139 +++
 .../aarch64/sme2/acle-asm/bfmlslb_f32.c       |   65 +
 .../aarch64/sme2/acle-asm/bfmlslb_lane_f32.c  |   84 ++
 .../aarch64/sme2/acle-asm/bfmlslt_f32.c       |   65 +
 .../aarch64/sme2/acle-asm/bfmlslt_lane_f32.c  |   84 ++
 .../aarch64/sme2/acle-asm/bmopa_za32.c        |   30 +
 .../aarch64/sme2/acle-asm/bmops_za32.c        |   30 +
 .../aarch64/sme2/acle-asm/clamp_f16.c         |   42 +
 .../aarch64/sme2/acle-asm/clamp_f16_x2.c      |   94 ++
 .../aarch64/sme2/acle-asm/clamp_f16_x4.c      |  104 ++
 .../aarch64/sme2/acle-asm/clamp_f32.c         |   42 +
 .../aarch64/sme2/acle-asm/clamp_f32_x2.c      |   94 ++
 .../aarch64/sme2/acle-asm/clamp_f32_x4.c      |  104 ++
 .../aarch64/sme2/acle-asm/clamp_f64.c         |   42 +
 .../aarch64/sme2/acle-asm/clamp_f64_x2.c      |   94 ++
 .../aarch64/sme2/acle-asm/clamp_f64_x4.c      |  104 ++
 .../aarch64/sme2/acle-asm/clamp_s16_x2.c      |   94 ++
 .../aarch64/sme2/acle-asm/clamp_s16_x4.c      |  104 ++
 .../aarch64/sme2/acle-asm/clamp_s32_x2.c      |   94 ++
 .../aarch64/sme2/acle-asm/clamp_s32_x4.c      |  104 ++
 .../aarch64/sme2/acle-asm/clamp_s64_x2.c      |   94 ++
 .../aarch64/sme2/acle-asm/clamp_s64_x4.c      |  104 ++
 .../aarch64/sme2/acle-asm/clamp_s8_x2.c       |   94 ++
 .../aarch64/sme2/acle-asm/clamp_s8_x4.c       |  104 ++
 .../aarch64/sme2/acle-asm/clamp_u16_x2.c      |   94 ++
 .../aarch64/sme2/acle-asm/clamp_u16_x4.c      |  104 ++
 .../aarch64/sme2/acle-asm/clamp_u32_x2.c      |   94 ++
 .../aarch64/sme2/acle-asm/clamp_u32_x4.c      |  104 ++
 .../aarch64/sme2/acle-asm/clamp_u64_x2.c      |   94 ++
 .../aarch64/sme2/acle-asm/clamp_u64_x4.c      |  104 ++
 .../aarch64/sme2/acle-asm/clamp_u8_x2.c       |   94 ++
 .../aarch64/sme2/acle-asm/clamp_u8_x4.c       |  104 ++
 .../aarch64/sme2/acle-asm/cntp_c16.c          |   39 +
 .../aarch64/sme2/acle-asm/cntp_c32.c          |   39 +
 .../aarch64/sme2/acle-asm/cntp_c64.c          |   39 +
 .../aarch64/sme2/acle-asm/cntp_c8.c           |   39 +
 .../aarch64/sme2/acle-asm/cvt_bf16_f32_x2.c   |   50 +
 .../aarch64/sme2/acle-asm/cvt_f16_f32_x2.c    |   50 +
 .../aarch64/sme2/acle-asm/cvt_f32_s32_x2.c    |   43 +
 .../aarch64/sme2/acle-asm/cvt_f32_s32_x4.c    |   77 ++
 .../aarch64/sme2/acle-asm/cvt_f32_u32_x2.c    |   43 +
 .../aarch64/sme2/acle-asm/cvt_f32_u32_x4.c    |   77 ++
 .../aarch64/sme2/acle-asm/cvt_s32_f32_x2.c    |   43 +
 .../aarch64/sme2/acle-asm/cvt_s32_f32_x4.c    |   77 ++
 .../aarch64/sme2/acle-asm/cvt_u32_f32_x2.c    |   43 +
 .../aarch64/sme2/acle-asm/cvt_u32_f32_x4.c    |   77 ++
 .../aarch64/sme2/acle-asm/cvtn_bf16_f32_x2.c  |   50 +
 .../aarch64/sme2/acle-asm/cvtn_f16_f32_x2.c   |   50 +
 .../aarch64/sme2/acle-asm/dot_f32.c           |   44 +
 .../aarch64/sme2/acle-asm/dot_lane_f32.c      |   93 ++
 .../aarch64/sme2/acle-asm/dot_lane_s32.c      |   93 ++
 .../aarch64/sme2/acle-asm/dot_lane_u32.c      |   93 ++
 .../sme2/acle-asm/dot_lane_za32_bf16_vg1x2.c  |  102 ++
 .../sme2/acle-asm/dot_lane_za32_bf16_vg1x4.c  |  108 ++
 .../sme2/acle-asm/dot_lane_za32_f16_vg1x2.c   |  102 ++
 .../sme2/acle-asm/dot_lane_za32_f16_vg1x4.c   |  108 ++
 .../sme2/acle-asm/dot_lane_za32_s16_vg1x2.c   |  102 ++
 .../sme2/acle-asm/dot_lane_za32_s16_vg1x4.c   |  108 ++
 .../sme2/acle-asm/dot_lane_za32_s8_vg1x2.c    |  102 ++
 .../sme2/acle-asm/dot_lane_za32_s8_vg1x4.c    |  108 ++
 .../sme2/acle-asm/dot_lane_za32_u16_vg1x2.c   |  102 ++
 .../sme2/acle-asm/dot_lane_za32_u16_vg1x4.c   |  108 ++
 .../sme2/acle-asm/dot_lane_za32_u8_vg1x2.c    |  102 ++
 .../sme2/acle-asm/dot_lane_za32_u8_vg1x4.c    |  108 ++
 .../sme2/acle-asm/dot_lane_za64_s16_vg1x2.c   |  104 ++
 .../sme2/acle-asm/dot_lane_za64_s16_vg1x4.c   |  110 ++
 .../sme2/acle-asm/dot_lane_za64_u16_vg1x2.c   |  104 ++
 .../sme2/acle-asm/dot_lane_za64_u16_vg1x4.c   |  110 ++
 .../aarch64/sme2/acle-asm/dot_s32.c           |   44 +
 .../aarch64/sme2/acle-asm/dot_u32.c           |   44 +
 .../sme2/acle-asm/dot_za32_bf16_vg1x2.c       |  243 ++++
 .../sme2/acle-asm/dot_za32_bf16_vg1x4.c       |  254 ++++
 .../sme2/acle-asm/dot_za32_f16_vg1x2.c        |  243 ++++
 .../sme2/acle-asm/dot_za32_f16_vg1x4.c        |  254 ++++
 .../sme2/acle-asm/dot_za32_s16_vg1x2.c        |  243 ++++
 .../sme2/acle-asm/dot_za32_s16_vg1x4.c        |  254 ++++
 .../aarch64/sme2/acle-asm/dot_za32_s8_vg1x2.c |  243 ++++
 .../aarch64/sme2/acle-asm/dot_za32_s8_vg1x4.c |  254 ++++
 .../sme2/acle-asm/dot_za32_u16_vg1x2.c        |  243 ++++
 .../sme2/acle-asm/dot_za32_u16_vg1x4.c        |  254 ++++
 .../aarch64/sme2/acle-asm/dot_za32_u8_vg1x2.c |  243 ++++
 .../aarch64/sme2/acle-asm/dot_za32_u8_vg1x4.c |  254 ++++
 .../sme2/acle-asm/dot_za64_s16_vg1x2.c        |  245 ++++
 .../sme2/acle-asm/dot_za64_s16_vg1x4.c        |  256 ++++
 .../sme2/acle-asm/dot_za64_u16_vg1x2.c        |  245 ++++
 .../sme2/acle-asm/dot_za64_u16_vg1x4.c        |  256 ++++
 .../aarch64/sme2/acle-asm/ld1_bf16_x2.c       |  262 ++++
 .../aarch64/sme2/acle-asm/ld1_bf16_x4.c       |  354 ++++++
 .../aarch64/sme2/acle-asm/ld1_f16_x2.c        |  262 ++++
 .../aarch64/sme2/acle-asm/ld1_f16_x4.c        |  354 ++++++
 .../aarch64/sme2/acle-asm/ld1_f32_x2.c        |  262 ++++
 .../aarch64/sme2/acle-asm/ld1_f32_x4.c        |  354 ++++++
 .../aarch64/sme2/acle-asm/ld1_f64_x2.c        |  262 ++++
 .../aarch64/sme2/acle-asm/ld1_f64_x4.c        |  354 ++++++
 .../aarch64/sme2/acle-asm/ld1_s16_x2.c        |  262 ++++
 .../aarch64/sme2/acle-asm/ld1_s16_x4.c        |  354 ++++++
 .../aarch64/sme2/acle-asm/ld1_s32_x2.c        |  262 ++++
 .../aarch64/sme2/acle-asm/ld1_s32_x4.c        |  354 ++++++
 .../aarch64/sme2/acle-asm/ld1_s64_x2.c        |  262 ++++
 .../aarch64/sme2/acle-asm/ld1_s64_x4.c        |  354 ++++++
 .../aarch64/sme2/acle-asm/ld1_s8_x2.c         |  262 ++++
 .../aarch64/sme2/acle-asm/ld1_s8_x4.c         |  354 ++++++
 .../aarch64/sme2/acle-asm/ld1_u16_x2.c        |  262 ++++
 .../aarch64/sme2/acle-asm/ld1_u16_x4.c        |  354 ++++++
 .../aarch64/sme2/acle-asm/ld1_u32_x2.c        |  262 ++++
 .../aarch64/sme2/acle-asm/ld1_u32_x4.c        |  354 ++++++
 .../aarch64/sme2/acle-asm/ld1_u64_x2.c        |  262 ++++
 .../aarch64/sme2/acle-asm/ld1_u64_x4.c        |  354 ++++++
 .../aarch64/sme2/acle-asm/ld1_u8_x2.c         |  262 ++++
 .../aarch64/sme2/acle-asm/ld1_u8_x4.c         |  354 ++++++
 .../aarch64/sme2/acle-asm/ldnt1_bf16_x2.c     |  262 ++++
 .../aarch64/sme2/acle-asm/ldnt1_bf16_x4.c     |  354 ++++++
 .../aarch64/sme2/acle-asm/ldnt1_f16_x2.c      |  262 ++++
 .../aarch64/sme2/acle-asm/ldnt1_f16_x4.c      |  354 ++++++
 .../aarch64/sme2/acle-asm/ldnt1_f32_x2.c      |  262 ++++
 .../aarch64/sme2/acle-asm/ldnt1_f32_x4.c      |  354 ++++++
 .../aarch64/sme2/acle-asm/ldnt1_f64_x2.c      |  262 ++++
 .../aarch64/sme2/acle-asm/ldnt1_f64_x4.c      |  354 ++++++
 .../aarch64/sme2/acle-asm/ldnt1_s16_x2.c      |  262 ++++
 .../aarch64/sme2/acle-asm/ldnt1_s16_x4.c      |  354 ++++++
 .../aarch64/sme2/acle-asm/ldnt1_s32_x2.c      |  262 ++++
 .../aarch64/sme2/acle-asm/ldnt1_s32_x4.c      |  354 ++++++
 .../aarch64/sme2/acle-asm/ldnt1_s64_x2.c      |  262 ++++
 .../aarch64/sme2/acle-asm/ldnt1_s64_x4.c      |  354 ++++++
 .../aarch64/sme2/acle-asm/ldnt1_s8_x2.c       |  262 ++++
 .../aarch64/sme2/acle-asm/ldnt1_s8_x4.c       |  354 ++++++
 .../aarch64/sme2/acle-asm/ldnt1_u16_x2.c      |  262 ++++
 .../aarch64/sme2/acle-asm/ldnt1_u16_x4.c      |  354 ++++++
 .../aarch64/sme2/acle-asm/ldnt1_u32_x2.c      |  262 ++++
 .../aarch64/sme2/acle-asm/ldnt1_u32_x4.c      |  354 ++++++
 .../aarch64/sme2/acle-asm/ldnt1_u64_x2.c      |  262 ++++
 .../aarch64/sme2/acle-asm/ldnt1_u64_x4.c      |  354 ++++++
 .../aarch64/sme2/acle-asm/ldnt1_u8_x2.c       |  262 ++++
 .../aarch64/sme2/acle-asm/ldnt1_u8_x4.c       |  354 ++++++
 .../gcc.target/aarch64/sme2/acle-asm/ldr_zt.c |   36 +
 .../aarch64/sme2/acle-asm/luti2_bf16.c        |   48 +
 .../aarch64/sme2/acle-asm/luti2_bf16_x2.c     |   50 +
 .../aarch64/sme2/acle-asm/luti2_bf16_x4.c     |   56 +
 .../aarch64/sme2/acle-asm/luti2_f16.c         |   48 +
 .../aarch64/sme2/acle-asm/luti2_f16_x2.c      |   50 +
 .../aarch64/sme2/acle-asm/luti2_f16_x4.c      |   56 +
 .../aarch64/sme2/acle-asm/luti2_f32.c         |   48 +
 .../aarch64/sme2/acle-asm/luti2_f32_x2.c      |   50 +
 .../aarch64/sme2/acle-asm/luti2_f32_x4.c      |   56 +
 .../aarch64/sme2/acle-asm/luti2_s16.c         |   48 +
 .../aarch64/sme2/acle-asm/luti2_s16_x2.c      |   50 +
 .../aarch64/sme2/acle-asm/luti2_s16_x4.c      |   56 +
 .../aarch64/sme2/acle-asm/luti2_s32.c         |   48 +
 .../aarch64/sme2/acle-asm/luti2_s32_x2.c      |   50 +
 .../aarch64/sme2/acle-asm/luti2_s32_x4.c      |   56 +
 .../aarch64/sme2/acle-asm/luti2_s8.c          |   48 +
 .../aarch64/sme2/acle-asm/luti2_s8_x2.c       |   50 +
 .../aarch64/sme2/acle-asm/luti2_s8_x4.c       |   56 +
 .../aarch64/sme2/acle-asm/luti2_u16.c         |   48 +
 .../aarch64/sme2/acle-asm/luti2_u16_x2.c      |   50 +
 .../aarch64/sme2/acle-asm/luti2_u16_x4.c      |   56 +
 .../aarch64/sme2/acle-asm/luti2_u32.c         |   48 +
 .../aarch64/sme2/acle-asm/luti2_u32_x2.c      |   50 +
 .../aarch64/sme2/acle-asm/luti2_u32_x4.c      |   56 +
 .../aarch64/sme2/acle-asm/luti2_u8.c          |   48 +
 .../aarch64/sme2/acle-asm/luti2_u8_x2.c       |   50 +
 .../aarch64/sme2/acle-asm/luti2_u8_x4.c       |   56 +
 .../aarch64/sme2/acle-asm/luti4_bf16.c        |   48 +
 .../aarch64/sme2/acle-asm/luti4_bf16_x2.c     |   50 +
 .../aarch64/sme2/acle-asm/luti4_bf16_x4.c     |   56 +
 .../aarch64/sme2/acle-asm/luti4_f16.c         |   48 +
 .../aarch64/sme2/acle-asm/luti4_f16_x2.c      |   50 +
 .../aarch64/sme2/acle-asm/luti4_f16_x4.c      |   56 +
 .../aarch64/sme2/acle-asm/luti4_f32.c         |   48 +
 .../aarch64/sme2/acle-asm/luti4_f32_x2.c      |   50 +
 .../aarch64/sme2/acle-asm/luti4_f32_x4.c      |   56 +
 .../aarch64/sme2/acle-asm/luti4_s16.c         |   48 +
 .../aarch64/sme2/acle-asm/luti4_s16_x2.c      |   50 +
 .../aarch64/sme2/acle-asm/luti4_s16_x4.c      |   56 +
 .../aarch64/sme2/acle-asm/luti4_s32.c         |   48 +
 .../aarch64/sme2/acle-asm/luti4_s32_x2.c      |   50 +
 .../aarch64/sme2/acle-asm/luti4_s32_x4.c      |   56 +
 .../aarch64/sme2/acle-asm/luti4_s8.c          |   48 +
 .../aarch64/sme2/acle-asm/luti4_s8_x2.c       |   50 +
 .../aarch64/sme2/acle-asm/luti4_u16.c         |   48 +
 .../aarch64/sme2/acle-asm/luti4_u16_x2.c      |   50 +
 .../aarch64/sme2/acle-asm/luti4_u16_x4.c      |   56 +
 .../aarch64/sme2/acle-asm/luti4_u32.c         |   48 +
 .../aarch64/sme2/acle-asm/luti4_u32_x2.c      |   50 +
 .../aarch64/sme2/acle-asm/luti4_u32_x4.c      |   56 +
 .../aarch64/sme2/acle-asm/luti4_u8.c          |   48 +
 .../aarch64/sme2/acle-asm/luti4_u8_x2.c       |   50 +
 .../aarch64/sme2/acle-asm/max_f16_x2.c        |  207 ++++
 .../aarch64/sme2/acle-asm/max_f16_x4.c        |  249 ++++
 .../aarch64/sme2/acle-asm/max_f32_x2.c        |  207 ++++
 .../aarch64/sme2/acle-asm/max_f32_x4.c        |  249 ++++
 .../aarch64/sme2/acle-asm/max_f64_x2.c        |  207 ++++
 .../aarch64/sme2/acle-asm/max_f64_x4.c        |  249 ++++
 .../aarch64/sme2/acle-asm/max_s16_x2.c        |  207 ++++
 .../aarch64/sme2/acle-asm/max_s16_x4.c        |  249 ++++
 .../aarch64/sme2/acle-asm/max_s32_x2.c        |  207 ++++
 .../aarch64/sme2/acle-asm/max_s32_x4.c        |  249 ++++
 .../aarch64/sme2/acle-asm/max_s64_x2.c        |  207 ++++
 .../aarch64/sme2/acle-asm/max_s64_x4.c        |  249 ++++
 .../aarch64/sme2/acle-asm/max_s8_x2.c         |  207 ++++
 .../aarch64/sme2/acle-asm/max_s8_x4.c         |  249 ++++
 .../aarch64/sme2/acle-asm/max_u16_x2.c        |  207 ++++
 .../aarch64/sme2/acle-asm/max_u16_x4.c        |  249 ++++
 .../aarch64/sme2/acle-asm/max_u32_x2.c        |  207 ++++
 .../aarch64/sme2/acle-asm/max_u32_x4.c        |  249 ++++
 .../aarch64/sme2/acle-asm/max_u64_x2.c        |  207 ++++
 .../aarch64/sme2/acle-asm/max_u64_x4.c        |  249 ++++
 .../aarch64/sme2/acle-asm/max_u8_x2.c         |  207 ++++
 .../aarch64/sme2/acle-asm/max_u8_x4.c         |  249 ++++
 .../aarch64/sme2/acle-asm/maxnm_f16_x2.c      |  207 ++++
 .../aarch64/sme2/acle-asm/maxnm_f16_x4.c      |  249 ++++
 .../aarch64/sme2/acle-asm/maxnm_f32_x2.c      |  207 ++++
 .../aarch64/sme2/acle-asm/maxnm_f32_x4.c      |  249 ++++
 .../aarch64/sme2/acle-asm/maxnm_f64_x2.c      |  207 ++++
 .../aarch64/sme2/acle-asm/maxnm_f64_x4.c      |  249 ++++
 .../aarch64/sme2/acle-asm/min_f16_x2.c        |  207 ++++
 .../aarch64/sme2/acle-asm/min_f16_x4.c        |  249 ++++
 .../aarch64/sme2/acle-asm/min_f32_x2.c        |  207 ++++
 .../aarch64/sme2/acle-asm/min_f32_x4.c        |  249 ++++
 .../aarch64/sme2/acle-asm/min_f64_x2.c        |  207 ++++
 .../aarch64/sme2/acle-asm/min_f64_x4.c        |  249 ++++
 .../aarch64/sme2/acle-asm/min_s16_x2.c        |  207 ++++
 .../aarch64/sme2/acle-asm/min_s16_x4.c        |  249 ++++
 .../aarch64/sme2/acle-asm/min_s32_x2.c        |  207 ++++
 .../aarch64/sme2/acle-asm/min_s32_x4.c        |  249 ++++
 .../aarch64/sme2/acle-asm/min_s64_x2.c        |  207 ++++
 .../aarch64/sme2/acle-asm/min_s64_x4.c        |  249 ++++
 .../aarch64/sme2/acle-asm/min_s8_x2.c         |  207 ++++
 .../aarch64/sme2/acle-asm/min_s8_x4.c         |  249 ++++
 .../aarch64/sme2/acle-asm/min_u16_x2.c        |  207 ++++
 .../aarch64/sme2/acle-asm/min_u16_x4.c        |  249 ++++
 .../aarch64/sme2/acle-asm/min_u32_x2.c        |  207 ++++
 .../aarch64/sme2/acle-asm/min_u32_x4.c        |  249 ++++
 .../aarch64/sme2/acle-asm/min_u64_x2.c        |  207 ++++
 .../aarch64/sme2/acle-asm/min_u64_x4.c        |  249 ++++
 .../aarch64/sme2/acle-asm/min_u8_x2.c         |  207 ++++
 .../aarch64/sme2/acle-asm/min_u8_x4.c         |  249 ++++
 .../aarch64/sme2/acle-asm/minnm_f16_x2.c      |  207 ++++
 .../aarch64/sme2/acle-asm/minnm_f16_x4.c      |  249 ++++
 .../aarch64/sme2/acle-asm/minnm_f32_x2.c      |  207 ++++
 .../aarch64/sme2/acle-asm/minnm_f32_x4.c      |  249 ++++
 .../aarch64/sme2/acle-asm/minnm_f64_x2.c      |  207 ++++
 .../aarch64/sme2/acle-asm/minnm_f64_x4.c      |  249 ++++
 .../sme2/acle-asm/mla_lane_za32_bf16_vg2x1.c  |  148 +++
 .../sme2/acle-asm/mla_lane_za32_bf16_vg2x2.c  |  112 ++
 .../sme2/acle-asm/mla_lane_za32_bf16_vg2x4.c  |  118 ++
 .../sme2/acle-asm/mla_lane_za32_f16_vg2x1.c   |  148 +++
 .../sme2/acle-asm/mla_lane_za32_f16_vg2x2.c   |  112 ++
 .../sme2/acle-asm/mla_lane_za32_f16_vg2x4.c   |  118 ++
 .../sme2/acle-asm/mla_lane_za32_f32_vg1x2.c   |  102 ++
 .../sme2/acle-asm/mla_lane_za32_f32_vg1x4.c   |  108 ++
 .../sme2/acle-asm/mla_lane_za32_s16_vg2x1.c   |  148 +++
 .../sme2/acle-asm/mla_lane_za32_s16_vg2x2.c   |  112 ++
 .../sme2/acle-asm/mla_lane_za32_s16_vg2x4.c   |  118 ++
 .../sme2/acle-asm/mla_lane_za32_s8_vg4x1.c    |  150 +++
 .../sme2/acle-asm/mla_lane_za32_s8_vg4x2.c    |  122 ++
 .../sme2/acle-asm/mla_lane_za32_s8_vg4x4.c    |  128 ++
 .../sme2/acle-asm/mla_lane_za32_u16_vg2x1.c   |  148 +++
 .../sme2/acle-asm/mla_lane_za32_u16_vg2x2.c   |  112 ++
 .../sme2/acle-asm/mla_lane_za32_u16_vg2x4.c   |  118 ++
 .../sme2/acle-asm/mla_lane_za32_u8_vg4x1.c    |  150 +++
 .../sme2/acle-asm/mla_lane_za32_u8_vg4x2.c    |  122 ++
 .../sme2/acle-asm/mla_lane_za32_u8_vg4x4.c    |  128 ++
 .../sme2/acle-asm/mla_lane_za64_f64_vg1x2.c   |  104 ++
 .../sme2/acle-asm/mla_lane_za64_f64_vg1x4.c   |  110 ++
 .../sme2/acle-asm/mla_lane_za64_s16_vg4x1.c   |  152 +++
 .../sme2/acle-asm/mla_lane_za64_s16_vg4x2.c   |  124 ++
 .../sme2/acle-asm/mla_lane_za64_s16_vg4x4.c   |  130 ++
 .../sme2/acle-asm/mla_lane_za64_u16_vg4x1.c   |  152 +++
 .../sme2/acle-asm/mla_lane_za64_u16_vg4x2.c   |  124 ++
 .../sme2/acle-asm/mla_lane_za64_u16_vg4x4.c   |  130 ++
 .../sme2/acle-asm/mla_za32_bf16_vg2x1.c       |  148 +++
 .../sme2/acle-asm/mla_za32_bf16_vg2x2.c       |  247 ++++
 .../sme2/acle-asm/mla_za32_bf16_vg2x4.c       |  258 ++++
 .../sme2/acle-asm/mla_za32_f16_vg2x1.c        |  148 +++
 .../sme2/acle-asm/mla_za32_f16_vg2x2.c        |  247 ++++
 .../sme2/acle-asm/mla_za32_f16_vg2x4.c        |  258 ++++
 .../sme2/acle-asm/mla_za32_f32_vg1x2.c        |  180 +++
 .../sme2/acle-asm/mla_za32_f32_vg1x4.c        |  172 +++
 .../sme2/acle-asm/mla_za32_s16_vg2x1.c        |  148 +++
 .../sme2/acle-asm/mla_za32_s16_vg2x2.c        |  247 ++++
 .../sme2/acle-asm/mla_za32_s16_vg2x4.c        |  258 ++++
 .../aarch64/sme2/acle-asm/mla_za32_s8_vg4x1.c |  149 +++
 .../aarch64/sme2/acle-asm/mla_za32_s8_vg4x2.c |  249 ++++
 .../aarch64/sme2/acle-asm/mla_za32_s8_vg4x4.c |  260 ++++
 .../sme2/acle-asm/mla_za32_u16_vg2x1.c        |  148 +++
 .../sme2/acle-asm/mla_za32_u16_vg2x2.c        |  247 ++++
 .../sme2/acle-asm/mla_za32_u16_vg2x4.c        |  258 ++++
 .../aarch64/sme2/acle-asm/mla_za32_u8_vg4x1.c |  149 +++
 .../aarch64/sme2/acle-asm/mla_za32_u8_vg4x2.c |  249 ++++
 .../aarch64/sme2/acle-asm/mla_za32_u8_vg4x4.c |  260 ++++
 .../sme2/acle-asm/mla_za64_f64_vg1x2.c        |  182 +++
 .../sme2/acle-asm/mla_za64_f64_vg1x4.c        |  174 +++
 .../sme2/acle-asm/mla_za64_s16_vg4x1.c        |  151 +++
 .../sme2/acle-asm/mla_za64_s16_vg4x2.c        |  251 ++++
 .../sme2/acle-asm/mla_za64_s16_vg4x4.c        |  262 ++++
 .../sme2/acle-asm/mla_za64_u16_vg4x1.c        |  151 +++
 .../sme2/acle-asm/mla_za64_u16_vg4x2.c        |  251 ++++
 .../sme2/acle-asm/mla_za64_u16_vg4x4.c        |  262 ++++
 .../sme2/acle-asm/mls_lane_za32_bf16_vg2x1.c  |  148 +++
 .../sme2/acle-asm/mls_lane_za32_bf16_vg2x2.c  |  112 ++
 .../sme2/acle-asm/mls_lane_za32_bf16_vg2x4.c  |  118 ++
 .../sme2/acle-asm/mls_lane_za32_f16_vg2x1.c   |  148 +++
 .../sme2/acle-asm/mls_lane_za32_f16_vg2x2.c   |  112 ++
 .../sme2/acle-asm/mls_lane_za32_f16_vg2x4.c   |  118 ++
 .../sme2/acle-asm/mls_lane_za32_f32_vg1x2.c   |  102 ++
 .../sme2/acle-asm/mls_lane_za32_f32_vg1x4.c   |  108 ++
 .../sme2/acle-asm/mls_lane_za32_s16_vg2x1.c   |  148 +++
 .../sme2/acle-asm/mls_lane_za32_s16_vg2x2.c   |  112 ++
 .../sme2/acle-asm/mls_lane_za32_s16_vg2x4.c   |  118 ++
 .../sme2/acle-asm/mls_lane_za32_s8_vg4x1.c    |  150 +++
 .../sme2/acle-asm/mls_lane_za32_s8_vg4x2.c    |  122 ++
 .../sme2/acle-asm/mls_lane_za32_s8_vg4x4.c    |  128 ++
 .../sme2/acle-asm/mls_lane_za32_u16_vg2x1.c   |  148 +++
 .../sme2/acle-asm/mls_lane_za32_u16_vg2x2.c   |  112 ++
 .../sme2/acle-asm/mls_lane_za32_u16_vg2x4.c   |  118 ++
 .../sme2/acle-asm/mls_lane_za32_u8_vg4x1.c    |  150 +++
 .../sme2/acle-asm/mls_lane_za32_u8_vg4x2.c    |  122 ++
 .../sme2/acle-asm/mls_lane_za32_u8_vg4x4.c    |  128 ++
 .../sme2/acle-asm/mls_lane_za64_f64_vg1x2.c   |  104 ++
 .../sme2/acle-asm/mls_lane_za64_f64_vg1x4.c   |  110 ++
 .../sme2/acle-asm/mls_lane_za64_s16_vg4x1.c   |  152 +++
 .../sme2/acle-asm/mls_lane_za64_s16_vg4x2.c   |  124 ++
 .../sme2/acle-asm/mls_lane_za64_s16_vg4x4.c   |  130 ++
 .../sme2/acle-asm/mls_lane_za64_u16_vg4x1.c   |  152 +++
 .../sme2/acle-asm/mls_lane_za64_u16_vg4x2.c   |  124 ++
 .../sme2/acle-asm/mls_lane_za64_u16_vg4x4.c   |  130 ++
 .../sme2/acle-asm/mls_za32_bf16_vg2x1.c       |  148 +++
 .../sme2/acle-asm/mls_za32_bf16_vg2x2.c       |  247 ++++
 .../sme2/acle-asm/mls_za32_bf16_vg2x4.c       |  258 ++++
 .../sme2/acle-asm/mls_za32_f16_vg2x1.c        |  148 +++
 .../sme2/acle-asm/mls_za32_f16_vg2x2.c        |  247 ++++
 .../sme2/acle-asm/mls_za32_f16_vg2x4.c        |  258 ++++
 .../sme2/acle-asm/mls_za32_f32_vg1x2.c        |  180 +++
 .../sme2/acle-asm/mls_za32_f32_vg1x4.c        |  172 +++
 .../sme2/acle-asm/mls_za32_s16_vg2x1.c        |  148 +++
 .../sme2/acle-asm/mls_za32_s16_vg2x2.c        |  247 ++++
 .../sme2/acle-asm/mls_za32_s16_vg2x4.c        |  258 ++++
 .../aarch64/sme2/acle-asm/mls_za32_s8_vg4x1.c |  149 +++
 .../aarch64/sme2/acle-asm/mls_za32_s8_vg4x2.c |  249 ++++
 .../aarch64/sme2/acle-asm/mls_za32_s8_vg4x4.c |  260 ++++
 .../sme2/acle-asm/mls_za32_u16_vg2x1.c        |  148 +++
 .../sme2/acle-asm/mls_za32_u16_vg2x2.c        |  247 ++++
 .../sme2/acle-asm/mls_za32_u16_vg2x4.c        |  258 ++++
 .../aarch64/sme2/acle-asm/mls_za32_u8_vg4x1.c |  149 +++
 .../aarch64/sme2/acle-asm/mls_za32_u8_vg4x2.c |  249 ++++
 .../aarch64/sme2/acle-asm/mls_za32_u8_vg4x4.c |  260 ++++
 .../sme2/acle-asm/mls_za64_f64_vg1x2.c        |  182 +++
 .../sme2/acle-asm/mls_za64_f64_vg1x4.c        |  174 +++
 .../sme2/acle-asm/mls_za64_s16_vg4x1.c        |  151 +++
 .../sme2/acle-asm/mls_za64_s16_vg4x2.c        |  251 ++++
 .../sme2/acle-asm/mls_za64_s16_vg4x4.c        |  262 ++++
 .../sme2/acle-asm/mls_za64_u16_vg4x1.c        |  151 +++
 .../sme2/acle-asm/mls_za64_u16_vg4x2.c        |  251 ++++
 .../sme2/acle-asm/mls_za64_u16_vg4x4.c        |  262 ++++
 .../aarch64/sme2/acle-asm/mopa_za32.c         |   48 +
 .../aarch64/sme2/acle-asm/mops_za32.c         |   48 +
 .../aarch64/sme2/acle-asm/pext_c16.c          |   50 +
 .../aarch64/sme2/acle-asm/pext_c16_x2.c       |   54 +
 .../aarch64/sme2/acle-asm/pext_c32.c          |   50 +
 .../aarch64/sme2/acle-asm/pext_c32_x2.c       |   54 +
 .../aarch64/sme2/acle-asm/pext_c64.c          |   50 +
 .../aarch64/sme2/acle-asm/pext_c64_x2.c       |   54 +
 .../aarch64/sme2/acle-asm/pext_c8.c           |   50 +
 .../aarch64/sme2/acle-asm/pext_c8_x2.c        |   54 +
 .../aarch64/sme2/acle-asm/pfalse_c.c          |   39 +
 .../aarch64/sme2/acle-asm/psel_b16.c          |   89 ++
 .../aarch64/sme2/acle-asm/psel_b32.c          |   89 ++
 .../aarch64/sme2/acle-asm/psel_b64.c          |   80 ++
 .../aarch64/sme2/acle-asm/psel_b8.c           |   89 ++
 .../aarch64/sme2/acle-asm/psel_c16.c          |   89 ++
 .../aarch64/sme2/acle-asm/psel_c32.c          |   89 ++
 .../aarch64/sme2/acle-asm/psel_c64.c          |   80 ++
 .../aarch64/sme2/acle-asm/psel_c8.c           |   89 ++
 .../aarch64/sme2/acle-asm/ptrue_c16.c         |   41 +
 .../aarch64/sme2/acle-asm/ptrue_c32.c         |   41 +
 .../aarch64/sme2/acle-asm/ptrue_c64.c         |   41 +
 .../aarch64/sme2/acle-asm/ptrue_c8.c          |   41 +
 .../aarch64/sme2/acle-asm/qcvt_s16_s32_x2.c   |   50 +
 .../aarch64/sme2/acle-asm/qcvt_s16_s64_x4.c   |   65 +
 .../aarch64/sme2/acle-asm/qcvt_s8_s32_x4.c    |   65 +
 .../aarch64/sme2/acle-asm/qcvt_u16_s32_x2.c   |   50 +
 .../aarch64/sme2/acle-asm/qcvt_u16_s64_x4.c   |   65 +
 .../aarch64/sme2/acle-asm/qcvt_u16_u32_x2.c   |   50 +
 .../aarch64/sme2/acle-asm/qcvt_u16_u64_x4.c   |   65 +
 .../aarch64/sme2/acle-asm/qcvt_u8_s32_x4.c    |   65 +
 .../aarch64/sme2/acle-asm/qcvt_u8_u32_x4.c    |   65 +
 .../aarch64/sme2/acle-asm/qcvtn_s16_s32_x2.c  |   50 +
 .../aarch64/sme2/acle-asm/qcvtn_s16_s64_x4.c  |   65 +
 .../aarch64/sme2/acle-asm/qcvtn_s8_s32_x4.c   |   65 +
 .../aarch64/sme2/acle-asm/qcvtn_u16_s32_x2.c  |   50 +
 .../aarch64/sme2/acle-asm/qcvtn_u16_s64_x4.c  |   65 +
 .../aarch64/sme2/acle-asm/qcvtn_u16_u32_x2.c  |   50 +
 .../aarch64/sme2/acle-asm/qcvtn_u16_u64_x4.c  |   65 +
 .../aarch64/sme2/acle-asm/qcvtn_u8_s32_x4.c   |   65 +
 .../aarch64/sme2/acle-asm/qcvtn_u8_u32_x4.c   |   65 +
 .../aarch64/sme2/acle-asm/qdmulh_s16_x2.c     |  207 ++++
 .../aarch64/sme2/acle-asm/qdmulh_s16_x4.c     |  249 ++++
 .../aarch64/sme2/acle-asm/qdmulh_s32_x2.c     |  207 ++++
 .../aarch64/sme2/acle-asm/qdmulh_s32_x4.c     |  249 ++++
 .../aarch64/sme2/acle-asm/qdmulh_s64_x2.c     |  207 ++++
 .../aarch64/sme2/acle-asm/qdmulh_s64_x4.c     |  249 ++++
 .../aarch64/sme2/acle-asm/qdmulh_s8_x2.c      |  207 ++++
 .../aarch64/sme2/acle-asm/qdmulh_s8_x4.c      |  249 ++++
 .../aarch64/sme2/acle-asm/qrshr_s16_x2.c      |   50 +
 .../aarch64/sme2/acle-asm/qrshr_s16_x4.c      |   65 +
 .../aarch64/sme2/acle-asm/qrshr_s8_x4.c       |   65 +
 .../aarch64/sme2/acle-asm/qrshr_u16_x2.c      |   50 +
 .../aarch64/sme2/acle-asm/qrshr_u16_x4.c      |   65 +
 .../aarch64/sme2/acle-asm/qrshr_u8_x4.c       |   65 +
 .../aarch64/sme2/acle-asm/qrshrn_s16_x2.c     |   50 +
 .../aarch64/sme2/acle-asm/qrshrn_s16_x4.c     |   65 +
 .../aarch64/sme2/acle-asm/qrshrn_s8_x4.c      |   65 +
 .../aarch64/sme2/acle-asm/qrshrn_u16_x2.c     |   50 +
 .../aarch64/sme2/acle-asm/qrshrn_u16_x4.c     |   65 +
 .../aarch64/sme2/acle-asm/qrshrn_u8_x4.c      |   65 +
 .../aarch64/sme2/acle-asm/qrshru_u16_x2.c     |   50 +
 .../aarch64/sme2/acle-asm/qrshru_u16_x4.c     |   65 +
 .../aarch64/sme2/acle-asm/qrshru_u8_x4.c      |   65 +
 .../aarch64/sme2/acle-asm/qrshrun_u16_x2.c    |   50 +
 .../aarch64/sme2/acle-asm/qrshrun_u16_x4.c    |   65 +
 .../aarch64/sme2/acle-asm/qrshrun_u8_x4.c     |   65 +
 .../aarch64/sme2/acle-asm/read_hor_za16_vg2.c |  140 +++
 .../aarch64/sme2/acle-asm/read_hor_za16_vg4.c |  138 +++
 .../aarch64/sme2/acle-asm/read_hor_za32_vg2.c |  112 ++
 .../aarch64/sme2/acle-asm/read_hor_za32_vg4.c |  129 ++
 .../aarch64/sme2/acle-asm/read_hor_za64_vg2.c |  113 ++
 .../aarch64/sme2/acle-asm/read_hor_za64_vg4.c |  129 ++
 .../aarch64/sme2/acle-asm/read_hor_za8_vg2.c  |  140 +++
 .../aarch64/sme2/acle-asm/read_hor_za8_vg4.c  |  156 +++
 .../aarch64/sme2/acle-asm/read_ver_za16_vg2.c |  140 +++
 .../aarch64/sme2/acle-asm/read_ver_za16_vg4.c |  138 +++
 .../aarch64/sme2/acle-asm/read_ver_za32_vg2.c |  112 ++
 .../aarch64/sme2/acle-asm/read_ver_za32_vg4.c |  129 ++
 .../aarch64/sme2/acle-asm/read_ver_za64_vg2.c |  113 ++
 .../aarch64/sme2/acle-asm/read_ver_za64_vg4.c |  129 ++
 .../aarch64/sme2/acle-asm/read_ver_za8_vg2.c  |  140 +++
 .../aarch64/sme2/acle-asm/read_ver_za8_vg4.c  |  156 +++
 .../aarch64/sme2/acle-asm/read_za16_vg1x2.c   |  122 ++
 .../aarch64/sme2/acle-asm/read_za16_vg1x4.c   |  137 +++
 .../aarch64/sme2/acle-asm/read_za32_vg1x2.c   |  122 ++
 .../aarch64/sme2/acle-asm/read_za32_vg1x4.c   |  137 +++
 .../aarch64/sme2/acle-asm/read_za64_vg1x2.c   |  122 ++
 .../aarch64/sme2/acle-asm/read_za64_vg1x4.c   |  137 +++
 .../aarch64/sme2/acle-asm/read_za8_vg1x2.c    |  122 ++
 .../aarch64/sme2/acle-asm/read_za8_vg1x4.c    |  137 +++
 .../aarch64/sme2/acle-asm/rinta_s32_x2.c      |   61 +
 .../aarch64/sme2/acle-asm/rinta_s32_x4.c      |   73 ++
 .../aarch64/sme2/acle-asm/rintm_u32_x2.c      |   61 +
 .../aarch64/sme2/acle-asm/rintm_u32_x4.c      |   73 ++
 .../aarch64/sme2/acle-asm/rintn_u32_x2.c      |   61 +
 .../aarch64/sme2/acle-asm/rintn_u32_x4.c      |   73 ++
 .../aarch64/sme2/acle-asm/rintp_u32_x2.c      |   61 +
 .../aarch64/sme2/acle-asm/rintp_u32_x4.c      |   73 ++
 .../aarch64/sme2/acle-asm/rshl_s16_x2.c       |  207 ++++
 .../aarch64/sme2/acle-asm/rshl_s16_x4.c       |  249 ++++
 .../aarch64/sme2/acle-asm/rshl_s32_x2.c       |  207 ++++
 .../aarch64/sme2/acle-asm/rshl_s32_x4.c       |  249 ++++
 .../aarch64/sme2/acle-asm/rshl_s64_x2.c       |  207 ++++
 .../aarch64/sme2/acle-asm/rshl_s64_x4.c       |  249 ++++
 .../aarch64/sme2/acle-asm/rshl_s8_x2.c        |  207 ++++
 .../aarch64/sme2/acle-asm/rshl_s8_x4.c        |  249 ++++
 .../aarch64/sme2/acle-asm/rshl_u16_x2.c       |  207 ++++
 .../aarch64/sme2/acle-asm/rshl_u16_x4.c       |  228 ++++
 .../aarch64/sme2/acle-asm/rshl_u32_x2.c       |  207 ++++
 .../aarch64/sme2/acle-asm/rshl_u32_x4.c       |  228 ++++
 .../aarch64/sme2/acle-asm/rshl_u64_x2.c       |  207 ++++
 .../aarch64/sme2/acle-asm/rshl_u64_x4.c       |  228 ++++
 .../aarch64/sme2/acle-asm/rshl_u8_x2.c        |  207 ++++
 .../aarch64/sme2/acle-asm/rshl_u8_x4.c        |  228 ++++
 .../aarch64/sme2/acle-asm/sel_bf16_x2.c       |   92 ++
 .../aarch64/sme2/acle-asm/sel_bf16_x4.c       |   92 ++
 .../aarch64/sme2/acle-asm/sel_f16_x2.c        |   92 ++
 .../aarch64/sme2/acle-asm/sel_f16_x4.c        |   92 ++
 .../aarch64/sme2/acle-asm/sel_f32_x2.c        |   92 ++
 .../aarch64/sme2/acle-asm/sel_f32_x4.c        |   92 ++
 .../aarch64/sme2/acle-asm/sel_f64_x2.c        |   92 ++
 .../aarch64/sme2/acle-asm/sel_f64_x4.c        |   92 ++
 .../aarch64/sme2/acle-asm/sel_s16_x2.c        |   92 ++
 .../aarch64/sme2/acle-asm/sel_s16_x4.c        |   92 ++
 .../aarch64/sme2/acle-asm/sel_s32_x2.c        |   92 ++
 .../aarch64/sme2/acle-asm/sel_s32_x4.c        |   92 ++
 .../aarch64/sme2/acle-asm/sel_s64_x2.c        |   92 ++
 .../aarch64/sme2/acle-asm/sel_s64_x4.c        |   92 ++
 .../aarch64/sme2/acle-asm/sel_s8_x2.c         |   92 ++
 .../aarch64/sme2/acle-asm/sel_s8_x4.c         |   92 ++
 .../aarch64/sme2/acle-asm/sel_u16_x2.c        |   92 ++
 .../aarch64/sme2/acle-asm/sel_u16_x4.c        |   92 ++
 .../aarch64/sme2/acle-asm/sel_u32_x2.c        |   92 ++
 .../aarch64/sme2/acle-asm/sel_u32_x4.c        |   92 ++
 .../aarch64/sme2/acle-asm/sel_u64_x2.c        |   92 ++
 .../aarch64/sme2/acle-asm/sel_u64_x4.c        |   92 ++
 .../aarch64/sme2/acle-asm/sel_u8_x2.c         |   92 ++
 .../aarch64/sme2/acle-asm/sel_u8_x4.c         |   92 ++
 .../aarch64/sme2/acle-asm/st1_bf16_x2.c       |  262 ++++
 .../aarch64/sme2/acle-asm/st1_bf16_x4.c       |  354 ++++++
 .../aarch64/sme2/acle-asm/st1_f16_x2.c        |  262 ++++
 .../aarch64/sme2/acle-asm/st1_f16_x4.c        |  354 ++++++
 .../aarch64/sme2/acle-asm/st1_f32_x2.c        |  262 ++++
 .../aarch64/sme2/acle-asm/st1_f32_x4.c        |  354 ++++++
 .../aarch64/sme2/acle-asm/st1_f64_x2.c        |  262 ++++
 .../aarch64/sme2/acle-asm/st1_f64_x4.c        |  354 ++++++
 .../aarch64/sme2/acle-asm/st1_s16_x2.c        |  262 ++++
 .../aarch64/sme2/acle-asm/st1_s16_x4.c        |  354 ++++++
 .../aarch64/sme2/acle-asm/st1_s32_x2.c        |  262 ++++
 .../aarch64/sme2/acle-asm/st1_s32_x4.c        |  354 ++++++
 .../aarch64/sme2/acle-asm/st1_s64_x2.c        |  262 ++++
 .../aarch64/sme2/acle-asm/st1_s64_x4.c        |  354 ++++++
 .../aarch64/sme2/acle-asm/st1_s8_x2.c         |  262 ++++
 .../aarch64/sme2/acle-asm/st1_s8_x4.c         |  354 ++++++
 .../aarch64/sme2/acle-asm/st1_u16_x2.c        |  262 ++++
 .../aarch64/sme2/acle-asm/st1_u16_x4.c        |  354 ++++++
 .../aarch64/sme2/acle-asm/st1_u32_x2.c        |  262 ++++
 .../aarch64/sme2/acle-asm/st1_u32_x4.c        |  354 ++++++
 .../aarch64/sme2/acle-asm/st1_u64_x2.c        |  262 ++++
 .../aarch64/sme2/acle-asm/st1_u64_x4.c        |  354 ++++++
 .../aarch64/sme2/acle-asm/st1_u8_x2.c         |  262 ++++
 .../aarch64/sme2/acle-asm/st1_u8_x4.c         |  354 ++++++
 .../aarch64/sme2/acle-asm/stnt1_bf16_x2.c     |  262 ++++
 .../aarch64/sme2/acle-asm/stnt1_bf16_x4.c     |  354 ++++++
 .../aarch64/sme2/acle-asm/stnt1_f16_x2.c      |  262 ++++
 .../aarch64/sme2/acle-asm/stnt1_f16_x4.c      |  354 ++++++
 .../aarch64/sme2/acle-asm/stnt1_f32_x2.c      |  262 ++++
 .../aarch64/sme2/acle-asm/stnt1_f32_x4.c      |  354 ++++++
 .../aarch64/sme2/acle-asm/stnt1_f64_x2.c      |  262 ++++
 .../aarch64/sme2/acle-asm/stnt1_f64_x4.c      |  354 ++++++
 .../aarch64/sme2/acle-asm/stnt1_s16_x2.c      |  262 ++++
 .../aarch64/sme2/acle-asm/stnt1_s16_x4.c      |  354 ++++++
 .../aarch64/sme2/acle-asm/stnt1_s32_x2.c      |  262 ++++
 .../aarch64/sme2/acle-asm/stnt1_s32_x4.c      |  354 ++++++
 .../aarch64/sme2/acle-asm/stnt1_s64_x2.c      |  262 ++++
 .../aarch64/sme2/acle-asm/stnt1_s64_x4.c      |  354 ++++++
 .../aarch64/sme2/acle-asm/stnt1_s8_x2.c       |  262 ++++
 .../aarch64/sme2/acle-asm/stnt1_s8_x4.c       |  354 ++++++
 .../aarch64/sme2/acle-asm/stnt1_u16_x2.c      |  262 ++++
 .../aarch64/sme2/acle-asm/stnt1_u16_x4.c      |  354 ++++++
 .../aarch64/sme2/acle-asm/stnt1_u32_x2.c      |  262 ++++
 .../aarch64/sme2/acle-asm/stnt1_u32_x4.c      |  354 ++++++
 .../aarch64/sme2/acle-asm/stnt1_u64_x2.c      |  262 ++++
 .../aarch64/sme2/acle-asm/stnt1_u64_x4.c      |  354 ++++++
 .../aarch64/sme2/acle-asm/stnt1_u8_x2.c       |  262 ++++
 .../aarch64/sme2/acle-asm/stnt1_u8_x4.c       |  354 ++++++
 .../gcc.target/aarch64/sme2/acle-asm/str_zt.c |   36 +
 .../sme2/acle-asm/sub_write_za32_s32_vg1x2.c  |  180 +++
 .../sme2/acle-asm/sub_write_za32_s32_vg1x4.c  |  172 +++
 .../sme2/acle-asm/sub_write_za32_u32_vg1x2.c  |  180 +++
 .../sme2/acle-asm/sub_write_za32_u32_vg1x4.c  |  172 +++
 .../sme2/acle-asm/sub_write_za64_s64_vg1x2.c  |  182 +++
 .../sme2/acle-asm/sub_write_za64_s64_vg1x4.c  |  174 +++
 .../sme2/acle-asm/sub_write_za64_u64_vg1x2.c  |  182 +++
 .../sme2/acle-asm/sub_write_za64_u64_vg1x4.c  |  174 +++
 .../sme2/acle-asm/sub_za32_f32_vg1x2.c        |  122 ++
 .../sme2/acle-asm/sub_za32_f32_vg1x4.c        |  137 +++
 .../sme2/acle-asm/sub_za32_s32_vg1x2.c        |  122 ++
 .../sme2/acle-asm/sub_za32_s32_vg1x4.c        |  137 +++
 .../sme2/acle-asm/sub_za32_u32_vg1x2.c        |  122 ++
 .../sme2/acle-asm/sub_za32_u32_vg1x4.c        |  137 +++
 .../sme2/acle-asm/sub_za64_f64_vg1x2.c        |  126 ++
 .../sme2/acle-asm/sub_za64_f64_vg1x4.c        |  141 +++
 .../sme2/acle-asm/sub_za64_s64_vg1x2.c        |  124 ++
 .../sme2/acle-asm/sub_za64_s64_vg1x4.c        |  139 +++
 .../sme2/acle-asm/sub_za64_u64_vg1x2.c        |  124 ++
 .../sme2/acle-asm/sub_za64_u64_vg1x4.c        |  139 +++
 .../sme2/acle-asm/sudot_lane_za32_s8_vg1x2.c  |  102 ++
 .../sme2/acle-asm/sudot_lane_za32_s8_vg1x4.c  |  108 ++
 .../sme2/acle-asm/sudot_za32_s8_vg1x2.c       |  243 ++++
 .../sme2/acle-asm/sudot_za32_s8_vg1x4.c       |  254 ++++
 .../sme2/acle-asm/suvdot_lane_za32_s8_vg1x4.c |  108 ++
 .../aarch64/sme2/acle-asm/test_sme2_acle.h    |  124 ++
 .../aarch64/sme2/acle-asm/unpk_s16_x2.c       |   50 +
 .../aarch64/sme2/acle-asm/unpk_s16_x4.c       |   76 ++
 .../aarch64/sme2/acle-asm/unpk_s32_x2.c       |   50 +
 .../aarch64/sme2/acle-asm/unpk_s32_x4.c       |   76 ++
 .../aarch64/sme2/acle-asm/unpk_s8_x2.c        |   50 +
 .../aarch64/sme2/acle-asm/unpk_s8_x4.c        |   76 ++
 .../aarch64/sme2/acle-asm/unpk_u16_x2.c       |   50 +
 .../aarch64/sme2/acle-asm/unpk_u16_x4.c       |   76 ++
 .../aarch64/sme2/acle-asm/unpk_u32_x2.c       |   50 +
 .../aarch64/sme2/acle-asm/unpk_u32_x4.c       |   76 ++
 .../aarch64/sme2/acle-asm/unpk_u8_x2.c        |   50 +
 .../aarch64/sme2/acle-asm/unpk_u8_x4.c        |   76 ++
 .../sme2/acle-asm/usdot_lane_za32_u8_vg1x2.c  |  102 ++
 .../sme2/acle-asm/usdot_lane_za32_u8_vg1x4.c  |  108 ++
 .../sme2/acle-asm/usdot_za32_u8_vg1x2.c       |  243 ++++
 .../sme2/acle-asm/usdot_za32_u8_vg1x4.c       |  254 ++++
 .../sme2/acle-asm/usvdot_lane_za32_u8_vg1x4.c |  108 ++
 .../aarch64/sme2/acle-asm/uzp_bf16_x2.c       |   77 ++
 .../aarch64/sme2/acle-asm/uzp_bf16_x4.c       |   73 ++
 .../aarch64/sme2/acle-asm/uzp_f16_x2.c        |   77 ++
 .../aarch64/sme2/acle-asm/uzp_f16_x4.c        |   73 ++
 .../aarch64/sme2/acle-asm/uzp_f32_x2.c        |   77 ++
 .../aarch64/sme2/acle-asm/uzp_f32_x4.c        |   73 ++
 .../aarch64/sme2/acle-asm/uzp_f64_x2.c        |   77 ++
 .../aarch64/sme2/acle-asm/uzp_f64_x4.c        |   73 ++
 .../aarch64/sme2/acle-asm/uzp_s16_x2.c        |   77 ++
 .../aarch64/sme2/acle-asm/uzp_s16_x4.c        |   73 ++
 .../aarch64/sme2/acle-asm/uzp_s32_x2.c        |   77 ++
 .../aarch64/sme2/acle-asm/uzp_s32_x4.c        |   73 ++
 .../aarch64/sme2/acle-asm/uzp_s64_x2.c        |   77 ++
 .../aarch64/sme2/acle-asm/uzp_s64_x4.c        |   73 ++
 .../aarch64/sme2/acle-asm/uzp_s8_x2.c         |   77 ++
 .../aarch64/sme2/acle-asm/uzp_s8_x4.c         |   73 ++
 .../aarch64/sme2/acle-asm/uzp_u16_x2.c        |   77 ++
 .../aarch64/sme2/acle-asm/uzp_u16_x4.c        |   73 ++
 .../aarch64/sme2/acle-asm/uzp_u32_x2.c        |   77 ++
 .../aarch64/sme2/acle-asm/uzp_u32_x4.c        |   73 ++
 .../aarch64/sme2/acle-asm/uzp_u64_x2.c        |   77 ++
 .../aarch64/sme2/acle-asm/uzp_u64_x4.c        |   73 ++
 .../aarch64/sme2/acle-asm/uzp_u8_x2.c         |   77 ++
 .../aarch64/sme2/acle-asm/uzp_u8_x4.c         |   73 ++
 .../aarch64/sme2/acle-asm/uzpq_bf16_x2.c      |   77 ++
 .../aarch64/sme2/acle-asm/uzpq_bf16_x4.c      |   73 ++
 .../aarch64/sme2/acle-asm/uzpq_f16_x2.c       |   77 ++
 .../aarch64/sme2/acle-asm/uzpq_f16_x4.c       |   73 ++
 .../aarch64/sme2/acle-asm/uzpq_f32_x2.c       |   77 ++
 .../aarch64/sme2/acle-asm/uzpq_f32_x4.c       |   73 ++
 .../aarch64/sme2/acle-asm/uzpq_f64_x2.c       |   77 ++
 .../aarch64/sme2/acle-asm/uzpq_f64_x4.c       |   73 ++
 .../aarch64/sme2/acle-asm/uzpq_s16_x2.c       |   77 ++
 .../aarch64/sme2/acle-asm/uzpq_s16_x4.c       |   73 ++
 .../aarch64/sme2/acle-asm/uzpq_s32_x2.c       |   77 ++
 .../aarch64/sme2/acle-asm/uzpq_s32_x4.c       |   73 ++
 .../aarch64/sme2/acle-asm/uzpq_s64_x2.c       |   77 ++
 .../aarch64/sme2/acle-asm/uzpq_s64_x4.c       |   73 ++
 .../aarch64/sme2/acle-asm/uzpq_s8_x2.c        |   77 ++
 .../aarch64/sme2/acle-asm/uzpq_s8_x4.c        |   73 ++
 .../aarch64/sme2/acle-asm/uzpq_u16_x2.c       |   77 ++
 .../aarch64/sme2/acle-asm/uzpq_u16_x4.c       |   73 ++
 .../aarch64/sme2/acle-asm/uzpq_u32_x2.c       |   77 ++
 .../aarch64/sme2/acle-asm/uzpq_u32_x4.c       |   73 ++
 .../aarch64/sme2/acle-asm/uzpq_u64_x2.c       |   77 ++
 .../aarch64/sme2/acle-asm/uzpq_u64_x4.c       |   73 ++
 .../aarch64/sme2/acle-asm/uzpq_u8_x2.c        |   77 ++
 .../aarch64/sme2/acle-asm/uzpq_u8_x4.c        |   73 ++
 .../sme2/acle-asm/vdot_lane_za32_bf16_vg1x2.c |  102 ++
 .../sme2/acle-asm/vdot_lane_za32_f16_vg1x2.c  |  102 ++
 .../sme2/acle-asm/vdot_lane_za32_s16_vg1x2.c  |  102 ++
 .../sme2/acle-asm/vdot_lane_za32_s8_vg1x4.c   |  108 ++
 .../sme2/acle-asm/vdot_lane_za32_u16_vg1x2.c  |  102 ++
 .../sme2/acle-asm/vdot_lane_za32_u8_vg1x4.c   |  108 ++
 .../sme2/acle-asm/vdot_lane_za64_s16_vg1x4.c  |  110 ++
 .../sme2/acle-asm/vdot_lane_za64_u16_vg1x4.c  |  110 ++
 .../aarch64/sme2/acle-asm/whilege_b16.c       |  119 ++
 .../aarch64/sme2/acle-asm/whilege_b32.c       |  119 ++
 .../aarch64/sme2/acle-asm/whilege_b64.c       |  119 ++
 .../aarch64/sme2/acle-asm/whilege_b8.c        |  119 ++
 .../aarch64/sme2/acle-asm/whilege_c16.c       |  117 ++
 .../aarch64/sme2/acle-asm/whilege_c32.c       |  117 ++
 .../aarch64/sme2/acle-asm/whilege_c64.c       |  117 ++
 .../aarch64/sme2/acle-asm/whilege_c8.c        |  117 ++
 .../aarch64/sme2/acle-asm/whilegt_b16.c       |  119 ++
 .../aarch64/sme2/acle-asm/whilegt_b32.c       |  119 ++
 .../aarch64/sme2/acle-asm/whilegt_b64.c       |  119 ++
 .../aarch64/sme2/acle-asm/whilegt_b8.c        |  119 ++
 .../aarch64/sme2/acle-asm/whilegt_c16.c       |  117 ++
 .../aarch64/sme2/acle-asm/whilegt_c32.c       |  117 ++
 .../aarch64/sme2/acle-asm/whilegt_c64.c       |  117 ++
 .../aarch64/sme2/acle-asm/whilegt_c8.c        |  117 ++
 .../aarch64/sme2/acle-asm/whilele_b16.c       |  119 ++
 .../aarch64/sme2/acle-asm/whilele_b32.c       |  119 ++
 .../aarch64/sme2/acle-asm/whilele_b64.c       |  119 ++
 .../aarch64/sme2/acle-asm/whilele_b8.c        |  119 ++
 .../aarch64/sme2/acle-asm/whilele_c16.c       |  117 ++
 .../aarch64/sme2/acle-asm/whilele_c32.c       |  117 ++
 .../aarch64/sme2/acle-asm/whilele_c64.c       |  117 ++
 .../aarch64/sme2/acle-asm/whilele_c8.c        |  117 ++
 .../aarch64/sme2/acle-asm/whilelt_b16.c       |  119 ++
 .../aarch64/sme2/acle-asm/whilelt_b32.c       |  119 ++
 .../aarch64/sme2/acle-asm/whilelt_b64.c       |  119 ++
 .../aarch64/sme2/acle-asm/whilelt_b8.c        |  119 ++
 .../aarch64/sme2/acle-asm/whilelt_c16.c       |  117 ++
 .../aarch64/sme2/acle-asm/whilelt_c32.c       |  117 ++
 .../aarch64/sme2/acle-asm/whilelt_c64.c       |  117 ++
 .../aarch64/sme2/acle-asm/whilelt_c8.c        |  117 ++
 .../sme2/acle-asm/write_hor_za16_vg2.c        |  140 +++
 .../sme2/acle-asm/write_hor_za16_vg4.c        |  138 +++
 .../sme2/acle-asm/write_hor_za32_vg2.c        |  112 ++
 .../sme2/acle-asm/write_hor_za32_vg4.c        |  129 ++
 .../sme2/acle-asm/write_hor_za64_vg2.c        |  113 ++
 .../sme2/acle-asm/write_hor_za64_vg4.c        |  129 ++
 .../aarch64/sme2/acle-asm/write_hor_za8_vg2.c |  140 +++
 .../aarch64/sme2/acle-asm/write_hor_za8_vg4.c |  156 +++
 .../sme2/acle-asm/write_ver_za16_vg2.c        |  140 +++
 .../sme2/acle-asm/write_ver_za16_vg4.c        |  138 +++
 .../sme2/acle-asm/write_ver_za32_vg2.c        |  112 ++
 .../sme2/acle-asm/write_ver_za32_vg4.c        |  129 ++
 .../sme2/acle-asm/write_ver_za64_vg2.c        |  113 ++
 .../sme2/acle-asm/write_ver_za64_vg4.c        |  129 ++
 .../aarch64/sme2/acle-asm/write_ver_za8_vg2.c |  140 +++
 .../aarch64/sme2/acle-asm/write_ver_za8_vg4.c |  156 +++
 .../aarch64/sme2/acle-asm/write_za16_vg1x2.c  |  122 ++
 .../aarch64/sme2/acle-asm/write_za16_vg1x4.c  |  137 +++
 .../aarch64/sme2/acle-asm/write_za32_vg1x2.c  |  122 ++
 .../aarch64/sme2/acle-asm/write_za32_vg1x4.c  |  137 +++
 .../aarch64/sme2/acle-asm/write_za64_vg1x2.c  |  122 ++
 .../aarch64/sme2/acle-asm/write_za64_vg1x4.c  |  137 +++
 .../aarch64/sme2/acle-asm/write_za8_vg1x2.c   |  122 ++
 .../aarch64/sme2/acle-asm/write_za8_vg1x4.c   |  137 +++
 .../aarch64/sme2/acle-asm/zero_zt.c           |   12 +
 .../aarch64/sme2/acle-asm/zip_bf16_x2.c       |   77 ++
 .../aarch64/sme2/acle-asm/zip_bf16_x4.c       |   73 ++
 .../aarch64/sme2/acle-asm/zip_f16_x2.c        |   77 ++
 .../aarch64/sme2/acle-asm/zip_f16_x4.c        |   73 ++
 .../aarch64/sme2/acle-asm/zip_f32_x2.c        |   77 ++
 .../aarch64/sme2/acle-asm/zip_f32_x4.c        |   73 ++
 .../aarch64/sme2/acle-asm/zip_f64_x2.c        |   77 ++
 .../aarch64/sme2/acle-asm/zip_f64_x4.c        |   73 ++
 .../aarch64/sme2/acle-asm/zip_s16_x2.c        |   77 ++
 .../aarch64/sme2/acle-asm/zip_s16_x4.c        |   73 ++
 .../aarch64/sme2/acle-asm/zip_s32_x2.c        |   77 ++
 .../aarch64/sme2/acle-asm/zip_s32_x4.c        |   73 ++
 .../aarch64/sme2/acle-asm/zip_s64_x2.c        |   77 ++
 .../aarch64/sme2/acle-asm/zip_s64_x4.c        |   73 ++
 .../aarch64/sme2/acle-asm/zip_s8_x2.c         |   77 ++
 .../aarch64/sme2/acle-asm/zip_s8_x4.c         |   73 ++
 .../aarch64/sme2/acle-asm/zip_u16_x2.c        |   77 ++
 .../aarch64/sme2/acle-asm/zip_u16_x4.c        |   73 ++
 .../aarch64/sme2/acle-asm/zip_u32_x2.c        |   77 ++
 .../aarch64/sme2/acle-asm/zip_u32_x4.c        |   73 ++
 .../aarch64/sme2/acle-asm/zip_u64_x2.c        |   77 ++
 .../aarch64/sme2/acle-asm/zip_u64_x4.c        |   73 ++
 .../aarch64/sme2/acle-asm/zip_u8_x2.c         |   77 ++
 .../aarch64/sme2/acle-asm/zip_u8_x4.c         |   73 ++
 .../aarch64/sme2/acle-asm/zipq_bf16_x2.c      |   77 ++
 .../aarch64/sme2/acle-asm/zipq_bf16_x4.c      |   73 ++
 .../aarch64/sme2/acle-asm/zipq_f16_x2.c       |   77 ++
 .../aarch64/sme2/acle-asm/zipq_f16_x4.c       |   73 ++
 .../aarch64/sme2/acle-asm/zipq_f32_x2.c       |   77 ++
 .../aarch64/sme2/acle-asm/zipq_f32_x4.c       |   73 ++
 .../aarch64/sme2/acle-asm/zipq_f64_x2.c       |   77 ++
 .../aarch64/sme2/acle-asm/zipq_f64_x4.c       |   73 ++
 .../aarch64/sme2/acle-asm/zipq_s16_x2.c       |   77 ++
 .../aarch64/sme2/acle-asm/zipq_s16_x4.c       |   73 ++
 .../aarch64/sme2/acle-asm/zipq_s32_x2.c       |   77 ++
 .../aarch64/sme2/acle-asm/zipq_s32_x4.c       |   73 ++
 .../aarch64/sme2/acle-asm/zipq_s64_x2.c       |   77 ++
 .../aarch64/sme2/acle-asm/zipq_s64_x4.c       |   73 ++
 .../aarch64/sme2/acle-asm/zipq_s8_x2.c        |   77 ++
 .../aarch64/sme2/acle-asm/zipq_s8_x4.c        |   73 ++
 .../aarch64/sme2/acle-asm/zipq_u16_x2.c       |   77 ++
 .../aarch64/sme2/acle-asm/zipq_u16_x4.c       |   73 ++
 .../aarch64/sme2/acle-asm/zipq_u32_x2.c       |   77 ++
 .../aarch64/sme2/acle-asm/zipq_u32_x4.c       |   73 ++
 .../aarch64/sme2/acle-asm/zipq_u64_x2.c       |   77 ++
 .../aarch64/sme2/acle-asm/zipq_u64_x4.c       |   73 ++
 .../aarch64/sme2/acle-asm/zipq_u8_x2.c        |   77 ++
 .../aarch64/sme2/acle-asm/zipq_u8_x4.c        |   73 ++
 .../aarch64/sve/acle/asm/create2_1.c          |   18 +
 .../gcc.target/aarch64/sve/acle/asm/get2_b.c  |   55 +
 .../gcc.target/aarch64/sve/acle/asm/set2_b.c  |   41 +
 .../aarch64/sve/acle/asm/test_sve_acle.h      |  269 +++-
 .../general-c/binary_int_opt_single_n_1.c     |   35 +
 .../general-c/binary_int_opt_single_n_2.c     |   36 +
 .../acle/general-c/binary_opt_single_n_1.c    |   26 +
 .../acle/general-c/binary_opt_single_n_2.c    |   38 +
 .../sve/acle/general-c/binary_single_1.c      |   34 +
 .../sve/acle/general-c/binary_za_m_1.c        |    1 -
 .../binary_za_slice_int_opt_single_1.c        |   61 +
 .../acle/general-c/binary_za_slice_lane_1.c   |   73 ++
 .../acle/general-c/binary_za_slice_lane_2.c   |   78 ++
 .../acle/general-c/binary_za_slice_lane_3.c   |   78 ++
 .../acle/general-c/binary_za_slice_lane_4.c   |   26 +
 .../general-c/binary_za_slice_opt_single_1.c  |   76 ++
 .../general-c/binary_za_slice_opt_single_2.c  |   29 +
 .../general-c/binary_za_slice_opt_single_3.c  |   16 +
 .../binary_za_slice_uint_opt_single_1.c       |   61 +
 .../aarch64/sve/acle/general-c/binaryxn_1.c   |   23 +
 .../aarch64/sve/acle/general-c/binaryxn_2.c   |   33 +
 .../aarch64/sve/acle/general-c/clamp_1.c      |   30 +
 .../acle/general-c/compare_scalar_count_1.c   |   55 +
 .../aarch64/sve/acle/general-c/create_1.c     |    2 +-
 .../acle/general-c/dot_za_slice_int_lane_1.c  |   59 +
 .../sve/acle/general-c/dot_za_slice_lane_1.c  |   83 ++
 .../sve/acle/general-c/dot_za_slice_lane_2.c  |   83 ++
 .../acle/general-c/dot_za_slice_uint_lane_1.c |   59 +
 .../general-c/shift_right_imm_narrowxn_1.c    |   89 ++
 .../aarch64/sve/acle/general-c/store_1.c      |    2 +-
 .../aarch64/sve/acle/general-c/store_2.c      |    2 +-
 .../aarch64/sve/acle/general-c/storexn_1.c    |   33 +
 .../sve/acle/general-c/ternary_qq_lane_1.c    |   30 +-
 .../sve/acle/general-c/ternary_qq_opt_n_2.c   |   12 +-
 .../acle/general-c/ternary_qq_or_011_lane_1.c |   33 +
 .../sve/acle/general-c/unary_convertxn_1.c    |   28 +
 .../sve/acle/general-c/unary_za_slice_1.c     |   54 +
 .../sve/acle/general-c/unary_za_slice_2.c     |   27 +
 .../sve/acle/general-c/unary_za_slice_3.c     |   16 +
 .../aarch64/sve/acle/general-c/unaryxn_1.c    |   15 +
 .../aarch64/sve/acle/general-c/write_za_1.c   |   50 +
 .../sve/acle/general-c/write_za_slice_1.c     |   38 +
 875 files changed, 121976 insertions(+), 227 deletions(-)
 create mode 100644 gcc/testsuite/g++.target/aarch64/sme2/aarch64-sme2-acle-asm.exp
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/clamp_s16.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/clamp_s32.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/clamp_s64.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/clamp_s8.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/clamp_u16.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/clamp_u32.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/clamp_u64.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/clamp_u8.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/revd_bf16.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/revd_f16.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/revd_f32.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/revd_f64.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/revd_s16.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/revd_s32.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/revd_s64.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/revd_s8.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/revd_u16.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/revd_u32.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/revd_u64.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/revd_u8.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/clamp_1.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/clamp_2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/clamp_3.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/clamp_4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/aarch64-sme2-acle-asm.exp
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/add_s16_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/add_s16_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/add_s32_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/add_s32_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/add_s64_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/add_s64_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/add_s8_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/add_s8_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/add_u16_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/add_u16_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/add_u32_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/add_u32_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/add_u64_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/add_u64_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/add_u8_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/add_u8_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/add_write_za32_s32_vg1x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/add_write_za32_s32_vg1x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/add_write_za32_u32_vg1x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/add_write_za32_u32_vg1x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/add_write_za64_s64_vg1x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/add_write_za64_s64_vg1x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/add_write_za64_u64_vg1x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/add_write_za64_u64_vg1x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/add_za32_f32_vg1x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/add_za32_f32_vg1x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/add_za32_s32_vg1x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/add_za32_s32_vg1x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/add_za32_u32_vg1x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/add_za32_u32_vg1x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/add_za64_f64_vg1x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/add_za64_f64_vg1x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/add_za64_s64_vg1x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/add_za64_s64_vg1x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/add_za64_u64_vg1x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/add_za64_u64_vg1x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/bfmlslb_f32.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/bfmlslb_lane_f32.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/bfmlslt_f32.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/bfmlslt_lane_f32.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/bmopa_za32.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/bmops_za32.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/clamp_f16.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/clamp_f16_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/clamp_f16_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/clamp_f32.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/clamp_f32_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/clamp_f32_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/clamp_f64.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/clamp_f64_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/clamp_f64_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/clamp_s16_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/clamp_s16_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/clamp_s32_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/clamp_s32_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/clamp_s64_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/clamp_s64_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/clamp_s8_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/clamp_s8_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/clamp_u16_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/clamp_u16_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/clamp_u32_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/clamp_u32_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/clamp_u64_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/clamp_u64_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/clamp_u8_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/clamp_u8_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/cntp_c16.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/cntp_c32.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/cntp_c64.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/cntp_c8.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/cvt_bf16_f32_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/cvt_f16_f32_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/cvt_f32_s32_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/cvt_f32_s32_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/cvt_f32_u32_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/cvt_f32_u32_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/cvt_s32_f32_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/cvt_s32_f32_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/cvt_u32_f32_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/cvt_u32_f32_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/cvtn_bf16_f32_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/cvtn_f16_f32_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/dot_f32.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/dot_lane_f32.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/dot_lane_s32.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/dot_lane_u32.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/dot_lane_za32_bf16_vg1x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/dot_lane_za32_bf16_vg1x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/dot_lane_za32_f16_vg1x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/dot_lane_za32_f16_vg1x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/dot_lane_za32_s16_vg1x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/dot_lane_za32_s16_vg1x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/dot_lane_za32_s8_vg1x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/dot_lane_za32_s8_vg1x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/dot_lane_za32_u16_vg1x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/dot_lane_za32_u16_vg1x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/dot_lane_za32_u8_vg1x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/dot_lane_za32_u8_vg1x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/dot_lane_za64_s16_vg1x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/dot_lane_za64_s16_vg1x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/dot_lane_za64_u16_vg1x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/dot_lane_za64_u16_vg1x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/dot_s32.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/dot_u32.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/dot_za32_bf16_vg1x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/dot_za32_bf16_vg1x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/dot_za32_f16_vg1x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/dot_za32_f16_vg1x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/dot_za32_s16_vg1x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/dot_za32_s16_vg1x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/dot_za32_s8_vg1x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/dot_za32_s8_vg1x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/dot_za32_u16_vg1x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/dot_za32_u16_vg1x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/dot_za32_u8_vg1x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/dot_za32_u8_vg1x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/dot_za64_s16_vg1x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/dot_za64_s16_vg1x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/dot_za64_u16_vg1x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/dot_za64_u16_vg1x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/ld1_bf16_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/ld1_bf16_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/ld1_f16_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/ld1_f16_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/ld1_f32_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/ld1_f32_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/ld1_f64_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/ld1_f64_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/ld1_s16_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/ld1_s16_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/ld1_s32_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/ld1_s32_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/ld1_s64_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/ld1_s64_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/ld1_s8_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/ld1_s8_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/ld1_u16_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/ld1_u16_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/ld1_u32_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/ld1_u32_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/ld1_u64_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/ld1_u64_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/ld1_u8_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/ld1_u8_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/ldnt1_bf16_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/ldnt1_bf16_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/ldnt1_f16_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/ldnt1_f16_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/ldnt1_f32_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/ldnt1_f32_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/ldnt1_f64_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/ldnt1_f64_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/ldnt1_s16_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/ldnt1_s16_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/ldnt1_s32_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/ldnt1_s32_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/ldnt1_s64_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/ldnt1_s64_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/ldnt1_s8_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/ldnt1_s8_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/ldnt1_u16_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/ldnt1_u16_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/ldnt1_u32_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/ldnt1_u32_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/ldnt1_u64_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/ldnt1_u64_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/ldnt1_u8_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/ldnt1_u8_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/ldr_zt.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/luti2_bf16.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/luti2_bf16_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/luti2_bf16_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/luti2_f16.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/luti2_f16_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/luti2_f16_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/luti2_f32.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/luti2_f32_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/luti2_f32_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/luti2_s16.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/luti2_s16_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/luti2_s16_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/luti2_s32.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/luti2_s32_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/luti2_s32_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/luti2_s8.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/luti2_s8_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/luti2_s8_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/luti2_u16.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/luti2_u16_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/luti2_u16_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/luti2_u32.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/luti2_u32_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/luti2_u32_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/luti2_u8.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/luti2_u8_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/luti2_u8_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/luti4_bf16.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/luti4_bf16_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/luti4_bf16_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/luti4_f16.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/luti4_f16_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/luti4_f16_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/luti4_f32.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/luti4_f32_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/luti4_f32_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/luti4_s16.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/luti4_s16_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/luti4_s16_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/luti4_s32.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/luti4_s32_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/luti4_s32_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/luti4_s8.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/luti4_s8_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/luti4_u16.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/luti4_u16_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/luti4_u16_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/luti4_u32.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/luti4_u32_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/luti4_u32_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/luti4_u8.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/luti4_u8_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/max_f16_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/max_f16_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/max_f32_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/max_f32_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/max_f64_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/max_f64_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/max_s16_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/max_s16_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/max_s32_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/max_s32_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/max_s64_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/max_s64_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/max_s8_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/max_s8_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/max_u16_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/max_u16_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/max_u32_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/max_u32_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/max_u64_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/max_u64_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/max_u8_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/max_u8_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/maxnm_f16_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/maxnm_f16_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/maxnm_f32_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/maxnm_f32_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/maxnm_f64_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/maxnm_f64_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/min_f16_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/min_f16_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/min_f32_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/min_f32_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/min_f64_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/min_f64_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/min_s16_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/min_s16_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/min_s32_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/min_s32_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/min_s64_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/min_s64_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/min_s8_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/min_s8_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/min_u16_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/min_u16_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/min_u32_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/min_u32_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/min_u64_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/min_u64_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/min_u8_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/min_u8_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/minnm_f16_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/minnm_f16_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/minnm_f32_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/minnm_f32_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/minnm_f64_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/minnm_f64_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_lane_za32_bf16_vg2x1.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_lane_za32_bf16_vg2x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_lane_za32_bf16_vg2x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_lane_za32_f16_vg2x1.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_lane_za32_f16_vg2x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_lane_za32_f16_vg2x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_lane_za32_f32_vg1x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_lane_za32_f32_vg1x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_lane_za32_s16_vg2x1.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_lane_za32_s16_vg2x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_lane_za32_s16_vg2x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_lane_za32_s8_vg4x1.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_lane_za32_s8_vg4x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_lane_za32_s8_vg4x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_lane_za32_u16_vg2x1.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_lane_za32_u16_vg2x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_lane_za32_u16_vg2x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_lane_za32_u8_vg4x1.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_lane_za32_u8_vg4x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_lane_za32_u8_vg4x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_lane_za64_f64_vg1x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_lane_za64_f64_vg1x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_lane_za64_s16_vg4x1.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_lane_za64_s16_vg4x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_lane_za64_s16_vg4x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_lane_za64_u16_vg4x1.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_lane_za64_u16_vg4x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_lane_za64_u16_vg4x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_za32_bf16_vg2x1.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_za32_bf16_vg2x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_za32_bf16_vg2x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_za32_f16_vg2x1.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_za32_f16_vg2x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_za32_f16_vg2x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_za32_f32_vg1x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_za32_f32_vg1x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_za32_s16_vg2x1.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_za32_s16_vg2x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_za32_s16_vg2x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_za32_s8_vg4x1.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_za32_s8_vg4x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_za32_s8_vg4x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_za32_u16_vg2x1.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_za32_u16_vg2x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_za32_u16_vg2x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_za32_u8_vg4x1.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_za32_u8_vg4x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_za32_u8_vg4x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_za64_f64_vg1x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_za64_f64_vg1x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_za64_s16_vg4x1.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_za64_s16_vg4x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_za64_s16_vg4x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_za64_u16_vg4x1.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_za64_u16_vg4x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_za64_u16_vg4x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mls_lane_za32_bf16_vg2x1.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mls_lane_za32_bf16_vg2x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mls_lane_za32_bf16_vg2x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mls_lane_za32_f16_vg2x1.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mls_lane_za32_f16_vg2x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mls_lane_za32_f16_vg2x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mls_lane_za32_f32_vg1x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mls_lane_za32_f32_vg1x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mls_lane_za32_s16_vg2x1.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mls_lane_za32_s16_vg2x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mls_lane_za32_s16_vg2x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mls_lane_za32_s8_vg4x1.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mls_lane_za32_s8_vg4x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mls_lane_za32_s8_vg4x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mls_lane_za32_u16_vg2x1.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mls_lane_za32_u16_vg2x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mls_lane_za32_u16_vg2x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mls_lane_za32_u8_vg4x1.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mls_lane_za32_u8_vg4x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mls_lane_za32_u8_vg4x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mls_lane_za64_f64_vg1x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mls_lane_za64_f64_vg1x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mls_lane_za64_s16_vg4x1.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mls_lane_za64_s16_vg4x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mls_lane_za64_s16_vg4x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mls_lane_za64_u16_vg4x1.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mls_lane_za64_u16_vg4x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mls_lane_za64_u16_vg4x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mls_za32_bf16_vg2x1.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mls_za32_bf16_vg2x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mls_za32_bf16_vg2x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mls_za32_f16_vg2x1.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mls_za32_f16_vg2x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mls_za32_f16_vg2x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mls_za32_f32_vg1x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mls_za32_f32_vg1x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mls_za32_s16_vg2x1.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mls_za32_s16_vg2x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mls_za32_s16_vg2x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mls_za32_s8_vg4x1.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mls_za32_s8_vg4x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mls_za32_s8_vg4x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mls_za32_u16_vg2x1.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mls_za32_u16_vg2x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mls_za32_u16_vg2x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mls_za32_u8_vg4x1.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mls_za32_u8_vg4x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mls_za32_u8_vg4x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mls_za64_f64_vg1x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mls_za64_f64_vg1x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mls_za64_s16_vg4x1.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mls_za64_s16_vg4x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mls_za64_s16_vg4x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mls_za64_u16_vg4x1.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mls_za64_u16_vg4x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mls_za64_u16_vg4x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mopa_za32.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mops_za32.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/pext_c16.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/pext_c16_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/pext_c32.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/pext_c32_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/pext_c64.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/pext_c64_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/pext_c8.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/pext_c8_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/pfalse_c.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/psel_b16.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/psel_b32.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/psel_b64.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/psel_b8.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/psel_c16.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/psel_c32.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/psel_c64.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/psel_c8.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/ptrue_c16.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/ptrue_c32.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/ptrue_c64.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/ptrue_c8.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/qcvt_s16_s32_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/qcvt_s16_s64_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/qcvt_s8_s32_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/qcvt_u16_s32_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/qcvt_u16_s64_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/qcvt_u16_u32_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/qcvt_u16_u64_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/qcvt_u8_s32_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/qcvt_u8_u32_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/qcvtn_s16_s32_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/qcvtn_s16_s64_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/qcvtn_s8_s32_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/qcvtn_u16_s32_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/qcvtn_u16_s64_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/qcvtn_u16_u32_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/qcvtn_u16_u64_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/qcvtn_u8_s32_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/qcvtn_u8_u32_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/qdmulh_s16_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/qdmulh_s16_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/qdmulh_s32_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/qdmulh_s32_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/qdmulh_s64_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/qdmulh_s64_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/qdmulh_s8_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/qdmulh_s8_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/qrshr_s16_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/qrshr_s16_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/qrshr_s8_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/qrshr_u16_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/qrshr_u16_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/qrshr_u8_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/qrshrn_s16_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/qrshrn_s16_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/qrshrn_s8_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/qrshrn_u16_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/qrshrn_u16_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/qrshrn_u8_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/qrshru_u16_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/qrshru_u16_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/qrshru_u8_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/qrshrun_u16_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/qrshrun_u16_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/qrshrun_u8_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/read_hor_za16_vg2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/read_hor_za16_vg4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/read_hor_za32_vg2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/read_hor_za32_vg4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/read_hor_za64_vg2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/read_hor_za64_vg4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/read_hor_za8_vg2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/read_hor_za8_vg4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/read_ver_za16_vg2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/read_ver_za16_vg4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/read_ver_za32_vg2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/read_ver_za32_vg4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/read_ver_za64_vg2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/read_ver_za64_vg4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/read_ver_za8_vg2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/read_ver_za8_vg4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/read_za16_vg1x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/read_za16_vg1x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/read_za32_vg1x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/read_za32_vg1x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/read_za64_vg1x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/read_za64_vg1x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/read_za8_vg1x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/read_za8_vg1x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/rinta_s32_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/rinta_s32_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/rintm_u32_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/rintm_u32_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/rintn_u32_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/rintn_u32_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/rintp_u32_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/rintp_u32_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/rshl_s16_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/rshl_s16_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/rshl_s32_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/rshl_s32_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/rshl_s64_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/rshl_s64_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/rshl_s8_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/rshl_s8_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/rshl_u16_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/rshl_u16_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/rshl_u32_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/rshl_u32_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/rshl_u64_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/rshl_u64_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/rshl_u8_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/rshl_u8_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/sel_bf16_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/sel_bf16_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/sel_f16_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/sel_f16_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/sel_f32_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/sel_f32_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/sel_f64_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/sel_f64_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/sel_s16_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/sel_s16_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/sel_s32_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/sel_s32_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/sel_s64_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/sel_s64_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/sel_s8_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/sel_s8_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/sel_u16_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/sel_u16_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/sel_u32_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/sel_u32_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/sel_u64_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/sel_u64_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/sel_u8_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/sel_u8_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/st1_bf16_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/st1_bf16_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/st1_f16_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/st1_f16_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/st1_f32_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/st1_f32_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/st1_f64_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/st1_f64_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/st1_s16_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/st1_s16_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/st1_s32_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/st1_s32_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/st1_s64_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/st1_s64_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/st1_s8_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/st1_s8_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/st1_u16_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/st1_u16_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/st1_u32_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/st1_u32_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/st1_u64_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/st1_u64_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/st1_u8_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/st1_u8_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/stnt1_bf16_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/stnt1_bf16_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/stnt1_f16_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/stnt1_f16_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/stnt1_f32_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/stnt1_f32_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/stnt1_f64_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/stnt1_f64_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/stnt1_s16_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/stnt1_s16_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/stnt1_s32_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/stnt1_s32_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/stnt1_s64_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/stnt1_s64_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/stnt1_s8_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/stnt1_s8_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/stnt1_u16_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/stnt1_u16_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/stnt1_u32_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/stnt1_u32_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/stnt1_u64_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/stnt1_u64_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/stnt1_u8_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/stnt1_u8_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/str_zt.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/sub_write_za32_s32_vg1x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/sub_write_za32_s32_vg1x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/sub_write_za32_u32_vg1x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/sub_write_za32_u32_vg1x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/sub_write_za64_s64_vg1x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/sub_write_za64_s64_vg1x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/sub_write_za64_u64_vg1x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/sub_write_za64_u64_vg1x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/sub_za32_f32_vg1x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/sub_za32_f32_vg1x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/sub_za32_s32_vg1x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/sub_za32_s32_vg1x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/sub_za32_u32_vg1x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/sub_za32_u32_vg1x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/sub_za64_f64_vg1x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/sub_za64_f64_vg1x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/sub_za64_s64_vg1x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/sub_za64_s64_vg1x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/sub_za64_u64_vg1x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/sub_za64_u64_vg1x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/sudot_lane_za32_s8_vg1x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/sudot_lane_za32_s8_vg1x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/sudot_za32_s8_vg1x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/sudot_za32_s8_vg1x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/suvdot_lane_za32_s8_vg1x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/test_sme2_acle.h
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/unpk_s16_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/unpk_s16_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/unpk_s32_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/unpk_s32_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/unpk_s8_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/unpk_s8_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/unpk_u16_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/unpk_u16_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/unpk_u32_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/unpk_u32_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/unpk_u8_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/unpk_u8_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/usdot_lane_za32_u8_vg1x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/usdot_lane_za32_u8_vg1x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/usdot_za32_u8_vg1x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/usdot_za32_u8_vg1x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/usvdot_lane_za32_u8_vg1x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzp_bf16_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzp_bf16_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzp_f16_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzp_f16_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzp_f32_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzp_f32_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzp_f64_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzp_f64_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzp_s16_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzp_s16_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzp_s32_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzp_s32_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzp_s64_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzp_s64_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzp_s8_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzp_s8_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzp_u16_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzp_u16_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzp_u32_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzp_u32_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzp_u64_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzp_u64_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzp_u8_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzp_u8_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzpq_bf16_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzpq_bf16_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzpq_f16_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzpq_f16_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzpq_f32_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzpq_f32_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzpq_f64_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzpq_f64_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzpq_s16_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzpq_s16_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzpq_s32_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzpq_s32_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzpq_s64_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzpq_s64_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzpq_s8_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzpq_s8_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzpq_u16_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzpq_u16_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzpq_u32_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzpq_u32_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzpq_u64_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzpq_u64_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzpq_u8_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzpq_u8_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/vdot_lane_za32_bf16_vg1x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/vdot_lane_za32_f16_vg1x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/vdot_lane_za32_s16_vg1x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/vdot_lane_za32_s8_vg1x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/vdot_lane_za32_u16_vg1x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/vdot_lane_za32_u8_vg1x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/vdot_lane_za64_s16_vg1x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/vdot_lane_za64_u16_vg1x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/whilege_b16.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/whilege_b32.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/whilege_b64.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/whilege_b8.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/whilege_c16.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/whilege_c32.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/whilege_c64.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/whilege_c8.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/whilegt_b16.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/whilegt_b32.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/whilegt_b64.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/whilegt_b8.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/whilegt_c16.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/whilegt_c32.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/whilegt_c64.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/whilegt_c8.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/whilele_b16.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/whilele_b32.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/whilele_b64.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/whilele_b8.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/whilele_c16.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/whilele_c32.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/whilele_c64.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/whilele_c8.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/whilelt_b16.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/whilelt_b32.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/whilelt_b64.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/whilelt_b8.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/whilelt_c16.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/whilelt_c32.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/whilelt_c64.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/whilelt_c8.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/write_hor_za16_vg2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/write_hor_za16_vg4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/write_hor_za32_vg2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/write_hor_za32_vg4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/write_hor_za64_vg2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/write_hor_za64_vg4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/write_hor_za8_vg2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/write_hor_za8_vg4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/write_ver_za16_vg2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/write_ver_za16_vg4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/write_ver_za32_vg2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/write_ver_za32_vg4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/write_ver_za64_vg2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/write_ver_za64_vg4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/write_ver_za8_vg2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/write_ver_za8_vg4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/write_za16_vg1x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/write_za16_vg1x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/write_za32_vg1x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/write_za32_vg1x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/write_za64_vg1x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/write_za64_vg1x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/write_za8_vg1x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/write_za8_vg1x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zero_zt.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zip_bf16_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zip_bf16_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zip_f16_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zip_f16_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zip_f32_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zip_f32_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zip_f64_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zip_f64_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zip_s16_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zip_s16_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zip_s32_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zip_s32_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zip_s64_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zip_s64_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zip_s8_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zip_s8_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zip_u16_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zip_u16_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zip_u32_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zip_u32_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zip_u64_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zip_u64_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zip_u8_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zip_u8_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zipq_bf16_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zipq_bf16_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zipq_f16_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zipq_f16_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zipq_f32_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zipq_f32_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zipq_f64_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zipq_f64_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zipq_s16_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zipq_s16_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zipq_s32_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zipq_s32_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zipq_s64_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zipq_s64_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zipq_s8_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zipq_s8_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zipq_u16_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zipq_u16_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zipq_u32_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zipq_u32_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zipq_u64_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zipq_u64_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zipq_u8_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zipq_u8_x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/acle/asm/get2_b.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/acle/asm/set2_b.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_int_opt_single_n_1.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_int_opt_single_n_2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_opt_single_n_1.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_opt_single_n_2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_single_1.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_za_slice_int_opt_single_1.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_za_slice_lane_1.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_za_slice_lane_2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_za_slice_lane_3.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_za_slice_lane_4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_za_slice_opt_single_1.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_za_slice_opt_single_2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_za_slice_opt_single_3.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_za_slice_uint_opt_single_1.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binaryxn_1.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binaryxn_2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/clamp_1.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/compare_scalar_count_1.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/dot_za_slice_int_lane_1.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/dot_za_slice_lane_1.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/dot_za_slice_lane_2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/dot_za_slice_uint_lane_1.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/shift_right_imm_narrowxn_1.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/storexn_1.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_qq_or_011_lane_1.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/unary_convertxn_1.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/unary_za_slice_1.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/unary_za_slice_2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/unary_za_slice_3.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/unaryxn_1.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/write_za_1.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/write_za_slice_1.c

diff --git a/gcc/config/aarch64/aarch64-c.cc b/gcc/config/aarch64/aarch64-c.cc
index 017380b7563..dd4b32546c2 100644
--- a/gcc/config/aarch64/aarch64-c.cc
+++ b/gcc/config/aarch64/aarch64-c.cc
@@ -258,6 +258,7 @@ aarch64_update_cpp_builtins (cpp_reader *pfile)
   aarch64_def_or_undef (TARGET_SME, "__ARM_FEATURE_SME", pfile);
   aarch64_def_or_undef (TARGET_SME_I16I64, "__ARM_FEATURE_SME_I16I64", pfile);
   aarch64_def_or_undef (TARGET_SME_F64F64, "__ARM_FEATURE_SME_F64F64", pfile);
+  aarch64_def_or_undef (TARGET_SME2, "__ARM_FEATURE_SME2", pfile);
 
   /* Not for ACLE, but required to keep "float.h" correct if we switch
      target between implementations that do or do not support ARMv8.2-A
diff --git a/gcc/config/aarch64/aarch64-sme.md b/gcc/config/aarch64/aarch64-sme.md
index 505805e2ecf..6cba6ab5f74 100644
--- a/gcc/config/aarch64/aarch64-sme.md
+++ b/gcc/config/aarch64/aarch64-sme.md
@@ -31,14 +31,25 @@
 ;; ---- Single-vector stores
 ;; ---- Table stores
 ;; ---- Single-vector moves
+;; ---- Multi-vector moves
 ;; ---- Zeroing
 ;;
 ;; == Binary arithmetic
 ;; ---- Binary arithmetic on ZA tile
+;; ---- Binary arithmetic on ZA slice
+;; ---- Binary arithmetic, writing to ZA slice
 ;;
 ;; == Ternary arithmetic
+;; ---- [INT] Dot product
+;; ---- [INT] Ternary widening arithmetic on ZA slice
 ;; ---- [INT] Sum of outer products
+;; ---- [FP] Dot product
+;; ---- [FP] Ternary arithmetic on ZA slice
+;; ---- [FP] Ternary widening arithmetic on ZA slice
 ;; ---- [FP] Sum of outer products
+;;
+;; == Table lookup
+;; ---- Table lookup
 
 ;; =========================================================================
 ;; == State management
@@ -772,6 +783,131 @@ (define_insn "@aarch64_sme_<optab><VNx1TI_ONLY:mode><SVE_FULL:mode>"
   "mova\tza%0<hv>.q[%w1, 0], %2/m, %3.q"
 )
 
+;; -------------------------------------------------------------------------
+;; ---- Multi-vector moves
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - MOVA
+;; -------------------------------------------------------------------------
+
+(define_insn "@aarch64_sme_<optab><mode><mode>"
+  [(set (match_operand:SVE_FULLx24 0 "aligned_register_operand" "=Uw<vector_count>")
+	(unspec:SVE_FULLx24
+	  [(reg:SVE_FULLx24 ZA_REGNUM)
+	   (reg:DI SME_STATE_REGNUM)
+	   (match_operand:DI 1 "const_int_operand")
+	   (match_operand:SI 2 "register_operand" "Ucj")]
+	  SME_READ))]
+  "TARGET_STREAMING_SME2"
+  {
+    operands[3] = GEN_INT (<vector_count> - 1);
+    return "mova\t%0, za%1<hv>.<Vetype>[%w2, 0:%3]";
+  }
+)
+
+(define_insn "*aarch64_sme_<optab><mode><mode>_plus"
+  [(set (match_operand:SVE_FULLx24 0 "aligned_register_operand" "=Uw<vector_count>")
+	(unspec:SVE_FULLx24
+	  [(reg:SVE_FULLx24 ZA_REGNUM)
+	   (reg:DI SME_STATE_REGNUM)
+	   (match_operand:DI 1 "const_int_operand")
+	   (plus:SI
+	     (match_operand:SI 2 "register_operand" "Ucj")
+	     (match_operand:SI 3 "const_int_operand"))]
+	  SME_READ))]
+  "TARGET_STREAMING_SME2
+   && UINTVAL (operands[3]) % <vector_count> == 0
+   && UINTVAL (operands[3]) < 128 / <elem_bits>"
+  {
+    operands[4] = GEN_INT (INTVAL (operands[3]) + <vector_count> - 1);
+    return "mova\t%0, za%1<hv>.<Vetype>[%w2, %3:%4]";
+  }
+)
+
+(define_insn "@aarch64_sme_read<mode>"
+  [(set (match_operand:SVE_DIx24 0 "aligned_register_operand" "=Uw<vector_count>")
+	(unspec:SVE_DIx24
+	  [(reg:SVE_DIx24 ZA_REGNUM)
+	   (reg:DI SME_STATE_REGNUM)
+	   (match_operand:SI 1 "register_operand" "Uci")]
+	  UNSPEC_SME_READ))]
+  "TARGET_STREAMING_SME2"
+  "mova\t%0, za.d[%w1, 0, vgx<vector_count>]"
+)
+
+(define_insn "*aarch64_sme_read<mode>_plus"
+  [(set (match_operand:SVE_DIx24 0 "aligned_register_operand" "=Uw<vector_count>")
+	(unspec:SVE_DIx24
+	  [(reg:SVE_DIx24 ZA_REGNUM)
+	   (reg:DI SME_STATE_REGNUM)
+	   (plus:SI (match_operand:SI 1 "register_operand" "Uci")
+		    (match_operand:SI 2 "const_0_to_7_operand"))]
+	  UNSPEC_SME_READ))]
+  "TARGET_STREAMING_SME2"
+  "mova\t%0, za.d[%w1, %2, vgx<vector_count>]"
+)
+
+(define_insn "@aarch64_sme_<optab><mode><mode>"
+  [(set (reg:SVE_FULLx24 ZA_REGNUM)
+	(unspec:SVE_FULLx24
+	  [(reg:SVE_FULLx24 ZA_REGNUM)
+	   (reg:DI SME_STATE_REGNUM)
+	   (match_operand:DI 0 "const_int_operand")
+	   (match_operand:SI 1 "register_operand" "Ucj")
+	   (match_operand:SVE_FULLx24 2 "aligned_register_operand" "Uw<vector_count>")]
+	  SME_WRITE))]
+  "TARGET_STREAMING_SME2"
+  {
+    operands[3] = GEN_INT (<vector_count> - 1);
+    return "mova\tza%0<hv>.<Vetype>[%w1, 0:%3], %2";
+  }
+)
+
+(define_insn "*aarch64_sme_<optab><mode><mode>_plus"
+  [(set (reg:SVE_FULLx24 ZA_REGNUM)
+	(unspec:SVE_FULLx24
+	  [(reg:SVE_FULLx24 ZA_REGNUM)
+	   (reg:DI SME_STATE_REGNUM)
+	   (match_operand:DI 0 "const_int_operand")
+	   (plus:SI
+	     (match_operand:SI 1 "register_operand" "Ucj")
+	     (match_operand:SI 2 "const_int_operand"))
+	   (match_operand:SVE_FULLx24 3 "aligned_register_operand" "Uw<vector_count>")]
+	  SME_WRITE))]
+  "TARGET_STREAMING_SME2
+   && UINTVAL (operands[2]) % <vector_count> == 0
+   && UINTVAL (operands[2]) < 128 / <elem_bits>"
+  {
+    operands[4] = GEN_INT (INTVAL (operands[2]) + <vector_count> - 1);
+    return "mova\tza%0<hv>.<Vetype>[%w1, %2:%4], %3";
+  }
+)
+
+(define_insn "@aarch64_sme_write<mode>"
+  [(set (reg:SVE_DIx24 ZA_REGNUM)
+	(unspec:SVE_DIx24
+	  [(reg:SVE_DIx24 ZA_REGNUM)
+	   (reg:DI SME_STATE_REGNUM)
+	   (match_operand:SI 0 "register_operand" "Uci")
+	   (match_operand:SVE_DIx24 1 "aligned_register_operand" "Uw<vector_count>")]
+	  UNSPEC_SME_READ))]
+  "TARGET_STREAMING_SME2"
+  "mova\tza.d[%w0, 0, vgx<vector_count>], %1"
+)
+
+(define_insn "*aarch64_sme_write<mode>_plus"
+  [(set (reg:SVE_DIx24 ZA_REGNUM)
+	(unspec:SVE_DIx24
+	  [(reg:SVE_DIx24 ZA_REGNUM)
+	   (reg:DI SME_STATE_REGNUM)
+	   (plus:SI (match_operand:SI 0 "register_operand" "Uci")
+		    (match_operand:SI 1 "const_0_to_7_operand"))
+	   (match_operand:SVE_DIx24 2 "aligned_register_operand" "Uw<vector_count>")]
+	  UNSPEC_SME_READ))]
+  "TARGET_STREAMING_SME2"
+  "mova\tza.d[%w0, %1, vgx<vector_count>], %2"
+)
+
 ;; -------------------------------------------------------------------------
 ;; ---- Zeroing
 ;; -------------------------------------------------------------------------
@@ -793,6 +929,14 @@ (define_insn "aarch64_sme_zero_za"
   }
 )
 
+(define_insn "aarch64_sme_zero_zt0"
+  [(set (reg:V8DI ZT0_REGNUM)
+	(const_int 0))
+   (use (reg:DI SME_STATE_REGNUM))]
+  "TARGET_SME2"
+  "zero\t{ zt0 }"
+)
+
 ;; =========================================================================
 ;; == Binary arithmetic
 ;; =========================================================================
@@ -819,14 +963,543 @@ (define_insn "@aarch64_sme_<optab><mode>"
   "<optab>\tza%0.<Vetype>, %1/m, %2/m, %3.<Vetype>"
 )
 
+;; -------------------------------------------------------------------------
+;; ---- Binary arithmetic on ZA slice
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - ADD
+;; -------------------------------------------------------------------------
+
+(define_insn "@aarch64_sme_<optab><mode>"
+  [(set (reg:SME_ZA_SDIx24 ZA_REGNUM)
+	(unspec:SME_ZA_SDIx24
+	  [(reg:SME_ZA_SDIx24 ZA_REGNUM)
+	   (reg:DI SME_STATE_REGNUM)
+	   (match_operand:SI 0 "register_operand" "Uci")
+	   (match_operand:SME_ZA_SDIx24 1 "aligned_register_operand" "Uw<vector_count>")]
+	  SME_BINARY_SLICE_SDI))]
+  "TARGET_STREAMING_SME2"
+  "<optab>\tza.<Vetype>[%w0, 0, vgx<vector_count>], %1"
+)
+
+(define_insn "*aarch64_sme_<optab><mode>_plus"
+  [(set (reg:SME_ZA_SDIx24 ZA_REGNUM)
+	(unspec:SME_ZA_SDIx24
+	  [(reg:SME_ZA_SDIx24 ZA_REGNUM)
+	   (reg:DI SME_STATE_REGNUM)
+	   (plus:SI (match_operand:SI 0 "register_operand" "Uci")
+		    (match_operand:SI 1 "const_0_to_7_operand"))
+	   (match_operand:SME_ZA_SDIx24 2 "aligned_register_operand" "Uw<vector_count>")]
+	  SME_BINARY_SLICE_SDI))]
+  "TARGET_STREAMING_SME2"
+  "<optab>\tza.<Vetype>[%w0, %1, vgx<vector_count>], %2"
+)
+
+(define_insn "@aarch64_sme_<optab><mode>"
+  [(set (reg:SME_ZA_SDFx24 ZA_REGNUM)
+	(unspec:SME_ZA_SDFx24
+	  [(reg:SME_ZA_SDFx24 ZA_REGNUM)
+	   (reg:DI SME_STATE_REGNUM)
+	   (match_operand:SI 0 "register_operand" "Uci")
+	   (match_operand:SME_ZA_SDFx24 1 "aligned_register_operand" "Uw<vector_count>")]
+	  SME_BINARY_SLICE_SDF))]
+  "TARGET_STREAMING_SME2"
+  "<optab>\tza.<Vetype>[%w0, 0, vgx<vector_count>], %1"
+)
+
+(define_insn "*aarch64_sme_<optab><mode>_plus"
+  [(set (reg:SME_ZA_SDFx24 ZA_REGNUM)
+	(unspec:SME_ZA_SDFx24
+	  [(reg:SME_ZA_SDFx24 ZA_REGNUM)
+	   (reg:DI SME_STATE_REGNUM)
+	   (plus:SI (match_operand:SI 0 "register_operand" "Uci")
+		    (match_operand:SI 1 "const_0_to_7_operand"))
+	   (match_operand:SME_ZA_SDFx24 2 "aligned_register_operand" "Uw<vector_count>")]
+	  SME_BINARY_SLICE_SDF))]
+  "TARGET_STREAMING_SME2"
+  "<optab>\tza.<Vetype>[%w0, %1, vgx<vector_count>], %2"
+)
+
+;; -------------------------------------------------------------------------
+;; ---- Binary arithmetic, writing to ZA slice
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - ADD
+;; - SUB
+;; -------------------------------------------------------------------------
+
+(define_insn "@aarch64_sme_<optab><mode>"
+  [(set (reg:SME_ZA_SDIx24 ZA_REGNUM)
+	(unspec:SME_ZA_SDIx24
+	  [(reg:SME_ZA_SDIx24 ZA_REGNUM)
+	   (reg:DI SME_STATE_REGNUM)
+	   (match_operand:SI 0 "register_operand" "Uci")
+	   (match_operand:SME_ZA_SDIx24 1 "aligned_register_operand" "Uw<vector_count>")
+	   (match_operand:SME_ZA_SDIx24 2 "aligned_register_operand" "Uw<vector_count>")]
+	  SME_BINARY_WRITE_SLICE_SDI))]
+  "TARGET_STREAMING_SME2"
+  "<sme_int_op>\tza.<Vetype>[%w0, 0, vgx<vector_count>], %1, %2"
+)
+
+(define_insn "*aarch64_sme_<optab><mode>_plus"
+  [(set (reg:SME_ZA_SDIx24 ZA_REGNUM)
+	(unspec:SME_ZA_SDIx24
+	  [(reg:SME_ZA_SDIx24 ZA_REGNUM)
+	   (reg:DI SME_STATE_REGNUM)
+	   (plus:SI (match_operand:SI 0 "register_operand" "Uci")
+		    (match_operand:SI 1 "const_0_to_7_operand"))
+	   (match_operand:SME_ZA_SDIx24 2 "aligned_register_operand" "Uw<vector_count>")
+	   (match_operand:SME_ZA_SDIx24 3 "aligned_register_operand" "Uw<vector_count>")]
+	  SME_BINARY_WRITE_SLICE_SDI))]
+  "TARGET_STREAMING_SME2"
+  "<sme_int_op>\tza.<Vetype>[%w0, %1, vgx<vector_count>], %2, %3"
+)
+
+(define_insn "@aarch64_sme_single_<optab><mode>"
+  [(set (reg:SME_ZA_SDIx24 ZA_REGNUM)
+	(unspec:SME_ZA_SDIx24
+	  [(reg:SME_ZA_SDIx24 ZA_REGNUM)
+	   (reg:DI SME_STATE_REGNUM)
+	   (match_operand:SI 0 "register_operand" "Uci")
+	   (match_operand:SME_ZA_SDIx24 1 "register_operand" "w")
+	   (vec_duplicate:SME_ZA_SDIx24
+	     (match_operand:<VSINGLE> 2 "register_operand" "x"))]
+	  SME_BINARY_WRITE_SLICE_SDI))]
+  "TARGET_STREAMING_SME2"
+  "<sme_int_op>\tza.<Vetype>[%w0, 0, vgx<vector_count>], %1, %2.<Vetype>"
+)
+
+(define_insn "*aarch64_sme_single_<optab><mode>_plus"
+  [(set (reg:SME_ZA_SDIx24 ZA_REGNUM)
+	(unspec:SME_ZA_SDIx24
+	  [(reg:SME_ZA_SDIx24 ZA_REGNUM)
+	   (reg:DI SME_STATE_REGNUM)
+	   (plus:SI (match_operand:SI 0 "register_operand" "Uci")
+		    (match_operand:SI 1 "const_0_to_7_operand"))
+	   (match_operand:SME_ZA_SDIx24 2 "register_operand" "w")
+	   (vec_duplicate:SME_ZA_SDIx24
+	     (match_operand:<VSINGLE> 3 "register_operand" "x"))]
+	  SME_BINARY_WRITE_SLICE_SDI))]
+  "TARGET_STREAMING_SME2"
+  "<sme_int_op>\tza.<Vetype>[%w0, %1, vgx<vector_count>], %2, %3.<Vetype>"
+)
+
 ;; =========================================================================
 ;; == Ternary arithmetic
 ;; =========================================================================
 
 ;; -------------------------------------------------------------------------
-;; ---- [INT] Sum of outer products
+;; ---- [INT] Dot product
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - SDOT
+;; - SUDOT
+;; - UDOT
+;; - USDOT
+;; -------------------------------------------------------------------------
+
+(define_insn "@aarch64_sme_<optab><SME_ZA_SDI:mode><SME_ZA_BHIx24:mode>"
+  [(set (reg:SME_ZA_SDI ZA_REGNUM)
+	(unspec:SME_ZA_SDI
+	  [(reg:SME_ZA_SDI ZA_REGNUM)
+	   (reg:DI SME_STATE_REGNUM)
+	   (match_operand:SI 0 "register_operand" "Uci")
+	   (match_operand:SME_ZA_BHIx24 1 "aligned_register_operand" "Uw<vector_count>")
+	   (match_operand:SME_ZA_BHIx24 2 "aligned_register_operand" "Uw<vector_count>")]
+	  SME_INT_DOTPROD))]
+  "TARGET_STREAMING_SME2
+   && (<SME_ZA_SDI:elem_bits> == 32 || <SME_ZA_BHIx24:elem_bits> == 16)
+   && (<SME_ZA_BHIx24:elem_bits> == 8 || <has_16bit_form>)"
+  "<optab>\tza.<SME_ZA_SDI:Vetype>[%w0, 0, vgx<vector_count>], %1, %2"
+)
+
+(define_insn "*aarch64_sme_<optab><SME_ZA_SDI:mode><SME_ZA_BHIx24:mode>_plus"
+  [(set (reg:SME_ZA_SDI ZA_REGNUM)
+	(unspec:SME_ZA_SDI
+	  [(reg:SME_ZA_SDI ZA_REGNUM)
+	   (reg:DI SME_STATE_REGNUM)
+	   (plus:SI (match_operand:SI 0 "register_operand" "Uci")
+		    (match_operand:SI 1 "const_0_to_7_operand"))
+	   (match_operand:SME_ZA_BHIx24 2 "aligned_register_operand" "Uw<vector_count>")
+	   (match_operand:SME_ZA_BHIx24 3 "aligned_register_operand" "Uw<vector_count>")]
+	  SME_INT_DOTPROD))]
+  "TARGET_STREAMING_SME2
+   && (<SME_ZA_SDI:elem_bits> == 32 || <SME_ZA_BHIx24:elem_bits> == 16)
+   && (<SME_ZA_BHIx24:elem_bits> == 8 || <has_16bit_form>)"
+  "<optab>\tza.<SME_ZA_SDI:Vetype>[%w0, %1, vgx<vector_count>], %2, %3"
+)
+
+(define_insn "@aarch64_sme_single_<optab><SME_ZA_SDI:mode><SME_ZA_BHIx24:mode>"
+  [(set (reg:SME_ZA_SDI ZA_REGNUM)
+	(unspec:SME_ZA_SDI
+	  [(reg:SME_ZA_SDI ZA_REGNUM)
+	   (reg:DI SME_STATE_REGNUM)
+	   (match_operand:SI 0 "register_operand" "Uci")
+	   (match_operand:SME_ZA_BHIx24 1 "register_operand" "w")
+	   (vec_duplicate:SME_ZA_BHIx24
+	     (match_operand:<VSINGLE> 2 "register_operand" "x"))]
+	  SME_INT_DOTPROD))]
+  "TARGET_STREAMING_SME2
+   && (<SME_ZA_SDI:elem_bits> == 32 || <SME_ZA_BHIx24:elem_bits> == 16)
+   && (<SME_ZA_BHIx24:elem_bits> == 8 || <has_16bit_form>)"
+  "<optab>\tza.<SME_ZA_SDI:Vetype>[%w0, 0, vgx<vector_count>], %1, %2.<SME_ZA_BHIx24:Vetype>"
+)
+
+(define_insn "*aarch64_sme_single_<optab><SME_ZA_SDI:mode><SME_ZA_BHIx24:mode>_plus"
+  [(set (reg:SME_ZA_SDI ZA_REGNUM)
+	(unspec:SME_ZA_SDI
+	  [(reg:SME_ZA_SDI ZA_REGNUM)
+	   (reg:DI SME_STATE_REGNUM)
+	   (plus:SI (match_operand:SI 0 "register_operand" "Uci")
+		    (match_operand:SI 1 "const_0_to_7_operand"))
+	   (match_operand:SME_ZA_BHIx24 2 "register_operand" "w")
+	   (vec_duplicate:SME_ZA_BHIx24
+	     (match_operand:<VSINGLE> 3 "register_operand" "x"))]
+	  SME_INT_DOTPROD))]
+  "TARGET_STREAMING_SME2
+   && (<SME_ZA_SDI:elem_bits> == 32 || <SME_ZA_BHIx24:elem_bits> == 16)
+   && (<SME_ZA_BHIx24:elem_bits> == 8 || <has_16bit_form>)"
+  "<optab>\tza.<SME_ZA_SDI:Vetype>[%w0, %1, vgx<vector_count>], %2, %3.<SME_ZA_BHIx24:Vetype>"
+)
+
+;; SUDOT is USDOT with the operands swapped.
+(define_insn "@aarch64_sme_single_sudot<VNx4SI_ONLY:mode><SME_ZA_BIx24:mode>"
+  [(set (reg:VNx4SI_ONLY ZA_REGNUM)
+	(unspec:VNx4SI_ONLY
+	  [(reg:VNx4SI_ONLY ZA_REGNUM)
+	   (reg:DI SME_STATE_REGNUM)
+	   (match_operand:SI 0 "register_operand" "Uci")
+	   (vec_duplicate:SME_ZA_BIx24
+	     (match_operand:<VSINGLE> 2 "register_operand" "x"))
+	   (match_operand:SME_ZA_BIx24 1 "register_operand" "w")]
+	  UNSPEC_SME_USDOT))]
+  "TARGET_STREAMING_SME2"
+  "sudot\tza.s[%w0, 0, vgx<vector_count>], %1, %2.b"
+)
+
+(define_insn "*aarch64_sme_single_sudot<VNx4SI_ONLY:mode><SME_ZA_BIx24:mode>_plus"
+  [(set (reg:VNx4SI_ONLY ZA_REGNUM)
+	(unspec:VNx4SI_ONLY
+	  [(reg:VNx4SI_ONLY ZA_REGNUM)
+	   (reg:DI SME_STATE_REGNUM)
+	   (plus:SI (match_operand:SI 0 "register_operand" "Uci")
+		    (match_operand:SI 1 "const_0_to_7_operand"))
+	   (vec_duplicate:SME_ZA_BIx24
+	     (match_operand:<VSINGLE> 3 "register_operand" "x"))
+	   (match_operand:SME_ZA_BIx24 2 "register_operand" "w")]
+	  UNSPEC_SME_USDOT))]
+  "TARGET_STREAMING_SME2"
+  "sudot\tza.s[%w0, %1, vgx<vector_count>], %2, %3.b"
+)
+
+(define_insn "@aarch64_sme_lane_<optab><SME_ZA_SDI:mode><SME_ZA_BHIx24:mode>"
+  [(set (reg:SME_ZA_SDI ZA_REGNUM)
+	(unspec:SME_ZA_SDI
+	  [(reg:SME_ZA_SDI ZA_REGNUM)
+	   (reg:DI SME_STATE_REGNUM)
+	   (match_operand:SI 0 "register_operand" "Uci")
+	   (match_operand:SME_ZA_BHIx24 1 "aligned_register_operand" "Uw<vector_count>")
+	   (unspec:SME_ZA_BHIx24
+	     [(match_operand:<VSINGLE> 2 "register_operand" "x")
+	      (match_operand:SI 3 "const_int_operand")]
+	     UNSPEC_SVE_LANE_SELECT)]
+	  SME_INT_DOTPROD_LANE))]
+  "TARGET_STREAMING_SME2
+   && (<SME_ZA_SDI:elem_bits> == 32 || <SME_ZA_BHIx24:elem_bits> == 16)
+   && (<SME_ZA_BHIx24:elem_bits> == 8 || <has_16bit_form>)"
+  "<optab>\tza.<SME_ZA_SDI:Vetype>[%w0, 0, vgx<vector_count>], %1, %2.<SME_ZA_BHIx24:Vetype>[%3]"
+)
+
+(define_insn "*aarch64_sme_lane_<optab><SME_ZA_SDI:mode><SME_ZA_BHIx24:mode>_plus"
+  [(set (reg:SME_ZA_SDI ZA_REGNUM)
+	(unspec:SME_ZA_SDI
+	  [(reg:SME_ZA_SDI ZA_REGNUM)
+	   (reg:DI SME_STATE_REGNUM)
+	   (plus:SI (match_operand:SI 0 "register_operand" "Uci")
+		    (match_operand:SI 1 "const_0_to_7_operand"))
+	   (match_operand:SME_ZA_BHIx24 2 "aligned_register_operand" "Uw<vector_count>")
+	   (unspec:SME_ZA_BHIx24
+	     [(match_operand:<VSINGLE> 3 "register_operand" "x")
+	      (match_operand:SI 4 "const_int_operand")]
+	     UNSPEC_SVE_LANE_SELECT)]
+	  SME_INT_DOTPROD_LANE))]
+  "TARGET_STREAMING_SME2
+   && (<SME_ZA_SDI:elem_bits> == 32 || <SME_ZA_BHIx24:elem_bits> == 16)
+   && (<SME_ZA_BHIx24:elem_bits> == 8 || <has_16bit_form>)"
+  "<optab>\tza.<SME_ZA_SDI:Vetype>[%w0, %1, vgx<vector_count>], %2, %3.<SME_ZA_BHIx24:Vetype>[%4]"
+)
+
+;; -------------------------------------------------------------------------
+;; ---- [INT] Ternary widening arithmetic on ZA slice
 ;; -------------------------------------------------------------------------
 ;; Includes:
+;; - SMLA
+;; - SMLS
+;; - UMLA
+;; - UMLS
+;; -------------------------------------------------------------------------
+
+(define_insn "@aarch64_sme_<optab><VNx4SI_ONLY:mode><SVE_FULL_BHI:mode>"
+  [(set (reg:VNx4SI_ONLY ZA_REGNUM)
+	(unspec:VNx4SI_ONLY
+	  [(reg:VNx4SI_ONLY ZA_REGNUM)
+	   (reg:DI SME_STATE_REGNUM)
+	   (match_operand:SI 0 "register_operand" "Uci")
+	   (match_operand:SVE_FULL_BHI 1 "register_operand" "w")
+	   (match_operand:SVE_FULL_BHI 2 "register_operand" "x")]
+	  SME_INT_TERNARY_SLICE))]
+  "TARGET_STREAMING_SME2"
+  "<optab><za32_long>\tza.s[%w0, 0:<za32_last_offset>], %1.<SVE_FULL_BHI:Vetype>, %2.<SVE_FULL_BHI:Vetype>"
+)
+
+(define_insn "*aarch64_sme_<optab><VNx4SI_ONLY:mode><SVE_FULL_BHI:mode>_plus"
+  [(set (reg:VNx4SI_ONLY ZA_REGNUM)
+	(unspec:VNx4SI_ONLY
+	  [(reg:VNx4SI_ONLY ZA_REGNUM)
+	   (reg:DI SME_STATE_REGNUM)
+	   (plus:SI (match_operand:SI 0 "register_operand" "Uci")
+		    (match_operand:SI 1 "const_<za32_offset_range>_operand"))
+	   (match_operand:SVE_FULL_BHI 2 "register_operand" "w")
+	   (match_operand:SVE_FULL_BHI 3 "register_operand" "x")]
+	  SME_INT_TERNARY_SLICE))]
+  "TARGET_STREAMING_SME2"
+  {
+    operands[4] = GEN_INT (INTVAL (operands[1]) + <za32_last_offset>);
+    return "<optab><za32_long>\tza.s[%w0, %1:%4], %2.<SVE_FULL_BHI:Vetype>, %3.<SVE_FULL_BHI:Vetype>";
+  }
+)
+
+(define_insn "@aarch64_sme_<optab><VNx4SI_ONLY:mode><SME_ZA_BHIx24:mode>"
+  [(set (reg:VNx4SI_ONLY ZA_REGNUM)
+	(unspec:VNx4SI_ONLY
+	  [(reg:VNx4SI_ONLY ZA_REGNUM)
+	   (reg:DI SME_STATE_REGNUM)
+	   (match_operand:SI 0 "register_operand" "Uci")
+	   (match_operand:SME_ZA_BHIx24 1 "aligned_register_operand" "Uw<vector_count>")
+	   (match_operand:SME_ZA_BHIx24 2 "aligned_register_operand" "Uw<vector_count>")]
+	  SME_INT_TERNARY_SLICE))]
+  "TARGET_STREAMING_SME2"
+  "<optab><za32_long>\tza.s[%w0, 0:<za32_last_offset>, vgx<vector_count>], %1, %2"
+)
+
+(define_insn "*aarch64_sme_<optab><VNx4SI_ONLY:mode><SME_ZA_BHIx24:mode>_plus"
+  [(set (reg:VNx4SI_ONLY ZA_REGNUM)
+	(unspec:VNx4SI_ONLY
+	  [(reg:VNx4SI_ONLY ZA_REGNUM)
+	   (reg:DI SME_STATE_REGNUM)
+	   (plus:SI (match_operand:SI 0 "register_operand" "Uci")
+		    (match_operand:SI 1 "const_<za32_offset_range>_operand"))
+	   (match_operand:SME_ZA_BHIx24 2 "aligned_register_operand" "Uw<vector_count>")
+	   (match_operand:SME_ZA_BHIx24 3 "aligned_register_operand" "Uw<vector_count>")]
+	  SME_INT_TERNARY_SLICE))]
+  "TARGET_STREAMING_SME2"
+  {
+    operands[4] = GEN_INT (INTVAL (operands[1]) + <za32_last_offset>);
+    return "<optab><za32_long>\tza.s[%w0, %1:%4, vgx<vector_count>], %2, %3";
+  }
+)
+
+(define_insn "@aarch64_sme_single_<optab><VNx4SI_ONLY:mode><SME_ZA_BHIx24:mode>"
+  [(set (reg:VNx4SI_ONLY ZA_REGNUM)
+	(unspec:VNx4SI_ONLY
+	  [(reg:VNx4SI_ONLY ZA_REGNUM)
+	   (reg:DI SME_STATE_REGNUM)
+	   (match_operand:SI 0 "register_operand" "Uci")
+	   (match_operand:SME_ZA_BHIx24 1 "register_operand" "w")
+	   (vec_duplicate:SME_ZA_BHIx24
+	     (match_operand:<SME_ZA_BHIx24:VSINGLE> 2 "register_operand" "x"))]
+	  SME_INT_TERNARY_SLICE))]
+  "TARGET_STREAMING_SME2"
+  "<optab><za32_long>\tza.s[%w0, 0:<za32_last_offset>, vgx<vector_count>], %1, %2.<SME_ZA_BHIx24:Vetype>"
+)
+
+(define_insn "*aarch64_sme_single_<optab><VNx4SI_ONLY:mode><SME_ZA_BHIx24:mode>_plus"
+  [(set (reg:VNx4SI_ONLY ZA_REGNUM)
+	(unspec:VNx4SI_ONLY
+	  [(reg:VNx4SI_ONLY ZA_REGNUM)
+	   (reg:DI SME_STATE_REGNUM)
+	   (plus:SI (match_operand:SI 0 "register_operand" "Uci")
+		    (match_operand:SI 1 "const_<za32_offset_range>_operand"))
+	   (match_operand:SME_ZA_BHIx24 2 "register_operand" "w")
+	   (vec_duplicate:SME_ZA_BHIx24
+	     (match_operand:<SME_ZA_BHIx24:VSINGLE> 3 "register_operand" "x"))]
+	  SME_INT_TERNARY_SLICE))]
+  "TARGET_STREAMING_SME2"
+  {
+    operands[4] = GEN_INT (INTVAL (operands[1]) + <za32_last_offset>);
+    return "<optab><za32_long>\tza.s[%w0, %1:%4, vgx<vector_count>], %2, %3.<SME_ZA_BHIx24:Vetype>";
+  }
+)
+
+(define_insn "@aarch64_sme_lane_<optab><VNx4SI_ONLY:mode><SME_ZA_BHIx124:mode>"
+  [(set (reg:VNx4SI_ONLY ZA_REGNUM)
+	(unspec:VNx4SI_ONLY
+	  [(reg:VNx4SI_ONLY ZA_REGNUM)
+	   (reg:DI SME_STATE_REGNUM)
+	   (match_operand:SI 0 "register_operand" "Uci")
+	   (match_operand:SME_ZA_BHIx124 1 "<aligned_operand>" "<aligned_fpr>")
+	   (unspec:SME_ZA_BHIx124
+	     [(match_operand:<VSINGLE> 2 "register_operand" "x")
+	      (match_operand:SI 3 "const_int_operand")]
+	     UNSPEC_SVE_LANE_SELECT)]
+	  SME_INT_TERNARY_SLICE))]
+  "TARGET_STREAMING_SME2"
+  "<optab><za32_long>\tza.s[%w0, 0:<za32_last_offset><vg_modifier>], %1<z_suffix>, %2.<SME_ZA_BHIx124:Vetype>[%3]"
+)
+
+(define_insn "*aarch64_sme_lane_<optab><VNx4SI_ONLY:mode><SME_ZA_BHIx124:mode>"
+  [(set (reg:VNx4SI_ONLY ZA_REGNUM)
+	(unspec:VNx4SI_ONLY
+	  [(reg:VNx4SI_ONLY ZA_REGNUM)
+	   (reg:DI SME_STATE_REGNUM)
+	   (plus:SI (match_operand:SI 0 "register_operand" "Uci")
+		    (match_operand:SI 1 "const_<za32_offset_range>_operand"))
+	   (match_operand:SME_ZA_BHIx124 2 "<aligned_operand>" "<aligned_fpr>")
+	   (unspec:SME_ZA_BHIx124
+	     [(match_operand:<VSINGLE> 3 "register_operand" "x")
+	      (match_operand:SI 4 "const_int_operand")]
+	     UNSPEC_SVE_LANE_SELECT)]
+	  SME_INT_TERNARY_SLICE))]
+  "TARGET_STREAMING_SME2"
+  {
+    operands[5] = GEN_INT (INTVAL (operands[1]) + <za32_last_offset>);
+    return "<optab><za32_long>\tza.s[%w0, %1:%5<vg_modifier>], %2<z_suffix>, %3.<SME_ZA_BHIx124:Vetype>[%4]";
+  }
+)
+
+(define_insn "@aarch64_sme_<optab><VNx2DI_ONLY:mode><VNx8HI_ONLY:mode>"
+  [(set (reg:VNx2DI_ONLY ZA_REGNUM)
+	(unspec:VNx2DI_ONLY
+	  [(reg:VNx2DI_ONLY ZA_REGNUM)
+	   (reg:DI SME_STATE_REGNUM)
+	   (match_operand:SI 0 "register_operand" "Uci")
+	   (match_operand:VNx8HI_ONLY 1 "register_operand" "w")
+	   (match_operand:VNx8HI_ONLY 2 "register_operand" "x")]
+	  SME_INT_TERNARY_SLICE))]
+  "TARGET_SME2 && TARGET_SME_I16I64 && TARGET_STREAMING_SME"
+  "<optab>ll\tza.d[%w0, 0:3], %1.h, %2.h"
+)
+
+(define_insn "*aarch64_sme_<optab><VNx2DI_ONLY:mode><VNx8HI_ONLY:mode>_plus"
+  [(set (reg:VNx2DI_ONLY ZA_REGNUM)
+	(unspec:VNx2DI_ONLY
+	  [(reg:VNx2DI_ONLY ZA_REGNUM)
+	   (reg:DI SME_STATE_REGNUM)
+	   (plus:SI (match_operand:SI 0 "register_operand" "Uci")
+		    (match_operand:SI 1 "const_<za64_offset_range>_operand"))
+	   (match_operand:VNx8HI_ONLY 2 "register_operand" "w")
+	   (match_operand:VNx8HI_ONLY 3 "register_operand" "x")]
+	  SME_INT_TERNARY_SLICE))]
+  "TARGET_SME2 && TARGET_SME_I16I64 && TARGET_STREAMING_SME"
+  {
+    operands[4] = GEN_INT (INTVAL (operands[1]) + 3);
+    return "<optab>ll\tza.d[%w0, %1:%4], %2.h, %3.h";
+  }
+)
+
+(define_insn "@aarch64_sme_<optab><VNx2DI_ONLY:mode><SME_ZA_HIx24:mode>"
+  [(set (reg:VNx2DI_ONLY ZA_REGNUM)
+	(unspec:VNx2DI_ONLY
+	  [(reg:VNx2DI_ONLY ZA_REGNUM)
+	   (reg:DI SME_STATE_REGNUM)
+	   (match_operand:SI 0 "register_operand" "Uci")
+	   (match_operand:SME_ZA_HIx24 1 "aligned_register_operand" "Uw<vector_count>")
+	   (match_operand:SME_ZA_HIx24 2 "aligned_register_operand" "Uw<vector_count>")]
+	  SME_INT_TERNARY_SLICE))]
+  "TARGET_SME2 && TARGET_SME_I16I64 && TARGET_STREAMING_SME"
+  "<optab>ll\tza.d[%w0, 0:3, vgx<vector_count>], %1, %2"
+)
+
+(define_insn "*aarch64_sme_<optab><VNx2DI_ONLY:mode><SME_ZA_HIx24:mode>_plus"
+  [(set (reg:VNx2DI_ONLY ZA_REGNUM)
+	(unspec:VNx2DI_ONLY
+	  [(reg:VNx2DI_ONLY ZA_REGNUM)
+	   (reg:DI SME_STATE_REGNUM)
+	   (plus:SI (match_operand:SI 0 "register_operand" "Uci")
+		    (match_operand:SI 1 "const_<za64_offset_range>_operand"))
+	   (match_operand:SME_ZA_HIx24 2 "aligned_register_operand" "Uw<vector_count>")
+	   (match_operand:SME_ZA_HIx24 3 "aligned_register_operand" "Uw<vector_count>")]
+	  SME_INT_TERNARY_SLICE))]
+  "TARGET_SME2 && TARGET_SME_I16I64 && TARGET_STREAMING_SME"
+  {
+    operands[4] = GEN_INT (INTVAL (operands[1]) + 3);
+    return "<optab>ll\tza.d[%w0, %1:%4, vgx<vector_count>], %2, %3";
+  }
+)
+
+(define_insn "@aarch64_sme_single_<optab><VNx2DI_ONLY:mode><SME_ZA_HIx24:mode>"
+  [(set (reg:VNx2DI_ONLY ZA_REGNUM)
+	(unspec:VNx2DI_ONLY
+	  [(reg:VNx2DI_ONLY ZA_REGNUM)
+	   (reg:DI SME_STATE_REGNUM)
+	   (match_operand:SI 0 "register_operand" "Uci")
+	   (match_operand:SME_ZA_HIx24 1 "register_operand" "w")
+	   (vec_duplicate:SME_ZA_HIx24
+	     (match_operand:<SME_ZA_HIx24:VSINGLE> 2 "register_operand" "x"))]
+	  SME_INT_TERNARY_SLICE))]
+  "TARGET_SME2 && TARGET_SME_I16I64 && TARGET_STREAMING_SME"
+  "<optab>ll\tza.d[%w0, 0:3, vgx<vector_count>], %1, %2.h"
+)
+
+(define_insn "*aarch64_sme_single_<optab><VNx2DI_ONLY:mode><SME_ZA_HIx24:mode>_plus"
+  [(set (reg:VNx2DI_ONLY ZA_REGNUM)
+	(unspec:VNx2DI_ONLY
+	  [(reg:VNx2DI_ONLY ZA_REGNUM)
+	   (reg:DI SME_STATE_REGNUM)
+	   (plus:SI (match_operand:SI 0 "register_operand" "Uci")
+		    (match_operand:SI 1 "const_<za64_offset_range>_operand"))
+	   (match_operand:SME_ZA_HIx24 2 "register_operand" "w")
+	   (vec_duplicate:SME_ZA_HIx24
+	     (match_operand:<SME_ZA_HIx24:VSINGLE> 3 "register_operand" "x"))]
+	  SME_INT_TERNARY_SLICE))]
+  "TARGET_SME2 && TARGET_SME_I16I64 && TARGET_STREAMING_SME"
+  {
+    operands[4] = GEN_INT (INTVAL (operands[1]) + 3);
+    return "<optab>ll\tza.d[%w0, %1:%4, vgx<vector_count>], %2, %3.h";
+  }
+)
+
+(define_insn "@aarch64_sme_lane_<optab><VNx2DI_ONLY:mode><SME_ZA_HIx124:mode>"
+  [(set (reg:VNx2DI_ONLY ZA_REGNUM)
+	(unspec:VNx2DI_ONLY
+	  [(reg:VNx2DI_ONLY ZA_REGNUM)
+	   (reg:DI SME_STATE_REGNUM)
+	   (match_operand:SI 0 "register_operand" "Uci")
+	   (match_operand:SME_ZA_HIx124 1 "<aligned_operand>" "<aligned_fpr>")
+	   (unspec:SME_ZA_HIx124
+	     [(match_operand:<VSINGLE> 2 "register_operand" "x")
+	      (match_operand:SI 3 "const_int_operand")]
+	     UNSPEC_SVE_LANE_SELECT)]
+	  SME_INT_TERNARY_SLICE))]
+  "TARGET_SME2 && TARGET_SME_I16I64 && TARGET_STREAMING_SME"
+  "<optab>ll\tza.d[%w0, 0:3<vg_modifier>], %1<z_suffix>, %2.h[%3]"
+)
+
+(define_insn "*aarch64_sme_lane_<optab><VNx2DI_ONLY:mode><SME_ZA_HIx124:mode>"
+  [(set (reg:VNx2DI_ONLY ZA_REGNUM)
+	(unspec:VNx2DI_ONLY
+	  [(reg:VNx2DI_ONLY ZA_REGNUM)
+	   (reg:DI SME_STATE_REGNUM)
+	   (plus:SI (match_operand:SI 0 "register_operand" "Uci")
+		    (match_operand:SI 1 "const_<za64_offset_range>_operand"))
+	   (match_operand:SME_ZA_HIx124 2 "<aligned_operand>" "<aligned_fpr>")
+	   (unspec:SME_ZA_HIx124
+	     [(match_operand:<VSINGLE> 3 "register_operand" "x")
+	      (match_operand:SI 4 "const_int_operand")]
+	     UNSPEC_SVE_LANE_SELECT)]
+	  SME_INT_TERNARY_SLICE))]
+  "TARGET_SME2 && TARGET_SME_I16I64 && TARGET_STREAMING_SME"
+  {
+    operands[5] = GEN_INT (INTVAL (operands[1]) + 3);
+    return "<optab>ll\tza.d[%w0, %1:%5<vg_modifier>], %2<z_suffix>, %3.h[%4]";
+  }
+)
+
+;; -------------------------------------------------------------------------
+;; ---- [INT] Sum of outer products
+;; -------------------------------------------------------------------------
+;; - BMOPA
+;; - BMOPS
 ;; - SMOPA
 ;; - SMOPS
 ;; - SUMOPA
@@ -867,6 +1540,380 @@ (define_insn "@aarch64_sme_<optab><VNx2DI_ONLY:mode><VNx8HI_ONLY:mode>"
   "<optab>\tza%0.d, %1/m, %2/m, %3.h, %4.h"
 )
 
+(define_insn "@aarch64_sme_<optab><VNx4SI_ONLY:mode><VNx8HI_ONLY:mode>"
+  [(set (reg:VNx4SI_ONLY ZA_REGNUM)
+	(unspec:VNx4SI_ONLY
+	  [(reg:VNx4SI_ONLY ZA_REGNUM)
+	   (reg:DI SME_STATE_REGNUM)
+	   (match_operand:DI 0 "const_int_operand")
+	   (match_operand:<VNx4SI_ONLY:VPRED> 1 "register_operand" "Upl")
+	   (match_operand:<VNx4SI_ONLY:VPRED> 2 "register_operand" "Upl")
+	   (match_operand:VNx8HI_ONLY 3 "register_operand" "w")
+	   (match_operand:VNx8HI_ONLY 4 "register_operand" "w")]
+	  SME2_INT_MOP))]
+  "TARGET_STREAMING_SME2"
+  "<optab>\tza%0.s, %1/m, %2/m, %3.h, %4.h"
+)
+
+(define_insn "@aarch64_sme_<optab><VNx4SI_ONLY:mode><VNx4SI_ONLY:mode>"
+  [(set (reg:VNx4SI_ONLY ZA_REGNUM)
+	(unspec:VNx4SI_ONLY
+	  [(reg:VNx4SI_ONLY ZA_REGNUM)
+	   (reg:DI SME_STATE_REGNUM)
+	   (match_operand:DI 0 "const_int_operand")
+	   (match_operand:<VNx4SI_ONLY:VPRED> 1 "register_operand" "Upl")
+	   (match_operand:<VNx4SI_ONLY:VPRED> 2 "register_operand" "Upl")
+	   (match_operand:VNx4SI_ONLY 3 "register_operand" "w")
+	   (match_operand:VNx4SI_ONLY 4 "register_operand" "w")]
+	  SME2_BMOP))]
+  "TARGET_STREAMING_SME2"
+  "<optab>\tza%0.s, %1/m, %2/m, %3.s, %4.s"
+)
+
+;; -------------------------------------------------------------------------
+;; ---- [FP] Dot product
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - BFDOT
+;; - FDOT
+;; -------------------------------------------------------------------------
+
+(define_insn "@aarch64_sme_<optab><VNx4SI_ONLY:mode><SME_ZA_HFx24:mode>"
+  [(set (reg:VNx4SI_ONLY ZA_REGNUM)
+	(unspec:VNx4SI_ONLY
+	  [(reg:VNx4SI_ONLY ZA_REGNUM)
+	   (reg:DI SME_STATE_REGNUM)
+	   (match_operand:SI 0 "register_operand" "Uci")
+	   (match_operand:SME_ZA_HFx24 1 "aligned_register_operand" "Uw<vector_count>")
+	   (match_operand:SME_ZA_HFx24 2 "aligned_register_operand" "Uw<vector_count>")]
+	  SME_FP_DOTPROD))]
+  "TARGET_STREAMING_SME2"
+  "<b><optab>\tza.s[%w0, 0, vgx<vector_count>], %1, %2"
+)
+
+(define_insn "*aarch64_sme_<optab><VNx4SI_ONLY:mode><SME_ZA_HFx24:mode>_plus"
+  [(set (reg:VNx4SI_ONLY ZA_REGNUM)
+	(unspec:VNx4SI_ONLY
+	  [(reg:VNx4SI_ONLY ZA_REGNUM)
+	   (reg:DI SME_STATE_REGNUM)
+	   (plus:SI (match_operand:SI 0 "register_operand" "Uci")
+		    (match_operand:SI 1 "const_0_to_7_operand"))
+	   (match_operand:SME_ZA_HFx24 2 "aligned_register_operand" "Uw<vector_count>")
+	   (match_operand:SME_ZA_HFx24 3 "aligned_register_operand" "Uw<vector_count>")]
+	  SME_FP_DOTPROD))]
+  "TARGET_STREAMING_SME2"
+  "<b><optab>\tza.s[%w0, %1, vgx<vector_count>], %2, %3"
+)
+
+(define_insn "@aarch64_sme_single_<optab><VNx4SI_ONLY:mode><SME_ZA_HFx24:mode>"
+  [(set (reg:VNx4SI_ONLY ZA_REGNUM)
+	(unspec:VNx4SI_ONLY
+	  [(reg:VNx4SI_ONLY ZA_REGNUM)
+	   (reg:DI SME_STATE_REGNUM)
+	   (match_operand:SI 0 "register_operand" "Uci")
+	   (match_operand:SME_ZA_HFx24 1 "register_operand" "w")
+	   (vec_duplicate:SME_ZA_HFx24
+	     (match_operand:<VSINGLE> 2 "register_operand" "x"))]
+	  SME_FP_DOTPROD))]
+  "TARGET_STREAMING_SME2"
+  "<b><optab>\tza.s[%w0, 0, vgx<vector_count>], %1, %2.h"
+)
+
+(define_insn "*aarch64_sme_single_<optab><VNx4SI_ONLY:mode><SME_ZA_HFx24:mode>_plus"
+  [(set (reg:VNx4SI_ONLY ZA_REGNUM)
+	(unspec:VNx4SI_ONLY
+	  [(reg:VNx4SI_ONLY ZA_REGNUM)
+	   (reg:DI SME_STATE_REGNUM)
+	   (plus:SI (match_operand:SI 0 "register_operand" "Uci")
+		    (match_operand:SI 1 "const_0_to_7_operand"))
+	   (match_operand:SME_ZA_HFx24 2 "register_operand" "w")
+	   (vec_duplicate:SME_ZA_HFx24
+	     (match_operand:<VSINGLE> 3 "register_operand" "x"))]
+	  SME_FP_DOTPROD))]
+  "TARGET_STREAMING_SME2"
+  "<b><optab>\tza.s[%w0, %1, vgx<vector_count>], %2, %3.h"
+)
+
+(define_insn "@aarch64_sme_lane_<optab><VNx4SI_ONLY:mode><SME_ZA_HFx24:mode>"
+  [(set (reg:VNx4SI_ONLY ZA_REGNUM)
+	(unspec:VNx4SI_ONLY
+	  [(reg:VNx4SI_ONLY ZA_REGNUM)
+	   (reg:DI SME_STATE_REGNUM)
+	   (match_operand:SI 0 "register_operand" "Uci")
+	   (match_operand:SME_ZA_HFx24 1 "aligned_register_operand" "Uw<vector_count>")
+	   (unspec:SME_ZA_HFx24
+	     [(match_operand:<VSINGLE> 2 "register_operand" "x")
+	      (match_operand:SI 3 "const_int_operand")]
+	     UNSPEC_SVE_LANE_SELECT)]
+	  SME_FP_DOTPROD_LANE))]
+  "TARGET_STREAMING_SME2"
+  "<b><optab>\tza.s[%w0, 0, vgx<vector_count>], %1, %2.h[%3]"
+)
+
+(define_insn "*aarch64_sme_lane_<optab><VNx4SI_ONLY:mode><SME_ZA_HFx24:mode>_plus"
+  [(set (reg:VNx4SI_ONLY ZA_REGNUM)
+	(unspec:VNx4SI_ONLY
+	  [(reg:VNx4SI_ONLY ZA_REGNUM)
+	   (reg:DI SME_STATE_REGNUM)
+	   (plus:SI (match_operand:SI 0 "register_operand" "Uci")
+		    (match_operand:SI 1 "const_0_to_7_operand"))
+	   (match_operand:SME_ZA_HFx24 2 "aligned_register_operand" "Uw<vector_count>")
+	   (unspec:SME_ZA_HFx24
+	     [(match_operand:<VSINGLE> 3 "register_operand" "x")
+	      (match_operand:SI 4 "const_int_operand")]
+	     UNSPEC_SVE_LANE_SELECT)]
+	  SME_FP_DOTPROD_LANE))]
+  "TARGET_STREAMING_SME2"
+  "<b><optab>\tza.s[%w0, %1, vgx<vector_count>], %2, %3.h[%4]"
+)
+
+;; -------------------------------------------------------------------------
+;; ---- [FP] Ternary arithmetic on ZA slice
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - FMLA
+;; - FMLS
+;; -------------------------------------------------------------------------
+
+(define_insn "@aarch64_sme_<optab><SME_ZA_SDF_I:mode><SME_ZA_SDFx24:mode>"
+  [(set (reg:SME_ZA_SDF_I ZA_REGNUM)
+	(unspec:SME_ZA_SDF_I
+	  [(reg:SME_ZA_SDF_I ZA_REGNUM)
+	   (reg:DI SME_STATE_REGNUM)
+	   (match_operand:SI 0 "register_operand" "Uci")
+	   (match_operand:SME_ZA_SDFx24 1 "aligned_register_operand" "Uw<vector_count>")
+	   (match_operand:SME_ZA_SDFx24 2 "aligned_register_operand" "Uw<vector_count>")]
+	  SME_FP_TERNARY_SLICE))]
+  "TARGET_SME2
+   && TARGET_STREAMING_SME
+   && <SME_ZA_SDF_I:elem_bits> == <SME_ZA_SDFx24:elem_bits>"
+  "<optab>\tza.<SME_ZA_SDF_I:Vetype>[%w0, 0, vgx<vector_count>], %1, %2"
+)
+
+(define_insn "*aarch64_sme_<optab><SME_ZA_SDF_I:mode><SME_ZA_SDFx24:mode>_plus"
+  [(set (reg:SME_ZA_SDF_I ZA_REGNUM)
+	(unspec:SME_ZA_SDF_I
+	  [(reg:SME_ZA_SDF_I ZA_REGNUM)
+	   (reg:DI SME_STATE_REGNUM)
+	   (plus:SI (match_operand:SI 0 "register_operand" "Uci")
+		    (match_operand:SI 1 "const_0_to_7_operand"))
+	   (match_operand:SME_ZA_SDFx24 2 "aligned_register_operand" "Uw<vector_count>")
+	   (match_operand:SME_ZA_SDFx24 3 "aligned_register_operand" "Uw<vector_count>")]
+	  SME_FP_TERNARY_SLICE))]
+  "TARGET_SME2
+   && TARGET_STREAMING_SME
+   && <SME_ZA_SDF_I:elem_bits> == <SME_ZA_SDFx24:elem_bits>"
+  "<optab>\tza.<SME_ZA_SDF_I:Vetype>[%w0, %1, vgx<vector_count>], %2, %3"
+)
+
+(define_insn "@aarch64_sme_single_<optab><SME_ZA_SDF_I:mode><SME_ZA_SDFx24:mode>"
+  [(set (reg:SME_ZA_SDF_I ZA_REGNUM)
+	(unspec:SME_ZA_SDF_I
+	  [(reg:SME_ZA_SDF_I ZA_REGNUM)
+	   (reg:DI SME_STATE_REGNUM)
+	   (match_operand:SI 0 "register_operand" "Uci")
+	   (match_operand:SME_ZA_SDFx24 1 "register_operand" "w")
+	   (vec_duplicate:SME_ZA_SDFx24
+	     (match_operand:<VSINGLE> 2 "register_operand" "x"))]
+	  SME_FP_TERNARY_SLICE))]
+  "TARGET_SME2
+   && TARGET_STREAMING_SME
+   && <SME_ZA_SDF_I:elem_bits> == <SME_ZA_SDFx24:elem_bits>"
+  "<optab>\tza.<SME_ZA_SDF_I:Vetype>[%w0, 0, vgx<vector_count>], %1, %2.<SME_ZA_SDFx24:Vetype>"
+)
+
+(define_insn "*aarch64_sme_single_<optab><SME_ZA_SDF_I:mode><SME_ZA_SDFx24:mode>_plus"
+  [(set (reg:SME_ZA_SDF_I ZA_REGNUM)
+	(unspec:SME_ZA_SDF_I
+	  [(reg:SME_ZA_SDF_I ZA_REGNUM)
+	   (reg:DI SME_STATE_REGNUM)
+	   (plus:SI (match_operand:SI 0 "register_operand" "Uci")
+		    (match_operand:SI 1 "const_0_to_7_operand"))
+	   (match_operand:SME_ZA_SDFx24 2 "register_operand" "w")
+	   (vec_duplicate:SME_ZA_SDFx24
+	     (match_operand:<VSINGLE> 3 "register_operand" "x"))]
+	  SME_FP_TERNARY_SLICE))]
+  "TARGET_SME2
+   && TARGET_STREAMING_SME
+   && <SME_ZA_SDF_I:elem_bits> == <SME_ZA_SDFx24:elem_bits>"
+  "<optab>\tza.<SME_ZA_SDF_I:Vetype>[%w0, %1, vgx<vector_count>], %2, %3.<SME_ZA_SDFx24:Vetype>"
+)
+
+(define_insn "@aarch64_sme_lane_<optab><SME_ZA_SDF_I:mode><SME_ZA_SDFx24:mode>"
+  [(set (reg:SME_ZA_SDF_I ZA_REGNUM)
+	(unspec:SME_ZA_SDF_I
+	  [(reg:SME_ZA_SDF_I ZA_REGNUM)
+	   (reg:DI SME_STATE_REGNUM)
+	   (match_operand:SI 0 "register_operand" "Uci")
+	   (match_operand:SME_ZA_SDFx24 1 "aligned_register_operand" "Uw<vector_count>")
+	   (unspec:SME_ZA_SDFx24
+	     [(match_operand:<VSINGLE> 2 "register_operand" "x")
+	      (match_operand:SI 3 "const_int_operand")]
+	     UNSPEC_SVE_LANE_SELECT)]
+	  SME_FP_TERNARY_SLICE))]
+  "TARGET_SME2
+   && TARGET_STREAMING_SME
+   && <SME_ZA_SDF_I:elem_bits> == <SME_ZA_SDFx24:elem_bits>"
+  "<optab>\tza.<SME_ZA_SDF_I:Vetype>[%w0, 0, vgx<vector_count>], %1, %2.<SME_ZA_SDFx24:Vetype>[%3]"
+)
+
+(define_insn "*aarch64_sme_lane_<optab><SME_ZA_SDF_I:mode><SME_ZA_SDFx24:mode>"
+  [(set (reg:SME_ZA_SDF_I ZA_REGNUM)
+	(unspec:SME_ZA_SDF_I
+	  [(reg:SME_ZA_SDF_I ZA_REGNUM)
+	   (reg:DI SME_STATE_REGNUM)
+	   (plus:SI (match_operand:SI 0 "register_operand" "Uci")
+		    (match_operand:SI 1 "const_0_to_7_operand"))
+	   (match_operand:SME_ZA_SDFx24 2 "aligned_register_operand" "Uw<vector_count>")
+	   (unspec:SME_ZA_SDFx24
+	     [(match_operand:<VSINGLE> 3 "register_operand" "x")
+	      (match_operand:SI 4 "const_int_operand")]
+	     UNSPEC_SVE_LANE_SELECT)]
+	  SME_FP_TERNARY_SLICE))]
+  "TARGET_SME2
+   && TARGET_STREAMING_SME
+   && <SME_ZA_SDF_I:elem_bits> == <SME_ZA_SDFx24:elem_bits>"
+  "<optab>\tza.<SME_ZA_SDF_I:Vetype>[%w0, %1, vgx<vector_count>], %2, %3.<SME_ZA_SDFx24:Vetype>[%4]"
+)
+
+;; -------------------------------------------------------------------------
+;; ---- [FP] Ternary widening arithmetic on ZA slice
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - BFMLAL
+;; - BFMLSL
+;; - FMLAL
+;; - FMLSL
+;; -------------------------------------------------------------------------
+
+(define_insn "@aarch64_sme_<optab><VNx4SI_ONLY:mode><SVE_FULL_HF:mode>"
+  [(set (reg:VNx4SI_ONLY ZA_REGNUM)
+	(unspec:VNx4SI_ONLY
+	  [(reg:VNx4SI_ONLY ZA_REGNUM)
+	   (reg:DI SME_STATE_REGNUM)
+	   (match_operand:SI 0 "register_operand" "Uci")
+	   (match_operand:SVE_FULL_HF 1 "register_operand" "w")
+	   (match_operand:SVE_FULL_HF 2 "register_operand" "x")]
+	  SME_FP_TERNARY_SLICE))]
+  "TARGET_STREAMING_SME2"
+  "<b><optab>l\tza.s[%w0, 0:1], %1.h, %2.h"
+)
+
+(define_insn "*aarch64_sme_<optab><VNx4SI_ONLY:mode><SVE_FULL_HF:mode>_plus"
+  [(set (reg:VNx4SI_ONLY ZA_REGNUM)
+	(unspec:VNx4SI_ONLY
+	  [(reg:VNx4SI_ONLY ZA_REGNUM)
+	   (reg:DI SME_STATE_REGNUM)
+	   (plus:SI (match_operand:SI 0 "register_operand" "Uci")
+		    (match_operand:SI 1 "const_<za32_offset_range>_operand"))
+	   (match_operand:SVE_FULL_HF 2 "register_operand" "w")
+	   (match_operand:SVE_FULL_HF 3 "register_operand" "x")]
+	  SME_FP_TERNARY_SLICE))]
+  "TARGET_STREAMING_SME2"
+  {
+    operands[4] = GEN_INT (INTVAL (operands[1]) + 1);
+    return "<b><optab>l\tza.s[%w0, %1:%4], %2.h, %3.h";
+  }
+)
+
+(define_insn "@aarch64_sme_<optab><VNx4SI_ONLY:mode><SME_ZA_HFx24:mode>"
+  [(set (reg:VNx4SI_ONLY ZA_REGNUM)
+	(unspec:VNx4SI_ONLY
+	  [(reg:VNx4SI_ONLY ZA_REGNUM)
+	   (reg:DI SME_STATE_REGNUM)
+	   (match_operand:SI 0 "register_operand" "Uci")
+	   (match_operand:SME_ZA_HFx24 1 "aligned_register_operand" "Uw<vector_count>")
+	   (match_operand:SME_ZA_HFx24 2 "aligned_register_operand" "Uw<vector_count>")]
+	  SME_FP_TERNARY_SLICE))]
+  "TARGET_STREAMING_SME2"
+  "<b><optab>l\tza.s[%w0, 0:1, vgx<vector_count>], %1, %2"
+)
+
+(define_insn "*aarch64_sme_<optab><VNx4SI_ONLY:mode><SME_ZA_HFx24:mode>_plus"
+  [(set (reg:VNx4SI_ONLY ZA_REGNUM)
+	(unspec:VNx4SI_ONLY
+	  [(reg:VNx4SI_ONLY ZA_REGNUM)
+	   (reg:DI SME_STATE_REGNUM)
+	   (plus:SI (match_operand:SI 0 "register_operand" "Uci")
+		    (match_operand:SI 1 "const_<za32_offset_range>_operand"))
+	   (match_operand:SME_ZA_HFx24 2 "aligned_register_operand" "Uw<vector_count>")
+	   (match_operand:SME_ZA_HFx24 3 "aligned_register_operand" "Uw<vector_count>")]
+	  SME_FP_TERNARY_SLICE))]
+  "TARGET_STREAMING_SME2"
+  {
+    operands[4] = GEN_INT (INTVAL (operands[1]) + 1);
+    return "<b><optab>l\tza.s[%w0, %1:%4, vgx<vector_count>], %2, %3";
+  }
+)
+
+(define_insn "@aarch64_sme_single_<optab><VNx4SI_ONLY:mode><SME_ZA_HFx24:mode>"
+  [(set (reg:VNx4SI_ONLY ZA_REGNUM)
+	(unspec:VNx4SI_ONLY
+	  [(reg:VNx4SI_ONLY ZA_REGNUM)
+	   (reg:DI SME_STATE_REGNUM)
+	   (match_operand:SI 0 "register_operand" "Uci")
+	   (match_operand:SME_ZA_HFx24 1 "register_operand" "w")
+	   (vec_duplicate:SME_ZA_HFx24
+	     (match_operand:<SME_ZA_HFx24:VSINGLE> 2 "register_operand" "x"))]
+	  SME_FP_TERNARY_SLICE))]
+  "TARGET_STREAMING_SME2"
+  "<b><optab>l\tza.s[%w0, 0:1, vgx<vector_count>], %1, %2.h"
+)
+
+(define_insn "*aarch64_sme_single_<optab><VNx4SI_ONLY:mode><SME_ZA_HFx24:mode>_plus"
+  [(set (reg:VNx4SI_ONLY ZA_REGNUM)
+	(unspec:VNx4SI_ONLY
+	  [(reg:VNx4SI_ONLY ZA_REGNUM)
+	   (reg:DI SME_STATE_REGNUM)
+	   (plus:SI (match_operand:SI 0 "register_operand" "Uci")
+		    (match_operand:SI 1 "const_<za32_offset_range>_operand"))
+	   (match_operand:SME_ZA_HFx24 2 "register_operand" "w")
+	   (vec_duplicate:SME_ZA_HFx24
+	     (match_operand:<SME_ZA_HFx24:VSINGLE> 3 "register_operand" "x"))]
+	  SME_FP_TERNARY_SLICE))]
+  "TARGET_STREAMING_SME2"
+  {
+    operands[4] = GEN_INT (INTVAL (operands[1]) + 1);
+    return "<b><optab>l\tza.s[%w0, %1:%4, vgx<vector_count>], %2, %3.h";
+  }
+)
+
+(define_insn "@aarch64_sme_lane_<optab><VNx4SI_ONLY:mode><SME_ZA_HFx124:mode>"
+  [(set (reg:VNx4SI_ONLY ZA_REGNUM)
+	(unspec:VNx4SI_ONLY
+	  [(reg:VNx4SI_ONLY ZA_REGNUM)
+	   (reg:DI SME_STATE_REGNUM)
+	   (match_operand:SI 0 "register_operand" "Uci")
+	   (match_operand:SME_ZA_HFx124 1 "<aligned_operand>" "<aligned_fpr>")
+	   (unspec:SME_ZA_HFx124
+	     [(match_operand:<VSINGLE> 2 "register_operand" "x")
+	      (match_operand:SI 3 "const_int_operand")]
+	     UNSPEC_SVE_LANE_SELECT)]
+	  SME_FP_TERNARY_SLICE))]
+  "TARGET_STREAMING_SME2"
+  "<b><optab>l\tza.s[%w0, 0:1<vg_modifier>], %1<z_suffix>, %2.h[%3]"
+)
+
+(define_insn "*aarch64_sme_lane_<optab><VNx4SI_ONLY:mode><SME_ZA_HFx124:mode>"
+  [(set (reg:VNx4SI_ONLY ZA_REGNUM)
+	(unspec:VNx4SI_ONLY
+	  [(reg:VNx4SI_ONLY ZA_REGNUM)
+	   (reg:DI SME_STATE_REGNUM)
+	   (plus:SI (match_operand:SI 0 "register_operand" "Uci")
+		    (match_operand:SI 1 "const_<za32_offset_range>_operand"))
+	   (match_operand:SME_ZA_HFx124 2 "<aligned_operand>" "<aligned_fpr>")
+	   (unspec:SME_ZA_HFx124
+	     [(match_operand:<VSINGLE> 3 "register_operand" "x")
+	      (match_operand:SI 4 "const_int_operand")]
+	     UNSPEC_SVE_LANE_SELECT)]
+	  SME_FP_TERNARY_SLICE))]
+  "TARGET_STREAMING_SME2"
+  {
+    operands[5] = GEN_INT (INTVAL (operands[1]) + 1);
+    return "<b><optab>l\tza.s[%w0, %1:%5<vg_modifier>], %2<z_suffix>, %3.h[%4]";
+  }
+)
+
 ;; -------------------------------------------------------------------------
 ;; ---- [FP] Sum of outer products
 ;; -------------------------------------------------------------------------
@@ -892,3 +1939,46 @@ (define_insn "@aarch64_sme_<optab><SME_ZA_SDF_I:mode><SME_MOP_HSDF:mode>"
    && (<SME_ZA_SDF_I:elem_bits> == 32) == (<SME_MOP_HSDF:elem_bits> <= 32)"
   "<b><optab>\tza%0.<SME_ZA_SDF_I:Vetype>, %1/m, %2/m, %3.<SME_MOP_HSDF:Vetype>, %4.<SME_MOP_HSDF:Vetype>"
 )
+
+;; =========================================================================
+;; == Table lookup
+;; =========================================================================
+
+;; -------------------------------------------------------------------------
+;; ---- Table lookup
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - LUTI2
+;; - LUTI4
+;; -------------------------------------------------------------------------
+
+(define_c_enum "unspec" [
+  UNSPEC_SME_LUTI
+])
+
+(define_insn "@aarch64_sme_lut<LUTI_BITS><mode>"
+  [(set (match_operand:SVE_FULL_BHS 0 "register_operand" "=w")
+	(unspec:SVE_FULL_BHS
+	  [(reg:V8DI ZT0_REGNUM)
+	   (reg:DI SME_STATE_REGNUM)
+	   (match_operand:VNx16QI 1 "register_operand" "w")
+	   (match_operand:DI 2 "const_int_operand")
+	   (const_int LUTI_BITS)]
+	  UNSPEC_SME_LUTI))]
+  "TARGET_STREAMING_SME2"
+  "luti<LUTI_BITS>\t%0.<Vetype>, zt0, %1[%2]"
+)
+
+(define_insn "@aarch64_sme_lut<LUTI_BITS><mode>"
+  [(set (match_operand:SVE_BHSx24 0 "aligned_register_operand" "=Uw<vector_count>")
+	(unspec:SVE_BHSx24
+	  [(reg:V8DI ZT0_REGNUM)
+	   (reg:DI SME_STATE_REGNUM)
+	   (match_operand:VNx16QI 1 "register_operand" "w")
+	   (match_operand:DI 2 "const_int_operand")
+	   (const_int LUTI_BITS)]
+	  UNSPEC_SME_LUTI))]
+  "TARGET_STREAMING_SME2
+   && !(<LUTI_BITS> == 4 && <vector_count> == 4 && <elem_bits> == 8)"
+  "luti<LUTI_BITS>\t%0, zt0, %1[%2]"
+)
diff --git a/gcc/config/aarch64/aarch64-sve-builtins-base.cc b/gcc/config/aarch64/aarch64-sve-builtins-base.cc
index 89035135a38..6492da0b383 100644
--- a/gcc/config/aarch64/aarch64-sve-builtins-base.cc
+++ b/gcc/config/aarch64/aarch64-sve-builtins-base.cc
@@ -240,7 +240,7 @@ public:
       {
 	machine_mode mode = GET_MODE_INNER (e.vector_mode (0));
 	e.args[2] = simplify_unary_operation (NOT, mode, e.args[2], mode);
-	return e.map_to_rtx_codes (AND, AND, -1);
+	return e.map_to_rtx_codes (AND, AND, -1, -1);
       }
 
     if (e.type_suffix_ids[0] == TYPE_SUFFIX_b)
@@ -573,6 +573,12 @@ public:
   rtx
   expand (function_expander &e) const override
   {
+    if (e.type_suffix (0).tclass == TYPE_count)
+      {
+	unsigned int bits = e.type_suffix (0).element_bits;
+	return e.use_exact_insn (code_for_aarch64_sve_cntp_c (bits));
+      }
+
     machine_mode mode = e.vector_mode (0);
     e.add_ptrue_hint (0, mode);
     return e.use_exact_insn (code_for_aarch64_pred_cntp (mode));
@@ -640,9 +646,24 @@ public:
   rtx
   expand (function_expander &e) const override
   {
+    insn_code icode;
+    if (e.pred == PRED_none)
+      {
+	machine_mode mode0 = e.result_mode ();
+	machine_mode mode1 = GET_MODE (e.args[0]);
+	convert_optab optab;
+	if (e.type_suffix (0).integer_p)
+	  optab = e.type_suffix (0).unsigned_p ? ufix_optab : sfix_optab;
+	else if (e.type_suffix (1).integer_p)
+	  optab = e.type_suffix (1).unsigned_p ? ufloat_optab : sfloat_optab;
+	else
+	  optab = trunc_optab;
+	icode = convert_optab_handler (optab, mode0, mode1);
+	gcc_assert (icode != CODE_FOR_nothing);
+	return e.use_exact_insn (icode);
+      }
     machine_mode mode0 = e.vector_mode (0);
     machine_mode mode1 = e.vector_mode (1);
-    insn_code icode;
     /* All this complication comes from the need to select four things
        simultaneously:
 
@@ -706,9 +727,17 @@ public:
     /* In the optab, the multiplication operands come before the accumulator
        operand.  The optab is keyed off the multiplication mode.  */
     e.rotate_inputs_left (0, 3);
-    insn_code icode
-      = e.direct_optab_handler_for_sign (sdot_prod_optab, udot_prod_optab,
-					 0, GET_MODE (e.args[0]));
+    insn_code icode;
+    if (e.type_suffix_ids[1] == NUM_TYPE_SUFFIXES)
+      icode = e.direct_optab_handler_for_sign (sdot_prod_optab,
+					       udot_prod_optab,
+					       0, GET_MODE (e.args[0]));
+    else
+      icode = (e.type_suffix (0).float_p
+	       ? CODE_FOR_aarch64_sve_fdotvnx4sfvnx8hf
+	       : e.type_suffix (0).unsigned_p
+	       ? CODE_FOR_aarch64_sve_udotvnx4sivnx8hi
+	       : CODE_FOR_aarch64_sve_sdotvnx4sivnx8hi);
     return e.use_unpred_insn (icode);
   }
 };
@@ -721,12 +750,18 @@ public:
   rtx
   expand (function_expander &e) const override
   {
+    machine_mode mode0 = GET_MODE (e.args[0]);
+    machine_mode mode1 = GET_MODE (e.args[1]);
     /* Use the same ordering as the dot_prod_optab, with the
        accumulator last.  */
     e.rotate_inputs_left (0, 4);
     int unspec = unspec_for (e);
-    machine_mode mode = e.vector_mode (0);
-    return e.use_exact_insn (code_for_aarch64_dot_prod_lane (unspec, mode));
+    insn_code icode;
+    if (unspec == UNSPEC_FDOT)
+      icode = CODE_FOR_aarch64_fdot_prod_lanevnx4sfvnx8hf;
+    else
+      icode = code_for_aarch64_dot_prod_lane (unspec, mode0, mode1);
+    return e.use_exact_insn (icode);
   }
 };
 
@@ -1013,7 +1048,7 @@ public:
 	     with an extra argument on the end.  Take the inactive elements
 	     from this extra argument.  */
 	  e.rotate_inputs_left (0, 4);
-	return e.map_to_rtx_codes (AND, AND, -1, 3);
+	return e.map_to_rtx_codes (AND, AND, -1, -1, 3);
       }
 
     machine_mode wide_mode = e.vector_mode (0);
@@ -1244,6 +1279,9 @@ public:
   gimple *
   fold (gimple_folder &f) const override
   {
+    if (f.vectors_per_tuple () != 1)
+      return nullptr;
+
     tree vectype = f.vector_type (0);
 
     /* Get the predicate and base pointer.  */
@@ -1262,8 +1300,12 @@ public:
   rtx
   expand (function_expander &e) const override
   {
-    insn_code icode = convert_optab_handler (maskload_optab,
-					     e.vector_mode (0), e.gp_mode (0));
+    insn_code icode;
+    if (e.vectors_per_tuple () == 1)
+      icode = convert_optab_handler (maskload_optab,
+				     e.vector_mode (0), e.gp_mode (0));
+    else
+      icode = code_for_aarch64_ld1 (e.tuple_mode (0));
     return e.use_contiguous_load_insn (icode);
   }
 };
@@ -1563,7 +1605,7 @@ public:
   rtx
   expand (function_expander &e) const override
   {
-    insn_code icode = code_for_aarch64_ldnt1 (e.vector_mode (0));
+    insn_code icode = code_for_aarch64_ldnt1 (e.tuple_mode (0));
     return e.use_contiguous_load_insn (icode);
   }
 };
@@ -1823,7 +1865,10 @@ public:
   gimple *
   fold (gimple_folder &f) const override
   {
-    return f.fold_to_pfalse ();
+    if (f.type_suffix (0).tclass == TYPE_bool)
+      return f.fold_to_pfalse ();
+
+    return nullptr;
   }
 
   rtx
@@ -1968,13 +2013,20 @@ public:
   gimple *
   fold (gimple_folder &f) const override
   {
-    return f.fold_to_ptrue ();
+    if (f.type_suffix (0).tclass == TYPE_bool)
+      return f.fold_to_ptrue ();
+
+    return nullptr;
   }
 
   rtx
   expand (function_expander &e) const override
   {
-    return aarch64_ptrue_all (e.type_suffix (0).element_bytes);
+    if (e.type_suffix (0).tclass == TYPE_bool)
+      return aarch64_ptrue_all (e.type_suffix (0).element_bytes);
+
+    auto bits = e.type_suffix (0).element_bits;
+    return e.use_exact_insn (code_for_aarch64_sve_ptrue_c (bits));
   }
 };
 
@@ -2202,12 +2254,37 @@ public:
   }
 };
 
+class svrint_impl : public function_base
+{
+public:
+  CONSTEXPR svrint_impl (optab_tag optab, int cond_unspec)
+    : m_optab (optab), m_cond_unspec (cond_unspec)
+  {}
+
+  rtx
+  expand (function_expander &e) const override
+  {
+    if (e.pred == PRED_none)
+      {
+	auto icode = direct_optab_handler (m_optab, e.tuple_mode (0));
+	return e.use_exact_insn (icode);
+      }
+    return e.map_to_unspecs (-1, -1, m_cond_unspec);
+  }
+
+  optab_tag m_optab;
+  int m_cond_unspec;
+};
+
 class svsel_impl : public quiet<function_base>
 {
 public:
   gimple *
   fold (gimple_folder &f) const override
   {
+    if (f.vectors_per_tuple () > 1)
+      return nullptr;
+
     /* svsel corresponds exactly to VEC_COND_EXPR.  */
     gimple_seq stmts = NULL;
     tree pred = f.convert_pred (stmts, f.vector_type (0), 0);
@@ -2222,9 +2299,11 @@ public:
   {
     /* svsel (cond, truev, falsev) is vcond_mask (truev, falsev, cond).  */
     e.rotate_inputs_left (0, 3);
-    insn_code icode = convert_optab_handler (vcond_mask_optab,
-					     e.vector_mode (0),
-					     e.gp_mode (0));
+    insn_code icode = (e.vectors_per_tuple () > 1
+		       ? code_for_aarch64_sve_sel (e.tuple_mode (0))
+		       : convert_optab_handler (vcond_mask_optab,
+						e.vector_mode (0),
+						e.gp_mode (0)));
     return e.use_exact_insn (icode);
   }
 };
@@ -2311,6 +2390,9 @@ public:
   gimple *
   fold (gimple_folder &f) const override
   {
+    if (f.vectors_per_tuple () != 1)
+      return nullptr;
+
     tree vectype = f.vector_type (0);
 
     /* Get the predicate and base pointer.  */
@@ -2328,8 +2410,12 @@ public:
   rtx
   expand (function_expander &e) const override
   {
-    insn_code icode = convert_optab_handler (maskstore_optab,
-					     e.vector_mode (0), e.gp_mode (0));
+    insn_code icode;
+    if (e.vectors_per_tuple () == 1)
+      icode = convert_optab_handler (maskstore_optab,
+				     e.vector_mode (0), e.gp_mode (0));
+    else
+      icode = code_for_aarch64_st1 (e.tuple_mode (0));
     return e.use_contiguous_store_insn (icode);
   }
 };
@@ -2447,7 +2533,7 @@ public:
   rtx
   expand (function_expander &e) const override
   {
-    insn_code icode = code_for_aarch64_stnt1 (e.vector_mode (0));
+    insn_code icode = code_for_aarch64_stnt1 (e.tuple_mode (0));
     return e.use_contiguous_store_insn (icode);
   }
 };
@@ -2464,7 +2550,7 @@ public:
     /* Canonicalize subtractions of constants to additions.  */
     machine_mode mode = e.vector_mode (0);
     if (e.try_negating_argument (2, mode))
-      return e.map_to_rtx_codes (PLUS, PLUS, UNSPEC_COND_FADD);
+      return e.map_to_rtx_codes (PLUS, PLUS, UNSPEC_COND_FADD, -1);
 
     return rtx_code_function::expand (e);
   }
@@ -2675,6 +2761,9 @@ public:
   gimple *
   fold (gimple_folder &f) const override
   {
+    if (f.vectors_per_tuple () > 1)
+      return nullptr;
+
     if (f.type_suffix (1).unsigned_p)
       return fold_type<poly_uint64> (f);
     else
@@ -2812,7 +2901,8 @@ FUNCTION (svcvtnt, CODE_FOR_MODE0 (aarch64_sve_cvtnt),)
 FUNCTION (svdiv, rtx_code_function, (DIV, UDIV, UNSPEC_COND_FDIV))
 FUNCTION (svdivr, rtx_code_function_rotated, (DIV, UDIV, UNSPEC_COND_FDIV))
 FUNCTION (svdot, svdot_impl,)
-FUNCTION (svdot_lane, svdotprod_lane_impl, (UNSPEC_SDOT, UNSPEC_UDOT, -1))
+FUNCTION (svdot_lane, svdotprod_lane_impl, (UNSPEC_SDOT, UNSPEC_UDOT,
+					    UNSPEC_FDOT))
 FUNCTION (svdup, svdup_impl,)
 FUNCTION (svdup_lane, svdup_lane_impl,)
 FUNCTION (svdupq, svdupq_impl,)
@@ -2878,12 +2968,16 @@ FUNCTION (svlsl_wide, shift_wide, (ASHIFT, UNSPEC_ASHIFT_WIDE))
 FUNCTION (svlsr, rtx_code_function, (LSHIFTRT, LSHIFTRT))
 FUNCTION (svlsr_wide, shift_wide, (LSHIFTRT, UNSPEC_LSHIFTRT_WIDE))
 FUNCTION (svmad, svmad_impl,)
-FUNCTION (svmax, rtx_code_function, (SMAX, UMAX, UNSPEC_COND_FMAX))
-FUNCTION (svmaxnm, unspec_based_function, (-1, -1, UNSPEC_COND_FMAXNM))
+FUNCTION (svmax, rtx_code_function, (SMAX, UMAX, UNSPEC_COND_FMAX,
+				     UNSPEC_FMAX))
+FUNCTION (svmaxnm, cond_or_uncond_unspec_function, (UNSPEC_COND_FMAXNM,
+						    UNSPEC_FMAXNM))
 FUNCTION (svmaxnmv, reduction, (UNSPEC_FMAXNMV))
 FUNCTION (svmaxv, reduction, (UNSPEC_SMAXV, UNSPEC_UMAXV, UNSPEC_FMAXV))
-FUNCTION (svmin, rtx_code_function, (SMIN, UMIN, UNSPEC_COND_FMIN))
-FUNCTION (svminnm, unspec_based_function, (-1, -1, UNSPEC_COND_FMINNM))
+FUNCTION (svmin, rtx_code_function, (SMIN, UMIN, UNSPEC_COND_FMIN,
+				     UNSPEC_FMIN))
+FUNCTION (svminnm, cond_or_uncond_unspec_function, (UNSPEC_COND_FMINNM,
+						    UNSPEC_FMINNM))
 FUNCTION (svminnmv, reduction, (UNSPEC_FMINNMV))
 FUNCTION (svminv, reduction, (UNSPEC_SMINV, UNSPEC_UMINV, UNSPEC_FMINV))
 FUNCTION (svmla, svmla_impl,)
@@ -2955,13 +3049,13 @@ FUNCTION (svrev, svrev_impl,)
 FUNCTION (svrevb, unspec_based_function, (UNSPEC_REVB, UNSPEC_REVB, -1))
 FUNCTION (svrevh, unspec_based_function, (UNSPEC_REVH, UNSPEC_REVH, -1))
 FUNCTION (svrevw, unspec_based_function, (UNSPEC_REVW, UNSPEC_REVW, -1))
-FUNCTION (svrinta, unspec_based_function, (-1, -1, UNSPEC_COND_FRINTA))
-FUNCTION (svrinti, unspec_based_function, (-1, -1, UNSPEC_COND_FRINTI))
-FUNCTION (svrintm, unspec_based_function, (-1, -1, UNSPEC_COND_FRINTM))
-FUNCTION (svrintn, unspec_based_function, (-1, -1, UNSPEC_COND_FRINTN))
-FUNCTION (svrintp, unspec_based_function, (-1, -1, UNSPEC_COND_FRINTP))
-FUNCTION (svrintx, unspec_based_function, (-1, -1, UNSPEC_COND_FRINTX))
-FUNCTION (svrintz, unspec_based_function, (-1, -1, UNSPEC_COND_FRINTZ))
+FUNCTION (svrinta, svrint_impl, (round_optab, UNSPEC_COND_FRINTA))
+FUNCTION (svrinti, svrint_impl, (nearbyint_optab, UNSPEC_COND_FRINTI))
+FUNCTION (svrintm, svrint_impl, (floor_optab, UNSPEC_COND_FRINTM))
+FUNCTION (svrintn, svrint_impl, (roundeven_optab, UNSPEC_COND_FRINTN))
+FUNCTION (svrintp, svrint_impl, (ceil_optab, UNSPEC_COND_FRINTP))
+FUNCTION (svrintx, svrint_impl, (rint_optab, UNSPEC_COND_FRINTX))
+FUNCTION (svrintz, svrint_impl, (btrunc_optab, UNSPEC_COND_FRINTZ))
 FUNCTION (svrsqrte, unspec_based_function, (-1, UNSPEC_RSQRTE, UNSPEC_RSQRTE))
 FUNCTION (svrsqrts, unspec_based_function, (-1, -1, UNSPEC_RSQRTS))
 FUNCTION (svscale, unspec_based_function, (-1, -1, UNSPEC_COND_FSCALE))
diff --git a/gcc/config/aarch64/aarch64-sve-builtins-base.def b/gcc/config/aarch64/aarch64-sve-builtins-base.def
index a742c7bbc56..ddeeaea2028 100644
--- a/gcc/config/aarch64/aarch64-sve-builtins-base.def
+++ b/gcc/config/aarch64/aarch64-sve-builtins-base.def
@@ -71,13 +71,14 @@ DEF_SVE_FUNCTION (svcntp, count_pred, all_pred, implicit)
 DEF_SVE_FUNCTION (svcntw, count_inherent, none, none)
 DEF_SVE_FUNCTION (svcntw_pat, count_pat, none, none)
 DEF_SVE_FUNCTION (svcreate2, create, all_data, none)
+DEF_SVE_FUNCTION (svcreate2, create, b, none)
 DEF_SVE_FUNCTION (svcreate3, create, all_data, none)
 DEF_SVE_FUNCTION (svcreate4, create, all_data, none)
-DEF_SVE_FUNCTION (svcvt, unary_convert, cvt, mxz)
+DEF_SVE_FUNCTION (svcvt, unary_convertxn, cvt, mxz)
 DEF_SVE_FUNCTION (svdiv, binary_opt_n, all_float_and_sd_integer, mxz)
 DEF_SVE_FUNCTION (svdivr, binary_opt_n, all_float_and_sd_integer, mxz)
-DEF_SVE_FUNCTION (svdot, ternary_qq_opt_n, sd_integer, none)
-DEF_SVE_FUNCTION (svdot_lane, ternary_qq_lane, sd_integer, none)
+DEF_SVE_FUNCTION (svdot, ternary_qq_opt_n_or_011, sd_integer, none)
+DEF_SVE_FUNCTION (svdot_lane, ternary_qq_or_011_lane, sd_integer, none)
 DEF_SVE_FUNCTION (svdup, unary_n, all_data, mxz_or_none)
 DEF_SVE_FUNCTION (svdup, unary_n, all_pred, none)
 DEF_SVE_FUNCTION (svdup_lane, binary_uint_n, all_data, none)
@@ -92,6 +93,7 @@ DEF_SVE_FUNCTION (svextb, unary, hsd_integer, mxz)
 DEF_SVE_FUNCTION (svexth, unary, sd_integer, mxz)
 DEF_SVE_FUNCTION (svextw, unary, d_integer, mxz)
 DEF_SVE_FUNCTION (svget2, get, all_data, none)
+DEF_SVE_FUNCTION (svget2, get, b, none)
 DEF_SVE_FUNCTION (svget3, get, all_data, none)
 DEF_SVE_FUNCTION (svget4, get, all_data, none)
 DEF_SVE_FUNCTION (svindex, binary_scalar, all_integer, none)
@@ -116,12 +118,12 @@ DEF_SVE_FUNCTION (svlsl_wide, binary_uint64_opt_n, bhs_integer, mxz)
 DEF_SVE_FUNCTION (svlsr, binary_uint_opt_n, all_unsigned, mxz)
 DEF_SVE_FUNCTION (svlsr_wide, binary_uint64_opt_n, bhs_unsigned, mxz)
 DEF_SVE_FUNCTION (svmad, ternary_opt_n, all_arith, mxz)
-DEF_SVE_FUNCTION (svmax, binary_opt_n, all_arith, mxz)
-DEF_SVE_FUNCTION (svmaxnm, binary_opt_n, all_float, mxz)
+DEF_SVE_FUNCTION (svmax, binary_opt_single_n, all_arith, mxz)
+DEF_SVE_FUNCTION (svmaxnm, binary_opt_single_n, all_float, mxz)
 DEF_SVE_FUNCTION (svmaxnmv, reduction, all_float, implicit)
 DEF_SVE_FUNCTION (svmaxv, reduction, all_arith, implicit)
-DEF_SVE_FUNCTION (svmin, binary_opt_n, all_arith, mxz)
-DEF_SVE_FUNCTION (svminnm, binary_opt_n, all_float, mxz)
+DEF_SVE_FUNCTION (svmin, binary_opt_single_n, all_arith, mxz)
+DEF_SVE_FUNCTION (svminnm, binary_opt_single_n, all_float, mxz)
 DEF_SVE_FUNCTION (svminnmv, reduction, all_float, implicit)
 DEF_SVE_FUNCTION (svminv, reduction, all_arith, implicit)
 DEF_SVE_FUNCTION (svmla, ternary_opt_n, all_arith, mxz)
@@ -148,6 +150,7 @@ DEF_SVE_FUNCTION (svorr, binary_opt_n, all_integer, mxz)
 DEF_SVE_FUNCTION (svorr, binary_opt_n, b, z)
 DEF_SVE_FUNCTION (svorv, reduction, all_integer, implicit)
 DEF_SVE_FUNCTION (svpfalse, inherent_b, b, none)
+DEF_SVE_FUNCTION (svpfalse, inherent, c, none)
 DEF_SVE_FUNCTION (svpfirst, unary, b, implicit)
 DEF_SVE_FUNCTION (svpnext, unary_pred, all_pred, implicit)
 DEF_SVE_FUNCTION (svprfb, prefetch, none, implicit)
@@ -204,31 +207,32 @@ DEF_SVE_FUNCTION (svrev, unary_pred, all_pred, none)
 DEF_SVE_FUNCTION (svrevb, unary, hsd_integer, mxz)
 DEF_SVE_FUNCTION (svrevh, unary, sd_integer, mxz)
 DEF_SVE_FUNCTION (svrevw, unary, d_integer, mxz)
-DEF_SVE_FUNCTION (svrinta, unary, all_float, mxz)
+DEF_SVE_FUNCTION (svrinta, unaryxn, all_float, mxz)
 DEF_SVE_FUNCTION (svrinti, unary, all_float, mxz)
-DEF_SVE_FUNCTION (svrintm, unary, all_float, mxz)
-DEF_SVE_FUNCTION (svrintn, unary, all_float, mxz)
-DEF_SVE_FUNCTION (svrintp, unary, all_float, mxz)
+DEF_SVE_FUNCTION (svrintm, unaryxn, all_float, mxz)
+DEF_SVE_FUNCTION (svrintn, unaryxn, all_float, mxz)
+DEF_SVE_FUNCTION (svrintp, unaryxn, all_float, mxz)
 DEF_SVE_FUNCTION (svrintx, unary, all_float, mxz)
 DEF_SVE_FUNCTION (svrintz, unary, all_float, mxz)
 DEF_SVE_FUNCTION (svrsqrte, unary, all_float, none)
 DEF_SVE_FUNCTION (svrsqrts, binary, all_float, none)
 DEF_SVE_FUNCTION (svscale, binary_int_opt_n, all_float, mxz)
-DEF_SVE_FUNCTION (svsel, binary, all_data, implicit)
-DEF_SVE_FUNCTION (svsel, binary, b, implicit)
+DEF_SVE_FUNCTION (svsel, binaryxn, all_data, implicit)
+DEF_SVE_FUNCTION (svsel, binaryxn, b, implicit)
 DEF_SVE_FUNCTION (svset2, set, all_data, none)
+DEF_SVE_FUNCTION (svset2, set, b, none)
 DEF_SVE_FUNCTION (svset3, set, all_data, none)
 DEF_SVE_FUNCTION (svset4, set, all_data, none)
 DEF_SVE_FUNCTION (svsplice, binary, all_data, implicit)
 DEF_SVE_FUNCTION (svsqrt, unary, all_float, mxz)
-DEF_SVE_FUNCTION (svst1, store, all_data, implicit)
+DEF_SVE_FUNCTION (svst1, storexn, all_data, implicit)
 DEF_SVE_FUNCTION (svst1b, store, hsd_integer, implicit)
 DEF_SVE_FUNCTION (svst1h, store, sd_integer, implicit)
 DEF_SVE_FUNCTION (svst1w, store, d_integer, implicit)
 DEF_SVE_FUNCTION (svst2, store, all_data, implicit)
 DEF_SVE_FUNCTION (svst3, store, all_data, implicit)
 DEF_SVE_FUNCTION (svst4, store, all_data, implicit)
-DEF_SVE_FUNCTION (svstnt1, store, all_data, implicit)
+DEF_SVE_FUNCTION (svstnt1, storexn, all_data, implicit)
 DEF_SVE_FUNCTION (svsub, binary_opt_n, all_arith, mxz)
 DEF_SVE_FUNCTION (svsubr, binary_opt_n, all_arith, mxz)
 DEF_SVE_FUNCTION (svtbl, binary_uint, all_data, none)
@@ -238,6 +242,7 @@ DEF_SVE_FUNCTION (svtrn2, binary, all_data, none)
 DEF_SVE_FUNCTION (svtrn2, binary_pred, all_pred, none)
 DEF_SVE_FUNCTION (svundef, inherent, all_data, none)
 DEF_SVE_FUNCTION (svundef2, inherent, all_data, none)
+DEF_SVE_FUNCTION (svundef2, inherent, b, none)
 DEF_SVE_FUNCTION (svundef3, inherent, all_data, none)
 DEF_SVE_FUNCTION (svundef4, inherent, all_data, none)
 DEF_SVE_FUNCTION (svunpkhi, unary_widen, hsd_integer, none)
@@ -329,7 +334,7 @@ DEF_SVE_FUNCTION (svbfmlalb, ternary_bfloat_opt_n, s_float, none)
 DEF_SVE_FUNCTION (svbfmlalb_lane, ternary_bfloat_lane, s_float, none)
 DEF_SVE_FUNCTION (svbfmlalt, ternary_bfloat_opt_n, s_float, none)
 DEF_SVE_FUNCTION (svbfmlalt_lane, ternary_bfloat_lane, s_float, none)
-DEF_SVE_FUNCTION (svcvt, unary_convert, cvt_bfloat, mxz)
+DEF_SVE_FUNCTION (svcvt, unary_convertxn, cvt_bfloat, mxz)
 DEF_SVE_FUNCTION (svcvtnt, unary_convert_narrowt, cvt_bfloat, mx)
 #undef REQUIRED_EXTENSIONS
 
diff --git a/gcc/config/aarch64/aarch64-sve-builtins-functions.h b/gcc/config/aarch64/aarch64-sve-builtins-functions.h
index 5bd200d9c0a..b40640b0763 100644
--- a/gcc/config/aarch64/aarch64-sve-builtins-functions.h
+++ b/gcc/config/aarch64/aarch64-sve-builtins-functions.h
@@ -60,6 +60,12 @@ using read_write_za = add_call_properties<T, CP_READ_ZA | CP_WRITE_ZA>;
 template<typename T>
 using write_za = add_call_properties<T, CP_WRITE_ZA>;
 
+template<typename T>
+using read_zt0 = add_call_properties<T, CP_READ_ZT0>;
+
+template<typename T>
+using write_zt0 = add_call_properties<T, CP_WRITE_ZT0>;
+
 /* A function_base that sometimes or always operates on tuples of
    vectors.  */
 class multi_vector_function : public function_base
@@ -102,8 +108,9 @@ public:
   memory_vector_mode (const function_instance &fi) const override
   {
     machine_mode mode = fi.vector_mode (0);
-    if (m_vectors_per_tuple != 1)
-      mode = targetm.array_mode (mode, m_vectors_per_tuple).require ();
+    auto vectors_per_tuple = fi.vectors_per_tuple ();
+    if (vectors_per_tuple != 1)
+      mode = targetm.array_mode (mode, vectors_per_tuple).require ();
     return mode;
   }
 };
@@ -196,9 +203,11 @@ class rtx_code_function_base : public function_base
 public:
   CONSTEXPR rtx_code_function_base (rtx_code code_for_sint,
 				    rtx_code code_for_uint,
-				    int unspec_for_fp = -1)
+				    int unspec_for_cond_fp = -1,
+				    int unspec_for_uncond_fp = -1)
     : m_code_for_sint (code_for_sint), m_code_for_uint (code_for_uint),
-      m_unspec_for_fp (unspec_for_fp) {}
+      m_unspec_for_cond_fp (unspec_for_cond_fp),
+      m_unspec_for_uncond_fp (unspec_for_uncond_fp) {}
 
   /* The rtx code to use for signed and unsigned integers respectively.
      Can be UNKNOWN for functions that don't have integer forms.  */
@@ -207,7 +216,11 @@ public:
 
   /* The UNSPEC_COND_* to use for floating-point operations.  Can be -1
      for functions that only operate on integers.  */
-  int m_unspec_for_fp;
+  int m_unspec_for_cond_fp;
+
+  /* The UNSPEC_* to use for unpredicated floating-point operations.
+     Can be -1 if there is no such operation.  */
+  int m_unspec_for_uncond_fp;
 };
 
 /* A function_base for functions that have an associated rtx code.
@@ -221,7 +234,7 @@ public:
   expand (function_expander &e) const override
   {
     return e.map_to_rtx_codes (m_code_for_sint, m_code_for_uint,
-			       m_unspec_for_fp);
+			       m_unspec_for_cond_fp, m_unspec_for_uncond_fp);
   }
 };
 
@@ -242,7 +255,8 @@ public:
     unsigned int nargs = e.args.length ();
     e.rotate_inputs_left (e.pred != PRED_none ? 1 : 0, nargs);
     return e.map_to_rtx_codes (m_code_for_sint, m_code_for_uint,
-			       m_unspec_for_fp, nargs - 1);
+			       m_unspec_for_cond_fp, m_unspec_for_uncond_fp,
+			       nargs - 1);
   }
 };
 
@@ -334,10 +348,13 @@ public:
   expand (function_expander &e) const override
   {
     return e.use_exact_insn (CODE (unspec_for (e),
-				   e.vector_mode (m_suffix_index)));
+				   e.tuple_mode (m_suffix_index)));
   }
 };
 
+typedef unspec_based_function_exact_insn<code_for_aarch64_sve>
+  unspec_based_uncond_function;
+
 /* A function that performs an unspec and then adds it to another value.  */
 typedef unspec_based_function_exact_insn<code_for_aarch64_sve_add>
   unspec_based_add_function;
@@ -374,6 +391,34 @@ typedef unspec_based_function_exact_insn<code_for_aarch64_sve_sub>
 typedef unspec_based_function_exact_insn<code_for_aarch64_sve_sub_lane>
   unspec_based_sub_lane_function;
 
+/* A function that has conditional and unconditional forms, with both
+   forms being associated with a single unspec each.  */
+class cond_or_uncond_unspec_function : public function_base
+{
+public:
+  CONSTEXPR cond_or_uncond_unspec_function (int cond_unspec, int uncond_unspec)
+    : m_cond_unspec (cond_unspec), m_uncond_unspec (uncond_unspec) {}
+
+  rtx
+  expand (function_expander &e) const override
+  {
+    if (e.pred == PRED_none)
+      {
+	auto mode = e.tuple_mode (0);
+	auto icode = (e.mode_suffix_id == MODE_single
+		      ? code_for_aarch64_sve_single (m_uncond_unspec, mode)
+		      : code_for_aarch64_sve (m_uncond_unspec, mode));
+	return e.use_exact_insn (icode);
+      }
+    return e.map_to_unspecs (m_cond_unspec, m_cond_unspec, m_cond_unspec);
+  }
+
+  /* The unspecs for the conditional and unconditional instructions,
+     respectively.  */
+  int m_cond_unspec;
+  int m_uncond_unspec;
+};
+
 /* General SME unspec-based functions, parameterized on the vector mode.  */
 class sme_1mode_function : public read_write_za<unspec_based_function_base>
 {
@@ -388,14 +433,19 @@ public:
   rtx
   expand (function_expander &e) const override
   {
-    auto icode = code_for_aarch64_sme (unspec_for (e), e.tuple_mode (1));
+    insn_code icode;
+    if (e.mode_suffix_id == MODE_single)
+      icode = code_for_aarch64_sme_single (unspec_for (e), e.tuple_mode (1));
+    else
+      icode = code_for_aarch64_sme (unspec_for (e), e.tuple_mode (1));
     return e.use_exact_insn (icode);
   }
 };
 
 /* General SME unspec-based functions, parameterized on both the ZA mode
    and the vector mode.  */
-template<insn_code (*CODE) (int, machine_mode, machine_mode)>
+template<insn_code (*CODE) (int, machine_mode, machine_mode),
+	 insn_code (*CODE_SINGLE) (int, machine_mode, machine_mode)>
 class sme_2mode_function_t : public read_write_za<unspec_based_function_base>
 {
 public:
@@ -409,13 +459,21 @@ public:
   rtx
   expand (function_expander &e) const override
   {
-    insn_code icode = CODE (unspec_for (e), e.vector_mode (0),
-			    e.tuple_mode (1));
+    insn_code icode;
+    if (e.mode_suffix_id == MODE_single)
+      icode = CODE_SINGLE (unspec_for (e), e.vector_mode (0),
+			   e.tuple_mode (1));
+    else
+      icode = CODE (unspec_for (e), e.vector_mode (0), e.tuple_mode (1));
     return e.use_exact_insn (icode);
   }
 };
 
-using sme_2mode_function = sme_2mode_function_t<code_for_aarch64_sme>;
+using sme_2mode_function
+  = sme_2mode_function_t<code_for_aarch64_sme, code_for_aarch64_sme_single>;
+
+using sme_2mode_lane_function
+  = sme_2mode_function_t<code_for_aarch64_sme_lane, nullptr>;
 
 /* A function that acts like unspec_based_function_exact_insn<INT_CODE>
    when operating on integers, but that expands to an (fma ...)-style
@@ -565,6 +623,77 @@ public:
   int m_unspec;
 };
 
+/* A function that implements a x2 or x4 permute instruction.  Both forms
+   of intrinsic have a single x2 or x4 tuple argument, but the underlying
+   x2 instruction takes two separate input operands.  */
+class multireg_permute : public function_base
+{
+public:
+  CONSTEXPR multireg_permute (int unspec) : m_unspec (unspec) {}
+
+  rtx
+  expand (function_expander &e) const override
+  {
+    insn_code icode = code_for_aarch64_sve (m_unspec, e.tuple_mode (0));
+    if (e.group_suffix ().vectors_per_tuple == 2)
+      {
+	machine_mode elt_mode = e.vector_mode (0);
+	rtx arg = e.args[0];
+	e.args[0] = simplify_gen_subreg (elt_mode, arg, GET_MODE (arg), 0);
+	e.args.safe_push (simplify_gen_subreg (elt_mode, arg, GET_MODE (arg),
+					       GET_MODE_SIZE (elt_mode)));
+      }
+    return e.use_exact_insn (icode);
+  }
+
+  /* The unspec associated with the permutation.  */
+  int m_unspec;
+};
+
+/* A function that has two type integer type suffixes, which might agree
+   or disagree on signedness.  There are separate instructions for each
+   signed/unsigned combination.  */
+class integer_conversion : public function_base
+{
+public:
+  CONSTEXPR integer_conversion (int unspec_for_sint, int unspec_for_sintu,
+				int unspec_for_uint, int unspec_for_uints)
+    : m_unspec_for_sint (unspec_for_sint),
+      m_unspec_for_sintu (unspec_for_sintu),
+      m_unspec_for_uint (unspec_for_uint),
+      m_unspec_for_uints (unspec_for_uints)
+  {}
+
+  rtx
+  expand (function_expander &e) const override
+  {
+    machine_mode mode0 = e.vector_mode (0);
+    machine_mode mode1 = GET_MODE (e.args[0]);
+    int unspec;
+    if (e.type_suffix (0).unsigned_p == e.type_suffix (1).unsigned_p)
+      unspec = (e.type_suffix (0).unsigned_p
+		? m_unspec_for_uint
+		: m_unspec_for_sint);
+    else
+      unspec = (e.type_suffix (0).unsigned_p
+		? m_unspec_for_sintu
+		: m_unspec_for_uints);
+    return e.use_exact_insn (code_for_aarch64_sve (unspec, mode0, mode1));
+  }
+
+  /* The unspec for signed -> signed.  */
+  int m_unspec_for_sint;
+
+  /* The unspec for signed -> unsigned.  */
+  int m_unspec_for_sintu;
+
+  /* The unspec for unsigned -> signed.  */
+  int m_unspec_for_uint;
+
+  /* The unspec for unsigned -> unsigned.  */
+  int m_unspec_for_uints;
+};
+
 /* A function_base for functions that reduce a vector to a scalar.  */
 class reduction : public function_base
 {
@@ -623,7 +752,7 @@ public:
     if (aarch64_simd_shift_imm_p (shift, elem_mode, m_code == ASHIFT))
       {
 	e.args.last () = shift;
-	return e.map_to_rtx_codes (m_code, m_code, -1);
+	return e.map_to_rtx_codes (m_code, m_code, -1, -1);
       }
 
     if (e.pred == PRED_x)
@@ -679,6 +808,19 @@ public:
     int unspec = (e.type_suffix (1).unsigned_p
 		  ? m_unspec_for_uint
 		  : m_unspec_for_sint);
+    if (e.vectors_per_tuple () > 1)
+      {
+	auto bits = e.type_suffix (0).element_bits;
+	auto icode = code_for_aarch64_sve_while_b_x2 (unspec, bits);
+	return e.use_exact_insn (icode);
+      }
+    if (e.type_suffix (0).tclass == TYPE_count)
+      {
+	auto bits = e.type_suffix (0).element_bits;
+	auto icode = code_for_aarch64_sve_while_c (unspec, bits);
+	return e.use_exact_insn (icode);
+      }
+
     machine_mode pred_mode = e.vector_mode (0);
     scalar_mode reg_mode = GET_MODE_INNER (e.vector_mode (1));
     return e.use_exact_insn (code_for_while (unspec, reg_mode, pred_mode));
diff --git a/gcc/config/aarch64/aarch64-sve-builtins-shapes.cc b/gcc/config/aarch64/aarch64-sve-builtins-shapes.cc
index 36c3c5005c4..9380cc7db20 100644
--- a/gcc/config/aarch64/aarch64-sve-builtins-shapes.cc
+++ b/gcc/config/aarch64/aarch64-sve-builtins-shapes.cc
@@ -52,6 +52,17 @@ build_const_pointer (tree t)
   return build_pointer_type (build_qualified_type (t, TYPE_QUAL_CONST));
 }
 
+/* GROUP's first type suffix is a ZA-related one.  Return true if the
+   group exists only for the purpose of defining C overloads.  This is
+   useful if some forms of an instruction require one feature and other
+   forms require another feature, and neither feature implies the other.  */
+static bool
+za_group_is_pure_overload (const function_group_info &group)
+{
+  gcc_checking_assert (type_suffixes[group.types[0][0]].za_p);
+  return group.types[0][1] == NUM_TYPE_SUFFIXES;
+}
+
 /* If INSTANCE has a governing predicate, add it to the list of argument
    types in ARGUMENT_TYPES.  RETURN_TYPE is the type returned by the
    function.  */
@@ -64,7 +75,7 @@ apply_predication (const function_instance &instance, tree return_type,
      in the original format string.  */
   if (instance.pred != PRED_none && instance.pred != PRED_za_m)
     {
-      argument_types.quick_insert (0, get_svbool_t ());
+      argument_types.quick_insert (0, instance.gp_type ());
       /* For unary merge operations, the first argument is a vector with
 	 the same type as the result.  For unary_convert_narrowt it also
 	 provides the "bottom" half of active elements, and is present
@@ -82,6 +93,7 @@ apply_predication (const function_instance &instance, tree return_type,
    f<bits> - a floating-point type with the given number of bits
    f[01]   - a floating-point type with the same width as type suffix 0 or 1
    B       - bfloat16_t
+   c       - a predicate-as-counter
    h<elt>  - a half-sized version of <elt>
    p       - a predicate (represented as TYPE_SUFFIX_b)
    q<elt>  - a quarter-sized version of <elt>
@@ -118,6 +130,9 @@ parse_element_type (const function_instance &instance, const char *&format)
       return suffix;
     }
 
+  if (ch == 'c')
+    return TYPE_SUFFIX_c;
+
   if (ch == 'p')
     return TYPE_SUFFIX_b;
 
@@ -156,6 +171,8 @@ parse_element_type (const function_instance &instance, const char *&format)
    ap      - array pointer for prefetches
    as      - array pointer for stores
    b       - base vector type (from a _<m0>base suffix)
+   c0      - the result of a conversion, based on type and group suffixes
+   c1      - the source of a conversion, based on type and group suffixes
    d       - displacement vector type (from a _<m1>index or _<m1>offset suffix)
    e<name> - an enum with the given name
    s<elt>  - a scalar type with the given element suffix
@@ -189,6 +206,23 @@ parse_type (const function_instance &instance, const char *&format)
   if (ch == 'b')
     return instance.base_vector_type ();
 
+  if (ch == 'c')
+    {
+      int ch = *format++;
+      gcc_assert (ch == '0' || ch == '1');
+      unsigned int id = (ch == '0' ? 0 : 1);
+      auto vector_type = instance.type_suffix (id).vector_type;
+      unsigned int num_vectors = instance.group_suffix ().vectors_per_tuple;
+      if (num_vectors != 1)
+	{
+	  unsigned int bits = instance.type_suffix (id).element_bits;
+	  unsigned int other_bits = instance.type_suffix (1 - id).element_bits;
+	  if (other_bits > bits)
+	    num_vectors /= other_bits / bits;
+	}
+      return acle_vector_types[num_vectors - 1][vector_type];
+    }
+
   if (ch == 'd')
     return instance.displacement_vector_type ();
 
@@ -619,6 +653,63 @@ struct binary_za_m_base : public overloaded_base<1>
   }
 };
 
+/* Base class for shapes like binary_za_slice_lane.  TCLASS is the type
+   class of the final vector argument.  */
+template<type_class_index TCLASS = function_resolver::SAME_TYPE_CLASS>
+struct binary_za_slice_lane_base : public overloaded_base<1>
+{
+  constexpr binary_za_slice_lane_base (unsigned int lane_type_suffix)
+    : m_lane_type_suffix (lane_type_suffix) {}
+
+  void
+  build (function_builder &b, const function_group_info &group) const override
+  {
+    b.add_overloaded_functions (group, MODE_none);
+    build_all (b, "_,su32,t1,v1,su64", group, MODE_none);
+  }
+
+  tree
+  resolve (function_resolver &r) const override
+  {
+    sve_type type;
+    if (!r.check_num_arguments (4)
+	|| !r.require_scalar_type (0, "uint32_t")
+	|| !(type = r.infer_tuple_type (1))
+	|| !r.require_derived_vector_type (2, 1, type, TCLASS)
+	|| !r.require_integer_immediate (3))
+      return error_mark_node;
+
+    return r.resolve_to (r.mode_suffix_id, type);
+  }
+
+  bool
+  check (function_checker &c) const override
+  {
+    unsigned int bytes = c.type_suffix (m_lane_type_suffix).element_bytes;
+    return c.require_immediate_range (3, 0, 16 / bytes - 1);
+  }
+
+  unsigned int m_lane_type_suffix;
+};
+
+/* Base class for shapes like binary_za_slice_opt_single.  TCLASS is the
+   type class of the final argument.  */
+template<type_class_index TCLASS = function_resolver::SAME_TYPE_CLASS>
+struct binary_za_slice_opt_single_base : public overloaded_base<1>
+{
+  tree
+  resolve (function_resolver &r) const override
+  {
+    sve_type type;
+    if (!r.check_num_arguments (3)
+	|| !r.require_scalar_type (0, "uint32_t")
+	|| !(type = r.infer_tuple_type (1)))
+      return error_mark_node;
+
+    return r.finish_opt_single_resolution (2, 1, type, TCLASS);
+  }
+};
+
 /* Base class for inc_dec and inc_dec_pat.  */
 struct inc_dec_base : public overloaded_base<0>
 {
@@ -684,7 +775,8 @@ struct load_contiguous_base : public overloaded_base<0>
 	|| (vnum_p && !r.require_scalar_type (i + 1, "int64_t")))
       return error_mark_node;
 
-    return r.resolve_to (r.mode_suffix_id, type);
+    return r.resolve_to (r.mode_suffix_id, type, NUM_TYPE_SUFFIXES,
+			 r.group_suffix_id);
   }
 };
 
@@ -739,6 +831,29 @@ struct load_ext_gather_base : public overloaded_base<1>
   }
 };
 
+/* sv<t0>x<g>_t svfoo_t0_g(uint64_t, svuint8_t, uint64_t)
+
+   where the first argument is the ZT register number (currently always 0)
+   and the final argument is a constant index.  The instruction divides
+   the vector argument in BITS-bit quantities.  */
+template<unsigned int BITS>
+struct luti_lane_zt_base : public nonoverloaded_base
+{
+  void
+  build (function_builder &b, const function_group_info &group) const override
+  {
+    build_all (b, "t0,su64,vu8,su64", group, MODE_none);
+  }
+
+  bool
+  check (function_checker &c) const override
+  {
+    auto nvectors = c.vectors_per_tuple ();
+    return (c.require_immediate_range (0, 0, 0)
+	    && c.require_immediate_range (2, 0, 32 / BITS / nvectors - 1));
+  }
+};
+
 /* sv<t0>_t svfoo[_t0](sv<t0>_t, sv<t0:quarter>_t,
 		       sv<t0:quarter>_t)  (for integer t0)
    sv<t0>_t svmmla[_t0](sv<t0>_t, sv<t0>_t, sv<t0>_t)  (for floating-point t0)
@@ -1136,6 +1251,41 @@ struct binary_int_opt_n_def : public overloaded_base<0>
 };
 SHAPE (binary_int_opt_n)
 
+/* Like binary_int_opt_n for single vectors.  For tuples:
+
+   sv<t0>x<g>_t svfoo[_t0_g](sv<t0>x<g>_t, sv<t0:int>x<g>_t)
+   sv<t0>x<g>_t svfoo[_single_t0_g](sv<t0>x<g>_t, sv<t0:int>_t).  */
+struct binary_int_opt_single_n_def : public overloaded_base<0>
+{
+  bool explicit_group_suffix_p () const override { return false; }
+
+  void
+  build (function_builder &b, const function_group_info &group) const override
+  {
+    b.add_overloaded_functions (group, MODE_none);
+    build_all (b, "t0,t0,ts0", group, MODE_none);
+    if (group.groups[0] == GROUP_none)
+      build_all (b, "v0,v0,ss0", group, MODE_n);
+    else
+      build_all (b, "t0,t0,vs0", group, MODE_single);
+  }
+
+  tree
+  resolve (function_resolver &r) const override
+  {
+    unsigned int i, nargs;
+    sve_type type;
+    if (!r.check_gp_argument (2, i, nargs)
+	|| !(type = r.infer_sve_type (i)))
+      return error_mark_node;
+
+    return (type.num_vectors == 1 && r.scalar_argument_p (i + 1)
+	    ? r.finish_opt_n_resolution (i + 1, i, type.type, TYPE_signed)
+	    : r.finish_opt_single_resolution (i + 1, i, type, TYPE_signed));
+  }
+};
+SHAPE (binary_int_opt_single_n)
+
 /* sv<t0>_t svfoo_<t0>(sv<t0>_t, sv<t0>_t, uint64_t)
 
    where the final argument is an integer constant expression in the
@@ -1340,6 +1490,41 @@ struct binary_opt_n_def : public overloaded_base<0>
 };
 SHAPE (binary_opt_n)
 
+/* Like binary_opt_n for single vectors.  For tuples:
+
+   sv<t0>x<g>_t svfoo[_t0_g](sv<t0>x<g>_t, sv<t0>x<g>_t)
+   sv<t0>x<g>_t svfoo[_single_t0_g](sv<t0>x<g>_t, sv<t0>_t).  */
+struct binary_opt_single_n_def : public overloaded_base<0>
+{
+  bool explicit_group_suffix_p () const override { return false; }
+
+  void
+  build (function_builder &b, const function_group_info &group) const override
+  {
+    b.add_overloaded_functions (group, MODE_none);
+    build_all (b, "t0,t0,t0", group, MODE_none);
+    if (group.groups[0] == GROUP_none)
+      build_all (b, "v0,v0,s0", group, MODE_n);
+    else
+      build_all (b, "t0,t0,v0", group, MODE_single);
+  }
+
+  tree
+  resolve (function_resolver &r) const override
+  {
+    unsigned int i, nargs;
+    sve_type type;
+    if (!r.check_gp_argument (2, i, nargs)
+	|| !(type = r.infer_sve_type (i)))
+      return error_mark_node;
+
+    return (type.num_vectors == 1 && r.scalar_argument_p (i + 1)
+	    ? r.finish_opt_n_resolution (i + 1, i, type.type)
+	    : r.finish_opt_single_resolution (i + 1, i, type));
+  }
+};
+SHAPE (binary_opt_single_n)
+
 /* svbool_t svfoo(svbool_t, svbool_t).  */
 struct binary_pred_def : public nonoverloaded_base
 {
@@ -1391,6 +1576,33 @@ struct binary_scalar_def : public nonoverloaded_base
 };
 SHAPE (binary_scalar)
 
+/* sv<t0>x<g>_t svfoo[_single_t0_g](sv<t0>x<g>_t, sv<t0>_t).  */
+struct binary_single_def : public overloaded_base<0>
+{
+  bool explicit_group_suffix_p () const override { return false; }
+
+  void
+  build (function_builder &b, const function_group_info &group) const override
+  {
+    b.add_overloaded_functions (group, MODE_none);
+    build_all (b, "t0,t0,v0", group, MODE_single);
+  }
+
+  tree
+  resolve (function_resolver &r) const override
+  {
+    sve_type type;
+    if (!r.check_num_arguments (2)
+	|| !(type = r.infer_sve_type (0))
+	|| !r.require_derived_vector_type (1, 0, type, r.SAME_TYPE_CLASS,
+					   r.SAME_SIZE, 1))
+      return error_mark_node;
+
+    return r.resolve_to (MODE_single, type);
+  }
+};
+SHAPE (binary_single)
+
 /* sv<t0:uint>_t svfoo[_t0](sv<t0>_t, sv<t0>_t).
 
    i.e. a version of "binary" that returns unsigned integers.  */
@@ -1642,6 +1854,67 @@ struct binary_za_m_def : public binary_za_m_base<>
 };
 SHAPE (binary_za_m)
 
+/* void svfoo_lane_t0[_t1]_g(uint32_t, sv<t1>x<g>_t, sv<t1>_t, uint64_t)
+
+   where the first argument is a variable ZA slice and the final argument
+   indexes a single element in the preceding vector argument.  */
+struct binary_za_slice_lane_def : public binary_za_slice_lane_base<>
+{
+  constexpr binary_za_slice_lane_def () : binary_za_slice_lane_base<> (1) {}
+};
+SHAPE (binary_za_slice_lane)
+
+/* void svfoo_t0[_t1]_g(uint32_t, sv<t1>x<g>_t, sv<t1:int>x<g>_t)
+   void svfoo[_single]_t0[_t1]_g(uint32_t, sv<t1>x<g>_t, sv<t1:int>_t).
+
+   where the first argument is a variable ZA slice.  */
+struct binary_za_slice_int_opt_single_def
+  : public binary_za_slice_opt_single_base<TYPE_signed>
+{
+  void
+  build (function_builder &b, const function_group_info &group) const override
+  {
+    b.add_overloaded_functions (group, MODE_none);
+    build_all (b, "_,su32,t1,ts1", group, MODE_none);
+    build_all (b, "_,su32,t1,vs1", group, MODE_single);
+  }
+};
+SHAPE (binary_za_slice_int_opt_single)
+
+/* void svfoo_t0[_t1]_g(uint32_t, sv<t1>x<g>_t, sv<t1>x<g>_t)
+   void svfoo[_single]_t0[_t1]_g(uint32_t, sv<t1>x<g>_t, sv<t1>_t)
+
+   where the first argument is a variable ZA slice.  */
+struct binary_za_slice_opt_single_def
+  : public binary_za_slice_opt_single_base<>
+{
+  void
+  build (function_builder &b, const function_group_info &group) const override
+  {
+    b.add_overloaded_functions (group, MODE_none);
+    build_all (b, "_,su32,t1,t1", group, MODE_none);
+    build_all (b, "_,su32,t1,v1", group, MODE_single);
+  }
+};
+SHAPE (binary_za_slice_opt_single)
+
+/* void svfoo_t0[_t1]_g(uint32_t, sv<t1>x<g>_t, sv<t1:uint>x<g>_t)
+   void svfoo[_single]_t0[_t1]_g(uint32_t, sv<t1>x<g>_t, sv<t1:uint>_t)
+
+   where the first argument is a variable ZA slice.  */
+struct binary_za_slice_uint_opt_single_def
+  : public binary_za_slice_opt_single_base<TYPE_unsigned>
+{
+  void
+  build (function_builder &b, const function_group_info &group) const override
+  {
+    b.add_overloaded_functions (group, MODE_none);
+    build_all (b, "_,su32,t1,tu1", group, MODE_none);
+    build_all (b, "_,su32,t1,vu1", group, MODE_single);
+  }
+};
+SHAPE (binary_za_slice_uint_opt_single)
+
 /* void svfoo_t0[_t1]_g(uint64_t, svbool_t, svbool_t, sv<t1>x<g>_t,
 			sv<t1:uint>x<g>_t)
 
@@ -1657,6 +1930,35 @@ struct binary_za_uint_m_def : public binary_za_m_base<TYPE_unsigned>
 };
 SHAPE (binary_za_uint_m)
 
+/* sv<t0>x<g>_t svfoo[_t0_t1_g](sv<t0>x<g>_t, sv<t0>x<g>_t).  */
+struct binaryxn_def : public overloaded_base<0>
+{
+  bool explicit_group_suffix_p () const override { return false; }
+
+  void
+  build (function_builder &b, const function_group_info &group) const override
+  {
+    b.add_overloaded_functions (group, MODE_none);
+    build_all (b, "t0,t0,t0", group, MODE_none);
+  }
+
+  tree
+  resolve (function_resolver &r) const override
+  {
+    vector_type_index pred_type;
+    sve_type type;
+    if (!r.check_num_arguments (3)
+	|| (pred_type = r.infer_predicate_type (0)) == NUM_VECTOR_TYPES
+	|| !(type = r.infer_sve_type (1))
+	|| !r.require_matching_predicate_type (pred_type, type)
+	|| !r.require_matching_vector_type (2, 1, type))
+      return error_mark_node;
+
+    return r.resolve_to (r.mode_suffix_id, type);
+  }
+};
+SHAPE (binaryxn)
+
 /* bool svfoo().  */
 struct bool_inherent_def : public nonoverloaded_base
 {
@@ -1668,6 +1970,45 @@ struct bool_inherent_def : public nonoverloaded_base
 };
 SHAPE (bool_inherent)
 
+/* Either:
+
+     sv<t0>_t svfoo[_t0](sv<t0>_t, sv<t0>_t, sv<t0>_t)
+
+   for single vectors or:
+
+     sv<t0>x<g>_t svfoo[_single_t0_g](sv<t0>x<g>_t, sv<t0>_t, sv<t0>_t)
+
+   for tuples.  */
+struct clamp_def : public overloaded_base<0>
+{
+  bool explicit_group_suffix_p () const override { return false; }
+
+  void
+  build (function_builder &b, const function_group_info &group) const override
+  {
+    b.add_overloaded_functions (group, MODE_none);
+    build_all (b, "t0,t0,v0,v0", group,
+	       group.groups[0] == GROUP_none ? MODE_none : MODE_single);
+  }
+
+  tree
+  resolve (function_resolver &r) const override
+  {
+    sve_type type;
+    if (!r.check_num_arguments (3)
+	|| !(type = r.infer_sve_type (0))
+	|| !r.require_derived_vector_type (1, 0, type, r.SAME_TYPE_CLASS,
+					   r.SAME_SIZE, 1)
+	|| !r.require_derived_vector_type (2, 0, type, r.SAME_TYPE_CLASS,
+					   r.SAME_SIZE, 1))
+      return error_mark_node;
+
+    auto mode = type.num_vectors == 1 ? MODE_none : MODE_single;
+    return r.resolve_to (mode, type);
+  }
+};
+SHAPE (clamp)
+
 /* sv<t0>_t svfoo[_t0](sv<t0>_t, sv<t0>_t)
    <t0>_t svfoo[_n_t0](<t0>_t, sv<t0>_t).  */
 struct clast_def : public overloaded_base<0>
@@ -1773,7 +2114,7 @@ struct compare_ptr_def : public overloaded_base<0>
 };
 SHAPE (compare_ptr)
 
-/* svbool_t svfoo_t0[_t1](<t1>_t, <t1>_t)
+/* svboolx<g>_t svfoo_t0[_t1]_g(<t1>_t, <t1>_t)
 
    where _t0 is a _b<bits> suffix that describes the predicate result.
    There is no direct relationship between the element sizes of _t0
@@ -1784,7 +2125,7 @@ struct compare_scalar_def : public overloaded_base<1>
   build (function_builder &b, const function_group_info &group) const override
   {
     b.add_overloaded_functions (group, MODE_none);
-    build_all (b, "vp,s1,s1", group, MODE_none);
+    build_all (b, "tp,s1,s1", group, MODE_none);
   }
 
   tree
@@ -1797,11 +2138,47 @@ struct compare_scalar_def : public overloaded_base<1>
 	|| !r.require_matching_integer_scalar_type (i + 1, i, type))
       return error_mark_node;
 
-    return r.resolve_to (r.mode_suffix_id, r.type_suffix_ids[0], type);
+    return r.resolve_to (r.mode_suffix_id, r.type_suffix_ids[0], type,
+			 r.group_suffix_id);
   }
 };
 SHAPE (compare_scalar)
 
+/* svcount_t svfoo_t0[_t1](<t1>_t, <t1>_t, uint64_t)
+
+   where _t0 is a _c<bits> suffix that describes the predicate-as-counter
+   result.  The final argument is an integer constant that specifies the
+   number of vectors (2 or 4).  */
+struct compare_scalar_count_def : public overloaded_base<1>
+{
+  void
+  build (function_builder &b, const function_group_info &group) const override
+  {
+    b.add_overloaded_functions (group, MODE_none);
+    build_all (b, "v0,s1,s1,su64", group, MODE_none);
+  }
+
+  tree
+  resolve (function_resolver &r) const override
+  {
+    unsigned int i, nargs;
+    type_suffix_index type;
+    if (!r.check_gp_argument (3, i, nargs)
+	|| (type = r.infer_64bit_scalar_integer_pair (i)) == NUM_TYPE_SUFFIXES
+	|| !r.require_integer_immediate (i + 2))
+      return error_mark_node;
+
+    return r.resolve_to (r.mode_suffix_id, r.type_suffix_ids[0], type);
+  }
+
+  bool
+  check (function_checker &c) const override
+  {
+    return c.require_immediate_either_or (2, 2, 4);
+  }
+};
+SHAPE (compare_scalar_count)
+
 /* svbool_t svfoo[_t0](sv<t0>_t, svint64_t)  (for signed t0)
    svbool_t svfoo[_n_t0](sv<t0>_t, int64_t)  (for signed t0)
    svbool_t svfoo[_t0](sv<t0>_t, svuint64_t)  (for unsigned t0)
@@ -1865,6 +2242,25 @@ struct count_pred_def : public nonoverloaded_base
 };
 SHAPE (count_pred)
 
+/* uint64_t svfoo_t0(sv<t0>_t, uint64_t)
+
+   where the final argument must be 2 or 4.  */
+struct count_pred_c_def : public nonoverloaded_base
+{
+  void
+  build (function_builder &b, const function_group_info &group) const override
+  {
+    build_all (b, "su64,v0,su64", group, MODE_none);
+  }
+
+  bool
+  check (function_checker &c) const override
+  {
+    return c.require_immediate_either_or (1, 2, 4);
+  }
+};
+SHAPE (count_pred_c)
+
 /* uint64_t svfoo[_t0](sv<t0>_t).  */
 struct count_vector_def : public overloaded_base<0>
 {
@@ -1903,6 +2299,54 @@ struct create_def : public overloaded_base<0>
 };
 SHAPE (create)
 
+/* void svfoo_lane_t0[_t1]_g(uint32_t, sv<t1>x<g>_t, sv<t1:int>_t, uint64_t)
+
+   where the final argument indexes a <t0>-sized group of elements in the
+   preceding vector argument.  */
+struct dot_za_slice_int_lane_def
+  : public binary_za_slice_lane_base<TYPE_signed>
+{
+  constexpr dot_za_slice_int_lane_def ()
+    : binary_za_slice_lane_base<TYPE_signed> (0) {}
+
+  void
+  build (function_builder &b, const function_group_info &group) const override
+  {
+    b.add_overloaded_functions (group, MODE_none);
+    build_all (b, "_,su32,t1,vs1,su64", group, MODE_none);
+  }
+};
+SHAPE (dot_za_slice_int_lane)
+
+/* void svfoo_lane_t0[_t1]_g(uint32_t, sv<t1>x<g>_t, sv<t1>_t, uint64_t)
+
+   where the final argument indexes a <t0>-sized group of elements in the
+   preceding vector argument.  */
+struct dot_za_slice_lane_def : public binary_za_slice_lane_base<>
+{
+  constexpr dot_za_slice_lane_def () : binary_za_slice_lane_base<> (0) {}
+};
+SHAPE (dot_za_slice_lane)
+
+/* void svfoo_lane_t0[_t1]_g(uint32_t, sv<t1>x<g>_t, sv<t1:uint>_t, uint64_t)
+
+   where the final argument indexes a <t0>-sized group of elements in the
+   preceding vector argument.  */
+struct dot_za_slice_uint_lane_def
+  : public binary_za_slice_lane_base<TYPE_unsigned>
+{
+  constexpr dot_za_slice_uint_lane_def ()
+    : binary_za_slice_lane_base<TYPE_unsigned> (0) {}
+
+  void
+  build (function_builder &b, const function_group_info &group) const override
+  {
+    b.add_overloaded_functions (group, MODE_none);
+    build_all (b, "_,su32,t1,vu1,su64", group, MODE_none);
+  }
+};
+SHAPE (dot_za_slice_uint_lane)
+
 /* sv<t0>_t svfoo[_n]_t0(<t0>_t, ..., <t0>_t)
 
    where there are enough arguments to fill 128 bits of data (or to
@@ -1954,6 +2398,24 @@ struct ext_def : public overloaded_base<0>
 };
 SHAPE (ext)
 
+/* svboolx<g>_t svfoo_t0_g(sv<t0>_t, sv<t0>_t, uint32_t).  */
+struct extract_pred_def : public nonoverloaded_base
+{
+  void
+  build (function_builder &b, const function_group_info &group) const override
+  {
+    build_all (b, "tp,vc,su64", group, MODE_none);
+  }
+
+  bool
+  check (function_checker &c) const override
+  {
+    unsigned int size = c.vectors_per_tuple ();
+    return c.require_immediate_range (1, 0, 4 / size - 1);
+  }
+};
+SHAPE (extract_pred)
+
 /* <t0>_t svfoo[_t0](<t0>_t, sv<t0>_t).  */
 struct fold_left_def : public overloaded_base<0>
 {
@@ -2158,6 +2620,25 @@ struct inherent_za_def : public nonoverloaded_base
 };
 SHAPE (inherent_za)
 
+/* void svfoo_zt(uint64_t)
+
+   where the argument must be zero.  */
+struct inherent_zt_def : public nonoverloaded_base
+{
+  void
+  build (function_builder &b, const function_group_info &group) const override
+  {
+    build_all (b, "_,su64", group, MODE_none);
+  }
+
+  bool
+  check (function_checker &c) const override
+  {
+    return c.require_immediate_range (0, 0, 0);
+  }
+};
+SHAPE (inherent_zt)
+
 /* void svfoo_t0(uint64_t)
 
    where the argument is an integer constant that specifies an 8-bit mask.  */
@@ -2192,8 +2673,27 @@ struct ldr_za_def : public nonoverloaded_base
 };
 SHAPE (ldr_za)
 
-/* sv<t0>[xN]_t svfoo[_t0](const <t0>_t *)
-   sv<t0>[xN]_t svfoo_vnum[_t0](const <t0>_t *, int64_t).  */
+/* void svfoo_zt(uint64_t, const void *)
+
+   where the first argument must be zero.  */
+struct ldr_zt_def : public nonoverloaded_base
+{
+  void
+  build (function_builder &b, const function_group_info &group) const override
+  {
+    build_all (b, "_,su64,al", group, MODE_none);
+  }
+
+  bool
+  check (function_checker &c) const override
+  {
+    return c.require_immediate_range (0, 0, 0);
+  }
+};
+SHAPE (ldr_zt)
+
+/* sv<t0>[xN]_t svfoo[_t0]_g(const <t0>_t *)
+   sv<t0>[xN]_t svfoo_vnum[_t0]_g(const <t0>_t *, int64_t).  */
 struct load_def : public load_contiguous_base
 {
   void
@@ -2423,6 +2923,12 @@ struct load_za_def : public nonoverloaded_base
 };
 SHAPE (load_za)
 
+using luti2_lane_zt_def = luti_lane_zt_base<2>;
+SHAPE (luti2_lane_zt)
+
+using luti4_lane_zt_def = luti_lane_zt_base<4>;
+SHAPE (luti4_lane_zt)
+
 /* svbool_t svfoo(enum svpattern).  */
 struct pattern_pred_def : public nonoverloaded_base
 {
@@ -2517,6 +3023,23 @@ struct rdffr_def : public nonoverloaded_base
 };
 SHAPE (rdffr)
 
+/* sv<t1>x<g>_t svfoo_t0_t1_g(uint64_t, uint32_t).  */
+struct read_za_def : public nonoverloaded_base
+{
+  void
+  build (function_builder &b, const function_group_info &group) const override
+  {
+    build_all (b, "t1,su64,su32", group, MODE_none);
+  }
+
+  bool
+  check (function_checker &c) const override
+  {
+    return c.require_immediate_range (0, 0, c.num_za_tiles () - 1);
+  }
+};
+SHAPE (read_za)
+
 /* sv<t1>_t svfoo_t0[_t1](uint64_t, uint32_t)
 
    where the first two fields form a (ZA tile, slice) pair.  */
@@ -2559,6 +3082,17 @@ struct read_za_m_def : public overloaded_base<1>
 };
 SHAPE (read_za_m)
 
+/* sv<t1>x<g>_t svfoo_t0_t1_g(uint32_t).  */
+struct read_za_slice_def : public nonoverloaded_base
+{
+  void
+  build (function_builder &b, const function_group_info &group) const override
+  {
+    build_all (b, "t1,su32", group, MODE_none);
+  }
+};
+SHAPE (read_za_slice)
+
 /* <t0>_t svfoo[_t0](sv<t0>_t).  */
 struct reduction_def : public overloaded_base<0>
 {
@@ -2628,6 +3162,17 @@ struct reinterpret_def : public overloaded_base<1>
 };
 SHAPE (reinterpret)
 
+/* sv<t0>_t svfoo_t0(sv<t0>_t, sv<t0>_t, uint32_t).  */
+struct select_pred_def : public nonoverloaded_base
+{
+  void
+  build (function_builder &b, const function_group_info &group) const override
+  {
+    build_all (b, "v0,v0,vp,su32", group, MODE_none);
+  }
+};
+SHAPE (select_pred)
+
 /* sv<t0>xN_t svfoo[_t0](sv<t0>xN_t, uint64_t, sv<t0>_t)
 
    where the second argument is an integer constant expression in the
@@ -2797,6 +3342,42 @@ typedef shift_right_imm_narrow_wrapper<binary_imm_narrowt_base_unsigned, 2>
   shift_right_imm_narrowt_to_uint_def;
 SHAPE (shift_right_imm_narrowt_to_uint)
 
+/* sv<t0>_t svfoo[_n_t0])(sv<t0>_t, uint64_t)
+
+   where the final argument must be an integer constant expression in the
+   range [1, sizeof (<t0>_t) * 8].  */
+struct shift_right_imm_narrowxn_def : public overloaded_base<1>
+{
+  bool explicit_group_suffix_p () const override { return false; }
+
+  void
+  build (function_builder &b, const function_group_info &group) const override
+  {
+    b.add_overloaded_functions (group, MODE_n);
+    build_all (b, "c0,c1,su64", group, MODE_n);
+  }
+
+  tree
+  resolve (function_resolver &r) const override
+  {
+    sve_type type;
+    if (!r.check_num_arguments (2)
+	|| !(type = r.infer_sve_type (0))
+	|| !r.require_integer_immediate (1))
+      return error_mark_node;
+    return r.resolve_to (r.mode_suffix_id, type);
+  }
+
+  bool
+  check (function_checker &c) const override
+  {
+    unsigned int suffix = c.group_suffix_id == GROUP_x4 ? 1 : 0;
+    unsigned int bits = c.type_suffix (suffix).element_bits;
+    return c.require_immediate_range (1, 1, bits);
+  }
+};
+SHAPE (shift_right_imm_narrowxn)
+
 /* void svfoo[_t0](<X>_t *, sv<t0>[xN]_t)
    void svfoo_vnum[_t0](<X>_t *, int64_t, sv<t0>[xN]_t)
 
@@ -2948,6 +3529,37 @@ struct store_za_def : public nonoverloaded_base
 };
 SHAPE (store_za)
 
+/* void svfoo[_t0_g](<X>_t *, sv<t0>x<g>_t)
+   void svfoo_vnum[_t0_g](<X>_t *, int64_t, sv<t0>x<g>_t)
+
+   where <X> might be tied to <t0> (for non-truncating stores) or might
+   depend on the function base name (for truncating stores).  */
+struct storexn_def : public store_def
+{
+  bool explicit_group_suffix_p () const override { return false; }
+
+  tree
+  resolve (function_resolver &r) const override
+  {
+    bool vnum_p = r.mode_suffix_id == MODE_vnum;
+    gcc_assert (r.mode_suffix_id == MODE_none || vnum_p);
+
+    unsigned int nargs = vnum_p ? 4 : 3;
+    vector_type_index pred_type;
+    sve_type type;
+    if (!r.check_num_arguments (nargs)
+	|| (pred_type = r.infer_predicate_type (0)) == NUM_VECTOR_TYPES
+	|| !r.require_pointer_type (1)
+	|| (vnum_p && !r.require_scalar_type (2, "int64_t"))
+	|| !(type = r.infer_sve_type (nargs - 1))
+	|| !r.require_matching_predicate_type (pred_type, type))
+      return error_mark_node;
+
+    return r.resolve_to (r.mode_suffix_id, type);
+  }
+};
+SHAPE (storexn)
+
 /* void svfoo_t0(uint32_t, void *)
    void svfoo_vnum_t0(uint32_t, void *, int64_t)
 
@@ -2963,6 +3575,25 @@ struct str_za_def : public nonoverloaded_base
 };
 SHAPE (str_za)
 
+/* void svfoo_zt(uint64_t, void *)
+
+   where the first argument must be zero.  */
+struct str_zt_def : public nonoverloaded_base
+{
+  void
+  build (function_builder &b, const function_group_info &group) const override
+  {
+    build_all (b, "_,su64,as", group, MODE_none);
+  }
+
+  bool
+  check (function_checker &c) const override
+  {
+    return c.require_immediate_range (0, 0, 0);
+  }
+};
+SHAPE (str_zt)
+
 /* sv<t0>_t svfoo[_t0](sv<t0>xN_t, sv<t0:uint>_t).  */
 struct tbl_tuple_def : public overloaded_base<0>
 {
@@ -3184,20 +3815,49 @@ struct ternary_opt_n_def : public overloaded_base<0>
 };
 SHAPE (ternary_opt_n)
 
-/* sv<t0>_t svfoo[_t0](sv<t0>_t, sv<t0:quarter>_t, sv<t0:quarter>_t, uint64_t)
+/* A choice between:
+
+   (1) sv<t0>_t svfoo[_t0](sv<t0>_t, sv<t0:quarter>_t, sv<t0:quarter>_t,
+			   uint64_t)
+
+   (2) sv<t0>_t svfoo[_t0_t1](sv<t0>_t, sv<t1>_t, sv<t1>_t, uint64_t)
 
    where the final argument is an integer constant expression in the range
    [0, 16 / sizeof (<t0>_t) - 1].  */
-struct ternary_qq_lane_def : public ternary_qq_lane_base<>
+struct ternary_qq_or_011_lane_def : public ternary_qq_lane_base<>
 {
   void
   build (function_builder &b, const function_group_info &group) const override
   {
     b.add_overloaded_functions (group, MODE_none);
-    build_all (b, "v0,v0,vq0,vq0,su64", group, MODE_none);
+    if (group.types[0][1] == NUM_TYPE_SUFFIXES)
+      build_all (b, "v0,v0,vq0,vq0,su64", group, MODE_none);
+    else
+      build_all (b, "v0,v0,v1,v1,su64", group, MODE_none);
+  }
+
+  tree
+  resolve (function_resolver &r) const override
+  {
+    unsigned int i, nargs;
+    type_suffix_index type0, type1;
+    if (!r.check_gp_argument (4, i, nargs)
+	|| (type0 = r.infer_vector_type (i)) == NUM_TYPE_SUFFIXES
+	|| (type1 = r.infer_vector_type (i + 1)) == NUM_TYPE_SUFFIXES
+	|| !r.require_matching_vector_type (i + 2, i + 1, type1)
+	|| !r.require_integer_immediate (i + 3))
+      return error_mark_node;
+
+    if ((type_suffixes[type0].element_bits
+	 == 4 * type_suffixes[type1].element_bits)
+	&& type_suffixes[type0].tclass == type_suffixes[type1].tclass)
+      if (tree res = r.lookup_form (MODE_none, type0))
+	return res;
+
+    return r.resolve_to (r.mode_suffix_id, type0, type1);
   }
 };
-SHAPE (ternary_qq_lane)
+SHAPE (ternary_qq_or_011_lane)
 
 /* svbool_t svfoo[_<t0>](sv<t0>_t, sv<t0:quarter>_t, sv<t0:quarter>_t,
 			 uint64_t)
@@ -3240,24 +3900,64 @@ struct ternary_qq_lane_rotate_def : public overloaded_base<0>
 };
 SHAPE (ternary_qq_lane_rotate)
 
-/* sv<t0>_t svfoo[_t0](sv<t0>_t, sv<t0:quarter>_t, sv<t0:quarter>_t)
-   sv<t0>_t svfoo[_n_t0](sv<t0>_t, sv<t0:quarter>_t, <t0:quarter>_t)
+/* A choice between:
 
-   i.e. a version of the standard ternary shape ternary_opt_n in which
-   the element type of the last two arguments is the quarter-sized
-   equivalent of <t0>.  */
-struct ternary_qq_opt_n_def
+   (1) sv<t0>_t svfoo[_t0](sv<t0>_t, sv<t0:quarter>_t, sv<t0:quarter>_t)
+       sv<t0>_t svfoo[_n_t0](sv<t0>_t, sv<t0:quarter>_t, <t0:quarter>_t)
+
+       i.e. a version of the standard ternary shape ternary_opt_n in which
+       the element type of the last two arguments is the quarter-sized
+       equivalent of <t0>.
+
+   (2) sv<t0>_t svfoo[_t0_t1](sv<t0>_t, sv<t1>_t, sv<t1>_t)
+
+       where the element type of the last two arguments is specified
+       explicitly.  */
+struct ternary_qq_opt_n_or_011_def
   : public ternary_resize2_opt_n_base<function_resolver::QUARTER_SIZE>
 {
   void
   build (function_builder &b, const function_group_info &group) const override
   {
     b.add_overloaded_functions (group, MODE_none);
-    build_all (b, "v0,v0,vq0,vq0", group, MODE_none);
-    build_all (b, "v0,v0,vq0,sq0", group, MODE_n);
+    if (group.types[0][1] == NUM_TYPE_SUFFIXES)
+      {
+	build_all (b, "v0,v0,vq0,vq0", group, MODE_none);
+	build_all (b, "v0,v0,vq0,sq0", group, MODE_n);
+      }
+    else
+      build_all (b, "v0,v0,v1,v1", group, MODE_none);
+  }
+
+  tree
+  resolve (function_resolver &r) const override
+  {
+    unsigned int i, nargs;
+    type_suffix_index type0, type1;
+    if (!r.check_gp_argument (3, i, nargs)
+	|| (type0 = r.infer_vector_type (i)) == NUM_TYPE_SUFFIXES
+	|| (type1 = r.infer_vector_type (i + 1)) == NUM_TYPE_SUFFIXES
+	|| !r.require_vector_or_scalar_type (i + 2))
+      return error_mark_node;
+
+    auto mode = r.scalar_argument_p (i + 2) ? MODE_n : MODE_none;
+    if (mode == MODE_none
+	&& !r.require_matching_vector_type (i + 2, i + 1, type1))
+      return error_mark_node;
+
+    if ((type_suffixes[type0].element_bits
+	 == 4 * type_suffixes[type1].element_bits)
+	&& type_suffixes[type0].tclass == type_suffixes[type1].tclass)
+      if (tree res = r.lookup_form (mode, type0))
+	return res;
+
+    if (!r.require_nonscalar_type (i + 2))
+      return error_mark_node;
+
+    return r.resolve_to (r.mode_suffix_id, type0, type1);
   }
 };
-SHAPE (ternary_qq_opt_n)
+SHAPE (ternary_qq_opt_n_or_011)
 
 /* svbool_t svfoo[_<t0>](sv<t0>_t, sv<t0:quarter>_t, sv<t0:quarter>_t,
 			 uint64_t)
@@ -3467,7 +4167,7 @@ struct unary_def : public overloaded_base<0>
   build (function_builder &b, const function_group_info &group) const override
   {
     b.add_overloaded_functions (group, MODE_none);
-    build_all (b, "v0,v0", group, MODE_none);
+    build_all (b, "t0,t0", group, MODE_none);
   }
 
   tree
@@ -3488,7 +4188,7 @@ struct unary_convert_def : public overloaded_base<1>
   build (function_builder &b, const function_group_info &group) const override
   {
     b.add_overloaded_functions (group, MODE_none);
-    build_all (b, "v0,v1", group, MODE_none);
+    build_all (b, "c0,c1", group, MODE_none);
   }
 
   tree
@@ -3529,6 +4229,38 @@ struct unary_convert_narrowt_def : public overloaded_base<1>
 };
 SHAPE (unary_convert_narrowt)
 
+/* sv<t0>x<g0>_t svfoo_t0[_t1_g](sv<t1>x<g1>_t)
+
+   where the target type <t0> must be specified explicitly but the
+   source type <t1> can be inferred.
+
+   Functions with a group suffix are unpredicated.  For them:
+
+   - If <t0> is N times wider than <t1>, the return value has N times
+     more vectors than the argument.
+
+   - If <t1> is N times wider than <t0>, the argument has N times
+     more vectors than the return type.  */
+struct unary_convertxn_def : public unary_convert_def
+{
+  bool explicit_group_suffix_p () const override { return false; }
+
+  tree
+  resolve (function_resolver &r) const override
+  {
+    if (r.pred != PRED_none)
+      return unary_convert_def::resolve (r);
+
+    sve_type type;
+    if (!r.check_num_arguments (1)
+	|| !(type = r.infer_sve_type (0)))
+      return error_mark_node;
+
+    return r.resolve_conversion (r.mode_suffix_id, type);
+  }
+};
+SHAPE (unary_convertxn)
+
 /* sv<t0>_t svfoo[_t0](sv<t0:half>_t).  */
 struct unary_long_def : public overloaded_base<0>
 {
@@ -3757,6 +4489,83 @@ struct unary_za_m_def : public overloaded_base<1>
 };
 SHAPE (unary_za_m)
 
+/* void svfoo_t0[_t1]_g(uint32_t, sv<t1>x<g>_t).  */
+struct unary_za_slice_def : public overloaded_base<1>
+{
+  void
+  build (function_builder &b, const function_group_info &group) const override
+  {
+    b.add_overloaded_functions (group, MODE_none);
+    if (!za_group_is_pure_overload (group))
+      build_all (b, "_,su32,t1", group, MODE_none);
+  }
+
+  tree
+  resolve (function_resolver &r) const override
+  {
+    sve_type type;
+    if (!r.check_num_arguments (2)
+	|| !r.require_scalar_type (0, "uint32_t")
+	|| !(type = r.infer_tuple_type (1)))
+      return error_mark_node;
+
+    return r.resolve_to (r.mode_suffix_id, type);
+  }
+};
+SHAPE (unary_za_slice)
+
+/* sv<t0>x<g>_t svfoo[_t0_g](sv<t0>x<g>_t).  */
+struct unaryxn_def : public unary_def
+{
+  bool explicit_group_suffix_p () const override { return false; }
+
+  tree
+  resolve (function_resolver &r) const override
+  {
+    if (r.pred != PRED_none)
+      return unary_def::resolve (r);
+
+    sve_type type;
+    if (!r.check_num_arguments (1)
+	|| !(type = r.infer_sve_type (0)))
+      return error_mark_node;
+
+    return r.resolve_to (r.mode_suffix_id, type);
+  }
+};
+SHAPE (unaryxn)
+
+/* void svfoo_t0[_t1_g](uint64_t, uint32_t, sv<t1>x<g>_t).  */
+struct write_za_def : public overloaded_base<1>
+{
+  void
+  build (function_builder &b, const function_group_info &group) const override
+  {
+    b.add_overloaded_functions (group, MODE_none);
+    build_all (b, "_,su64,su32,t1", group, MODE_none);
+  }
+
+  tree
+  resolve (function_resolver &r) const override
+  {
+    sve_type type;
+    if (!r.check_num_arguments (3)
+	|| !r.require_integer_immediate (0)
+	|| !r.require_scalar_type (1, "uint32_t")
+	|| !(type = r.infer_tuple_type (2)))
+      return error_mark_node;
+
+    return r.resolve_to (r.mode_suffix_id, type);
+  }
+
+  bool
+  check (function_checker &c) const override
+  {
+    return c.require_immediate_range (0, 0, c.num_za_tiles () - 1);
+  }
+};
+SHAPE (write_za)
+
 /* void svfoo_t0[_t1](uint64_t, uint32_t, svbool_t, sv<t1>_t)
 
    where the first two fields form a (ZA tile, slice) pair.  */
@@ -3791,4 +4600,28 @@ struct write_za_m_def : public overloaded_base<1>
 };
 SHAPE (write_za_m)
 
+/* void svfoo_t0[_t1_g](uint32_t, sv<t1>x<g>_t).  */
+struct write_za_slice_def : public overloaded_base<1>
+{
+  void
+  build (function_builder &b, const function_group_info &group) const override
+  {
+    b.add_overloaded_functions (group, MODE_none);
+    build_all (b, "_,su32,t1", group, MODE_none);
+  }
+
+  tree
+  resolve (function_resolver &r) const override
+  {
+    sve_type type;
+    if (!r.check_num_arguments (2)
+	|| !r.require_scalar_type (0, "uint32_t")
+	|| !(type = r.infer_tuple_type (1)))
+      return error_mark_node;
+
+    return r.resolve_to (r.mode_suffix_id, type);
+  }
+};
+SHAPE (write_za_slice)
+
 }
diff --git a/gcc/config/aarch64/aarch64-sve-builtins-shapes.h b/gcc/config/aarch64/aarch64-sve-builtins-shapes.h
index d64ddca7358..88af62df48b 100644
--- a/gcc/config/aarch64/aarch64-sve-builtins-shapes.h
+++ b/gcc/config/aarch64/aarch64-sve-builtins-shapes.h
@@ -40,6 +40,9 @@ namespace aarch64_sve
        one in which the argument is the usual vector, and one in which it
        is replaced by a scalar.
 
+     - an "_opt_single" suffix similarly says that the function can take
+       a vector or tuple argument, with the former having a "_single" suffix.
+
      - "_int" and "_uint" replace the argument's element type with a
        signed or unsigned integer of the same width.  The suffixes above
        then indicate whether this final argument is or might be a scalar.
@@ -75,6 +78,7 @@ namespace aarch64_sve
     extern const function_shape *const adr_offset;
     extern const function_shape *const binary;
     extern const function_shape *const binary_int_opt_n;
+    extern const function_shape *const binary_int_opt_single_n;
     extern const function_shape *const binary_lane;
     extern const function_shape *const binary_long_lane;
     extern const function_shape *const binary_long_opt_n;
@@ -82,9 +86,11 @@ namespace aarch64_sve
     extern const function_shape *const binary_narrowb_opt_n;
     extern const function_shape *const binary_narrowt_opt_n;
     extern const function_shape *const binary_opt_n;
+    extern const function_shape *const binary_opt_single_n;
     extern const function_shape *const binary_pred;
     extern const function_shape *const binary_rotate;
     extern const function_shape *const binary_scalar;
+    extern const function_shape *const binary_single;
     extern const function_shape *const binary_to_uint;
     extern const function_shape *const binary_uint;
     extern const function_shape *const binary_uint_n;
@@ -95,21 +101,33 @@ namespace aarch64_sve
     extern const function_shape *const binary_wide_opt_n;
     extern const function_shape *const binary_za_int_m;
     extern const function_shape *const binary_za_m;
+    extern const function_shape *const binary_za_slice_lane;
+    extern const function_shape *const binary_za_slice_int_opt_single;
+    extern const function_shape *const binary_za_slice_opt_single;
+    extern const function_shape *const binary_za_slice_uint_opt_single;
     extern const function_shape *const binary_za_uint_m;
+    extern const function_shape *const binaryxn;
     extern const function_shape *const bool_inherent;
+    extern const function_shape *const clamp;
     extern const function_shape *const clast;
     extern const function_shape *const compare;
     extern const function_shape *const compare_opt_n;
     extern const function_shape *const compare_ptr;
     extern const function_shape *const compare_scalar;
+    extern const function_shape *const compare_scalar_count;
     extern const function_shape *const compare_wide_opt_n;
     extern const function_shape *const count_inherent;
     extern const function_shape *const count_pat;
     extern const function_shape *const count_pred;
+    extern const function_shape *const count_pred_c;
     extern const function_shape *const count_vector;
     extern const function_shape *const create;
+    extern const function_shape *const dot_za_slice_int_lane;
+    extern const function_shape *const dot_za_slice_lane;
+    extern const function_shape *const dot_za_slice_uint_lane;
     extern const function_shape *const dupq;
     extern const function_shape *const ext;
+    extern const function_shape *const extract_pred;
     extern const function_shape *const fold_left;
     extern const function_shape *const get;
     extern const function_shape *const inc_dec;
@@ -119,7 +137,9 @@ namespace aarch64_sve
     extern const function_shape *const inherent;
     extern const function_shape *const inherent_b;
     extern const function_shape *const inherent_za;
+    extern const function_shape *const inherent_zt;
     extern const function_shape *const inherent_mask_za;
+    extern const function_shape *const ldr_zt;
     extern const function_shape *const ldr_za;
     extern const function_shape *const load;
     extern const function_shape *const load_ext;
@@ -132,6 +152,8 @@ namespace aarch64_sve
     extern const function_shape *const load_gather_vs;
     extern const function_shape *const load_replicate;
     extern const function_shape *const load_za;
+    extern const function_shape *const luti2_lane_zt;
+    extern const function_shape *const luti4_lane_zt;
     extern const function_shape *const mmla;
     extern const function_shape *const pattern_pred;
     extern const function_shape *const prefetch;
@@ -139,10 +161,13 @@ namespace aarch64_sve
     extern const function_shape *const prefetch_gather_offset;
     extern const function_shape *const ptest;
     extern const function_shape *const rdffr;
+    extern const function_shape *const read_za;
     extern const function_shape *const read_za_m;
+    extern const function_shape *const read_za_slice;
     extern const function_shape *const reduction;
     extern const function_shape *const reduction_wide;
     extern const function_shape *const reinterpret;
+    extern const function_shape *const select_pred;
     extern const function_shape *const set;
     extern const function_shape *const setffr;
     extern const function_shape *const shift_left_imm_long;
@@ -150,6 +175,7 @@ namespace aarch64_sve
     extern const function_shape *const shift_right_imm;
     extern const function_shape *const shift_right_imm_narrowb;
     extern const function_shape *const shift_right_imm_narrowt;
+    extern const function_shape *const shift_right_imm_narrowxn;
     extern const function_shape *const shift_right_imm_narrowb_to_uint;
     extern const function_shape *const shift_right_imm_narrowt_to_uint;
     extern const function_shape *const store;
@@ -158,7 +184,9 @@ namespace aarch64_sve
     extern const function_shape *const store_scatter_offset;
     extern const function_shape *const store_scatter_offset_restricted;
     extern const function_shape *const store_za;
+    extern const function_shape *const storexn;
     extern const function_shape *const str_za;
+    extern const function_shape *const str_zt;
     extern const function_shape *const tbl_tuple;
     extern const function_shape *const ternary_bfloat;
     extern const function_shape *const ternary_bfloat_lane;
@@ -171,9 +199,9 @@ namespace aarch64_sve
     extern const function_shape *const ternary_long_lane;
     extern const function_shape *const ternary_long_opt_n;
     extern const function_shape *const ternary_opt_n;
-    extern const function_shape *const ternary_qq_lane;
+    extern const function_shape *const ternary_qq_or_011_lane;
     extern const function_shape *const ternary_qq_lane_rotate;
-    extern const function_shape *const ternary_qq_opt_n;
+    extern const function_shape *const ternary_qq_opt_n_or_011;
     extern const function_shape *const ternary_qq_rotate;
     extern const function_shape *const ternary_rotate;
     extern const function_shape *const ternary_shift_left_imm;
@@ -186,6 +214,7 @@ namespace aarch64_sve
     extern const function_shape *const unary;
     extern const function_shape *const unary_convert;
     extern const function_shape *const unary_convert_narrowt;
+    extern const function_shape *const unary_convertxn;
     extern const function_shape *const unary_long;
     extern const function_shape *const unary_n;
     extern const function_shape *const unary_narrowb;
@@ -198,7 +227,11 @@ namespace aarch64_sve
     extern const function_shape *const unary_uint;
     extern const function_shape *const unary_widen;
     extern const function_shape *const unary_za_m;
+    extern const function_shape *const unary_za_slice;
+    extern const function_shape *const unaryxn;
+    extern const function_shape *const write_za;
     extern const function_shape *const write_za_m;
+    extern const function_shape *const write_za_slice;
   }
 }
 
diff --git a/gcc/config/aarch64/aarch64-sve-builtins-sme.cc b/gcc/config/aarch64/aarch64-sve-builtins-sme.cc
index e1df6ce0d30..8d06a72f384 100644
--- a/gcc/config/aarch64/aarch64-sve-builtins-sme.cc
+++ b/gcc/config/aarch64/aarch64-sve-builtins-sme.cc
@@ -47,7 +47,7 @@ using namespace aarch64_sve;
 
 namespace {
 
-class load_store_za_base : public function_base
+class load_store_za_zt0_base : public function_base
 {
 public:
   tree
@@ -66,7 +66,7 @@ public:
   expand (function_expander &e) const override
   {
     auto za_mode = e.vector_mode (0);
-    auto z_mode = e.vector_mode (1);
+    auto z_mode = e.tuple_mode (1);
     auto icode = (za_mode == VNx1TImode
 		  ? code_for_aarch64_sme (m_unspec, za_mode, z_mode)
 		  : code_for_aarch64_sme (m_unspec, z_mode, z_mode));
@@ -76,11 +76,11 @@ public:
   int m_unspec;
 };
 
-using load_za_base = add_call_properties<load_store_za_base,
+using load_za_base = add_call_properties<load_store_za_zt0_base,
 					 CP_READ_MEMORY | CP_READ_ZA
 					 | CP_WRITE_ZA>;
 
-using store_za_base = add_call_properties<load_store_za_base,
+using store_za_base = add_call_properties<load_store_za_zt0_base,
 					  CP_WRITE_MEMORY | CP_READ_ZA>;
 
 /* E is a load or store intrinsic that accesses a ZA slice of mode MEM_MODE.
@@ -161,6 +161,17 @@ expand_ldr_str_za (function_expander &e, insn_code base_code,
     }
 }
 
+/* Use instruction ICODE to expand ZT0 load or store E.  */
+
+static rtx
+expand_ldr_str_zt0 (function_expander &e, insn_code icode)
+{
+  rtx base = e.convert_to_pmode (e.args[1]);
+  rtx mem = gen_rtx_MEM (V8DImode, force_reg (Pmode, base));
+  e.add_fixed_operand (mem);
+  return e.generate_insn (icode);
+}
+
 /* Expand ZA LD1 or ST1 intrinsic E.  UNSPEC is the load or store unspec.
    IS_LOAD is true if E is a load, false if it is a store.  */
 
@@ -309,6 +320,55 @@ public:
   }
 };
 
+class svldr_zt_impl : public load_store_za_zt0_base
+{
+public:
+  unsigned int
+  call_properties (const function_instance &) const override
+  {
+    return CP_READ_MEMORY | CP_WRITE_ZT0;
+  }
+
+  rtx
+  expand (function_expander &e) const override
+  {
+    return expand_ldr_str_zt0 (e, CODE_FOR_aarch64_sme_ldr_zt0);
+  }
+};
+
+class svluti_lane_zt_impl : public read_zt0<function_base>
+{
+public:
+  CONSTEXPR svluti_lane_zt_impl (unsigned int bits) : m_bits (bits) {}
+
+  rtx
+  expand (function_expander &e) const override
+  {
+    auto mode = e.tuple_mode (0);
+    e.args.ordered_remove (0);
+    return e.use_exact_insn (code_for_aarch64_sme_lut (m_bits, mode));
+  }
+
+  unsigned int m_bits;
+};
+
+class svread_za_impl : public function_base
+{
+public:
+  unsigned int
+  call_properties (const function_instance &) const override
+  {
+    return CP_READ_ZA;
+  }
+
+  rtx
+  expand (function_expander &e) const override
+  {
+    machine_mode mode = e.vectors_per_tuple () == 4 ? VNx8DImode : VNx4DImode;
+    return e.use_exact_insn (code_for_aarch64_sme_read (mode));
+  }
+};
+
 using svread_za_tile_impl = add_call_properties<read_write_za_base,
 						CP_READ_ZA>;
 
@@ -337,6 +397,41 @@ public:
   }
 };
 
+class svstr_zt_impl : public load_store_za_zt0_base
+{
+public:
+  unsigned int
+  call_properties (const function_instance &) const override
+  {
+    return CP_WRITE_MEMORY | CP_READ_ZT0;
+  }
+
+  rtx
+  expand (function_expander &e) const override
+  {
+    return expand_ldr_str_zt0 (e, CODE_FOR_aarch64_sme_str_zt0);
+  }
+};
+
+class svsudot_za_impl : public read_write_za<function_base>
+{
+public:
+  rtx
+  expand (function_expander &e) const override
+  {
+    if (e.mode_suffix_id == MODE_single)
+      {
+	auto icode = code_for_aarch64_sme_single_sudot (e.vector_mode (0),
+							e.tuple_mode (1));
+	return e.use_exact_insn (icode);
+      }
+    std::swap (e.args[1], e.args[2]);
+    return e.use_exact_insn (code_for_aarch64_sme (UNSPEC_SME_USDOT,
+						   e.vector_mode (0),
+						   e.tuple_mode (1)));
+  }
+};
+
 class svundef_za_impl : public write_za<function_base>
 {
 public:
@@ -349,6 +444,24 @@ public:
   }
 };
 
+class svwrite_za_impl : public function_base
+{
+public:
+  unsigned int
+  call_properties (const function_instance &) const override
+  {
+    return CP_WRITE_ZA;
+  }
+
+  rtx
+  expand (function_expander &e) const override
+  {
+    machine_mode mode = e.vectors_per_tuple () == 4 ? VNx8DImode : VNx4DImode;
+    e.args[1] = lowpart_subreg (mode, e.args[1], e.tuple_mode (1));
+    return e.use_exact_insn (code_for_aarch64_sme_write (mode));
+  }
+};
+
 using svwrite_za_tile_impl = add_call_properties<read_write_za_base,
 						 CP_READ_ZA | CP_WRITE_ZA>;
 
@@ -373,40 +486,94 @@ public:
   }
 };
 
+class svzero_zt_impl : public write_zt0<function_base>
+{
+public:
+  rtx
+  expand (function_expander &) const override
+  {
+    emit_insn (gen_aarch64_sme_zero_zt0 ());
+    return const0_rtx;
+  }
+};
+
 } /* end anonymous namespace */
 
 namespace aarch64_sve {
 
 FUNCTION (arm_has_sme, arm_has_sme_impl, )
 FUNCTION (arm_in_streaming_mode, arm_in_streaming_mode_impl, )
+FUNCTION (svadd_za, sme_1mode_function, (UNSPEC_SME_ADD, UNSPEC_SME_ADD,
+					 UNSPEC_SME_FADD))
+FUNCTION (svadd_write_za, sme_1mode_function, (UNSPEC_SME_ADD_WRITE,
+					       UNSPEC_SME_ADD_WRITE, -1))
 FUNCTION (svaddha_za, sme_1mode_function, (UNSPEC_SME_ADDHA,
 					   UNSPEC_SME_ADDHA, -1))
 FUNCTION (svaddva_za, sme_1mode_function, (UNSPEC_SME_ADDVA,
 					  UNSPEC_SME_ADDVA, -1))
+FUNCTION (svbmopa_za, sme_2mode_function, (-1, UNSPEC_SME_BMOPA, -1))
+FUNCTION (svbmops_za, sme_2mode_function, (-1, UNSPEC_SME_BMOPS, -1))
 FUNCTION (svcntsb, svcnts_bhwd_impl, (VNx16QImode))
 FUNCTION (svcntsd, svcnts_bhwd_impl, (VNx2DImode))
 FUNCTION (svcntsh, svcnts_bhwd_impl, (VNx8HImode))
 FUNCTION (svcntsw, svcnts_bhwd_impl, (VNx4SImode))
+FUNCTION (svdot_za, sme_2mode_function, (UNSPEC_SME_SDOT, UNSPEC_SME_UDOT,
+					 UNSPEC_SME_FDOT))
+FUNCTION (svdot_lane_za, sme_2mode_lane_function, (UNSPEC_SME_SDOT,
+						   UNSPEC_SME_UDOT,
+						   UNSPEC_SME_FDOT))
 FUNCTION (svld1_hor_za, svld1_za_impl, (UNSPEC_SME_LD1_HOR))
 FUNCTION (svld1_ver_za, svld1_za_impl, (UNSPEC_SME_LD1_VER))
 FUNCTION (svldr_za, svldr_za_impl, )
+FUNCTION (svldr_zt, svldr_zt_impl, )
+FUNCTION (svluti2_lane_zt, svluti_lane_zt_impl, (2))
+FUNCTION (svluti4_lane_zt, svluti_lane_zt_impl, (4))
+FUNCTION (svmla_za, sme_2mode_function, (UNSPEC_SME_SMLA, UNSPEC_SME_UMLA,
+					 UNSPEC_SME_FMLA))
+FUNCTION (svmla_lane_za, sme_2mode_lane_function, (UNSPEC_SME_SMLA,
+						   UNSPEC_SME_UMLA,
+						   UNSPEC_SME_FMLA))
+FUNCTION (svmls_za, sme_2mode_function, (UNSPEC_SME_SMLS, UNSPEC_SME_UMLS,
+					 UNSPEC_SME_FMLS))
+FUNCTION (svmls_lane_za, sme_2mode_lane_function, (UNSPEC_SME_SMLS,
+						   UNSPEC_SME_UMLS,
+						   UNSPEC_SME_FMLS))
 FUNCTION (svmopa_za, sme_2mode_function, (UNSPEC_SME_SMOPA, UNSPEC_SME_UMOPA,
 					  UNSPEC_SME_FMOPA))
 FUNCTION (svmops_za, sme_2mode_function, (UNSPEC_SME_SMOPS, UNSPEC_SME_UMOPS,
 					  UNSPEC_SME_FMOPS))
+FUNCTION (svread_za, svread_za_impl,)
 FUNCTION (svread_hor_za, svread_za_tile_impl, (UNSPEC_SME_READ_HOR))
 FUNCTION (svread_ver_za, svread_za_tile_impl, (UNSPEC_SME_READ_VER))
 FUNCTION (svst1_hor_za, svst1_za_impl, (UNSPEC_SME_ST1_HOR))
 FUNCTION (svst1_ver_za, svst1_za_impl, (UNSPEC_SME_ST1_VER))
 FUNCTION (svstr_za, svstr_za_impl, )
+FUNCTION (svstr_zt, svstr_zt_impl, )
+FUNCTION (svsub_za, sme_1mode_function, (UNSPEC_SME_SUB, UNSPEC_SME_SUB,
+					 UNSPEC_SME_FSUB))
+FUNCTION (svsub_write_za, sme_1mode_function, (UNSPEC_SME_SUB_WRITE,
+					       UNSPEC_SME_SUB_WRITE, -1))
+FUNCTION (svsudot_za, svsudot_za_impl,)
+FUNCTION (svsudot_lane_za, sme_2mode_lane_function, (UNSPEC_SME_SUDOT, -1, -1))
+FUNCTION (svsuvdot_lane_za, sme_2mode_lane_function, (UNSPEC_SME_SUVDOT,
+						      -1, -1))
 FUNCTION (svsumopa_za, sme_2mode_function, (UNSPEC_SME_SUMOPA, -1, -1))
 FUNCTION (svsumops_za, sme_2mode_function, (UNSPEC_SME_SUMOPS, -1, -1))
 FUNCTION (svundef_za, svundef_za_impl, )
+FUNCTION (svusdot_za, sme_2mode_function, (-1, UNSPEC_SME_USDOT, -1))
+FUNCTION (svusdot_lane_za, sme_2mode_lane_function, (-1, UNSPEC_SME_USDOT, -1))
+FUNCTION (svusvdot_lane_za, sme_2mode_lane_function, (-1, UNSPEC_SME_USVDOT,
+						      -1))
 FUNCTION (svusmopa_za, sme_2mode_function, (-1, UNSPEC_SME_USMOPA, -1))
 FUNCTION (svusmops_za, sme_2mode_function, (-1, UNSPEC_SME_USMOPS, -1))
+FUNCTION (svvdot_lane_za, sme_2mode_lane_function, (UNSPEC_SME_SVDOT,
+						    UNSPEC_SME_UVDOT,
+						    UNSPEC_SME_FVDOT))
+FUNCTION (svwrite_za, svwrite_za_impl,)
 FUNCTION (svwrite_hor_za, svwrite_za_tile_impl, (UNSPEC_SME_WRITE_HOR))
 FUNCTION (svwrite_ver_za, svwrite_za_tile_impl, (UNSPEC_SME_WRITE_VER))
 FUNCTION (svzero_mask_za, svzero_mask_za_impl, )
 FUNCTION (svzero_za, svzero_za_impl, )
+FUNCTION (svzero_zt, svzero_zt_impl, )
 
 } /* end namespace aarch64_sve */
diff --git a/gcc/config/aarch64/aarch64-sve-builtins-sme.def b/gcc/config/aarch64/aarch64-sve-builtins-sme.def
index 5bdcc93f40f..5f76d001480 100644
--- a/gcc/config/aarch64/aarch64-sve-builtins-sme.def
+++ b/gcc/config/aarch64/aarch64-sve-builtins-sme.def
@@ -74,3 +74,125 @@ DEF_SME_ZA_FUNCTION (svusmops, binary_za_int_m, mop_i16i64_unsigned, za_m)
 DEF_SME_ZA_FUNCTION (svmopa, binary_za_m, za_d_float, za_m)
 DEF_SME_ZA_FUNCTION (svmops, binary_za_m, za_d_float, za_m)
 #undef REQUIRED_EXTENSIONS
+
+#define REQUIRED_EXTENSIONS AARCH64_FL_SME2
+DEF_SVE_FUNCTION (svldr_zt, ldr_zt, none, none)
+DEF_SVE_FUNCTION (svstr_zt, str_zt, none, none)
+DEF_SVE_FUNCTION (svzero_zt, inherent_zt, none, none)
+#undef REQUIRED_EXTENSIONS
+
+/* The d_za entries in this section just declare C _za64 overloads,
+   which will then be resolved to either an integer function or a
+   floating-point function.  They are needed because the integer and
+   floating-point functions have different architecture requirements.  */
+#define REQUIRED_EXTENSIONS AARCH64_FL_SME2 | AARCH64_FL_SM_ON
+DEF_SME_ZA_FUNCTION_GS (svadd, unary_za_slice, za_s_data, vg1x24, none)
+DEF_SME_ZA_FUNCTION_GS (svadd, unary_za_slice, d_za, vg1x24, none)
+DEF_SME_ZA_FUNCTION_GS (svadd_write, binary_za_slice_opt_single, za_s_integer,
+			vg1x24, none)
+DEF_SME_ZA_FUNCTION (svbmopa, binary_za_m, za_s_unsigned, za_m)
+DEF_SME_ZA_FUNCTION (svbmops, binary_za_m, za_s_unsigned, za_m)
+DEF_SME_ZA_FUNCTION_GS (svdot, binary_za_slice_opt_single, za_s_h_data,
+			vg1x24, none)
+DEF_SME_ZA_FUNCTION_GS (svdot, binary_za_slice_opt_single, za_s_b_integer,
+			vg1x24, none)
+DEF_SME_ZA_FUNCTION_GS (svdot_lane, dot_za_slice_lane, za_s_h_data,
+			vg1x24, none)
+DEF_SME_ZA_FUNCTION_GS (svdot_lane, dot_za_slice_lane, za_s_b_integer,
+			vg1x24, none)
+DEF_SVE_FUNCTION_GS (svluti2_lane_zt, luti2_lane_zt, bhs_data, x124, none)
+DEF_SVE_FUNCTION_GS (svluti4_lane_zt, luti4_lane_zt, bhs_data, x12, none)
+DEF_SVE_FUNCTION_GS (svluti4_lane_zt, luti4_lane_zt, hs_data, x4, none)
+DEF_SME_ZA_FUNCTION_GS (svmla, binary_za_slice_opt_single, za_s_float,
+			vg1x24, none)
+DEF_SME_ZA_FUNCTION_GS (svmla, binary_za_slice_opt_single, za_s_h_data,
+			vg2, none)
+DEF_SME_ZA_FUNCTION_GS (svmla, binary_za_slice_opt_single, za_s_b_integer,
+			vg4, none)
+DEF_SME_ZA_FUNCTION_GS (svmla_lane, binary_za_slice_lane, za_s_float,
+			vg1x24, none)
+DEF_SME_ZA_FUNCTION_GS (svmla_lane, binary_za_slice_lane, za_s_h_data,
+			vg2, none)
+DEF_SME_ZA_FUNCTION_GS (svmla_lane, binary_za_slice_lane, za_s_b_integer,
+			vg4, none)
+DEF_SME_ZA_FUNCTION_GS (svmls, binary_za_slice_opt_single, za_s_float,
+			vg1x24, none)
+DEF_SME_ZA_FUNCTION_GS (svmls, binary_za_slice_opt_single, za_s_h_data,
+			vg2, none)
+DEF_SME_ZA_FUNCTION_GS (svmls, binary_za_slice_opt_single, za_s_b_integer,
+			vg4, none)
+DEF_SME_ZA_FUNCTION_GS (svmls_lane, binary_za_slice_lane, za_s_float,
+			vg1x24, none)
+DEF_SME_ZA_FUNCTION_GS (svmls_lane, binary_za_slice_lane, za_s_h_data,
+			vg2, none)
+DEF_SME_ZA_FUNCTION_GS (svmls_lane, binary_za_slice_lane, za_s_b_integer,
+			vg4, none)
+DEF_SME_ZA_FUNCTION (svmopa, binary_za_m, za_s_h_integer, za_m)
+DEF_SME_ZA_FUNCTION (svmops, binary_za_m, za_s_h_integer, za_m)
+DEF_SME_ZA_FUNCTION_GS (svread, read_za_slice, za_bhsd_data, vg1x24, none)
+DEF_SME_ZA_FUNCTION_GS (svread_hor, read_za, za_bhsd_data, vg24, none)
+DEF_SME_ZA_FUNCTION_GS (svread_ver, read_za, za_bhsd_data, vg24, none)
+DEF_SME_ZA_FUNCTION_GS (svsub, unary_za_slice, za_s_data, vg1x24, none)
+DEF_SME_ZA_FUNCTION_GS (svsub, unary_za_slice, d_za, vg1x24, none)
+DEF_SME_ZA_FUNCTION_GS (svsub_write, binary_za_slice_opt_single, za_s_integer,
+			vg1x24, none)
+DEF_SME_ZA_FUNCTION_GS (svsudot, binary_za_slice_uint_opt_single,
+		        za_s_b_signed, vg1x24, none)
+DEF_SME_ZA_FUNCTION_GS (svsudot_lane, dot_za_slice_uint_lane,
+			za_s_b_signed, vg1x24, none)
+DEF_SME_ZA_FUNCTION_GS (svsuvdot_lane, dot_za_slice_uint_lane,
+			za_s_b_signed, vg1x4, none)
+DEF_SME_ZA_FUNCTION_GS (svusdot, binary_za_slice_int_opt_single,
+		        za_s_b_unsigned, vg1x24, none)
+DEF_SME_ZA_FUNCTION_GS (svusdot_lane, dot_za_slice_int_lane,
+			za_s_b_unsigned, vg1x24, none)
+DEF_SME_ZA_FUNCTION_GS (svusvdot_lane, dot_za_slice_int_lane,
+			za_s_b_unsigned, vg1x4, none)
+DEF_SME_ZA_FUNCTION_GS (svvdot_lane, dot_za_slice_lane, za_s_h_data,
+			vg1x2, none)
+DEF_SME_ZA_FUNCTION_GS (svvdot_lane, dot_za_slice_lane, za_s_b_integer,
+			vg1x4, none)
+DEF_SME_ZA_FUNCTION_GS (svwrite, write_za_slice, za_bhsd_data, vg1x24, none)
+DEF_SME_ZA_FUNCTION_GS (svwrite_hor, write_za, za_bhsd_data, vg24, none)
+DEF_SME_ZA_FUNCTION_GS (svwrite_ver, write_za, za_bhsd_data, vg24, none)
+#undef REQUIRED_EXTENSIONS
+
+#define REQUIRED_EXTENSIONS (AARCH64_FL_SME2 \
+			     | AARCH64_FL_SME_I16I64 \
+			     | AARCH64_FL_SM_ON)
+DEF_SME_ZA_FUNCTION_GS (svadd, unary_za_slice, za_d_integer, vg1x24, none)
+DEF_SME_ZA_FUNCTION_GS (svadd_write, binary_za_slice_opt_single, za_d_integer,
+			vg1x24, none)
+DEF_SME_ZA_FUNCTION_GS (svdot, binary_za_slice_opt_single, za_d_h_integer,
+			vg1x24, none)
+DEF_SME_ZA_FUNCTION_GS (svdot_lane, dot_za_slice_lane, za_d_h_integer,
+			vg1x24, none)
+DEF_SME_ZA_FUNCTION_GS (svmla, binary_za_slice_opt_single, za_d_h_integer,
+			vg4, none)
+DEF_SME_ZA_FUNCTION_GS (svmla_lane, binary_za_slice_lane, za_d_h_integer,
+			vg4, none)
+DEF_SME_ZA_FUNCTION_GS (svmls, binary_za_slice_opt_single, za_d_h_integer,
+			vg4, none)
+DEF_SME_ZA_FUNCTION_GS (svmls_lane, binary_za_slice_lane, za_d_h_integer,
+			vg4, none)
+DEF_SME_ZA_FUNCTION_GS (svsub, unary_za_slice, za_d_integer, vg1x24, none)
+DEF_SME_ZA_FUNCTION_GS (svsub_write, binary_za_slice_opt_single, za_d_integer,
+			vg1x24, none)
+DEF_SME_ZA_FUNCTION_GS (svvdot_lane, dot_za_slice_lane, za_d_h_integer,
+			vg1x4, none)
+#undef REQUIRED_EXTENSIONS
+
+#define REQUIRED_EXTENSIONS (AARCH64_FL_SME2 \
+			     | AARCH64_FL_SME_F64F64 \
+			     | AARCH64_FL_SM_ON)
+DEF_SME_ZA_FUNCTION_GS (svadd, unary_za_slice, za_d_float, vg1x24, none)
+DEF_SME_ZA_FUNCTION_GS (svmla, binary_za_slice_opt_single, za_d_float,
+			vg1x24, none)
+DEF_SME_ZA_FUNCTION_GS (svmla_lane, binary_za_slice_lane, za_d_float,
+			vg1x24, none)
+DEF_SME_ZA_FUNCTION_GS (svmls, binary_za_slice_opt_single, za_d_float,
+			vg1x24, none)
+DEF_SME_ZA_FUNCTION_GS (svmls_lane, binary_za_slice_lane, za_d_float,
+			vg1x24, none)
+DEF_SME_ZA_FUNCTION_GS (svsub, unary_za_slice, za_d_float, vg1x24, none)
+#undef REQUIRED_EXTENSIONS
diff --git a/gcc/config/aarch64/aarch64-sve-builtins-sme.h b/gcc/config/aarch64/aarch64-sve-builtins-sme.h
index acfed77006b..69aca0f9a75 100644
--- a/gcc/config/aarch64/aarch64-sve-builtins-sme.h
+++ b/gcc/config/aarch64/aarch64-sve-builtins-sme.h
@@ -26,31 +26,57 @@ namespace aarch64_sve
   {
     extern const function_base *const arm_has_sme;
     extern const function_base *const arm_in_streaming_mode;
+    extern const function_base *const svadd_za;
+    extern const function_base *const svadd_write_za;
     extern const function_base *const svaddha_za;
     extern const function_base *const svaddva_za;
+    extern const function_base *const svbmopa_za;
+    extern const function_base *const svbmops_za;
     extern const function_base *const svcntsb;
     extern const function_base *const svcntsd;
     extern const function_base *const svcntsh;
     extern const function_base *const svcntsw;
+    extern const function_base *const svdot_za;
+    extern const function_base *const svdot_lane_za;
     extern const function_base *const svld1_hor_za;
     extern const function_base *const svld1_ver_za;
     extern const function_base *const svldr_za;
+    extern const function_base *const svldr_zt;
+    extern const function_base *const svluti2_lane_zt;
+    extern const function_base *const svluti4_lane_zt;
+    extern const function_base *const svmla_za;
+    extern const function_base *const svmla_lane_za;
+    extern const function_base *const svmls_za;
+    extern const function_base *const svmls_lane_za;
     extern const function_base *const svmopa_za;
     extern const function_base *const svmops_za;
+    extern const function_base *const svread_za;
     extern const function_base *const svread_hor_za;
     extern const function_base *const svread_ver_za;
     extern const function_base *const svst1_hor_za;
     extern const function_base *const svst1_ver_za;
     extern const function_base *const svstr_za;
+    extern const function_base *const svstr_zt;
+    extern const function_base *const svsub_za;
+    extern const function_base *const svsub_write_za;
+    extern const function_base *const svsudot_za;
+    extern const function_base *const svsudot_lane_za;
+    extern const function_base *const svsuvdot_lane_za;
     extern const function_base *const svsumopa_za;
     extern const function_base *const svsumops_za;
+    extern const function_base *const svusdot_za;
+    extern const function_base *const svusdot_lane_za;
+    extern const function_base *const svusvdot_lane_za;
     extern const function_base *const svusmopa_za;
     extern const function_base *const svusmops_za;
+    extern const function_base *const svwrite_za;
     extern const function_base *const svwrite_hor_za;
     extern const function_base *const svwrite_ver_za;
     extern const function_base *const svundef_za;
-    extern const function_base *const svzero_za;
+    extern const function_base *const svvdot_lane_za;
     extern const function_base *const svzero_mask_za;
+    extern const function_base *const svzero_za;
+    extern const function_base *const svzero_zt;
   }
 }
 
diff --git a/gcc/config/aarch64/aarch64-sve-builtins-sve2.cc b/gcc/config/aarch64/aarch64-sve-builtins-sve2.cc
index 73f9e5a899c..045e0d0d28d 100644
--- a/gcc/config/aarch64/aarch64-sve-builtins-sve2.cc
+++ b/gcc/config/aarch64/aarch64-sve-builtins-sve2.cc
@@ -116,6 +116,39 @@ public:
   }
 };
 
+class svclamp_impl : public function_base
+{
+public:
+  rtx
+  expand (function_expander &e) const override
+  {
+    auto mode = e.tuple_mode (0);
+    insn_code icode;
+    if (e.type_suffix (0).float_p)
+      icode = (e.vectors_per_tuple () > 1
+	       ? code_for_aarch64_sve_fclamp_single (mode)
+	       : code_for_aarch64_sve_fclamp (mode));
+    else
+      {
+	auto max = e.type_suffix (0).unsigned_p ? UMAX : SMAX;
+	icode = (e.vectors_per_tuple () > 1
+		 ? code_for_aarch64_sve_clamp_single (max, mode)
+		 : code_for_aarch64_sve_clamp (max, mode));
+      }
+    return e.use_exact_insn (icode);
+  }
+};
+
+class svcvtn_impl : public function_base
+{
+public:
+  rtx
+  expand (function_expander &e) const override
+  {
+    return e.use_exact_insn (code_for_aarch64_sve_cvtn (e.result_mode ()));
+  }
+};
+
 class svldnt1_gather_impl : public full_width_access
 {
 public:
@@ -188,6 +221,30 @@ public:
   }
 };
 
+class svpext_impl : public function_base
+{
+public:
+  rtx
+  expand (function_expander &e) const override
+  {
+    unsigned int bits = e.type_suffix (0).element_bits;
+    return e.use_exact_insn (e.vectors_per_tuple () == 2
+			     ? code_for_aarch64_sve_pextx2 (bits)
+			     : code_for_aarch64_sve_pext (bits));
+  }
+};
+
+class svpsel_impl : public function_base
+{
+public:
+  rtx
+  expand (function_expander &e) const override
+  {
+    unsigned int bits = e.type_suffix (0).element_bits;
+    return e.use_exact_insn (code_for_aarch64_sve_psel (bits));
+  }
+};
+
 class svqcadd_impl : public function_base
 {
 public:
@@ -255,8 +312,9 @@ public:
 	    /* The saturation has no effect, and [SU]RSHL has immediate forms
 	       that we can use for sensible shift amounts.  */
 	    function_instance instance ("svrshl", functions::svrshl,
-					shapes::binary_int_opt_n, MODE_n,
-					f.type_suffix_ids, GROUP_none, f.pred);
+					shapes::binary_int_opt_single_n,
+					MODE_n, f.type_suffix_ids, GROUP_none,
+					f.pred);
 	    return f.redirect_call (instance);
 	  }
       }
@@ -309,6 +367,9 @@ public:
   gimple *
   fold (gimple_folder &f) const override
   {
+    if (f.vectors_per_tuple () > 1)
+      return nullptr;
+
     if (tree amount = uniform_integer_cst_p (gimple_call_arg (f.call, 2)))
       {
 	if (wi::to_widest (amount) >= 0)
@@ -349,7 +410,7 @@ public:
     machine_mode mode = e.vector_mode (0);
     if (e.pred == PRED_x
 	&& aarch64_sve_sqadd_sqsub_immediate_p (mode, e.args[2], false))
-      return e.map_to_rtx_codes (UNKNOWN, US_PLUS, -1);
+      return e.map_to_rtx_codes (UNKNOWN, US_PLUS, -1, -1);
     return e.map_to_unspecs (-1, UNSPEC_USQADD, -1);
   }
 };
@@ -412,6 +473,19 @@ public:
   }
 };
 
+class svunpk_impl : public function_base
+{
+public:
+  rtx
+  expand (function_expander &e) const override
+  {
+    optab op = (e.type_suffix (0).unsigned_p ? zext_optab : sext_optab);
+    insn_code icode = convert_optab_handler (op, e.result_mode (),
+					     GET_MODE (e.args[0]));
+    return e.use_exact_insn (icode);
+  }
+};
+
 class svuqadd_impl : public function_base
 {
 public:
@@ -474,13 +548,21 @@ FUNCTION (svaesmc, fixed_insn_function, (CODE_FOR_aarch64_sve2_aesmc))
 FUNCTION (svbcax, CODE_FOR_MODE0 (aarch64_sve2_bcax),)
 FUNCTION (svbdep, unspec_based_function, (UNSPEC_BDEP, UNSPEC_BDEP, -1))
 FUNCTION (svbext, unspec_based_function, (UNSPEC_BEXT, UNSPEC_BEXT, -1))
+FUNCTION (svbfmlslb, fixed_insn_function, (CODE_FOR_aarch64_sve_bfmlslbvnx4sf))
+FUNCTION (svbfmlslb_lane, fixed_insn_function,
+	  (CODE_FOR_aarch64_sve_bfmlslb_lanevnx4sf))
+FUNCTION (svbfmlslt, fixed_insn_function, (CODE_FOR_aarch64_sve_bfmlsltvnx4sf))
+FUNCTION (svbfmlslt_lane, fixed_insn_function,
+	  (CODE_FOR_aarch64_sve_bfmlslt_lanevnx4sf))
 FUNCTION (svbgrp, unspec_based_function, (UNSPEC_BGRP, UNSPEC_BGRP, -1))
 FUNCTION (svbsl, CODE_FOR_MODE0 (aarch64_sve2_bsl),)
 FUNCTION (svbsl1n, CODE_FOR_MODE0 (aarch64_sve2_bsl1n),)
 FUNCTION (svbsl2n, CODE_FOR_MODE0 (aarch64_sve2_bsl2n),)
 FUNCTION (svcdot, svcdot_impl,)
 FUNCTION (svcdot_lane, svcdot_lane_impl,)
+FUNCTION (svclamp, svclamp_impl,)
 FUNCTION (svcvtlt, unspec_based_function, (-1, -1, UNSPEC_COND_FCVTLT))
+FUNCTION (svcvtn, svcvtn_impl,)
 FUNCTION (svcvtx, unspec_based_function, (-1, -1, UNSPEC_COND_FCVTX))
 FUNCTION (svcvtxnt, CODE_FOR_MODE1 (aarch64_sve2_cvtxnt),)
 FUNCTION (sveor3, CODE_FOR_MODE0 (aarch64_sve2_eor3),)
@@ -537,13 +619,19 @@ FUNCTION (svmullt_lane, unspec_based_lane_function, (UNSPEC_SMULLT,
 						     UNSPEC_UMULLT, -1))
 FUNCTION (svnbsl, CODE_FOR_MODE0 (aarch64_sve2_nbsl),)
 FUNCTION (svnmatch, svmatch_svnmatch_impl, (UNSPEC_NMATCH))
+FUNCTION (svpext, svpext_impl,)
 FUNCTION (svpmul, CODE_FOR_MODE0 (aarch64_sve2_pmul),)
 FUNCTION (svpmullb, unspec_based_function, (-1, UNSPEC_PMULLB, -1))
 FUNCTION (svpmullb_pair, unspec_based_function, (-1, UNSPEC_PMULLB_PAIR, -1))
 FUNCTION (svpmullt, unspec_based_function, (-1, UNSPEC_PMULLT, -1))
 FUNCTION (svpmullt_pair, unspec_based_function, (-1, UNSPEC_PMULLT_PAIR, -1))
+FUNCTION (svpsel, svpsel_impl,)
 FUNCTION (svqabs, rtx_code_function, (SS_ABS, UNKNOWN, UNKNOWN))
 FUNCTION (svqcadd, svqcadd_impl,)
+FUNCTION (svqcvt, integer_conversion, (UNSPEC_SQCVT, UNSPEC_SQCVTU,
+				       UNSPEC_UQCVT, -1))
+FUNCTION (svqcvtn, integer_conversion, (UNSPEC_SQCVTN, UNSPEC_SQCVTUN,
+					UNSPEC_UQCVTN, -1))
 FUNCTION (svqdmlalb, unspec_based_qadd_function, (UNSPEC_SQDMULLB, -1, -1))
 FUNCTION (svqdmlalb_lane, unspec_based_qadd_lane_function, (UNSPEC_SQDMULLB,
 							    -1, -1))
@@ -579,10 +667,16 @@ FUNCTION (svqrdmlsh, unspec_based_function, (UNSPEC_SQRDMLSH, -1, -1))
 FUNCTION (svqrdmlsh_lane, unspec_based_lane_function, (UNSPEC_SQRDMLSH,
 						       -1, -1))
 FUNCTION (svqrshl, svqrshl_impl,)
+FUNCTION (svqrshr, unspec_based_uncond_function, (UNSPEC_SQRSHR,
+						  UNSPEC_UQRSHR, -1, 1))
+FUNCTION (svqrshrn, unspec_based_uncond_function, (UNSPEC_SQRSHRN,
+						   UNSPEC_UQRSHRN, -1, 1))
 FUNCTION (svqrshrnb, unspec_based_function, (UNSPEC_SQRSHRNB,
 					     UNSPEC_UQRSHRNB, -1))
 FUNCTION (svqrshrnt, unspec_based_function, (UNSPEC_SQRSHRNT,
 					     UNSPEC_UQRSHRNT, -1))
+FUNCTION (svqrshru, unspec_based_uncond_function, (UNSPEC_SQRSHRU, -1, -1, 1))
+FUNCTION (svqrshrun, unspec_based_uncond_function, (UNSPEC_SQRSHRUN, -1, -1, 1))
 FUNCTION (svqrshrunb, unspec_based_function, (UNSPEC_SQRSHRUNB, -1, -1))
 FUNCTION (svqrshrunt, unspec_based_function, (UNSPEC_SQRSHRUNT, -1, -1))
 FUNCTION (svqshl, svqshl_impl,)
@@ -603,6 +697,8 @@ FUNCTION (svraddhnb, unspec_based_function, (UNSPEC_RADDHNB,
 FUNCTION (svraddhnt, unspec_based_function, (UNSPEC_RADDHNT,
 					     UNSPEC_RADDHNT, -1))
 FUNCTION (svrax1, fixed_insn_function, (CODE_FOR_aarch64_sve2_rax1))
+FUNCTION (svrevd, unspec_based_function, (UNSPEC_REVD, UNSPEC_REVD,
+					  UNSPEC_REVD))
 FUNCTION (svrhadd, unspec_based_function, (UNSPEC_SRHADD, UNSPEC_URHADD, -1))
 FUNCTION (svrshl, svrshl_impl,)
 FUNCTION (svrshr, unspec_based_function, (UNSPEC_SRSHR, UNSPEC_URSHR, -1))
@@ -639,7 +735,12 @@ FUNCTION (svsubwb, unspec_based_function, (UNSPEC_SSUBWB, UNSPEC_USUBWB, -1))
 FUNCTION (svsubwt, unspec_based_function, (UNSPEC_SSUBWT, UNSPEC_USUBWT, -1))
 FUNCTION (svtbl2, svtbl2_impl,)
 FUNCTION (svtbx, CODE_FOR_MODE0 (aarch64_sve2_tbx),)
+FUNCTION (svunpk, svunpk_impl,)
 FUNCTION (svuqadd, svuqadd_impl,)
+FUNCTION (svuzp, multireg_permute, (UNSPEC_UZP))
+FUNCTION (svuzpq, multireg_permute, (UNSPEC_UZPQ))
+FUNCTION (svzip, multireg_permute, (UNSPEC_ZIP))
+FUNCTION (svzipq, multireg_permute, (UNSPEC_ZIPQ))
 FUNCTION (svwhilege, while_comparison, (UNSPEC_WHILEGE, UNSPEC_WHILEHS))
 FUNCTION (svwhilegt, while_comparison, (UNSPEC_WHILEGT, UNSPEC_WHILEHI))
 FUNCTION (svwhilerw, svwhilerw_svwhilewr_impl, (UNSPEC_WHILERW))
diff --git a/gcc/config/aarch64/aarch64-sve-builtins-sve2.def b/gcc/config/aarch64/aarch64-sve-builtins-sve2.def
index 4aac1ac942a..f37a5cc6b68 100644
--- a/gcc/config/aarch64/aarch64-sve-builtins-sve2.def
+++ b/gcc/config/aarch64/aarch64-sve-builtins-sve2.def
@@ -94,7 +94,7 @@ DEF_SVE_FUNCTION (svqdmlslb_lane, ternary_long_lane, sd_signed, none)
 DEF_SVE_FUNCTION (svqdmlslbt, ternary_long_opt_n, hsd_signed, none)
 DEF_SVE_FUNCTION (svqdmlslt, ternary_long_opt_n, hsd_signed, none)
 DEF_SVE_FUNCTION (svqdmlslt_lane, ternary_long_lane, sd_signed, none)
-DEF_SVE_FUNCTION (svqdmulh, binary_opt_n, all_signed, none)
+DEF_SVE_FUNCTION (svqdmulh, binary_opt_single_n, all_signed, none)
 DEF_SVE_FUNCTION (svqdmulh_lane, binary_lane, hsd_signed, none)
 DEF_SVE_FUNCTION (svqdmullb, binary_long_opt_n, hsd_signed, none)
 DEF_SVE_FUNCTION (svqdmullb_lane, binary_long_lane, sd_signed, none)
@@ -131,7 +131,7 @@ DEF_SVE_FUNCTION (svraddhnt, binary_narrowt_opt_n, hsd_integer, none)
 DEF_SVE_FUNCTION (svrecpe, unary, s_unsigned, mxz)
 DEF_SVE_FUNCTION (svrhadd, binary_opt_n, all_integer, mxz)
 DEF_SVE_FUNCTION (svrsqrte, unary, s_unsigned, mxz)
-DEF_SVE_FUNCTION (svrshl, binary_int_opt_n, all_integer, mxz)
+DEF_SVE_FUNCTION (svrshl, binary_int_opt_single_n, all_integer, mxz)
 DEF_SVE_FUNCTION (svrshr, shift_right_imm, all_integer, mxz)
 DEF_SVE_FUNCTION (svrshrnb, shift_right_imm_narrowb, hsd_integer, none)
 DEF_SVE_FUNCTION (svrshrnt, shift_right_imm_narrowt, hsd_integer, none)
@@ -229,3 +229,73 @@ DEF_SVE_FUNCTION (svrax1, binary, d_integer, none)
 DEF_SVE_FUNCTION (svsm4e, binary, s_unsigned, none)
 DEF_SVE_FUNCTION (svsm4ekey, binary, s_unsigned, none)
 #undef REQUIRED_EXTENSIONS
+
+#define REQUIRED_EXTENSIONS (AARCH64_FL_SVE \
+			     | AARCH64_FL_SVE2 \
+			     | AARCH64_FL_SME \
+			     | AARCH64_FL_SM_ON)
+DEF_SVE_FUNCTION (svclamp, clamp, all_integer, none)
+DEF_SVE_FUNCTION (svpsel, select_pred, all_pred_count, none)
+DEF_SVE_FUNCTION (svrevd, unary, all_data, mxz)
+#undef REQUIRED_EXTENSIONS
+
+#define REQUIRED_EXTENSIONS (AARCH64_FL_SVE \
+			     | AARCH64_FL_SVE2 \
+			     | AARCH64_FL_SME2 \
+			     | AARCH64_FL_SM_ON)
+DEF_SVE_FUNCTION_GS (svadd, binary_single, all_integer, x24, none)
+DEF_SVE_FUNCTION (svbfmlslb, ternary_bfloat_opt_n, s_float, none)
+DEF_SVE_FUNCTION (svbfmlslb_lane, ternary_bfloat_lane, s_float, none)
+DEF_SVE_FUNCTION (svbfmlslt, ternary_bfloat_opt_n, s_float, none)
+DEF_SVE_FUNCTION (svbfmlslt_lane, ternary_bfloat_lane, s_float, none)
+DEF_SVE_FUNCTION (svclamp, clamp, all_float, none)
+DEF_SVE_FUNCTION_GS (svclamp, clamp, all_arith, x24, none)
+DEF_SVE_FUNCTION (svcntp, count_pred_c, all_count, none)
+DEF_SVE_FUNCTION_GS (svcvt, unary_convertxn, cvt_h_s_float, x2, none)
+DEF_SVE_FUNCTION_GS (svcvt, unary_convertxn, cvt_s_s, x24, none)
+DEF_SVE_FUNCTION_GS (svcvtn, unary_convertxn, cvt_h_s_float, x2, none)
+DEF_SVE_FUNCTION (svdot, ternary_qq_opt_n_or_011, s_narrow_fsu, none)
+DEF_SVE_FUNCTION (svdot_lane, ternary_qq_or_011_lane, s_narrow_fsu, none)
+DEF_SVE_FUNCTION_GS (svld1, load, all_data, x24, implicit)
+DEF_SVE_FUNCTION_GS (svldnt1, load, all_data, x24, implicit)
+DEF_SVE_FUNCTION_GS (svmax, binary_opt_single_n, all_arith, x24, none)
+DEF_SVE_FUNCTION_GS (svmaxnm, binary_opt_single_n, all_float, x24, none)
+DEF_SVE_FUNCTION_GS (svmin, binary_opt_single_n, all_arith, x24, none)
+DEF_SVE_FUNCTION_GS (svminnm, binary_opt_single_n, all_float, x24, none)
+DEF_SVE_FUNCTION_GS (svpext, extract_pred, all_count, x12, none)
+DEF_SVE_FUNCTION (svptrue, inherent, all_count, none)
+DEF_SVE_FUNCTION_GS (svqcvt, unary_convertxn, qcvt_x2, x2, none)
+DEF_SVE_FUNCTION_GS (svqcvt, unary_convertxn, qcvt_x4, x4, none)
+DEF_SVE_FUNCTION_GS (svqcvtn, unary_convertxn, qcvt_x2, x2, none)
+DEF_SVE_FUNCTION_GS (svqcvtn, unary_convertxn, qcvt_x4, x4, none)
+DEF_SVE_FUNCTION_GS (svqdmulh, binary_opt_single_n, all_signed, x24, none)
+DEF_SVE_FUNCTION_GS (svqrshr, shift_right_imm_narrowxn, qrshr_x2, x2, none)
+DEF_SVE_FUNCTION_GS (svqrshr, shift_right_imm_narrowxn, qrshr_x4, x4, none)
+DEF_SVE_FUNCTION_GS (svqrshrn, shift_right_imm_narrowxn, qrshr_x2, x2, none)
+DEF_SVE_FUNCTION_GS (svqrshrn, shift_right_imm_narrowxn, qrshr_x4, x4, none)
+DEF_SVE_FUNCTION_GS (svqrshru, shift_right_imm_narrowxn, qrshru_x2, x2, none)
+DEF_SVE_FUNCTION_GS (svqrshru, shift_right_imm_narrowxn, qrshru_x4, x4, none)
+DEF_SVE_FUNCTION_GS (svqrshrun, shift_right_imm_narrowxn, qrshru_x2, x2, none)
+DEF_SVE_FUNCTION_GS (svqrshrun, shift_right_imm_narrowxn, qrshru_x4, x4, none)
+DEF_SVE_FUNCTION_GS (svrinta, unaryxn, s_float, x24, none)
+DEF_SVE_FUNCTION_GS (svrintm, unaryxn, s_float, x24, none)
+DEF_SVE_FUNCTION_GS (svrintn, unaryxn, s_float, x24, none)
+DEF_SVE_FUNCTION_GS (svrintp, unaryxn, s_float, x24, none)
+DEF_SVE_FUNCTION_GS (svrshl, binary_int_opt_single_n, all_integer, x24, none)
+DEF_SVE_FUNCTION_GS (svsel, binaryxn, all_data, x24, implicit)
+DEF_SVE_FUNCTION_GS (svst1, storexn, all_data, x24, implicit)
+DEF_SVE_FUNCTION_GS (svstnt1, storexn, all_data, x24, implicit)
+DEF_SVE_FUNCTION_GS (svunpk, unary_convertxn, bhs_widen, x24, none)
+DEF_SVE_FUNCTION_GS (svuzp, unaryxn, all_data, x24, none)
+DEF_SVE_FUNCTION_GS (svuzpq, unaryxn, all_data, x24, none)
+DEF_SVE_FUNCTION_GS (svwhilege, compare_scalar, while_x, x2, none)
+DEF_SVE_FUNCTION (svwhilege, compare_scalar_count, while_x_c, none)
+DEF_SVE_FUNCTION_GS (svwhilegt, compare_scalar, while_x, x2, none)
+DEF_SVE_FUNCTION (svwhilegt, compare_scalar_count, while_x_c, none)
+DEF_SVE_FUNCTION_GS (svwhilele, compare_scalar, while_x, x2, none)
+DEF_SVE_FUNCTION (svwhilele, compare_scalar_count, while_x_c, none)
+DEF_SVE_FUNCTION_GS (svwhilelt, compare_scalar, while_x, x2, none)
+DEF_SVE_FUNCTION (svwhilelt, compare_scalar_count, while_x_c, none)
+DEF_SVE_FUNCTION_GS (svzip, unaryxn, all_data, x24, none)
+DEF_SVE_FUNCTION_GS (svzipq, unaryxn, all_data, x24, none)
+#undef REQUIRED_EXTENSIONS
diff --git a/gcc/config/aarch64/aarch64-sve-builtins-sve2.h b/gcc/config/aarch64/aarch64-sve-builtins-sve2.h
index 1cd4477acb3..24ee6125369 100644
--- a/gcc/config/aarch64/aarch64-sve-builtins-sve2.h
+++ b/gcc/config/aarch64/aarch64-sve-builtins-sve2.h
@@ -47,13 +47,20 @@ namespace aarch64_sve
     extern const function_base *const svbcax;
     extern const function_base *const svbdep;
     extern const function_base *const svbext;
+    extern const function_base *const svbfmlslb;
+    extern const function_base *const svbfmlslb_lane;
+    extern const function_base *const svbfmlslt;
+    extern const function_base *const svbfmlslt_lane;
     extern const function_base *const svbgrp;
     extern const function_base *const svbsl;
     extern const function_base *const svbsl1n;
     extern const function_base *const svbsl2n;
     extern const function_base *const svcdot;
     extern const function_base *const svcdot_lane;
+    extern const function_base *const svclamp;
+    extern const function_base *const svcntp;
     extern const function_base *const svcvtlt;
+    extern const function_base *const svcvtn;
     extern const function_base *const svcvtx;
     extern const function_base *const svcvtxnt;
     extern const function_base *const sveor3;
@@ -93,13 +100,17 @@ namespace aarch64_sve
     extern const function_base *const svmullt_lane;
     extern const function_base *const svnbsl;
     extern const function_base *const svnmatch;
+    extern const function_base *const svpext;
     extern const function_base *const svpmul;
     extern const function_base *const svpmullb;
     extern const function_base *const svpmullb_pair;
     extern const function_base *const svpmullt;
     extern const function_base *const svpmullt_pair;
+    extern const function_base *const svpsel;
     extern const function_base *const svqabs;
     extern const function_base *const svqcadd;
+    extern const function_base *const svqcvt;
+    extern const function_base *const svqcvtn;
     extern const function_base *const svqdmlalb;
     extern const function_base *const svqdmlalb_lane;
     extern const function_base *const svqdmlalbt;
@@ -126,8 +137,12 @@ namespace aarch64_sve
     extern const function_base *const svqrdmlsh;
     extern const function_base *const svqrdmlsh_lane;
     extern const function_base *const svqrshl;
+    extern const function_base *const svqrshr;
+    extern const function_base *const svqrshrn;
     extern const function_base *const svqrshrnb;
     extern const function_base *const svqrshrnt;
+    extern const function_base *const svqrshru;
+    extern const function_base *const svqrshrun;
     extern const function_base *const svqrshrunb;
     extern const function_base *const svqrshrunt;
     extern const function_base *const svqshl;
@@ -144,6 +159,7 @@ namespace aarch64_sve
     extern const function_base *const svraddhnb;
     extern const function_base *const svraddhnt;
     extern const function_base *const svrax1;
+    extern const function_base *const svrevd;
     extern const function_base *const svrhadd;
     extern const function_base *const svrshl;
     extern const function_base *const svrshr;
@@ -178,7 +194,12 @@ namespace aarch64_sve
     extern const function_base *const svsubwt;
     extern const function_base *const svtbl2;
     extern const function_base *const svtbx;
+    extern const function_base *const svunpk;
     extern const function_base *const svuqadd;
+    extern const function_base *const svuzp;
+    extern const function_base *const svuzpq;
+    extern const function_base *const svzip;
+    extern const function_base *const svzipq;
     extern const function_base *const svwhilege;
     extern const function_base *const svwhilegt;
     extern const function_base *const svwhilerw;
diff --git a/gcc/config/aarch64/aarch64-sve-builtins.cc b/gcc/config/aarch64/aarch64-sve-builtins.cc
index 7e4b9e67ed8..15fa5907de5 100644
--- a/gcc/config/aarch64/aarch64-sve-builtins.cc
+++ b/gcc/config/aarch64/aarch64-sve-builtins.cc
@@ -184,6 +184,16 @@ CONSTEXPR const group_suffix_info group_suffixes[] = {
 #define TYPES_all_pred(S, D) \
   S (b8), S (b16), S (b32), S (b64)
 
+/* _c8 _c16 _c32 _c64.  */
+#define TYPES_all_count(S, D) \
+  S (c8), S (c16), S (c32), S (c64)
+
+/* _b8 _b16 _b32 _b64
+   _c8 _c16 _c32 _c64.  */
+#define TYPES_all_pred_count(S, D) \
+  TYPES_all_pred (S, D), \
+  TYPES_all_count (S, D)
+
 /* _f16 _f32 _f64.  */
 #define TYPES_all_float(S, D) \
   S (f16), S (f32), S (f64)
@@ -223,6 +233,10 @@ CONSTEXPR const group_suffix_info group_suffixes[] = {
 #define TYPES_b(S, D) \
   S (b)
 
+/* _c only.  */
+#define TYPES_c(S, D) \
+  S (c)
+
 /* _u8.  */
 #define TYPES_b_unsigned(S, D) \
   S (u8)
@@ -254,6 +268,19 @@ CONSTEXPR const group_suffix_info group_suffixes[] = {
 #define TYPES_bhs_integer(S, D) \
   TYPES_bhs_signed (S, D), TYPES_bhs_unsigned (S, D)
 
+/*      _bf16
+	 _f16  _f32
+    _s8  _s16  _s32
+    _u8  _u16  _u32.  */
+#define TYPES_bhs_data(S, D) \
+  S (bf16), S (f16), S (f32), TYPES_bhs_integer (S, D)
+
+/* _s16_s8  _s32_s16  _s64_s32
+   _u16_u8  _u32_u16  _u64_u32.  */
+#define TYPES_bhs_widen(S, D) \
+  D (s16, s8), D (s32, s16), D (s64, s32), \
+  D (u16, u8), D (u32, u16), D (u64, u32)
+
 /* _s16
    _u16.  */
 #define TYPES_h_integer(S, D) \
@@ -272,6 +299,13 @@ CONSTEXPR const group_suffix_info group_suffixes[] = {
 #define TYPES_hs_float(S, D) \
   S (f16), S (f32)
 
+/* _bf16
+    _f16  _f32
+    _s16  _s32
+    _u16  _u32.  */
+#define TYPES_hs_data(S, D) \
+  S (bf16), S (f16), S (f32), TYPES_hs_integer (S, D)
+
 /* _u16 _u64.  */
 #define TYPES_hd_unsigned(S, D) \
   S (u16), S (u64)
@@ -383,6 +417,10 @@ CONSTEXPR const group_suffix_info group_suffixes[] = {
 #define TYPES_cvt_bfloat(S, D) \
   D (bf16, f32)
 
+/* { _bf16 _f16 } x _f32.  */
+#define TYPES_cvt_h_s_float(S, D) \
+  D (bf16, f32), D (f16, f32)
+
 /* _f32_f16
    _f64_f32.  */
 #define TYPES_cvt_long(S, D) \
@@ -397,6 +435,15 @@ CONSTEXPR const group_suffix_info group_suffixes[] = {
 #define TYPES_cvt_narrow(S, D) \
   D (f16, f32), TYPES_cvt_narrow_s (S, D)
 
+/* { _s32 _u32 } x _f32
+
+   _f32 x { _s32 _u32 }.  */
+#define TYPES_cvt_s_s(S, D) \
+  D (s32, f32), \
+  D (u32, f32), \
+  D (f32, s32), \
+  D (f32, u32)
+
 /* { _s32 _s64 } x { _b8 _b16 _b32 _b64 }
    { _u32 _u64 }.  */
 #define TYPES_inc_dec_n1(D, A) \
@@ -407,6 +454,55 @@ CONSTEXPR const group_suffix_info group_suffixes[] = {
   TYPES_inc_dec_n1 (D, u32), \
   TYPES_inc_dec_n1 (D, u64)
 
+/* { _s16 _u16 } x _s32
+
+   {      _u16 } x _u32.  */
+#define TYPES_qcvt_x2(S, D) \
+  D (s16, s32), \
+  D (u16, u32), \
+  D (u16, s32)
+
+/* { _s8  _u8  } x _s32
+
+   {      _u8  } x _u32
+
+   { _s16 _u16 } x _s64
+
+   {      _u16 } x _u64.  */
+#define TYPES_qcvt_x4(S, D) \
+  D (s8, s32), \
+  D (u8, u32), \
+  D (u8, s32), \
+  D (s16, s64), \
+  D (u16, u64), \
+  D (u16, s64)
+
+/* _s16_s32
+   _u16_u32.  */
+#define TYPES_qrshr_x2(S, D) \
+  D (s16, s32), \
+  D (u16, u32)
+
+/* _u16_s32.  */
+#define TYPES_qrshru_x2(S, D) \
+  D (u16, s32)
+
+/* _s8_s32
+   _s16_s64
+   _u8_u32
+   _u16_u64.  */
+#define TYPES_qrshr_x4(S, D) \
+  D (s8, s32), \
+  D (s16, s64), \
+  D (u8, u32), \
+  D (u16, u64)
+
+/* _u8_s32
+   _u16_s64.  */
+#define TYPES_qrshru_x4(S, D) \
+  D (u8, s32), \
+  D (u16, s64)
+
 /* {     _bf16           }   {     _bf16           }
    {      _f16 _f32 _f64 }   {      _f16 _f32 _f64 }
    { _s8  _s16 _s32 _s64 } x { _s8  _s16 _s32 _s64 }
@@ -446,6 +542,28 @@ CONSTEXPR const group_suffix_info group_suffixes[] = {
   TYPES_while1 (D, b32), \
   TYPES_while1 (D, b64)
 
+/* { _b8 _b16 _b32 _b64 } x { _s64 }
+			    { _u64 } */
+#define TYPES_while_x(S, D) \
+  D (b8, s64), D (b8, u64), \
+  D (b16, s64), D (b16, u64), \
+  D (b32, s64), D (b32, u64), \
+  D (b64, s64), D (b64, u64)
+
+/* { _c8 _c16 _c32 _c64 } x { _s64 }
+			    { _u64 } */
+#define TYPES_while_x_c(S, D) \
+  D (c8, s64), D (c8, u64), \
+  D (c16, s64), D (c16, u64), \
+  D (c32, s64), D (c32, u64), \
+  D (c64, s64), D (c64, u64)
+
+/* _f32_f16
+   _s32_s16
+   _u32_u16.  */
+#define TYPES_s_narrow_fsu(S, D) \
+  D (f32, f16), D (s32, s16), D (u32, u16)
+
 /* _za8 _za16 _za32 _za64 _za128.  */
 #define TYPES_all_za(S, D) \
   S (za8), S (za16), S (za32), S (za64), S (za128)
@@ -478,10 +596,45 @@ CONSTEXPR const group_suffix_info group_suffixes[] = {
   TYPES_za_bhsd_data (S, D), \
   TYPES_reinterpret1 (D, za128)
 
+/* _za32_s8.  */
+#define TYPES_za_s_b_signed(S, D) \
+   D (za32, s8)
+
+/* _za32_u8.  */
+#define TYPES_za_s_b_unsigned(S, D) \
+   D (za32, u8)
+
+/* _za32 x { _s8 _u8 }.  */
+#define TYPES_za_s_b_integer(S, D) \
+  D (za32, s8), D (za32, u8)
+
+/* _za32 x { _s16 _u16 }.  */
+#define TYPES_za_s_h_integer(S, D) \
+  D (za32, s16), D (za32, u16)
+
+/* _za32 x { _bf16 _f16 _s16 _u16 }.  */
+#define TYPES_za_s_h_data(S, D) \
+  D (za32, bf16), D (za32, f16), D (za32, s16), D (za32, u16)
+
+/* _za32_u32.  */
+#define TYPES_za_s_unsigned(S, D) \
+  D (za32, u32)
+
 /* _za32 x { _s32 _u32 }.  */
 #define TYPES_za_s_integer(S, D) \
   D (za32, s32), D (za32, u32)
 
+/* _za32_f32.  */
+#define TYPES_za_s_float(S, D) \
+  D (za32, f32)
+
+/* _za32 x { _f32 _s32 _u32 }.  */
+#define TYPES_za_s_data(S, D) \
+  D (za32, f32), D (za32, s32), D (za32, u32)
+
+/* _za64 x { _s16 _u16 }.  */
+#define TYPES_za_d_h_integer(S, D) \
+  D (za64, s16), D (za64, u16)
 
 /* _za64_f64.  */
 #define TYPES_za_d_float(S, D) \
@@ -541,6 +694,8 @@ static const type_suffix_pair types_none[] = {
 
 /* Create an array for each TYPES_<combination> macro above.  */
 DEF_SVE_TYPES_ARRAY (all_pred);
+DEF_SVE_TYPES_ARRAY (all_count);
+DEF_SVE_TYPES_ARRAY (all_pred_count);
 DEF_SVE_TYPES_ARRAY (all_float);
 DEF_SVE_TYPES_ARRAY (all_signed);
 DEF_SVE_TYPES_ARRAY (all_float_and_signed);
@@ -556,10 +711,14 @@ DEF_SVE_TYPES_ARRAY (bs_unsigned);
 DEF_SVE_TYPES_ARRAY (bhs_signed);
 DEF_SVE_TYPES_ARRAY (bhs_unsigned);
 DEF_SVE_TYPES_ARRAY (bhs_integer);
+DEF_SVE_TYPES_ARRAY (bhs_data);
+DEF_SVE_TYPES_ARRAY (bhs_widen);
+DEF_SVE_TYPES_ARRAY (c);
 DEF_SVE_TYPES_ARRAY (h_integer);
 DEF_SVE_TYPES_ARRAY (hs_signed);
 DEF_SVE_TYPES_ARRAY (hs_integer);
 DEF_SVE_TYPES_ARRAY (hs_float);
+DEF_SVE_TYPES_ARRAY (hs_data);
 DEF_SVE_TYPES_ARRAY (hd_unsigned);
 DEF_SVE_TYPES_ARRAY (hsd_signed);
 DEF_SVE_TYPES_ARRAY (hsd_integer);
@@ -580,17 +739,38 @@ DEF_SVE_TYPES_ARRAY (d_integer);
 DEF_SVE_TYPES_ARRAY (d_data);
 DEF_SVE_TYPES_ARRAY (cvt);
 DEF_SVE_TYPES_ARRAY (cvt_bfloat);
+DEF_SVE_TYPES_ARRAY (cvt_h_s_float);
 DEF_SVE_TYPES_ARRAY (cvt_long);
 DEF_SVE_TYPES_ARRAY (cvt_narrow_s);
 DEF_SVE_TYPES_ARRAY (cvt_narrow);
+DEF_SVE_TYPES_ARRAY (cvt_s_s);
 DEF_SVE_TYPES_ARRAY (inc_dec_n);
+DEF_SVE_TYPES_ARRAY (qcvt_x2);
+DEF_SVE_TYPES_ARRAY (qcvt_x4);
+DEF_SVE_TYPES_ARRAY (qrshr_x2);
+DEF_SVE_TYPES_ARRAY (qrshr_x4);
+DEF_SVE_TYPES_ARRAY (qrshru_x2);
+DEF_SVE_TYPES_ARRAY (qrshru_x4);
 DEF_SVE_TYPES_ARRAY (reinterpret);
 DEF_SVE_TYPES_ARRAY (reinterpret_b);
 DEF_SVE_TYPES_ARRAY (while);
+DEF_SVE_TYPES_ARRAY (while_x);
+DEF_SVE_TYPES_ARRAY (while_x_c);
+DEF_SVE_TYPES_ARRAY (s_narrow_fsu);
 DEF_SVE_TYPES_ARRAY (all_za);
 DEF_SVE_TYPES_ARRAY (d_za);
+DEF_SVE_TYPES_ARRAY (za_bhsd_data);
 DEF_SVE_TYPES_ARRAY (za_all_data);
+DEF_SVE_TYPES_ARRAY (za_s_b_signed);
+DEF_SVE_TYPES_ARRAY (za_s_b_unsigned);
+DEF_SVE_TYPES_ARRAY (za_s_b_integer);
+DEF_SVE_TYPES_ARRAY (za_s_h_integer);
+DEF_SVE_TYPES_ARRAY (za_s_h_data);
+DEF_SVE_TYPES_ARRAY (za_s_unsigned);
 DEF_SVE_TYPES_ARRAY (za_s_integer);
+DEF_SVE_TYPES_ARRAY (za_s_float);
+DEF_SVE_TYPES_ARRAY (za_s_data);
+DEF_SVE_TYPES_ARRAY (za_d_h_integer);
 DEF_SVE_TYPES_ARRAY (za_d_float);
 DEF_SVE_TYPES_ARRAY (za_d_integer);
 DEF_SVE_TYPES_ARRAY (mop_base);
@@ -605,10 +785,50 @@ static const group_suffix_index groups_none[] = {
   GROUP_none, NUM_GROUP_SUFFIXES
 };
 
+static const group_suffix_index groups_x2[] = { GROUP_x2, NUM_GROUP_SUFFIXES };
+
+static const group_suffix_index groups_x12[] = {
+  GROUP_none, GROUP_x2, NUM_GROUP_SUFFIXES
+};
+
+static const group_suffix_index groups_x4[] = { GROUP_x4, NUM_GROUP_SUFFIXES };
+
+static const group_suffix_index groups_x24[] = {
+  GROUP_x2, GROUP_x4, NUM_GROUP_SUFFIXES
+};
+
+static const group_suffix_index groups_x124[] = {
+  GROUP_none, GROUP_x2, GROUP_x4, NUM_GROUP_SUFFIXES
+};
+
 static const group_suffix_index groups_x1234[] = {
   GROUP_none, GROUP_x2, GROUP_x3, GROUP_x4, NUM_GROUP_SUFFIXES
 };
 
+static const group_suffix_index groups_vg1x2[] = {
+  GROUP_vg1x2, NUM_GROUP_SUFFIXES
+};
+
+static const group_suffix_index groups_vg1x4[] = {
+  GROUP_vg1x4, NUM_GROUP_SUFFIXES
+};
+
+static const group_suffix_index groups_vg1x24[] = {
+  GROUP_vg1x2, GROUP_vg1x4, NUM_GROUP_SUFFIXES
+};
+
+static const group_suffix_index groups_vg2[] = {
+  GROUP_vg2x1, GROUP_vg2x2, GROUP_vg2x4, NUM_GROUP_SUFFIXES
+};
+
+static const group_suffix_index groups_vg4[] = {
+  GROUP_vg4x1, GROUP_vg4x2, GROUP_vg4x4, NUM_GROUP_SUFFIXES
+};
+
+static const group_suffix_index groups_vg24[] = {
+  GROUP_vg2, GROUP_vg4, NUM_GROUP_SUFFIXES
+};
+
 /* Used by functions that have no governing predicate.  */
 static const predication_index preds_none[] = { PRED_none, NUM_PREDS };
 
@@ -1007,7 +1227,7 @@ function_instance::reads_global_state_p () const
     return true;
 
   /* Handle direct reads of global state.  */
-  return flags & (CP_READ_MEMORY | CP_READ_FFR | CP_READ_ZA);
+  return flags & (CP_READ_MEMORY | CP_READ_FFR | CP_READ_ZA | CP_READ_ZT0);
 }
 
 /* Return true if calls to the function could modify some form of
@@ -1028,7 +1248,7 @@ function_instance::modifies_global_state_p () const
     return true;
 
   /* Handle direct modifications of global state.  */
-  return flags & (CP_WRITE_MEMORY | CP_WRITE_FFR | CP_WRITE_ZA);
+  return flags & (CP_WRITE_MEMORY | CP_WRITE_FFR | CP_WRITE_ZA | CP_WRITE_ZT0);
 }
 
 /* Return true if calls to the function could raise a signal.  */
@@ -1183,7 +1403,8 @@ add_shared_state_attribute (const char *name, bool is_in, bool is_out,
   };
   static state_flag_info state_flags[] =
   {
-    { "za", CP_READ_ZA, CP_WRITE_ZA }
+    { "za", CP_READ_ZA, CP_WRITE_ZA },
+    { "zt0", CP_READ_ZT0, CP_WRITE_ZT0 }
   };
 
   tree args = NULL_TREE;
@@ -1379,6 +1600,10 @@ function_builder::add_overloaded_functions (const function_group_info &group,
   auto add_group_suffix = [&](group_suffix_index group_suffix_id,
 			      unsigned int pi)
     {
+      if (mode == MODE_single
+	  && group_suffixes[group_suffix_id].vectors_per_tuple == 1)
+	return;
+
       if (!explicit_type0 && !explicit_type1)
 	/* Deal with the common case in which there is one overloaded
 	   function for all type combinations.  */
@@ -1482,6 +1707,48 @@ function_resolver::report_incorrect_num_vectors (unsigned int argno,
 	     argno + 1, fndecl, num_vectors);
 }
 
+/* Report that arguments FIRST_ARGNO and ARGNO have different numbers
+   of vectors, but are required to have the same number of vectors.
+   FIRST_TYPE and TYPE are the types that arguments FIRST_ARGNO and
+   ARGNO actually have.  */
+void
+function_resolver::report_mismatched_num_vectors (unsigned int first_argno,
+						  sve_type first_type,
+						  unsigned int argno,
+						  sve_type type)
+{
+  /* If the tuple size is implied by the group suffix, and if the first
+     type had the right number of vectors, treat argument ARGNO as being
+     individually wrong, rather than wrong in relation to FIRST_ARGNO.  */
+  if (group_suffix_id != GROUP_none
+      && first_type.num_vectors == vectors_per_tuple ())
+    {
+      report_incorrect_num_vectors (argno, type, first_type.num_vectors);
+      return;
+    }
+
+  /* Make sure that FIRST_TYPE itself is sensible before using it
+     as a basis for an error message.  */
+  if (resolve_to (mode_suffix_id, first_type) == error_mark_node)
+    return;
+
+  if (type.num_vectors != 1 && first_type.num_vectors == 1)
+    error_at (location, "passing tuple %qT to argument %d of %qE after"
+	      " passing single vector %qT to argument %d",
+	      get_vector_type (type), argno + 1, fndecl,
+	      get_vector_type (first_type), first_argno + 1);
+  else if (type.num_vectors == 1 && first_type.num_vectors != 1)
+    error_at (location, "passing single vector %qT to argument %d"
+	      " of %qE after passing tuple %qT to argument %d",
+	      get_vector_type (type), argno + 1, fndecl,
+	      get_vector_type (first_type), first_argno + 1);
+  else
+    error_at (location, "passing mismatched tuple types %qT and %qT"
+	      " to arguments %d and %d of %qE",
+	      get_vector_type (first_type), get_vector_type (type),
+	      first_argno + 1, argno + 1, fndecl);
+}
+
 /* Report that the function has no form that takes type TYPE.
    Return error_mark_node.  */
 tree
@@ -1548,8 +1815,9 @@ function_resolver::resolve_to (mode_suffix_index mode,
 	return report_no_such_form (type0);
       if (type0 == type_suffix_ids[0])
 	return report_no_such_form (type1);
-      /* To be filled in when we have other cases.  */
-      gcc_unreachable ();
+      error_at (location, "%qE has no form that takes %qT and %qT arguments",
+		fndecl, get_vector_type (type0), get_vector_type (type1));
+      return error_mark_node;
     }
   return res;
 }
@@ -1567,6 +1835,54 @@ function_resolver::resolve_to (mode_suffix_index mode, sve_type type)
   return report_no_such_form (type);
 }
 
+/* Like resolve_to, but used for a conversion function with the following
+   properties:
+
+   - The function has an explicit first type suffix.
+   - The elements of the argument (which has type TYPE) might be narrower
+     or wider than the elements of the return type.
+   - The return type has enough vectors to represent the converted value
+     of every element.
+   - The group suffix describes the wider of the argument type and the
+     return type.  */
+tree
+function_resolver::resolve_conversion (mode_suffix_index mode, sve_type type)
+{
+  auto ret_type = type_suffix_ids[0];
+  unsigned int num_ret_vectors = (type.num_vectors
+				  * type_suffixes[ret_type].element_bits
+				  / type_suffixes[type.type].element_bits);
+  if (num_ret_vectors == 1
+      || num_ret_vectors == 2
+      || num_ret_vectors == 4)
+    {
+      unsigned int num_vectors = MAX (num_ret_vectors, type.num_vectors);
+      if (tree res = lookup_form (mode, { type.type, num_vectors }))
+	return res;
+    }
+  return report_no_such_form (type);
+}
+
+/* Require argument ARGNO to be an svbool_t or svcount_t predicate.
+   Return its type on success, otherwise report an error and return
+   NUM_VECTOR_TYPES.  */
+vector_type_index
+function_resolver::infer_predicate_type (unsigned int argno)
+{
+  tree actual = get_argument_type (argno);
+  if (actual == error_mark_node)
+    return NUM_VECTOR_TYPES;
+
+  for (auto index : { VECTOR_TYPE_svbool_t, VECTOR_TYPE_svcount_t })
+    if (matches_type_p (acle_vector_types[0][index], actual))
+      return index;
+
+  error_at (location, "passing %qT to argument %d of %qE, which expects"
+	    " an %qs or %qs", actual, argno + 1, fndecl, "svbool_t",
+	    "svcount_t");
+  return NUM_VECTOR_TYPES;
+}
+
 /* Require argument ARGNO to be a 32-bit or 64-bit scalar integer type.
    Return the associated type suffix on success, otherwise report an
    error and return NUM_TYPE_SUFFIXES.  */
@@ -1597,6 +1913,50 @@ function_resolver::infer_integer_scalar_type (unsigned int argno)
   return NUM_TYPE_SUFFIXES;
 }
 
+/* Return arguments ARGNO and ARGNO + 1 to be 64-bit scalar integers
+   of the same signedness, or be a combination that converts unambiguously
+   to such a pair.  Return the associated type suffix if they are,
+   otherwise report an error and return NUM_TYPE_SUFFIXES.  */
+type_suffix_index
+function_resolver::infer_64bit_scalar_integer_pair (unsigned int argno)
+{
+  /* Require two scalar integers, with one having 64 bits and the other
+     one being no bigger.  */
+  tree types[] = { get_argument_type (argno), get_argument_type (argno + 1) };
+  if (!INTEGRAL_TYPE_P (types[0])
+      || !INTEGRAL_TYPE_P (types[1])
+      || MAX (TYPE_PRECISION (types[0]), TYPE_PRECISION (types[1])) != 64)
+    {
+      error_at (location, "passing %qT and %qT to arguments %d and %d of %qE,"
+		" which expects a pair of 64-bit integers", types[0], types[1],
+		argno + 1, argno + 2, fndecl);
+      return NUM_TYPE_SUFFIXES;
+    }
+
+  /* Allow signed integers smaller than int64_t to be paired with an int64_t.
+     Allow unsigned integers smaller than uint64_t to be paired with any
+     64-bit integer.  */
+  for (int i = 0; i < 2; ++i)
+    {
+      if (TYPE_PRECISION (types[i]) != 64)
+	continue;
+
+      if (TYPE_UNSIGNED (types[1 - i]) != TYPE_UNSIGNED (types[i]))
+	{
+	  if (TYPE_PRECISION (types[1 - i]) == 64)
+	    continue;
+	  if (!TYPE_UNSIGNED (types[1 - i]))
+	    continue;
+	}
+      return TYPE_UNSIGNED (types[i]) ? TYPE_SUFFIX_u64 : TYPE_SUFFIX_s64;
+    }
+
+  error_at (location, "passing mismatched integer types %qT and %qT"
+	    " to arguments %d and %d of %qE", types[0], types[1],
+	    argno + 1, argno + 2, fndecl);
+  return NUM_TYPE_SUFFIXES;
+}
+
 /* Require argument ARGNO to be a pointer to a scalar type that has a
    corresponding type suffix.  Return that type suffix on success,
    otherwise report an error and return NUM_TYPE_SUFFIXES.
@@ -1783,6 +2143,37 @@ function_resolver::infer_tuple_type (unsigned int argno)
   return infer_vector_or_tuple_type (argno, vectors_per_tuple ());
 }
 
+/* PRED_TYPE is the type of a governing predicate argument and DATA_TYPE
+   is the type of an argument that it predicates.  Require the two types
+   to "agree": svcount_t must be used for multiple vectors and svbool_t
+   for single vectors.
+
+   Return true if they do agree, otherwise report an error and
+   return false.  */
+bool function_resolver::
+require_matching_predicate_type (vector_type_index pred_type,
+				 sve_type data_type)
+{
+  if (pred_type == VECTOR_TYPE_svbool_t && data_type.num_vectors == 1)
+    return true;
+
+  if (pred_type == VECTOR_TYPE_svcount_t && data_type.num_vectors != 1)
+    return true;
+
+  /* Make sure that FIRST_TYPE itself is sensible before using it
+     as a basis for an error message.  */
+  if (resolve_to (mode_suffix_id, data_type) == error_mark_node)
+    return false;
+
+  if (data_type.num_vectors > 1)
+    error_at (location, "operations on multiple vectors must be predicated"
+	      " by %qs rather than %qs", "svcount_t", "svbool_t");
+  else
+    error_at (location, "operations on single vectors must be predicated"
+	      " by %qs rather than %qs", "svbool_t", "svcount_t");
+  return false;
+}
+
 /* Require argument ARGNO to be a vector or scalar argument.  Return true
    if it is, otherwise report an appropriate error.  */
 bool
@@ -1835,6 +2226,12 @@ function_resolver::require_matching_vector_type (unsigned int argno,
   if (!new_type)
     return false;
 
+  if (type.num_vectors != new_type.num_vectors)
+    {
+      report_mismatched_num_vectors (first_argno, type, argno, new_type);
+      return false;
+    }
+
   if (type != new_type)
     {
       error_at (location, "passing %qT to argument %d of %qE, but"
@@ -1846,7 +2243,8 @@ function_resolver::require_matching_vector_type (unsigned int argno,
   return true;
 }
 
-/* Require argument ARGNO to be a vector type with the following properties:
+/* Require argument ARGNO to be a vector or tuple type with the following
+   properties:
 
    - the type class must be the same as FIRST_TYPE's if EXPECTED_TCLASS
      is SAME_TYPE_CLASS, otherwise it must be EXPECTED_TCLASS itself.
@@ -1858,6 +2256,9 @@ function_resolver::require_matching_vector_type (unsigned int argno,
      - a quarter of FIRST_TYPE's if EXPECTED_BITS == QUARTER_SIZE
      - EXPECTED_BITS itself otherwise
 
+   - the number of vectors must be the same as FIRST_TYPE's if
+     EXPECTED_NUM_VECTORS is zero, otherwise it must be EXPECTED_NUM_VECTORS.
+
    Return true if the argument has the required type, otherwise report
    an appropriate error.
 
@@ -1877,13 +2278,14 @@ require_derived_vector_type (unsigned int argno,
 			     unsigned int first_argno,
 			     sve_type first_type,
 			     type_class_index expected_tclass,
-			     unsigned int expected_bits)
+			     unsigned int expected_bits,
+			     unsigned int expected_num_vectors)
 {
   /* If the type needs to match FIRST_ARGNO exactly, use the preferred
      error message for that case.  */
-  if (first_type.num_vectors == 1
-      && expected_tclass == SAME_TYPE_CLASS
-      && expected_bits == SAME_SIZE)
+  if (expected_tclass == SAME_TYPE_CLASS
+      && expected_bits == SAME_SIZE
+      && expected_num_vectors == 0)
     {
       /* There's no need to resolve this case out of order.  */
       gcc_assert (argno > first_argno);
@@ -1904,10 +2306,15 @@ require_derived_vector_type (unsigned int argno,
   else if (expected_bits == QUARTER_SIZE)
     expected_bits = first_type_suffix.element_bits / 4;
 
+  unsigned int orig_expected_num_vectors = expected_num_vectors;
+  if (expected_num_vectors == 0)
+    expected_num_vectors = first_type.num_vectors;
+
   /* If the expected type doesn't depend on FIRST_TYPE at all,
      just check for the fixed choice of vector type.  */
   if (expected_tclass == orig_expected_tclass
-      && expected_bits == orig_expected_bits)
+      && expected_bits == orig_expected_bits
+      && orig_expected_num_vectors == 1)
     {
       const type_suffix_info &expected_suffix
 	= type_suffixes[find_type_suffix (expected_tclass, expected_bits)];
@@ -1916,20 +2323,37 @@ require_derived_vector_type (unsigned int argno,
 
   /* Require the argument to be some form of SVE vector type,
      without being specific about the type of vector we want.  */
-  sve_type actual_type = infer_vector_type (argno);
+  sve_type actual_type = infer_sve_type (argno);
   if (!actual_type)
     return false;
 
+  if (actual_type.num_vectors != expected_num_vectors)
+    {
+      if (orig_expected_num_vectors == 0)
+	report_mismatched_num_vectors (first_argno, first_type,
+				       argno, actual_type);
+      else
+	report_incorrect_num_vectors (argno, actual_type,
+				      expected_num_vectors);
+      return false;
+    }
+
   if (orig_expected_tclass == SAME_TYPE_CLASS
       && orig_expected_bits == SAME_SIZE)
     {
       if (actual_type.type == first_type.type)
 	return true;
 
-      error_at (location, "passing %qT to argument %d of %qE, but"
-		" argument %d was a tuple of %qT",
-		get_vector_type (actual_type), argno + 1, fndecl,
-		first_argno + 1, get_vector_type (first_type.type));
+      if (first_type.num_vectors > 1)
+	error_at (location, "passing %qT to argument %d of %qE, but"
+		  " argument %d was a tuple of %qT",
+		  get_vector_type (actual_type), argno + 1, fndecl,
+		  first_argno + 1, get_vector_type (first_type.type));
+      else
+	error_at (location, "passing %qT to argument %d of %qE, but"
+		  " argument %d had type %qT",
+		  get_vector_type (actual_type), argno + 1, fndecl,
+		  first_argno + 1, get_vector_type (first_type));
       return false;
     }
 
@@ -1944,10 +2368,16 @@ require_derived_vector_type (unsigned int argno,
      size requirement, without having to refer to FIRST_TYPE.  */
   if (!size_ok_p && expected_bits == orig_expected_bits)
     {
-      error_at (location, "passing %qT to argument %d of %qE, which"
-		" expects a vector of %d-bit elements",
-		get_vector_type (actual_type), argno + 1, fndecl,
-		expected_bits);
+      if (expected_num_vectors == 1)
+	error_at (location, "passing %qT to argument %d of %qE, which"
+		  " expects a vector of %d-bit elements",
+		  get_vector_type (actual_type), argno + 1, fndecl,
+		  expected_bits);
+      else
+	error_at (location, "passing %qT to argument %d of %qE, which"
+		  " expects vectors of %d-bit elements",
+		  get_vector_type (actual_type), argno + 1, fndecl,
+		  expected_bits);
       return false;
     }
 
@@ -1956,16 +2386,30 @@ require_derived_vector_type (unsigned int argno,
      translation work for other type classes.  */
   if (!tclass_ok_p && orig_expected_tclass == TYPE_signed)
     {
-      error_at (location, "passing %qT to argument %d of %qE, which"
-		" expects a vector of signed integers",
-		get_vector_type (actual_type), argno + 1, fndecl);
+      if (expected_num_vectors == 1)
+	error_at (location, "passing %qT to argument %d of %qE, which"
+		  " expects a vector of signed integers",
+		  get_vector_type (actual_type), argno + 1, fndecl);
+      else
+	/* Translation note: could also be written "expects a tuple of
+	   signed integer vectors".  */
+	error_at (location, "passing %qT to argument %d of %qE, which"
+		  " expects vectors of signed integers",
+		  get_vector_type (actual_type), argno + 1, fndecl);
       return false;
     }
   if (!tclass_ok_p && orig_expected_tclass == TYPE_unsigned)
     {
-      error_at (location, "passing %qT to argument %d of %qE, which"
-		" expects a vector of unsigned integers",
-		get_vector_type (actual_type), argno + 1, fndecl);
+      if (expected_num_vectors == 1)
+	error_at (location, "passing %qT to argument %d of %qE, which"
+		  " expects a vector of unsigned integers",
+		  get_vector_type (actual_type), argno + 1, fndecl);
+      else
+	/* Translation note: could also be written "expects a tuple of
+	   unsigned integer vectors".  */
+	error_at (location, "passing %qT to argument %d of %qE, which"
+		  " expects vectors of unsigned integers",
+		  get_vector_type (actual_type), argno + 1, fndecl);
       return false;
     }
 
@@ -1976,9 +2420,7 @@ require_derived_vector_type (unsigned int argno,
 
   /* If the arguments have consistent type classes, but a link between
      the sizes has been broken, try to describe the error in those terms.  */
-  if (first_type.num_vectors == 1
-      && tclass_ok_p
-      && orig_expected_bits == SAME_SIZE)
+  if (tclass_ok_p && orig_expected_bits == SAME_SIZE)
     {
       if (argno < first_argno)
 	{
@@ -1995,8 +2437,7 @@ require_derived_vector_type (unsigned int argno,
 
   /* Likewise in reverse: look for cases in which the sizes are consistent
      but a link between the type classes has been broken.  */
-  if (first_type.num_vectors == 1
-      && size_ok_p
+  if (size_ok_p
       && orig_expected_tclass == SAME_TYPE_CLASS
       && first_type_suffix.integer_p
       && actual_type_suffix.integer_p)
@@ -2055,10 +2496,29 @@ function_resolver::require_scalar_type (unsigned int argno,
 					const char *expected)
 {
   if (!scalar_argument_p (argno))
+    {
+      if (expected)
+	error_at (location, "passing %qT to argument %d of %qE, which"
+		  " expects %qs", get_argument_type (argno), argno + 1,
+		  fndecl, expected);
+      return false;
+    }
+  return true;
+}
+
+/* Require argument ARGNO to be a nonscalar type, given that it has already
+   passed require_vector_or_scalar_type.  Return true if it is, otherwise
+   report an error.  This is used when two sets of instructions share the
+   same overloaded function and one accepts scalars while the other
+   doesn't.  */
+bool
+function_resolver::require_nonscalar_type (unsigned int argno)
+{
+  if (scalar_argument_p (argno))
     {
       error_at (location, "passing %qT to argument %d of %qE, which"
-		" expects %qs", get_argument_type (argno), argno + 1,
-		fndecl, expected);
+		" does not accept scalars for this combination of arguments",
+		get_argument_type (argno), argno + 1, fndecl);
       return false;
     }
   return true;
@@ -2493,7 +2953,7 @@ function_resolver::check_gp_argument (unsigned int nops,
       gcc_assert (!shape->has_merge_argument_p (*this, nops));
       nargs = nops + 1;
       if (!check_num_arguments (nargs)
-	  || !require_vector_type (i, VECTOR_TYPE_svbool_t))
+	  || !require_vector_type (i, gp_type_index ()))
 	return false;
       i += 1;
     }
@@ -2563,6 +3023,58 @@ finish_opt_n_resolution (unsigned int argno, unsigned int first_argno,
   return resolve_to (mode_suffix_id, inferred_type);
 }
 
+/* Finish resolving a function whose final argument can be a tuple
+   or a vector, with the function having an implicit "_single" suffix
+   in the latter case.  This "_single" form might only exist for certain
+   type suffixes.
+
+   ARGNO is the index of the final argument.  The inferred type suffix
+   was obtained from argument FIRST_ARGNO, which has type FIRST_TYPE.
+   EXPECTED_TCLASS gives the expected type class for the final tuple
+   or vector.
+
+   Return the function decl of the resolved function on success,
+   otherwise report a suitable error and return error_mark_node.  */
+tree function_resolver::
+finish_opt_single_resolution (unsigned int argno, unsigned int first_argno,
+			      sve_type first_type,
+			      type_class_index expected_tclass)
+{
+  sve_type new_type = infer_sve_type (argno);
+  if (!new_type)
+    return error_mark_node;
+
+  /* If the type is a tuple, require it to match the group suffix.  */
+  unsigned int num_vectors = vectors_per_tuple ();
+  if (num_vectors != 1
+      && new_type.num_vectors != 1
+      && new_type.num_vectors != num_vectors)
+    {
+      report_incorrect_num_vectors (argno, new_type, num_vectors);
+      return error_mark_node;
+    }
+
+  auto expected_num_vectors = (new_type.num_vectors == 1 ? 1 : 0);
+  if (!require_derived_vector_type (argno, first_argno, first_type,
+				    expected_tclass, SAME_SIZE,
+				    expected_num_vectors))
+    return error_mark_node;
+
+  if (new_type.num_vectors == 1 && first_type.num_vectors > 1)
+    {
+      if (tree single_form = lookup_form (MODE_single, first_type))
+	return single_form;
+
+      if (resolve_to (mode_suffix_id, first_type) != error_mark_node)
+	error_at (location, "passing %qT to argument %d of %qE, but its"
+		  " %qT form does not accept single vectors",
+		  get_vector_type (new_type), argno + 1, fndecl,
+		  get_vector_type (first_type));
+      return error_mark_node;
+    }
+  return resolve_to (mode_suffix_id, first_type);
+}
+
 /* Resolve a (possibly predicated) unary function.  If the function uses
    merge predication or if TREAT_AS_MERGE_P is true, there is an extra
    vector argument before the governing predicate that specifies the
@@ -2747,7 +3259,7 @@ function_checker::require_immediate_either_or (unsigned int rel_argno,
 
   if (actual != value0 && actual != value1)
     {
-      report_neither_nor (location, fndecl, argno, actual, 90, 270);
+      report_neither_nor (location, fndecl, argno, actual, value0, value1);
       return false;
     }
 
@@ -3117,7 +3629,7 @@ function_expander::function_expander (const function_instance &instance,
 insn_code
 function_expander::direct_optab_handler (optab op, unsigned int suffix_i)
 {
-  return ::direct_optab_handler (op, vector_mode (suffix_i));
+  return ::direct_optab_handler (op, tuple_mode (suffix_i));
 }
 
 /* Choose between signed and unsigned direct optabs SIGNED_OP and
@@ -3552,7 +4064,8 @@ function_expander::use_pred_x_insn (insn_code icode)
 	has_float_operand_p = true;
     }
 
-  if (has_float_operand_p)
+  if (has_float_operand_p
+      && insn_data[icode].n_operands > (int) nops + 2)
     {
       /* Add a flag that indicates whether unpredicated instructions
 	 are allowed.  */
@@ -3685,7 +4198,8 @@ function_expander::use_contiguous_store_insn (insn_code icode)
 
    - CODE_FOR_SINT for signed integers
    - CODE_FOR_UINT for unsigned integers
-   - UNSPEC_FOR_FP for floating-point values
+   - UNSPEC_FOR_COND_FP for predicated floating-point
+   - UNSPEC_FOR_UNCOND_FP for unpredicated floating-point
 
    and where <code_optab> is like <optab>, but uses CODE_FOR_SINT instead
    of UNSPEC_FOR_FP for floating-point values.
@@ -3695,13 +4209,24 @@ function_expander::use_contiguous_store_insn (insn_code icode)
 rtx
 function_expander::map_to_rtx_codes (rtx_code code_for_sint,
 				     rtx_code code_for_uint,
-				     int unspec_for_fp,
+				     int unspec_for_cond_fp,
+				     int unspec_for_uncond_fp,
 				     unsigned int merge_argno)
 {
-  machine_mode mode = vector_mode (0);
+  machine_mode mode = tuple_mode (0);
   rtx_code code = (type_suffix (0).unsigned_p ? code_for_uint : code_for_sint);
   insn_code icode;
 
+  if (mode_suffix_id == MODE_single)
+    {
+      gcc_assert (pred == PRED_none);
+      if (type_suffix (0).integer_p)
+	icode = code_for_aarch64_sve_single (code, mode);
+      else
+	icode = code_for_aarch64_sve_single (unspec_for_uncond_fp, mode);
+      return use_exact_insn (icode);
+    }
+
   /* Handle predicate logic operations, which always use _z predication.  */
   if (type_suffix (0).tclass == TYPE_bool)
     {
@@ -3716,7 +4241,7 @@ function_expander::map_to_rtx_codes (rtx_code code_for_sint,
       if (type_suffix (0).integer_p)
 	icode = maybe_code_for_aarch64_pred (code, mode);
       else
-	icode = maybe_code_for_aarch64_pred (unspec_for_fp, mode);
+	icode = maybe_code_for_aarch64_pred (unspec_for_cond_fp, mode);
       if (icode != CODE_FOR_nothing)
 	return use_pred_x_insn (icode);
     }
@@ -3725,7 +4250,10 @@ function_expander::map_to_rtx_codes (rtx_code code_for_sint,
      Floating-point operations conventionally use the signed rtx code.  */
   if (pred == PRED_none || pred == PRED_x)
     {
-      icode = direct_optab_handler (code_to_optab (code), 0);
+      if (type_suffix (0).float_p && unspec_for_uncond_fp >= 0)
+	icode = maybe_code_for_aarch64_sve (unspec_for_uncond_fp, mode);
+      else
+	icode = direct_optab_handler (code_to_optab (code), 0);
       if (icode == CODE_FOR_nothing)
 	icode = code_for_aarch64_sve (code, mode);
       return use_unpred_insn (icode);
@@ -3735,7 +4263,7 @@ function_expander::map_to_rtx_codes (rtx_code code_for_sint,
   if (type_suffix (0).integer_p)
     icode = code_for_cond (code, mode);
   else
-    icode = code_for_cond (unspec_for_fp, mode);
+    icode = code_for_cond (unspec_for_cond_fp, mode);
   return use_cond_insn (icode, merge_argno);
 }
 
@@ -3761,11 +4289,17 @@ rtx
 function_expander::map_to_unspecs (int unspec_for_sint, int unspec_for_uint,
 				   int unspec_for_fp, unsigned int merge_argno)
 {
-  machine_mode mode = vector_mode (0);
+  machine_mode mode = tuple_mode (0);
   int unspec = (!type_suffix (0).integer_p ? unspec_for_fp
 		: type_suffix (0).unsigned_p ? unspec_for_uint
 		: unspec_for_sint);
 
+  if (mode_suffix_id == MODE_single)
+    {
+      gcc_assert (pred == PRED_none);
+      return use_exact_insn (code_for_aarch64_sve_single (unspec, mode));
+    }
+
   if (pred == PRED_x)
     {
       insn_code icode = maybe_code_for_aarch64_pred (unspec, mode);
diff --git a/gcc/config/aarch64/aarch64-sve-builtins.def b/gcc/config/aarch64/aarch64-sve-builtins.def
index 297904f3e47..23ef7889c51 100644
--- a/gcc/config/aarch64/aarch64-sve-builtins.def
+++ b/gcc/config/aarch64/aarch64-sve-builtins.def
@@ -57,6 +57,7 @@
 #endif
 
 DEF_SVE_MODE (n, none, none, none)
+DEF_SVE_MODE (single, none, none, none)
 DEF_SVE_MODE (index, none, none, elements)
 DEF_SVE_MODE (offset, none, none, bytes)
 DEF_SVE_MODE (s32index, none, svint32_t, elements)
@@ -108,6 +109,10 @@ DEF_SVE_TYPE_SUFFIX (b32, svbool_t, bool, 32, VNx4BImode)
 DEF_SVE_TYPE_SUFFIX (b64, svbool_t, bool, 64, VNx2BImode)
 DEF_SVE_TYPE_SUFFIX (bf16, svbfloat16_t, bfloat, 16, VNx8BFmode)
 DEF_SVE_TYPE_SUFFIX (c, svcount_t, count, 8, VNx16BImode)
+DEF_SVE_TYPE_SUFFIX (c8, svcount_t, count, 8, VNx16BImode)
+DEF_SVE_TYPE_SUFFIX (c16, svcount_t, count, 16, VNx16BImode)
+DEF_SVE_TYPE_SUFFIX (c32, svcount_t, count, 32, VNx16BImode)
+DEF_SVE_TYPE_SUFFIX (c64, svcount_t, count, 64, VNx16BImode)
 DEF_SVE_TYPE_SUFFIX (f16, svfloat16_t, float, 16, VNx8HFmode)
 DEF_SVE_TYPE_SUFFIX (f32, svfloat32_t, float, 32, VNx4SFmode)
 DEF_SVE_TYPE_SUFFIX (f64, svfloat64_t, float, 64, VNx2DFmode)
@@ -133,6 +138,16 @@ DEF_SME_ZA_SUFFIX (za128, 128, VNx1TImode)
 DEF_SVE_GROUP_SUFFIX (x2, 0, 2)
 DEF_SVE_GROUP_SUFFIX (x3, 0, 3)
 DEF_SVE_GROUP_SUFFIX (x4, 0, 4)
+DEF_SVE_GROUP_SUFFIX (vg1x2, 1, 2)
+DEF_SVE_GROUP_SUFFIX (vg1x4, 1, 4)
+DEF_SVE_GROUP_SUFFIX (vg2, 2, 2)
+DEF_SVE_GROUP_SUFFIX (vg2x1, 2, 1)
+DEF_SVE_GROUP_SUFFIX (vg2x2, 2, 2)
+DEF_SVE_GROUP_SUFFIX (vg2x4, 2, 4)
+DEF_SVE_GROUP_SUFFIX (vg4, 4, 4)
+DEF_SVE_GROUP_SUFFIX (vg4x1, 4, 1)
+DEF_SVE_GROUP_SUFFIX (vg4x2, 4, 2)
+DEF_SVE_GROUP_SUFFIX (vg4x4, 4, 4)
 
 #include "aarch64-sve-builtins-base.def"
 #include "aarch64-sve-builtins-sve2.def"
diff --git a/gcc/config/aarch64/aarch64-sve-builtins.h b/gcc/config/aarch64/aarch64-sve-builtins.h
index 51774825c23..e67c46581f3 100644
--- a/gcc/config/aarch64/aarch64-sve-builtins.h
+++ b/gcc/config/aarch64/aarch64-sve-builtins.h
@@ -99,6 +99,8 @@ const unsigned int CP_READ_FFR = 1U << 5;
 const unsigned int CP_WRITE_FFR = 1U << 6;
 const unsigned int CP_READ_ZA = 1U << 7;
 const unsigned int CP_WRITE_ZA = 1U << 8;
+const unsigned int CP_READ_ZT0 = 1U << 9;
+const unsigned int CP_WRITE_ZT0 = 1U << 10;
 
 /* Enumerates the SVE predicate and (data) vector types, together called
    "vector types" for brevity.  */
@@ -361,6 +363,9 @@ public:
   bool modifies_global_state_p () const;
   bool could_trap_p () const;
 
+  vector_type_index gp_type_index () const;
+  tree gp_type () const;
+
   unsigned int vectors_per_tuple () const;
   tree memory_scalar_type () const;
   machine_mode memory_vector_mode () const;
@@ -469,6 +474,8 @@ public:
   bool scalar_argument_p (unsigned int);
 
   void report_incorrect_num_vectors (unsigned int, sve_type, unsigned int);
+  void report_mismatched_num_vectors (unsigned int, sve_type,
+				      unsigned int, sve_type);
 
   tree report_no_such_form (sve_type);
   tree lookup_form (mode_suffix_index,
@@ -481,8 +488,11 @@ public:
 		   type_suffix_index = NUM_TYPE_SUFFIXES,
 		   group_suffix_index = GROUP_none);
   tree resolve_to (mode_suffix_index, sve_type);
+  tree resolve_conversion (mode_suffix_index, sve_type);
 
+  vector_type_index infer_predicate_type (unsigned int);
   type_suffix_index infer_integer_scalar_type (unsigned int);
+  type_suffix_index infer_64bit_scalar_integer_pair (unsigned int);
   type_suffix_index infer_pointer_type (unsigned int, bool = false);
   sve_type infer_sve_type (unsigned int);
   sve_type infer_vector_or_tuple_type (unsigned int, unsigned int);
@@ -494,13 +504,16 @@ public:
 
   bool require_vector_or_scalar_type (unsigned int);
 
+  bool require_matching_predicate_type (vector_type_index, sve_type);
   bool require_vector_type (unsigned int, vector_type_index);
   bool require_matching_vector_type (unsigned int, unsigned int, sve_type);
   bool require_derived_vector_type (unsigned int, unsigned int, sve_type,
 				    type_class_index = SAME_TYPE_CLASS,
-				    unsigned int = SAME_SIZE);
+				    unsigned int = SAME_SIZE,
+				    unsigned int = 1);
 
   bool require_scalar_type (unsigned int, const char *);
+  bool require_nonscalar_type (unsigned int);
   bool require_pointer_type (unsigned int);
   bool require_matching_integer_scalar_type (unsigned int, unsigned int,
 					     type_suffix_index);
@@ -529,6 +542,8 @@ public:
 				type_class_index = SAME_TYPE_CLASS,
 				unsigned int = SAME_SIZE,
 				type_suffix_index = NUM_TYPE_SUFFIXES);
+  tree finish_opt_single_resolution (unsigned int, unsigned int, sve_type,
+				     type_class_index = SAME_TYPE_CLASS);
 
   tree resolve ();
 
@@ -653,7 +668,7 @@ public:
   rtx use_contiguous_prefetch_insn (insn_code);
   rtx use_contiguous_store_insn (insn_code);
 
-  rtx map_to_rtx_codes (rtx_code, rtx_code, int,
+  rtx map_to_rtx_codes (rtx_code, rtx_code, int, int,
 			unsigned int = DEFAULT_MERGE_ARGNO);
   rtx map_to_unspecs (int, int, int, unsigned int = DEFAULT_MERGE_ARGNO);
 
@@ -784,13 +799,6 @@ extern tree acle_svprfop;
 bool vector_cst_all_same (tree, unsigned int);
 bool is_ptrue (tree, unsigned int);
 
-/* Return the ACLE type svbool_t.  */
-inline tree
-get_svbool_t (void)
-{
-  return acle_vector_types[0][VECTOR_TYPE_svbool_t];
-}
-
 /* Try to find a mode with the given mode_suffix_info fields.  Return the
    mode on success or MODE_none on failure.  */
 inline mode_suffix_index
@@ -864,6 +872,24 @@ function_instance::operator!= (const function_instance &other) const
   return !operator== (other);
 }
 
+/* Return the index of the type that should be used as the governing
+   predicate of this function.  */
+inline vector_type_index
+function_instance::gp_type_index () const
+{
+  if (group_suffix ().vectors_per_tuple > 1)
+    return VECTOR_TYPE_svcount_t;
+  return VECTOR_TYPE_svbool_t;
+}
+
+/* Return the type that should be used as the governing predicate of
+   this function.  */
+inline tree
+function_instance::gp_type () const
+{
+  return acle_vector_types[0][gp_type_index ()];
+}
+
 /* If the function operates on tuples of vectors, return the number
    of vectors in the tuples, otherwise return 1.  */
 inline unsigned int
@@ -997,6 +1023,10 @@ function_instance::tuple_mode (unsigned int i) const
 inline machine_mode
 function_instance::gp_mode (unsigned int i) const
 {
+  /* Multi-vector operations are predicated on an svcount_t, which has
+     mode VNx16BI.  */
+  if (group_suffix ().vectors_per_tuple > 1)
+    return VNx16BImode;
   return aarch64_sve_pred_mode (type_suffix (i).element_bytes).require ();
 }
 
diff --git a/gcc/config/aarch64/aarch64-sve.md b/gcc/config/aarch64/aarch64-sve.md
index 3729c67eb69..d911f657871 100644
--- a/gcc/config/aarch64/aarch64-sve.md
+++ b/gcc/config/aarch64/aarch64-sve.md
@@ -1266,7 +1266,7 @@ (define_insn "aarch64_update_ffrt"
 ;; - LD4W
 ;; -------------------------------------------------------------------------
 
-;; Predicated LD1.
+;; Predicated LD1 (single).
 (define_insn "maskload<mode><vpred>"
   [(set (match_operand:SVE_ALL 0 "register_operand" "=w")
 	(unspec:SVE_ALL
@@ -1277,6 +1277,17 @@ (define_insn "maskload<mode><vpred>"
   "ld1<Vesize>\t%0.<Vctype>, %2/z, %1"
 )
 
+;; Predicated LD1 (multi), with a count as predicate.
+(define_insn "@aarch64_ld1<mode>"
+  [(set (match_operand:SVE_FULLx24 0 "aligned_register_operand" "=Uw<vector_count>")
+	(unspec:SVE_FULLx24
+	  [(match_operand:VNx16BI 2 "register_operand" "Uph")
+	   (match_operand:SVE_FULLx24 1 "memory_operand" "m")]
+	  UNSPEC_LD1_SVE_COUNT))]
+  "TARGET_SME2 && TARGET_STREAMING"
+  "ld1<Vesize>\t%0, %K2/z, %1"
+)
+
 ;; Unpredicated LD[234].
 (define_expand "vec_load_lanes<mode><vsingle>"
   [(set (match_operand:SVE_STRUCT 0 "register_operand")
@@ -1408,7 +1419,7 @@ (define_insn_and_rewrite "@aarch64_ld<fn>f1_<ANY_EXTEND:optab><SVE_HSDI:mode><SV
 ;; - LDNT1W
 ;; -------------------------------------------------------------------------
 
-;; Predicated contiguous non-temporal load.
+;; Predicated contiguous non-temporal load (single).
 (define_insn "@aarch64_ldnt1<mode>"
   [(set (match_operand:SVE_FULL 0 "register_operand" "=w")
 	(unspec:SVE_FULL
@@ -1419,6 +1430,17 @@ (define_insn "@aarch64_ldnt1<mode>"
   "ldnt1<Vesize>\t%0.<Vetype>, %2/z, %1"
 )
 
+;; Predicated contiguous non-temporal load (multi).
+(define_insn "@aarch64_ldnt1<mode>"
+  [(set (match_operand:SVE_FULLx24 0 "aligned_register_operand" "=Uw<vector_count>")
+	(unspec:SVE_FULLx24
+	  [(match_operand:VNx16BI 2 "register_operand" "Uph")
+	   (match_operand:SVE_FULLx24 1 "memory_operand" "m")]
+	  UNSPEC_LDNT1_SVE_COUNT))]
+  "TARGET_SVE"
+  "ldnt1<Vesize>\t%0, %K2/z, %1"
+)
+
 ;; -------------------------------------------------------------------------
 ;; ---- Normal gather loads
 ;; -------------------------------------------------------------------------
@@ -2229,7 +2251,7 @@ (define_insn "*aarch64_sve_gather_prefetch<SVE_FULL_I:mode><VNx2DI_ONLY:mode>_ux
 ;; - ST4W
 ;; -------------------------------------------------------------------------
 
-;; Predicated ST1.
+;; Predicated ST1 (single).
 (define_insn "maskstore<mode><vpred>"
   [(set (match_operand:SVE_ALL 0 "memory_operand" "+m")
 	(unspec:SVE_ALL
@@ -2241,6 +2263,17 @@ (define_insn "maskstore<mode><vpred>"
   "st1<Vesize>\t%1.<Vctype>, %2, %0"
 )
 
+(define_insn "@aarch64_st1<mode>"
+  [(set (match_operand:SVE_FULLx24 0 "memory_operand" "+m")
+	(unspec:SVE_FULLx24
+	  [(match_operand:VNx16BI 2 "register_operand" "Uph")
+	   (match_operand:SVE_FULLx24 1 "aligned_register_operand" "Uw<vector_count>")
+	   (match_dup 0)]
+	  UNSPEC_ST1_SVE_COUNT))]
+  "TARGET_SME2 && TARGET_STREAMING"
+  "st1<Vesize>\t%1, %K2, %0"
+)
+
 ;; Unpredicated ST[234].  This is always a full update, so the dependence
 ;; on the old value of the memory location (via (match_dup 0)) is redundant.
 ;; There doesn't seem to be any obvious benefit to treating the all-true
@@ -2340,6 +2373,17 @@ (define_insn "@aarch64_stnt1<mode>"
   "stnt1<Vesize>\t%1.<Vetype>, %2, %0"
 )
 
+(define_insn "@aarch64_stnt1<mode>"
+  [(set (match_operand:SVE_FULLx24 0 "memory_operand" "+m")
+	(unspec:SVE_FULLx24
+	  [(match_operand:VNx16BI 2 "register_operand" "Uph")
+	   (match_operand:SVE_FULLx24 1 "aligned_register_operand" "Uw<vector_count>")
+	   (match_dup 0)]
+	  UNSPEC_STNT1_SVE_COUNT))]
+  "TARGET_SME2 && TARGET_STREAMING"
+  "stnt1<Vesize>\t%1, %K2, %0"
+)
+
 ;; -------------------------------------------------------------------------
 ;; ---- Normal scatter stores
 ;; -------------------------------------------------------------------------
@@ -7133,21 +7177,25 @@ (define_insn "<sur>dot_prod<vsi2qi>"
 )
 
 ;; Four-element integer dot-product by selected lanes with accumulation.
-(define_insn "@aarch64_<sur>dot_prod_lane<vsi2qi>"
+(define_insn "@aarch64_<sur>dot_prod_lane<SVE_FULL_SDI:mode><SVE_FULL_BHI:mode>"
   [(set (match_operand:SVE_FULL_SDI 0 "register_operand")
 	(plus:SVE_FULL_SDI
 	  (unspec:SVE_FULL_SDI
-	    [(match_operand:<VSI2QI> 1 "register_operand")
-	     (unspec:<VSI2QI>
-	       [(match_operand:<VSI2QI> 2 "register_operand")
+	    [(match_operand:SVE_FULL_BHI 1 "register_operand")
+	     (unspec:SVE_FULL_BHI
+	       [(match_operand:SVE_FULL_BHI 2 "register_operand")
 		(match_operand:SI 3 "const_int_operand")]
 	       UNSPEC_SVE_LANE_SELECT)]
 	    DOTPROD)
 	  (match_operand:SVE_FULL_SDI 4 "register_operand")))]
-  "TARGET_SVE"
-  {@ [ cons: =0 , 1 , 2              , 4 ; attrs: movprfx ]
-     [ w        , w , <sve_lane_con> , 0 ; *              ] <sur>dot\t%0.<Vetype>, %1.<Vetype_fourth>, %2.<Vetype_fourth>[%3]
-     [ ?&w      , w , <sve_lane_con> , w ; yes            ] movprfx\t%0, %4\;<sur>dot\t%0.<Vetype>, %1.<Vetype_fourth>, %2.<Vetype_fourth>[%3]
+  "TARGET_SVE
+   && (<SVE_FULL_SDI:elem_bits> == <SVE_FULL_BHI:elem_bits> * 4
+       || (TARGET_STREAMING_SME2
+	   && <SVE_FULL_SDI:elem_bits> == 32
+	   && <SVE_FULL_BHI:elem_bits> == 16))"
+  {@ [ cons: =0 , 1 , 2                           , 4 ; attrs: movprfx ]
+     [ w        , w , <SVE_FULL_SDI:sve_lane_con> , 0 ; *              ] <sur>dot\t%0.<SVE_FULL_SDI:Vetype>, %1.<SVE_FULL_BHI:Vetype>, %2.<SVE_FULL_BHI:Vetype>[%3]
+     [ ?&w      , w , <SVE_FULL_SDI:sve_lane_con> , w ; yes            ] movprfx\t%0, %4\;<sur>dot\t%0.<SVE_FULL_SDI:Vetype>, %1.<SVE_FULL_BHI:Vetype>, %2.<SVE_FULL_BHI:Vetype>[%3]
   }
 )
 
@@ -7166,13 +7214,13 @@ (define_insn "@<sur>dot_prod<vsi2qi>"
   }
 )
 
-(define_insn "@aarch64_<sur>dot_prod_lane<vsi2qi>"
+(define_insn "@aarch64_<sur>dot_prod_lane<VNx4SI_ONLY:mode><VNx16QI_ONLY:mode>"
   [(set (match_operand:VNx4SI_ONLY 0 "register_operand")
 	(plus:VNx4SI_ONLY
 	  (unspec:VNx4SI_ONLY
-	    [(match_operand:<VSI2QI> 1 "register_operand")
-	     (unspec:<VSI2QI>
-	       [(match_operand:<VSI2QI> 2 "register_operand")
+	    [(match_operand:VNx16QI_ONLY 1 "register_operand")
+	     (unspec:VNx16QI_ONLY
+	       [(match_operand:VNx16QI_ONLY 2 "register_operand")
 		(match_operand:SI 3 "const_int_operand")]
 	       UNSPEC_SVE_LANE_SELECT)]
 	    DOTPROD_I8MM)
@@ -7758,6 +7806,8 @@ (define_insn "@aarch64_sve_tmad<mode>"
 ;; - BFDOT (BF16)
 ;; - BFMLALB (BF16)
 ;; - BFMLALT (BF16)
+;; - BFMLSLB (SME2)
+;; - BFMLSLT (SME2)
 ;; - BFMMLA (BF16)
 ;; -------------------------------------------------------------------------
 
@@ -8239,11 +8289,18 @@ (define_insn "*aarch64_pred_cmp<cmp_op><mode>_wide_ptest"
 ;; - WHILEWR (SVE2)
 ;; -------------------------------------------------------------------------
 
+(define_constants [
+  (SVE_WHILE_B 0)
+  (SVE_WHILE_B_X2 1)
+  (SVE_WHILE_C 2)
+])
+
 ;; Set element I of the result if (cmp (plus operand1 J) operand2) is
 ;; true for all J in [0, I].
 (define_insn "@while_<while_optab_cmp><GPI:mode><PRED_ALL:mode>"
   [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
-	(unspec:PRED_ALL [(match_operand:GPI 1 "aarch64_reg_or_zero" "rZ")
+	(unspec:PRED_ALL [(const_int SVE_WHILE_B)
+			  (match_operand:GPI 1 "aarch64_reg_or_zero" "rZ")
 			  (match_operand:GPI 2 "aarch64_reg_or_zero" "rZ")]
 			 SVE_WHILE))
    (clobber (reg:CC_NZC CC_REGNUM))]
@@ -8261,12 +8318,14 @@ (define_insn_and_rewrite "*while_<while_optab_cmp><GPI:mode><PRED_ALL:mode>_cc"
 	   (match_operand 4)
 	   (const_int SVE_KNOWN_PTRUE)
 	   (unspec:PRED_ALL
-	     [(match_operand:GPI 1 "aarch64_reg_or_zero" "rZ")
+	     [(const_int SVE_WHILE_B)
+	      (match_operand:GPI 1 "aarch64_reg_or_zero" "rZ")
 	      (match_operand:GPI 2 "aarch64_reg_or_zero" "rZ")]
 	     SVE_WHILE)]
 	  UNSPEC_PTEST))
    (set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
-	(unspec:PRED_ALL [(match_dup 1)
+	(unspec:PRED_ALL [(const_int SVE_WHILE_B)
+			  (match_dup 1)
 			  (match_dup 2)]
 			 SVE_WHILE))]
   "TARGET_SVE"
@@ -8288,7 +8347,8 @@ (define_insn_and_rewrite "@while_<while_optab_cmp><GPI:mode><PRED_ALL:mode>_ptes
 	   (match_operand 4)
 	   (const_int SVE_KNOWN_PTRUE)
 	   (unspec:PRED_ALL
-	     [(match_operand:GPI 1 "aarch64_reg_or_zero" "rZ")
+	     [(const_int SVE_WHILE_B)
+	      (match_operand:GPI 1 "aarch64_reg_or_zero" "rZ")
 	      (match_operand:GPI 2 "aarch64_reg_or_zero" "rZ")]
 	     SVE_WHILE)]
 	  UNSPEC_PTEST))
diff --git a/gcc/config/aarch64/aarch64-sve2.md b/gcc/config/aarch64/aarch64-sve2.md
index 79e19699bc4..29c41ca3c93 100644
--- a/gcc/config/aarch64/aarch64-sve2.md
+++ b/gcc/config/aarch64/aarch64-sve2.md
@@ -25,12 +25,24 @@
 ;; ---- Non-temporal gather loads
 ;; ---- Non-temporal scatter stores
 ;;
+;; == Predicate manipulation
+;; ---- [PRED] Predicate-as-counter PTRUE
+;; ---- [PRED] Predicate extraction
+;; ---- [PRED] Predicate selection
+;; ---- [PRED] Predicate count
+;;
+;; == Uniform unary arithmnetic
+;; ---- [FP] Multi-register unary operations
+;;
 ;; == Uniform binary arithmnetic
+;; ---- [INT] Multi-register operations
+;; ---- [INT] Clamp to minimum/maximum
 ;; ---- [INT] Multiplication
 ;; ---- [INT] Scaled high-part multiplication
 ;; ---- [INT] General binary arithmetic that maps to unspecs
 ;; ---- [INT] Saturating binary arithmetic
 ;; ---- [INT] Saturating left shifts
+;; ---- [FP] Clamp to minimum/maximum
 ;;
 ;; == Uniform ternary arithmnetic
 ;; ---- [INT] General ternary arithmetic that maps to unspecs
@@ -42,16 +54,20 @@
 ;; ---- [INT] Sum of absolute differences
 ;;
 ;; == Extending arithmetic
+;; ---- [INT] Multi-register widening conversions
 ;; ---- [INT] Wide binary arithmetic
 ;; ---- [INT] Long binary arithmetic
 ;; ---- [INT] Long left shifts
 ;; ---- [INT] Long binary arithmetic with accumulation
+;; ---- [FP] Multi-register operations
 ;; ---- [FP] Long multiplication with accumulation
 ;;
 ;; == Narrowing arithnetic
 ;; ---- [INT] Narrowing unary arithmetic
+;; ---- [INT] Multi-vector narrowing unary arithmetic
 ;; ---- [INT] Narrowing binary arithmetic
 ;; ---- [INT] Narrowing right shifts
+;; ---- [INT] Multi-vector narrowing right shifts
 ;;
 ;; == Pairwise arithmetic
 ;; ---- [INT] Pairwise arithmetic
@@ -66,14 +82,23 @@
 ;; == Conversions
 ;; ---- [FP<-FP] Widening conversions
 ;; ---- [FP<-FP] Narrowing conversions
+;; ---- [FP<-FP] Multi-vector narrowing conversions
+;; ---- [FP<-INT] Multi-vector conversions
+;; ---- [INT<-FP] Multi-vector conversions
 ;;
 ;; == Other arithmetic
 ;; ---- [INT] Reciprocal approximation
 ;; ---- [INT<-FP] Base-2 logarithm
 ;; ---- [INT] Polynomial multiplication
 ;;
+;; == Comparisons and selects
+;; ---- [INT,FP] Select based on predicates as counters
+;; ---- [INT] While tests
+;;
 ;; == Permutation
+;; ---- [INT,FP] Reversal
 ;; ---- [INT,FP] General permutes
+;; ---- [INT,FP] Multi-register permutes
 ;; ---- [INT] Optional bit-permute extensions
 ;;
 ;; == General
@@ -192,10 +217,256 @@ (define_insn "@aarch64_scatter_stnt_<SVE_FULL_SDI:mode><SVE_PARTIAL_I:mode>"
   }
 )
 
+;; =========================================================================
+;; == Predicate manipulation
+;; =========================================================================
+
+;; -------------------------------------------------------------------------
+;; ---- [PRED] Predicate-as-counter PTRUE
+;; -------------------------------------------------------------------------
+;; - PTRUE (predicate-as-counter form)
+;; -------------------------------------------------------------------------
+
+(define_insn "@aarch64_sve_ptrue_c<BHSD_BITS>"
+  [(set (match_operand:VNx16BI 0 "register_operand" "=Uph")
+	(unspec:VNx16BI [(const_int BHSD_BITS)] UNSPEC_PTRUE_C))]
+  "TARGET_STREAMING_SME2"
+  "ptrue\t%K0.<bits_etype>"
+)
+
+;; -------------------------------------------------------------------------
+;; ---- [PRED] Predicate extraction
+;; -------------------------------------------------------------------------
+;; Includes
+;; - PEXT
+;; -------------------------------------------------------------------------
+
+(define_insn "@aarch64_sve_pext<BHSD_BITS>"
+  [(set (match_operand:VNx16BI 0 "register_operand" "=Upa")
+	(unspec:VNx16BI
+	  [(match_operand:VNx16BI 1 "register_operand" "Uph")
+	   (match_operand:DI 2 "const_int_operand")
+	   (const_int BHSD_BITS)]
+	  UNSPEC_PEXT))]
+  "TARGET_STREAMING_SME2"
+  "pext\t%0.<bits_etype>, %K1[%2]"
+)
+
+(define_insn "@aarch64_sve_pext<BHSD_BITS>x2"
+  [(set (match_operand:VNx32BI 0 "register_operand" "=Up2")
+	(unspec:VNx32BI
+	  [(match_operand:VNx16BI 1 "register_operand" "Uph")
+	   (match_operand:DI 2 "const_int_operand")
+	   (const_int BHSD_BITS)]
+	  UNSPEC_PEXTx2))]
+  "TARGET_STREAMING_SME2"
+  "pext\t{%S0.<bits_etype>, %T0.<bits_etype>}, %K1[%2]"
+)
+
+;; -------------------------------------------------------------------------
+;; ---- [PRED] Predicate selection
+;; -------------------------------------------------------------------------
+;; Includes
+;; - PSEL
+;; -------------------------------------------------------------------------
+
+(define_insn "@aarch64_sve_psel<BHSD_BITS>"
+  [(set (match_operand:VNx16BI 0 "register_operand" "=Upa")
+	(unspec:VNx16BI
+	  [(match_operand:VNx16BI 1 "register_operand" "Upa")
+	   (match_operand:VNx16BI 2 "register_operand" "Upa")
+	   (match_operand:SI 3 "register_operand" "Ucj")
+	   (const_int BHSD_BITS)]
+	  UNSPEC_PSEL))]
+  "TARGET_STREAMING_SME2"
+  "psel\t%0, %1, %2.<bits_etype>[%w3, 0]"
+)
+
+(define_insn "*aarch64_sve_psel<BHSD_BITS>_plus"
+  [(set (match_operand:VNx16BI 0 "register_operand" "=Upa")
+	(unspec:VNx16BI
+	  [(match_operand:VNx16BI 1 "register_operand" "Upa")
+	   (match_operand:VNx16BI 2 "register_operand" "Upa")
+	   (plus:SI
+	     (match_operand:SI 3 "register_operand" "Ucj")
+	     (match_operand:SI 4 "const_int_operand"))
+	   (const_int BHSD_BITS)]
+	  UNSPEC_PSEL))]
+  "TARGET_STREAMING_SME2
+   && UINTVAL (operands[4]) < 128 / <BHSD_BITS>"
+  "psel\t%0, %1, %2.<bits_etype>[%w3, %4]"
+)
+
+;; -------------------------------------------------------------------------
+;; ---- [PRED] Predicate count
+;; -------------------------------------------------------------------------
+;; Includes
+;; - CNTP (predicate as counter)
+;; -------------------------------------------------------------------------
+
+(define_insn "@aarch64_sve_cntp_c<BHSD_BITS>"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(unspec:DI
+	  [(match_operand:VNx16BI 1 "register_operand" "Upa")
+	   (match_operand:DI 2 "const_int_operand")
+	   (const_int BHSD_BITS)]
+	  UNSPEC_CNTP_C))]
+  "TARGET_STREAMING_SME2"
+  "cntp\t%x0, %K1.<bits_etype>, vlx%2"
+)
+
+;; =========================================================================
+;; == Uniform unary arithmnetic
+;; =========================================================================
+
+;; -------------------------------------------------------------------------
+;; ---- [FP] Multi-register unary operations
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - FRINTA
+;; - FRINTM
+;; - FRINTN
+;; - FRINTP
+;; -------------------------------------------------------------------------
+
+(define_insn "<frint_pattern><mode>2"
+  [(set (match_operand:SVE_SFx24 0 "aligned_register_operand" "=Uw<vector_count>")
+	(unspec:SVE_SFx24
+	  [(match_operand:SVE_SFx24 1 "aligned_register_operand" "Uw<vector_count>")]
+	  SVE2_SFx24_UNARY))]
+  "TARGET_STREAMING_SME2"
+  "frint<frint_suffix>\t%0, %1"
+)
+
 ;; =========================================================================
 ;; == Uniform binary arithmnetic
 ;; =========================================================================
 
+;; -------------------------------------------------------------------------
+;; ---- [INT] Multi-register operations
+;; -------------------------------------------------------------------------
+;; Includes the multi-register forms of:
+;; - ADD
+;; - SMAX
+;; - SMIN
+;; - SQMULH
+;; - SRSHL
+;; - UMAX
+;; - UMIN
+;; - URSHL
+;; -------------------------------------------------------------------------
+
+(define_expand "<optab><mode>3"
+  [(set (match_operand:SVE_Ix24 0 "aligned_register_operand" "=Uw<vector_count>")
+	(SVE_INT_BINARY_MULTI:SVE_Ix24
+	  (match_operand:SVE_Ix24 1 "aligned_register_operand" "Uw<vector_count>")
+	  (match_operand:SVE_Ix24 2 "aligned_register_operand" "Uw<vector_count>")))]
+  "TARGET_STREAMING_SME2"
+)
+
+(define_insn "*<optab><mode>3"
+  [(set (match_operand:SVE_Ix24 0 "aligned_register_operand" "=Uw<vector_count>")
+	(SVE_INT_BINARY_MULTI:SVE_Ix24
+	  (match_operand:SVE_Ix24 1 "aligned_register_operand" "%0")
+	  (match_operand:SVE_Ix24 2 "aligned_register_operand" "Uw<vector_count>")))]
+  "TARGET_STREAMING_SME2"
+  "<sve_int_op>\t%0, %0, %2"
+)
+
+(define_insn "@aarch64_sve_single_<optab><mode>"
+  [(set (match_operand:SVE_Ix24 0 "aligned_register_operand" "=Uw<vector_count>")
+	(SVE_INT_BINARY_SINGLE:SVE_Ix24
+	  (match_operand:SVE_Ix24 1 "aligned_register_operand" "0")
+	  (vec_duplicate:SVE_Ix24
+	    (match_operand:<VSINGLE> 2 "register_operand" "x"))))]
+  "TARGET_STREAMING_SME2"
+  "<sve_int_op>\t%0, %0, %2.<Vetype>"
+)
+
+(define_insn "@aarch64_sve_<sve_int_op><mode>"
+  [(set (match_operand:SVE_Ix24 0 "aligned_register_operand" "=Uw<vector_count>")
+	(unspec:SVE_Ix24
+	  [(match_operand:SVE_Ix24 1 "aligned_register_operand" "%0")
+	   (match_operand:SVE_Ix24 2 "aligned_register_operand" "Uw<vector_count>")]
+	  SVE_INT_BINARY_MULTI))]
+  "TARGET_STREAMING_SME2"
+  "<sve_int_op>\t%0, %0, %2"
+)
+
+(define_insn "@aarch64_sve_single_<sve_int_op><mode>"
+  [(set (match_operand:SVE_Ix24 0 "aligned_register_operand" "=Uw<vector_count>")
+	(unspec:SVE_Ix24
+	  [(match_operand:SVE_Ix24 1 "aligned_register_operand" "0")
+	   (vec_duplicate:SVE_Ix24
+	     (match_operand:<VSINGLE> 2 "register_operand" "x"))]
+	  SVE_INT_BINARY_MULTI))]
+  "TARGET_STREAMING_SME2"
+  "<sve_int_op>\t%0, %0, %2.<Vetype>"
+)
+
+;; -------------------------------------------------------------------------
+;; ---- [INT] Clamp to minimum/maximum
+;; -------------------------------------------------------------------------
+;; - SCLAMP
+;; - UCLAMP
+;; -------------------------------------------------------------------------
+
+;; The minimum is applied after the maximum, which matters if the maximum
+;; bound is (unexpectedly) less than the minimum bound.
+(define_insn "@aarch64_sve_<su>clamp<mode>"
+  [(set (match_operand:SVE_FULL_I 0 "register_operand")
+	(<max_opp>:SVE_FULL_I
+	  (USMAX:SVE_FULL_I
+	    (match_operand:SVE_FULL_I 1 "register_operand")
+	    (match_operand:SVE_FULL_I 2 "register_operand"))
+	  (match_operand:SVE_FULL_I 3 "register_operand")))]
+  "TARGET_STREAMING_SME"
+  {@ [cons: =0,  1, 2, 3; attrs: movprfx]
+     [       w, %0, w, w; *             ] <su>clamp\t%0.<Vetype>, %2.<Vetype>, %3.<Vetype>
+     [     ?&w,  w, w, w; yes           ] movprfx\t%0, %1\;<su>clamp\t%0.<Vetype>, %2.<Vetype>, %3.<Vetype>
+  }
+)
+
+(define_insn_and_split "*aarch64_sve_<su>clamp<mode>_x"
+  [(set (match_operand:SVE_FULL_I 0 "register_operand")
+	(unspec:SVE_FULL_I
+	  [(match_operand 4)
+	   (<max_opp>:SVE_FULL_I
+	     (unspec:SVE_FULL_I
+	       [(match_operand 5)
+		(USMAX:SVE_FULL_I
+		  (match_operand:SVE_FULL_I 1 "register_operand")
+		  (match_operand:SVE_FULL_I 2 "register_operand"))]
+	       UNSPEC_PRED_X)
+	     (match_operand:SVE_FULL_I 3 "register_operand"))]
+	  UNSPEC_PRED_X))]
+  "TARGET_STREAMING_SME"
+  {@ [cons: =0,  1, 2, 3; attrs: movprfx]
+     [       w, %0, w, w; *             ] #
+     [     ?&w,  w, w, w; yes           ] #
+  }
+  "&& true"
+  [(set (match_dup 0)
+	(<max_opp>:SVE_FULL_I
+	  (USMAX:SVE_FULL_I
+	    (match_dup 1)
+	    (match_dup 2))
+	  (match_dup 3)))]
+)
+
+(define_insn "@aarch64_sve_<su>clamp_single<mode>"
+  [(set (match_operand:SVE_Ix24 0 "register_operand" "=Uw<vector_count>")
+	(<max_opp>:SVE_Ix24
+	  (USMAX:SVE_Ix24
+	    (match_operand:SVE_Ix24 1 "register_operand" "0")
+	    (vec_duplicate:SVE_Ix24
+	      (match_operand:<VSINGLE> 2 "register_operand" "w")))
+	  (vec_duplicate:SVE_Ix24
+	    (match_operand:<VSINGLE> 3 "register_operand" "w"))))]
+  "TARGET_STREAMING_SME2"
+  "<su>clamp\t%0, %2.<Vetype>, %3.<Vetype>"
+)
+
 ;; -------------------------------------------------------------------------
 ;; ---- [INT] Multiplication
 ;; -------------------------------------------------------------------------
@@ -689,6 +960,74 @@ (define_insn_and_rewrite "*cond_<sve_int_op><mode>_any"
   [(set_attr "movprfx" "yes")]
 )
 
+;; -------------------------------------------------------------------------
+;; ---- [FP] Clamp to minimum/maximum
+;; -------------------------------------------------------------------------
+;; - FCLAMP
+;; -------------------------------------------------------------------------
+
+;; The minimum is applied after the maximum, which matters if the maximum
+;; bound is (unexpectedly) less than the minimum bound.
+(define_insn "@aarch64_sve_fclamp<mode>"
+  [(set (match_operand:SVE_FULL_F 0 "register_operand")
+	(unspec:SVE_FULL_F
+	  [(unspec:SVE_FULL_F
+	     [(match_operand:SVE_FULL_F 1 "register_operand")
+	      (match_operand:SVE_FULL_F 2 "register_operand")]
+	     UNSPEC_FMAXNM)
+	   (match_operand:SVE_FULL_F 3 "register_operand")]
+	  UNSPEC_FMINNM))]
+  "TARGET_STREAMING_SME"
+  {@ [cons: =0,  1, 2, 3; attrs: movprfx]
+     [       w, %0, w, w; *             ] fclamp\t%0.<Vetype>, %2.<Vetype>, %3.<Vetype>
+     [     ?&w,  w, w, w; yes           ] movprfx\t%0, %1\;fclamp\t%0.<Vetype>, %2.<Vetype>, %3.<Vetype>
+  }
+)
+
+(define_insn_and_split "*aarch64_sve_fclamp<mode>_x"
+  [(set (match_operand:SVE_FULL_F 0 "register_operand")
+	(unspec:SVE_FULL_F
+	  [(match_operand 4)
+	   (const_int SVE_RELAXED_GP)
+	   (unspec:SVE_FULL_F
+	     [(match_operand 5)
+	      (const_int SVE_RELAXED_GP)
+	      (match_operand:SVE_FULL_F 1 "register_operand")
+	      (match_operand:SVE_FULL_F 2 "register_operand")]
+	     UNSPEC_COND_FMAXNM)
+	   (match_operand:SVE_FULL_F 3 "register_operand")]
+	  UNSPEC_COND_FMINNM))]
+  "TARGET_STREAMING_SME"
+  {@ [cons: =0,  1, 2, 3; attrs: movprfx]
+     [       w, %0, w, w; *             ] #
+     [     ?&w,  w, w, w; yes           ] #
+  }
+  "&& true"
+  [(set (match_dup 0)
+	(unspec:SVE_FULL_F
+	  [(unspec:SVE_FULL_F
+	     [(match_dup 1)
+	      (match_dup 2)]
+	     UNSPEC_FMAXNM)
+	   (match_dup 3)]
+	  UNSPEC_FMINNM))]
+)
+
+(define_insn "@aarch64_sve_fclamp_single<mode>"
+  [(set (match_operand:SVE_Fx24 0 "register_operand" "=Uw<vector_count>")
+	(unspec:SVE_Fx24
+	  [(unspec:SVE_Fx24
+	     [(match_operand:SVE_Fx24 1 "register_operand" "0")
+	      (vec_duplicate:SVE_Fx24
+		(match_operand:<VSINGLE> 2 "register_operand" "w"))]
+	     UNSPEC_FMAXNM)
+	   (vec_duplicate:SVE_Fx24
+	     (match_operand:<VSINGLE> 3 "register_operand" "w"))]
+	  UNSPEC_FMINNM))]
+  "TARGET_STREAMING_SME2"
+  "fclamp\t%0, %2.<Vetype>, %3.<Vetype>"
+)
+
 ;; =========================================================================
 ;; == Uniform ternary arithmnetic
 ;; =========================================================================
@@ -1256,6 +1595,30 @@ (define_insn "*aarch64_sve2_<su>aba<mode>"
 ;; == Extending arithmetic
 ;; =========================================================================
 
+;; -------------------------------------------------------------------------
+;; ---- [INT] Multi-register widening conversions
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - SUNPK
+;; - UUNPK
+;; -------------------------------------------------------------------------
+
+(define_insn "<optab><mode><v2xwide>2"
+  [(set (match_operand:<V2XWIDE> 0 "aligned_register_operand" "=Uw2")
+	(ANY_EXTEND:<V2XWIDE>
+	  (match_operand:SVE_FULL_BHSI 1 "register_operand" "w")))]
+  "TARGET_STREAMING_SME2"
+  "<su>unpk\t%0, %1.<Vetype>"
+)
+
+(define_insn "<optab><mode><v2xwide>2"
+  [(set (match_operand:<V2XWIDE> 0 "aligned_register_operand" "=Uw4")
+	(ANY_EXTEND:<V2XWIDE>
+	  (match_operand:SVE_FULL_BHSIx2 1 "aligned_register_operand" "Uw2")))]
+  "TARGET_STREAMING_SME2"
+  "<su>unpk\t%0, %1"
+)
+
 ;; -------------------------------------------------------------------------
 ;; ---- [INT] Wide binary arithmetic
 ;; -------------------------------------------------------------------------
@@ -1357,6 +1720,7 @@ (define_insn "@aarch64_sve_<sve_int_op><mode>"
 ;; Includes:
 ;; - SABALB
 ;; - SABALT
+;; - SDOT (SME2 or SVE2p1)
 ;; - SMLALB
 ;; - SMLALT
 ;; - SMLSLB
@@ -1369,6 +1733,7 @@ (define_insn "@aarch64_sve_<sve_int_op><mode>"
 ;; - SQDMLSLT
 ;; - UABALB
 ;; - UABALT
+;; - UDOT (SME2 or SVE2p1)
 ;; - UMLALB
 ;; - UMLALT
 ;; - UMLSLB
@@ -1514,10 +1879,68 @@ (define_insn "@aarch64_sve_qsub_<sve_int_op>_lane_<mode>"
      [ ?&w      , w , w , <sve_lane_con> ; yes            ] movprfx\t%0, %1\;<sve_int_qsub_op>\t%0.<Vetype>, %2.<Ventype>, %3.<Ventype>[%4]
   }
 )
+
+;; Two-way dot-product.
+(define_insn "@aarch64_sve_<sur>dotvnx4sivnx8hi"
+  [(set (match_operand:VNx4SI 0 "register_operand")
+	(plus:VNx4SI
+	  (unspec:VNx4SI
+	    [(match_operand:VNx8HI 1 "register_operand")
+	     (match_operand:VNx8HI 2 "register_operand")]
+	    DOTPROD)
+	  (match_operand:VNx4SI 3 "register_operand")))]
+  "TARGET_STREAMING_SME2"
+  {@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ]
+     [ w        , w , w , 0 ; *              ] <sur>dot\t%0.s, %1.h, %2.h
+     [ ?&w      , w , w , w ; yes            ] movprfx\t%0, %3\;<sur>dot\t%0.s, %1.h, %2.h
+  }
+)
+
+;; -------------------------------------------------------------------------
+;; ---- [FP] Multi-register operations
+;; -------------------------------------------------------------------------
+;; Includes the multi-register forms of:
+;; - FMAX
+;; - FMAXNM
+;; - FMIN
+;; - FMINNM
+;; -------------------------------------------------------------------------
+
+(define_expand "@aarch64_sve_<maxmin_uns_op><mode>"
+  [(set (match_operand:SVE_Fx24 0 "aligned_register_operand" "=Uw<vector_count>")
+	(unspec:SVE_Fx24
+	  [(match_operand:SVE_Fx24 1 "aligned_register_operand" "Uw<vector_count>")
+	   (match_operand:SVE_Fx24 2 "aligned_register_operand" "Uw<vector_count>")]
+	  SVE_FP_BINARY_MULTI))]
+  "TARGET_STREAMING_SME2"
+)
+
+(define_insn "*aarch64_sve_<maxmin_uns_op><mode>"
+  [(set (match_operand:SVE_Fx24 0 "aligned_register_operand" "=Uw<vector_count>")
+	(unspec:SVE_Fx24
+	  [(match_operand:SVE_Fx24 1 "aligned_register_operand" "%0")
+	   (match_operand:SVE_Fx24 2 "aligned_register_operand" "Uw<vector_count>")]
+	  SVE_FP_BINARY_MULTI))]
+  "TARGET_STREAMING_SME2"
+  "<maxmin_uns_op>\t%0, %0, %2"
+)
+
+(define_insn "@aarch64_sve_single_<maxmin_uns_op><mode>"
+  [(set (match_operand:SVE_Fx24 0 "aligned_register_operand" "=Uw<vector_count>")
+	(unspec:SVE_Fx24
+	  [(match_operand:SVE_Fx24 1 "aligned_register_operand" "0")
+	   (vec_duplicate:SVE_Fx24
+	     (match_operand:<VSINGLE> 2 "register_operand" "x"))]
+	  SVE_FP_BINARY_MULTI))]
+  "TARGET_STREAMING_SME2"
+  "<maxmin_uns_op>\t%0, %0, %2.<Vetype>"
+)
+
 ;; -------------------------------------------------------------------------
 ;; ---- [FP] Long multiplication with accumulation
 ;; -------------------------------------------------------------------------
 ;; Includes:
+;; - FDOT (SME2 or SVE2p1)
 ;; - FMLALB
 ;; - FMLALT
 ;; - FMLSLB
@@ -1555,6 +1978,40 @@ (define_insn "@aarch64_<sve_fp_op>_lane_<mode>"
   }
 )
 
+;; Two-way dot-product.
+(define_insn "aarch64_sve_fdotvnx4sfvnx8hf"
+  [(set (match_operand:VNx4SF 0 "register_operand")
+	(plus:VNx4SF
+	  (unspec:VNx4SF
+	    [(match_operand:VNx8HF 1 "register_operand")
+	     (match_operand:VNx8HF 2 "register_operand")]
+	    UNSPEC_FDOT)
+	  (match_operand:VNx4SF 3 "register_operand")))]
+  "TARGET_STREAMING_SME2"
+  {@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ]
+     [ w        , w , w , 0 ; *              ] fdot\t%0.s, %1.h, %2.h
+     [ ?&w      , w , w , w ; yes            ] movprfx\t%0, %3\;fdot\t%0.s, %1.h, %2.h
+  }
+)
+
+(define_insn "aarch64_fdot_prod_lanevnx4sfvnx8hf"
+  [(set (match_operand:VNx4SF 0 "register_operand")
+	(plus:VNx4SF
+	  (unspec:VNx4SF
+	    [(match_operand:VNx8HF 1 "register_operand")
+	     (unspec:VNx8HF
+	       [(match_operand:VNx8HF 2 "register_operand")
+		(match_operand:SI 3 "const_int_operand")]
+	       UNSPEC_SVE_LANE_SELECT)]
+	    UNSPEC_FDOT)
+	  (match_operand:VNx4SF 4 "register_operand")))]
+  "TARGET_STREAMING_SME2"
+  {@ [ cons: =0 , 1 , 2 , 4 ; attrs: movprfx ]
+     [ w        , w , y , 0 ; *              ] fdot\t%0.s, %1.h, %2.h[%3]
+     [ ?&w      , w , y , w ; yes            ] movprfx\t%0, %4\;fdot\t%0.s, %1.h, %2.h[%3]
+  }
+)
+
 ;; =========================================================================
 ;; == Narrowing arithnetic
 ;; =========================================================================
@@ -1591,6 +2048,43 @@ (define_insn "@aarch64_sve_<sve_int_op><mode>"
   "<sve_int_op>\t%0.<Ventype>, %2.<Vetype>"
 )
 
+;; -------------------------------------------------------------------------
+;; ---- [INT] Multi-vector narrowing unary arithmetic
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - SQCVT
+;; - SQCVTN
+;; - UQCVT
+;; - UQCVTN
+;; -------------------------------------------------------------------------
+
+(define_insn "@aarch64_sve_<optab><VNx16QI_ONLY:mode><VNx16SI_ONLY:mode>"
+  [(set (match_operand:VNx16QI_ONLY 0 "register_operand" "=w")
+	(unspec:VNx16QI_ONLY
+	  [(match_operand:VNx16SI_ONLY 1 "aligned_register_operand" "Uw<vector_count>")]
+	  SVE_QCVTxN))]
+  "TARGET_SME2 && TARGET_STREAMING"
+  "<optab>\t%0.b, %1"
+)
+
+(define_insn "@aarch64_sve_<optab><VNx8HI_ONLY:mode><VNx8SI_ONLY:mode>"
+  [(set (match_operand:VNx8HI_ONLY 0 "register_operand" "=w")
+	(unspec:VNx8HI_ONLY
+	  [(match_operand:VNx8SI_ONLY 1 "aligned_register_operand" "Uw<vector_count>")]
+	  SVE_QCVTxN))]
+  "TARGET_SME2 && TARGET_STREAMING"
+  "<optab>\t%0.h, %1"
+)
+
+(define_insn "@aarch64_sve_<optab><VNx8HI_ONLY:mode><VNx8DI_ONLY:mode>"
+  [(set (match_operand:VNx8HI_ONLY 0 "register_operand" "=w")
+	(unspec:VNx8HI_ONLY
+	  [(match_operand:VNx8DI_ONLY 1 "aligned_register_operand" "Uw<vector_count>")]
+	  SVE_QCVTxN))]
+  "TARGET_SME2 && TARGET_STREAMING"
+  "<optab>\t%0.h, %1"
+)
+
 ;; -------------------------------------------------------------------------
 ;; ---- [INT] Narrowing binary arithmetic
 ;; -------------------------------------------------------------------------
@@ -1689,6 +2183,20 @@ (define_insn "@aarch64_sve_<sve_int_op><mode>"
   "<sve_int_op>\t%0.<Ventype>, %2.<Vetype>, #%3"
 )
 
+;; -------------------------------------------------------------------------
+;; ---- [INT] Multi-vector narrowing right shifts
+;; -------------------------------------------------------------------------
+
+(define_insn "@aarch64_sve_<sve_int_op><mode>"
+  [(set (match_operand:<VNARROW> 0 "register_operand" "=w")
+	(unspec:<VNARROW>
+	  [(match_operand:SVE_FULL_SIx2_SDIx4 1 "register_operand" "Uw<vector_count>")
+	   (match_operand:DI 2 "const_int_operand")]
+	  SVE2_INT_SHIFT_IMM_NARROWxN))]
+  "TARGET_STREAMING_SME2"
+  "<sve_int_op>\t%0.<Ventype>, %1, #%2"
+)
+
 ;; =========================================================================
 ;; == Pairwise arithmetic
 ;; =========================================================================
@@ -2162,6 +2670,57 @@ (define_insn "@aarch64_sve2_cvtxnt<mode>"
   "fcvtxnt\t%0.<Ventype>, %2/m, %3.<Vetype>"
 )
 
+;; -------------------------------------------------------------------------
+;; ---- [FP<-FP] Multi-vector narrowing conversions
+;; -------------------------------------------------------------------------
+;; Includes the multi-register forms of:
+;; - BFCVT
+;; - BFCVTN
+;; - FCVT
+;; - FCVTN
+;; -------------------------------------------------------------------------
+
+(define_insn "truncvnx8sf<mode>2"
+  [(set (match_operand:SVE_FULL_HF 0 "register_operand" "=w")
+	(float_truncate:SVE_FULL_HF
+	  (match_operand:VNx8SF 1 "aligned_register_operand" "Uw2")))]
+  "TARGET_STREAMING_SME2"
+  "<b>fcvt\t%0.h, %1"
+)
+
+(define_insn "@aarch64_sve_cvtn<mode>"
+  [(set (match_operand:SVE_FULL_HF 0 "register_operand" "=w")
+	(unspec:SVE_FULL_HF
+	  [(match_operand:VNx8SF 1 "aligned_register_operand" "Uw2")]
+	  UNSPEC_FCVTN))]
+  "TARGET_STREAMING_SME2"
+  "<b>fcvtn\t%0.h, %1"
+)
+
+;; -------------------------------------------------------------------------
+;; ---- [FP<-INT] Multi-vector conversions
+;; -------------------------------------------------------------------------
+
+(define_insn "<optab><v_int_equiv><mode>2"
+  [(set (match_operand:SVE_SFx24 0 "aligned_register_operand" "=Uw<vector_count>")
+	(FLOATUORS:SVE_SFx24
+	  (match_operand:<V_INT_EQUIV> 1 "aligned_register_operand" "Uw<vector_count>")))]
+  "TARGET_STREAMING_SME2"
+  "<su_optab>cvtf\t%0, %1"
+)
+
+;; -------------------------------------------------------------------------
+;; ---- [INT<-FP] Multi-vector conversions
+;; -------------------------------------------------------------------------
+
+(define_insn "<optab><mode><v_int_equiv>2"
+  [(set (match_operand:<V_INT_EQUIV> 0 "aligned_register_operand" "=Uw<vector_count>")
+	(FIXUORS:<V_INT_EQUIV>
+	  (match_operand:SVE_SFx24 1 "aligned_register_operand" "Uw<vector_count>")))]
+  "TARGET_STREAMING_SME2"
+  "fcvtz<su>\t%0, %1"
+)
+
 ;; =========================================================================
 ;; == Other arithmetic
 ;; =========================================================================
@@ -2357,10 +2916,108 @@ (define_insn "@aarch64_sve_<optab><mode>"
   "<sve_int_op>\t%0.<Vewtype>, %1.<Vetype>, %2.<Vetype>"
 )
 
+;; =========================================================================
+;; == Comparisons and selects
+;; =========================================================================
+
+;; -------------------------------------------------------------------------
+;; ---- [INT,FP] Select based on predicates as counters
+;; -------------------------------------------------------------------------
+
+(define_insn "@aarch64_sve_sel<mode>"
+  [(set (match_operand:SVE_FULLx24 0 "register_operand" "=Uw<vector_count>")
+	(unspec:SVE_FULLx24
+	  [(match_operand:<VPRED> 3 "register_operand" "Uph")
+	   (match_operand:SVE_FULLx24 1 "aligned_register_operand" "Uw<vector_count>")
+	   (match_operand:SVE_FULLx24 2 "aligned_register_operand" "Uw<vector_count>")]
+	  UNSPEC_SEL))]
+  "TARGET_STREAMING_SME2"
+  "sel\t%0, %K3, %1, %2"
+)
+
+;; -------------------------------------------------------------------------
+;; ---- [INT] While tests
+;; -------------------------------------------------------------------------
+;; Includes the x2 and count versions of:
+;; - WHILEGE
+;; - WHILEGT
+;; - WHILEHI
+;; - WHILEHS
+;; - WHILELE
+;; - WHILELO
+;; - WHILELS
+;; - WHILELT
+;; -------------------------------------------------------------------------
+
+(define_insn "@aarch64_sve_while<while_optab_cmp>_b<BHSD_BITS>_x2"
+  [(set (match_operand:VNx32BI 0 "register_operand" "=Up2")
+	(unspec:VNx32BI
+	  [(const_int SVE_WHILE_B_X2)
+	   (match_operand:DI 1 "aarch64_reg_or_zero" "rZ")
+	   (match_operand:DI 2 "aarch64_reg_or_zero" "rZ")
+	   (const_int BHSD_BITS)]
+	  SVE_WHILE_ORDER))
+   (clobber (reg:CC_NZC CC_REGNUM))]
+  "TARGET_STREAMING_SME2"
+  "while<cmp_op>\t{%S0.<bits_etype>, %T0.<bits_etype>}, %x1, %x2"
+)
+
+(define_insn "@aarch64_sve_while<while_optab_cmp>_c<BHSD_BITS>"
+  [(set (match_operand:VNx16BI 0 "register_operand" "=Uph")
+	(unspec:VNx16BI
+	  [(const_int SVE_WHILE_C)
+	   (match_operand:DI 1 "aarch64_reg_or_zero" "rZ")
+	   (match_operand:DI 2 "aarch64_reg_or_zero" "rZ")
+	   (const_int BHSD_BITS)
+	   (match_operand:DI 3 "const_int_operand")]
+	  SVE_WHILE_ORDER))
+   (clobber (reg:CC_NZC CC_REGNUM))]
+  "TARGET_STREAMING_SME2"
+  "while<cmp_op>\t%K0.<bits_etype>, %x1, %x2, vlx%3"
+)
+
 ;; =========================================================================
 ;; == Permutation
 ;; =========================================================================
 
+;; -------------------------------------------------------------------------
+;; ---- [INT,FP] Reversal
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - REVD
+;; -------------------------------------------------------------------------
+
+(define_insn "@aarch64_pred_<optab><mode>"
+  [(set (match_operand:SVE_FULL 0 "register_operand")
+	(unspec:SVE_FULL
+	  [(match_operand:VNx2BI 1 "register_operand")
+	   (unspec:SVE_FULL
+	     [(match_operand:SVE_FULL 2 "register_operand")]
+	     UNSPEC_REVD_ONLY)]
+	  UNSPEC_PRED_X))]
+  "TARGET_STREAMING_SME"
+  {@ [ cons: =0 , 1   , 2 ; attrs: movprfx ]
+     [ w        , Upl , 0 ; *              ] revd\t%0.q, %1/m, %2.q
+     [ ?&w      , Upl , w ; yes            ] movprfx\t%0, %2\;revd\t%0.q, %1/m, %2.q
+  }
+)
+
+(define_insn "@cond_<optab><mode>"
+  [(set (match_operand:SVE_FULL 0 "register_operand")
+	(unspec:SVE_FULL
+	  [(match_operand:VNx2BI 1 "register_operand")
+	   (unspec:SVE_FULL
+	     [(match_operand:SVE_FULL 2 "register_operand")]
+	     UNSPEC_REVD_ONLY)
+	   (match_operand:SVE_FULL 3 "register_operand")]
+	  UNSPEC_SEL))]
+  "TARGET_STREAMING_SME"
+  {@ [ cons: =0 , 1   , 2 , 3  ; attrs: movprfx ]
+     [ w        , Upl , w , 0  ; *              ] revd\t%0.q, %1/m, %2.q
+     [ ?&w      , Upl , w , w  ; yes            ] movprfx\t%0, %3\;revd\t%0.q, %1/m, %2.q
+  }
+)
+
 ;; -------------------------------------------------------------------------
 ;; ---- [INT,FP] General permutes
 ;; -------------------------------------------------------------------------
@@ -2392,6 +3049,52 @@ (define_insn "@aarch64_sve2_tbx<mode>"
   "tbx\t%0.<Vetype>, %2.<Vetype>, %3.<Vetype>"
 )
 
+;; -------------------------------------------------------------------------
+;; ---- [INT,FP] Multi-register permutes
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - ZIP
+;; - UZP
+;; -------------------------------------------------------------------------
+
+(define_insn "@aarch64_sve_<optab><mode>"
+  [(set (match_operand:SVE_FULLx2 0 "aligned_register_operand" "=Uw2")
+	(unspec:SVE_FULLx2
+	  [(match_operand:<VSINGLE> 1 "register_operand" "w")
+	   (match_operand:<VSINGLE> 2 "register_operand" "w")]
+	  SVE2_x24_PERMUTE))]
+  "TARGET_STREAMING_SME2"
+  "<perm_insn>\t%0, %1.<Vetype>, %2.<Vetype>"
+)
+
+(define_insn "@aarch64_sve_<optab><mode>"
+  [(set (match_operand:SVE_FULLx2 0 "aligned_register_operand" "=Uw2")
+	(unspec:SVE_FULLx2
+	  [(match_operand:<VSINGLE> 1 "register_operand" "w")
+	   (match_operand:<VSINGLE> 2 "register_operand" "w")]
+	  SVE2_x24_PERMUTEQ))]
+  "TARGET_STREAMING_SME2"
+  "<perm_insn>\t{%S0.q - %T0.q}, %1.q, %2.q"
+)
+
+(define_insn "@aarch64_sve_<optab><mode>"
+  [(set (match_operand:SVE_FULLx4 0 "aligned_register_operand" "=Uw4")
+	(unspec:SVE_FULLx4
+	  [(match_operand:SVE_FULLx4 1 "aligned_register_operand" "Uw4")]
+	  SVE2_x24_PERMUTE))]
+  "TARGET_STREAMING_SME2"
+  "<perm_insn>\t%0, %1"
+)
+
+(define_insn "@aarch64_sve_<optab><mode>"
+  [(set (match_operand:SVE_FULLx4 0 "aligned_register_operand" "=Uw4")
+	(unspec:SVE_FULLx4
+	  [(match_operand:SVE_FULLx4 1 "aligned_register_operand" "Uw4")]
+	  SVE2_x24_PERMUTEQ))]
+  "TARGET_STREAMING_SME2"
+  "<perm_insn>\t{%S0.q - %V0.q}, {%S1.q - %V1.q}"
+)
+
 ;; -------------------------------------------------------------------------
 ;; ---- [INT] Optional bit-permute extensions
 ;; -------------------------------------------------------------------------
diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
index 8f34ca14635..0ea5950ddca 100644
--- a/gcc/config/aarch64/aarch64.cc
+++ b/gcc/config/aarch64/aarch64.cc
@@ -11689,6 +11689,7 @@ sizetochar (int size)
      '0':		Print a normal operand, if it's a general register,
 			then we assume DImode.
      'k':		Print NZCV for conditional compare instructions.
+     'K':		Print a predicate register as pn<N> rather than p<N>
      'A':		Output address constant representing the first
 			argument of X, specifying a relocation offset
 			if appropriate.
@@ -11865,14 +11866,17 @@ aarch64_print_operand (FILE *f, rtx x, int code)
     case 'T':
     case 'U':
     case 'V':
-      if (!REG_P (x) || !FP_REGNUM_P (REGNO (x)))
+      if (!REG_P (x) || (!FP_REGNUM_P (REGNO (x)) && !PR_REGNUM_P (REGNO (x))))
 	{
-	  output_operand_lossage ("incompatible floating point / vector register operand for '%%%c'", code);
+	  output_operand_lossage ("incompatible operand for '%%%c'", code);
 	  return;
 	}
-      asm_fprintf (f, "%c%d",
-		   aarch64_sve_data_mode_p (GET_MODE (x)) ? 'z' : 'v',
-		   REGNO (x) - V0_REGNUM + (code - 'S'));
+      if (PR_REGNUM_P (REGNO (x)))
+	asm_fprintf (f, "p%d", REGNO (x) - P0_REGNUM + (code - 'S'));
+      else
+	asm_fprintf (f, "%c%d",
+		     aarch64_sve_data_mode_p (GET_MODE (x)) ? 'z' : 'v',
+		     REGNO (x) - V0_REGNUM + (code - 'S'));
       break;
 
     case 'R':
@@ -12153,6 +12157,15 @@ aarch64_print_operand (FILE *f, rtx x, int code)
       }
       break;
 
+    case 'K':
+      if (!REG_P (x) || !PR_REGNUM_P (REGNO (x)))
+	{
+	  output_operand_lossage ("invalid operand for '%%%c'", code);
+	  return;
+	}
+      asm_fprintf (f, "pn%d", REGNO (x) - P0_REGNUM);
+      break;
+
     case 'y':
     case 'z':
       {
@@ -12355,6 +12368,9 @@ aarch64_label_mentioned_p (rtx x)
 enum reg_class
 aarch64_regno_regclass (unsigned regno)
 {
+  if (W8_W11_REGNUM_P (regno))
+    return W8_W11_REGS;
+
   if (W12_W15_REGNUM_P (regno))
     return W12_W15_REGS;
 
@@ -12722,6 +12738,7 @@ aarch64_class_max_nregs (reg_class_t regclass, machine_mode mode)
   unsigned int nregs, vec_flags;
   switch (regclass)
     {
+    case W8_W11_REGS:
     case W12_W15_REGS:
     case STUB_REGS:
     case TAILCALL_ADDR_REGS:
diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h
index 5599c98ee94..bcdd13b015a 100644
--- a/gcc/config/aarch64/aarch64.h
+++ b/gcc/config/aarch64/aarch64.h
@@ -324,7 +324,7 @@ constexpr auto AARCH64_FL_DEFAULT_ISA_MODE = AARCH64_FL_SM_OFF;
    imply anything about the state of PSTATE.SM.  */
 #define TARGET_SME (AARCH64_ISA_SME)
 
-/* Streaming-mode SME instructions.  */
+/* Same with streaming mode enabled.  */
 #define TARGET_STREAMING_SME (TARGET_STREAMING && TARGET_SME)
 
 /* The FEAT_SME_I16I64 extension to SME, enabled through +sme-i16i64.  */
@@ -336,6 +336,9 @@ constexpr auto AARCH64_FL_DEFAULT_ISA_MODE = AARCH64_FL_SM_OFF;
 /* SME2 instructions, enabled through +sme2.  */
 #define TARGET_SME2 (AARCH64_ISA_SME2)
 
+/* Same with streaming mode enabled.  */
+#define TARGET_STREAMING_SME2 (TARGET_STREAMING && TARGET_SME2)
+
 /* ARMv8.3-A features.  */
 #define TARGET_ARMV8_3	(AARCH64_ISA_V8_3A)
 
@@ -541,6 +544,8 @@ constexpr auto AARCH64_FL_DEFAULT_ISA_MODE = AARCH64_FL_SM_OFF;
                      {"b" # N, V0_REGNUM + (N)}, \
                      {"z" # N, V0_REGNUM + (N)}
 
+#define P_ALIASES(N) {"pn" # N, P0_REGNUM + (N)}
+
 /* Provide aliases for all of the ISA defined register name forms.
    These aliases are convenient for use in the clobber lists of inline
    asm statements.  */
@@ -561,7 +566,11 @@ constexpr auto AARCH64_FL_DEFAULT_ISA_MODE = AARCH64_FL_SM_OFF;
     V_ALIASES(16), V_ALIASES(17), V_ALIASES(18), V_ALIASES(19), \
     V_ALIASES(20), V_ALIASES(21), V_ALIASES(22), V_ALIASES(23), \
     V_ALIASES(24), V_ALIASES(25), V_ALIASES(26), V_ALIASES(27), \
-    V_ALIASES(28), V_ALIASES(29), V_ALIASES(30), V_ALIASES(31)  \
+    V_ALIASES(28), V_ALIASES(29), V_ALIASES(30), V_ALIASES(31), \
+    P_ALIASES(0),  P_ALIASES(1),  P_ALIASES(2),  P_ALIASES(3),  \
+    P_ALIASES(4),  P_ALIASES(5),  P_ALIASES(6),  P_ALIASES(7),  \
+    P_ALIASES(8),  P_ALIASES(9),  P_ALIASES(10), P_ALIASES(11), \
+    P_ALIASES(12), P_ALIASES(13), P_ALIASES(14), P_ALIASES(15)  \
   }
 
 #define EPILOGUE_USES(REGNO) (aarch64_epilogue_uses (REGNO))
@@ -682,6 +691,9 @@ constexpr auto AARCH64_FL_DEFAULT_ISA_MODE = AARCH64_FL_SM_OFF;
    && (REGNO) != R17_REGNUM \
    && (REGNO) != R30_REGNUM) \
 
+#define W8_W11_REGNUM_P(REGNO) \
+  IN_RANGE (REGNO, R8_REGNUM, R11_REGNUM)
+
 #define W12_W15_REGNUM_P(REGNO) \
   IN_RANGE (REGNO, R12_REGNUM, R15_REGNUM)
 
@@ -711,6 +723,7 @@ constexpr auto AARCH64_FL_DEFAULT_ISA_MODE = AARCH64_FL_SM_OFF;
 enum reg_class
 {
   NO_REGS,
+  W8_W11_REGS,
   W12_W15_REGS,
   TAILCALL_ADDR_REGS,
   STUB_REGS,
@@ -736,6 +749,7 @@ enum reg_class
 #define REG_CLASS_NAMES				\
 {						\
   "NO_REGS",					\
+  "W8_W11_REGS",				\
   "W12_W15_REGS",				\
   "TAILCALL_ADDR_REGS",				\
   "STUB_REGS",					\
@@ -758,6 +772,7 @@ enum reg_class
 #define REG_CLASS_CONTENTS						\
 {									\
   { 0x00000000, 0x00000000, 0x00000000 },	/* NO_REGS */		\
+  { 0x00000f00, 0x00000000, 0x00000000 },	/* W8_W11_REGS */	\
   { 0x0000f000, 0x00000000, 0x00000000 },	/* W12_W15_REGS */	\
   { 0x00030000, 0x00000000, 0x00000000 },	/* TAILCALL_ADDR_REGS */\
   { 0x3ffcffff, 0x00000000, 0x00000000 },	/* STUB_REGS */		\
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index 9e9ccefbfed..b8e12fc1d4b 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -290,9 +290,13 @@ (define_c_enum "unspec" [
     UNSPEC_NZCV
     UNSPEC_XPACLRI
     UNSPEC_LD1_SVE
+    UNSPEC_LD1_SVE_COUNT
     UNSPEC_ST1_SVE
+    UNSPEC_ST1_SVE_COUNT
     UNSPEC_LDNT1_SVE
+    UNSPEC_LDNT1_SVE_COUNT
     UNSPEC_STNT1_SVE
+    UNSPEC_STNT1_SVE_COUNT
     UNSPEC_LD1RQ
     UNSPEC_LD1_GATHER
     UNSPEC_LDFF1_GATHER
diff --git a/gcc/config/aarch64/atomics.md b/gcc/config/aarch64/atomics.md
index 055a87320ca..3ca7f23554f 100644
--- a/gcc/config/aarch64/atomics.md
+++ b/gcc/config/aarch64/atomics.md
@@ -39,7 +39,7 @@ (define_expand "@atomic_compare_and_swap<mode>"
 (define_mode_attr cas_short_expected_pred
   [(QI "aarch64_reg_or_imm") (HI "aarch64_plushi_operand")])
 (define_mode_attr cas_short_expected_imm
-  [(QI "n") (HI "Uph")])
+  [(QI "n") (HI "Uih")])
 
 (define_insn_and_split "@aarch64_compare_and_swap<mode>"
   [(set (reg:CC CC_REGNUM)					;; bool out
diff --git a/gcc/config/aarch64/constraints.md b/gcc/config/aarch64/constraints.md
index 78a62af1abf..8b65cab29fb 100644
--- a/gcc/config/aarch64/constraints.md
+++ b/gcc/config/aarch64/constraints.md
@@ -21,6 +21,9 @@
 (define_register_constraint "k" "STACK_REG"
   "@internal The stack register.")
 
+(define_register_constraint "Uci" "W8_W11_REGS"
+  "@internal r8-r11, which can be used to index ZA.")
+
 (define_register_constraint "Ucj" "W12_W15_REGS"
   "@internal r12-r15, which can be used to index ZA.")
 
@@ -39,6 +42,20 @@ (define_register_constraint "Ucr"
 (define_register_constraint "w" "FP_REGS"
   "Floating point and SIMD vector registers.")
 
+(define_register_constraint "x" "FP_LO_REGS"
+  "Floating point and SIMD vector registers V0 - V15.")
+
+(define_register_constraint "y" "FP_LO8_REGS"
+  "Floating point and SIMD vector registers V0 - V7.")
+
+(define_register_constraint "Uw2" "FP_REGS"
+  "Even floating point and SIMD vector registers."
+  "regno % 2 == 0")
+
+(define_register_constraint "Uw4" "FP_REGS"
+  "4-tuple-aligned floating point and SIMD vector registers."
+  "regno % 4 == 0")
+
 (define_register_constraint "Upa" "PR_REGS"
   "SVE predicate registers p0 - p15.")
 
@@ -49,11 +66,8 @@ (define_register_constraint "Up2" "PR_REGS"
 (define_register_constraint "Upl" "PR_LO_REGS"
   "SVE predicate registers p0 - p7.")
 
-(define_register_constraint "x" "FP_LO_REGS"
-  "Floating point and SIMD vector registers V0 - V15.")
-
-(define_register_constraint "y" "FP_LO8_REGS"
-  "Floating point and SIMD vector registers V0 - V7.")
+(define_register_constraint "Uph" "PR_HI_REGS"
+  "SVE predicate registers p8 - p15.")
 
 (define_constraint "c"
  "@internal The condition code register."
@@ -285,7 +299,7 @@ (define_constraint "Up3"
   (and (match_code "const_int")
        (match_test "(unsigned) exact_log2 (ival) <= 4")))
 
-(define_constraint "Uph"
+(define_constraint "Uih"
   "@internal
   A constraint that matches HImode integers zero extendable to
   SImode plus_operand."
diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md
index 1a14069485d..f204850850c 100644
--- a/gcc/config/aarch64/iterators.md
+++ b/gcc/config/aarch64/iterators.md
@@ -423,8 +423,11 @@ (define_mode_iterator VMULD [V4HI V8HI V2SI V4SI
 
 ;; Iterators for single modes, for "@" patterns.
 (define_mode_iterator VNx16QI_ONLY [VNx16QI])
+(define_mode_iterator VNx16SI_ONLY [VNx16SI])
 (define_mode_iterator VNx8HI_ONLY [VNx8HI])
 (define_mode_iterator VNx8BF_ONLY [VNx8BF])
+(define_mode_iterator VNx8SI_ONLY [VNx8SI])
+(define_mode_iterator VNx8DI_ONLY [VNx8DI])
 (define_mode_iterator VNx4SI_ONLY [VNx4SI])
 (define_mode_iterator VNx4SF_ONLY [VNx4SF])
 (define_mode_iterator VNx2DI_ONLY [VNx2DI])
@@ -448,6 +451,12 @@ (define_mode_iterator SVE_FULL_BHI [VNx16QI VNx8HI])
 ;; elements.
 (define_mode_iterator SVE_FULL_BHSI [VNx16QI VNx8HI VNx4SI])
 
+;; Pairs of the above.
+(define_mode_iterator SVE_FULL_BHSIx2 [VNx32QI VNx16HI VNx8SI])
+
+;; Fully-packed SVE vector modes that have 16-bit float elements.
+(define_mode_iterator SVE_FULL_HF [VNx8BF VNx8HF])
+
 ;; Fully-packed SVE vector modes that have 16-bit, 32-bit or 64-bit elements.
 (define_mode_iterator SVE_FULL_HSD [VNx8HI VNx4SI VNx2DI
 				    VNx8BF VNx8HF VNx4SF VNx2DF])
@@ -473,6 +482,9 @@ (define_mode_iterator SVE_FULL_SD [VNx4SI VNx2DI VNx4SF VNx2DF])
 ;; Fully-packed SVE integer vector modes that have 32-bit or 64-bit elements.
 (define_mode_iterator SVE_FULL_SDI [VNx4SI VNx2DI])
 
+;; 2x and 4x tuples of the above, excluding 2x DI.
+(define_mode_iterator SVE_FULL_SIx2_SDIx4 [VNx8SI VNx16SI VNx8DI])
+
 ;; Fully-packed SVE floating-point vector modes that have 32-bit or 64-bit
 ;; elements.
 (define_mode_iterator SVE_FULL_SDF [VNx4SF VNx2DF])
@@ -481,6 +493,10 @@ (define_mode_iterator SVE_FULL_SDF [VNx4SF VNx2DF])
 (define_mode_iterator SVE_MATMULF [(VNx4SF "TARGET_SVE_F32MM")
 				   (VNx2DF "TARGET_SVE_F64MM")])
 
+;; Fully-packed SVE vector modes that have 32-bit or smaller elements.
+(define_mode_iterator SVE_FULL_BHS [VNx16QI VNx8HI VNx4SI
+				    VNx8BF VNx8HF VNx4SF])
+
 ;; Fully-packed SVE vector modes that have 32-bit elements.
 (define_mode_iterator SVE_FULL_S [VNx4SI VNx4SF])
 
@@ -514,6 +530,8 @@ (define_mode_iterator SVE_FULLx3 [VNx48QI VNx24HI VNx12SI VNx6DI
 (define_mode_iterator SVE_FULLx4 [VNx64QI VNx32HI VNx16SI VNx8DI
 			          VNx32BF VNx32HF VNx16SF VNx8DF])
 
+(define_mode_iterator SVE_FULLx24 [SVE_FULLx2 SVE_FULLx4])
+
 ;; All SVE vector structure modes.
 (define_mode_iterator SVE_STRUCT [SVE_FULLx2 SVE_FULLx3 SVE_FULLx4])
 
@@ -531,6 +549,8 @@ (define_mode_iterator SVE_HSDI [VNx8HI VNx4HI VNx2HI
 				VNx4SI VNx2SI
 				VNx2DI])
 
+(define_mode_iterator SVE_DIx24 [VNx4DI VNx8DI])
+
 ;; SVE modes with 2 or 4 elements.
 (define_mode_iterator SVE_24 [VNx2QI VNx2HI VNx2HF VNx2BF VNx2SI VNx2SF
 			      VNx2DI VNx2DF
@@ -588,12 +608,47 @@ (define_mode_iterator PRED_HSD [VNx8BI VNx4BI VNx2BI])
 ;; Bfloat16 modes to which V4SF can be converted
 (define_mode_iterator V4SF_TO_BF [V4BF V8BF])
 
+(define_mode_iterator SVE_BHSx24 [VNx32QI VNx16HI VNx8SI
+				  VNx16BF VNx16HF VNx8SF
+				  VNx64QI VNx32HI VNx16SI
+				  VNx32BF VNx32HF VNx16SF])
+
+(define_mode_iterator SVE_Ix24 [VNx32QI VNx16HI VNx8SI VNx4DI
+				VNx64QI VNx32HI VNx16SI VNx8DI])
+
+(define_mode_iterator SVE_Fx24 [VNx16HF VNx8SF VNx4DF
+				VNx32HF VNx16SF VNx8DF])
+
+(define_mode_iterator SVE_SFx24 [VNx8SF VNx16SF])
+
 ;; The modes used to represent different ZA access sizes.
 (define_mode_iterator SME_ZA_I [VNx16QI VNx8HI VNx4SI VNx2DI VNx1TI])
 (define_mode_iterator SME_ZA_SDI [VNx4SI (VNx2DI "TARGET_SME_I16I64")])
 
 (define_mode_iterator SME_ZA_SDF_I [VNx4SI (VNx2DI "TARGET_SME_F64F64")])
 
+(define_mode_iterator SME_ZA_BIx24 [VNx32QI VNx64QI])
+
+(define_mode_iterator SME_ZA_BHIx124 [VNx16QI VNx32QI VNx64QI
+				      VNx8HI VNx16HI VNx32HI])
+
+(define_mode_iterator SME_ZA_BHIx24 [VNx32QI VNx64QI VNx16HI VNx32HI])
+
+(define_mode_iterator SME_ZA_HFx124 [VNx8BF VNx16BF VNx32BF
+				     VNx8HF VNx16HF VNx32HF])
+
+(define_mode_iterator SME_ZA_HFx24 [VNx16BF VNx32BF VNx16HF VNx32HF])
+
+(define_mode_iterator SME_ZA_HIx124 [VNx8HI VNx16HI VNx32HI])
+
+(define_mode_iterator SME_ZA_HIx24 [VNx16HI VNx32HI])
+
+(define_mode_iterator SME_ZA_SDIx24 [VNx8SI (VNx4DI "TARGET_SME_I16I64")
+				     VNx16SI (VNx8DI "TARGET_SME_I16I64")])
+
+(define_mode_iterator SME_ZA_SDFx24 [VNx8SF (VNx4DF "TARGET_SME_F64F64")
+				     VNx16SF (VNx8DF "TARGET_SME_F64F64")])
+
 ;; The modes for which outer product instructions are supported.
 (define_mode_iterator SME_MOP_BHI [VNx16QI (VNx8HI "TARGET_SME_I16I64")])
 (define_mode_iterator SME_MOP_HSDF [VNx8BF VNx8HF VNx4SF
@@ -731,6 +786,7 @@ (define_c_enum "unspec"
     UNSPEC_IORF		; Used in aarch64-sve.md.
     UNSPEC_XORF		; Used in aarch64-sve.md.
     UNSPEC_REVB		; Used in aarch64-sve.md.
+    UNSPEC_REVD		; Used in aarch64-sve2.md.
     UNSPEC_REVH		; Used in aarch64-sve.md.
     UNSPEC_REVW		; Used in aarch64-sve.md.
     UNSPEC_REVBHW	; Used in aarch64-sve.md.
@@ -845,6 +901,7 @@ (define_c_enum "unspec"
     UNSPEC_CMLA180_CONJ	; Used in aarch64-sve2.md.
     UNSPEC_CMUL		; Used in aarch64-sve2.md.
     UNSPEC_CMUL_CONJ	; Used in aarch64-sve2.md.
+    UNSPEC_CNTP_C	; Used in aarch64-sve2.md.
     UNSPEC_COND_FCVTLT	; Used in aarch64-sve2.md.
     UNSPEC_COND_FCVTNT	; Used in aarch64-sve2.md.
     UNSPEC_COND_FCVTX	; Used in aarch64-sve2.md.
@@ -865,10 +922,14 @@ (define_c_enum "unspec"
     UNSPEC_HISTSEG	; Used in aarch64-sve2.md.
     UNSPEC_MATCH	; Used in aarch64-sve2.md.
     UNSPEC_NMATCH	; Used in aarch64-sve2.md.
+    UNSPEC_PEXT		; Used in aarch64-sve2.md.
+    UNSPEC_PEXTx2	; Used in aarch64-sve2.md.
     UNSPEC_PMULLB	; Used in aarch64-sve2.md.
     UNSPEC_PMULLB_PAIR	; Used in aarch64-sve2.md.
     UNSPEC_PMULLT	; Used in aarch64-sve2.md.
     UNSPEC_PMULLT_PAIR	; Used in aarch64-sve2.md.
+    UNSPEC_PSEL		; Used in aarch64-sve2.md.
+    UNSPEC_PTRUE_C	; Used in aarch64-sve2.md.
     UNSPEC_RADDHNB	; Used in aarch64-sve2.md.
     UNSPEC_RADDHNT	; Used in aarch64-sve2.md.
     UNSPEC_RSHRNB	; Used in aarch64-sve2.md.
@@ -902,8 +963,12 @@ (define_c_enum "unspec"
     UNSPEC_SQRDCMLAH180	; Used in aarch64-sve2.md.
     UNSPEC_SQRDCMLAH270	; Used in aarch64-sve2.md.
     UNSPEC_SQRDCMLAH90	; Used in aarch64-sve2.md.
+    UNSPEC_SQRSHR	; Used in aarch64-sve2.md.
+    UNSPEC_SQRSHRN	; Used in aarch64-sve2.md.
     UNSPEC_SQRSHRNB	; Used in aarch64-sve2.md.
     UNSPEC_SQRSHRNT	; Used in aarch64-sve2.md.
+    UNSPEC_SQRSHRU	; Used in aarch64-sve2.md.
+    UNSPEC_SQRSHRUN	; Used in aarch64-sve2.md.
     UNSPEC_SQRSHRUNB	; Used in aarch64-sve2.md.
     UNSPEC_SQRSHRUNT	; Used in aarch64-sve2.md.
     UNSPEC_SQSHRNB	; Used in aarch64-sve2.md.
@@ -938,6 +1003,8 @@ (define_c_enum "unspec"
     UNSPEC_UMULHS	; Used in aarch64-sve2.md.
     UNSPEC_UMULLB	; Used in aarch64-sve2.md.
     UNSPEC_UMULLT	; Used in aarch64-sve2.md.
+    UNSPEC_UQRSHR	; Used in aarch64-sve2.md.
+    UNSPEC_UQRSHRN	; Used in aarch64-sve2.md.
     UNSPEC_UQRSHRNB	; Used in aarch64-sve2.md.
     UNSPEC_UQRSHRNT	; Used in aarch64-sve2.md.
     UNSPEC_UQSHRNB	; Used in aarch64-sve2.md.
@@ -951,35 +1018,77 @@ (define_c_enum "unspec"
     UNSPEC_USUBWB	; Used in aarch64-sve2.md.
     UNSPEC_USUBWT	; Used in aarch64-sve2.md.
     UNSPEC_USDOT	; Used in aarch64-simd.md.
+    UNSPEC_UZP		; Used in aarch64-sve2.md.
+    UNSPEC_UZPQ		; Used in aarch64-sve2.md.
+    UNSPEC_ZIP		; Used in aarch64-sve2.md.
+    UNSPEC_ZIPQ		; Used in aarch64-sve2.md.
     UNSPEC_SUDOT	; Used in aarch64-simd.md.
     UNSPEC_BFDOT	; Used in aarch64-simd.md.
     UNSPEC_BFMLALB	; Used in aarch64-sve.md.
     UNSPEC_BFMLALT	; Used in aarch64-sve.md.
+    UNSPEC_BFMLSLB	; Used in aarch64-sve.md.
+    UNSPEC_BFMLSLT	; Used in aarch64-sve.md.
     UNSPEC_BFMMLA	; Used in aarch64-sve.md.
     UNSPEC_BFCVTN      ; Used in aarch64-simd.md.
     UNSPEC_BFCVTN2     ; Used in aarch64-simd.md.
     UNSPEC_BFCVT       ; Used in aarch64-simd.md.
     UNSPEC_FCVTXN	; Used in aarch64-simd.md.
 
+    ;; All used in aarch64-sve2.md
+    UNSPEC_FCVTN
+    UNSPEC_FDOT
+    UNSPEC_SQCVT
+    UNSPEC_SQCVTN
+    UNSPEC_SQCVTU
+    UNSPEC_SQCVTUN
+    UNSPEC_UQCVT
+    UNSPEC_UQCVTN
+
     ;; All used in aarch64-sme.md
+    UNSPEC_SME_ADD
+    UNSPEC_SME_ADD_WRITE
     UNSPEC_SME_ADDHA
     UNSPEC_SME_ADDVA
+    UNSPEC_SME_BMOPA
+    UNSPEC_SME_BMOPS
+    UNSPEC_SME_FADD
+    UNSPEC_SME_FDOT
+    UNSPEC_SME_FVDOT
+    UNSPEC_SME_FMLA
+    UNSPEC_SME_FMLS
     UNSPEC_SME_FMOPA
     UNSPEC_SME_FMOPS
+    UNSPEC_SME_FSUB
     UNSPEC_SME_LD1_HOR
     UNSPEC_SME_LD1_VER
+    UNSPEC_SME_READ
     UNSPEC_SME_READ_HOR
     UNSPEC_SME_READ_VER
+    UNSPEC_SME_SDOT
+    UNSPEC_SME_SVDOT
+    UNSPEC_SME_SMLA
+    UNSPEC_SME_SMLS
     UNSPEC_SME_SMOPA
     UNSPEC_SME_SMOPS
     UNSPEC_SME_ST1_HOR
     UNSPEC_SME_ST1_VER
+    UNSPEC_SME_SUB
+    UNSPEC_SME_SUB_WRITE
+    UNSPEC_SME_SUDOT
+    UNSPEC_SME_SUVDOT
     UNSPEC_SME_SUMOPA
     UNSPEC_SME_SUMOPS
+    UNSPEC_SME_UDOT
+    UNSPEC_SME_UVDOT
+    UNSPEC_SME_UMLA
+    UNSPEC_SME_UMLS
     UNSPEC_SME_UMOPA
     UNSPEC_SME_UMOPS
+    UNSPEC_SME_USDOT
+    UNSPEC_SME_USVDOT
     UNSPEC_SME_USMOPA
     UNSPEC_SME_USMOPS
+    UNSPEC_SME_WRITE
     UNSPEC_SME_WRITE_HOR
     UNSPEC_SME_WRITE_VER
 ])
@@ -1253,6 +1362,14 @@ (define_mode_attr Vetype [(V8QI "b") (V16QI "b")
 			  (VNx2DI "d")
 			  (VNx2DF "d")
 			  (VNx1TI "q")
+			  (VNx32QI "b") (VNx64QI "b")
+			  (VNx16HI "h") (VNx32HI "h")
+			  (VNx16HF "h") (VNx32HF "h")
+			  (VNx16BF "h") (VNx32BF "h")
+			  (VNx8SI "s") (VNx16SI "s")
+			  (VNx8SF "s") (VNx16SF "s")
+			  (VNx4DI "d") (VNx8DI "d")
+			  (VNx4DF "d") (VNx8DF "d")
 			  (BF "h") (V4BF "h") (V8BF "h")
 			  (HF "h")
 			  (SF "s") (DF "d")
@@ -1526,7 +1643,9 @@ (define_mode_attr VNARROWQ2 [(V8HI "V16QI") (V4SI "V8HI")
 ;; Narrowed modes of vector modes.
 (define_mode_attr VNARROW [(VNx8HI "VNx16QI")
 			   (VNx4SI "VNx8HI") (VNx4SF "VNx8HF")
-			   (VNx2DI "VNx4SI") (VNx2DF "VNx4SF")])
+			   (VNx2DI "VNx4SI") (VNx2DF "VNx4SF")
+			   (VNx8SI "VNx8HI") (VNx16SI "VNx16QI")
+			   (VNx8DI "VNx8HI")])
 
 ;; Register suffix narrowed modes for VQN.
 (define_mode_attr Vntype [(V8HI "8b") (V4SI "4h")
@@ -1554,7 +1673,25 @@ (define_mode_attr V2XWIDE [(V8QI "V8HI") (V4HI "V4SI")
 			   (V16QI "V16HI") (V8HI "V8SI")
 			   (V2SI "V2DI") (V4SI "V4DI")
 			   (V2DI "V2TI") (DI "TI")
-			   (HI "SI") (SI "DI")])
+			   (HI "SI") (SI "DI")
+			   (VNx16QI "VNx16HI")
+			   (VNx8HI "VNx8SI")
+			   (VNx4SI "VNx4DI")
+			   (VNx32QI "VNx32HI")
+			   (VNx16HI "VNx16SI")
+			   (VNx8SI "VNx8DI")])
+
+(define_mode_attr v2xwide [(V8QI "v8hi") (V4HI "v4si")
+			   (V16QI "v16hi") (V8HI "v8si")
+			   (V2SI "v2di") (V4SI "v4di")
+			   (V2DI "v2ti") (DI "ti")
+			   (HI "si") (SI "di")
+			   (VNx16QI "vnx16hi")
+			   (VNx8HI "vnx8si")
+			   (VNx4SI "vnx4di")
+			   (VNx32QI "vnx32hi")
+			   (VNx16HI "vnx16si")
+			   (VNx8SI "vnx8di")])
 
 ;; Predicate mode associated with VWIDE.
 (define_mode_attr VWIDE_PRED [(VNx8HF "VNx4BI") (VNx4SF "VNx2BI")])
@@ -1598,7 +1735,9 @@ (define_mode_attr Vwhalf [(V8QI "4h") (V4HI "2s")
 ;; SVE vector after narrowing.
 (define_mode_attr Ventype [(VNx8HI "b")
 			   (VNx4SI "h") (VNx4SF "h")
-			   (VNx2DI "s") (VNx2DF "s")])
+			   (VNx2DI "s") (VNx2DF "s")
+			   (VNx8SI "h") (VNx16SI "b")
+			   (VNx8DI "h")])
 
 ;; SVE vector after widening.
 (define_mode_attr Vewtype [(VNx16QI "h")
@@ -1694,6 +1833,7 @@ (define_mode_attr V_INT_EQUIV [(V8QI "V8QI") (V16QI "V16QI")
 			       (VNx8BF  "VNx8HI")
 			       (VNx4SI  "VNx4SI") (VNx4SF "VNx4SI")
 			       (VNx2DI  "VNx2DI") (VNx2DF "VNx2DI")
+			       (VNx8SF  "VNx8SI") (VNx16SF "VNx16SI")
 ])
 
 ;; Lower case mode with floating-point values replaced by like-sized integers.
@@ -1711,6 +1851,7 @@ (define_mode_attr v_int_equiv [(V8QI "v8qi") (V16QI "v16qi")
 			       (VNx8BF  "vnx8hi")
 			       (VNx4SI  "vnx4si") (VNx4SF "vnx4si")
 			       (VNx2DI  "vnx2di") (VNx2DF "vnx2di")
+			       (VNx8SF  "vnx8si") (VNx16SF "vnx16si")
 ])
 
 ;; Floating-point equivalent of selected modes.
@@ -2044,7 +2185,11 @@ (define_mode_attr insn_length [(VNx32QI "8")  (VNx16HI "8")
 			       (VNx32HF "16") (VNx16SF "16") (VNx8DF "16")])
 
 ;; The type of a subvector in an SVE_STRUCT.
-(define_mode_attr VSINGLE [(VNx32QI "VNx16QI")
+(define_mode_attr VSINGLE [(VNx16QI "VNx16QI")
+			   (VNx8BF "VNx8BF")
+			   (VNx8HF "VNx8HF")
+			   (VNx8HI "VNx8HI")
+			   (VNx32QI "VNx16QI")
 			   (VNx16HI "VNx8HI") (VNx16HF "VNx8HF")
 			   (VNx16BF "VNx8BF")
 			   (VNx8SI "VNx4SI") (VNx8SF "VNx4SF")
@@ -2061,7 +2206,8 @@ (define_mode_attr VSINGLE [(VNx32QI "VNx16QI")
 			   (VNx8DI "VNx2DI") (VNx8DF "VNx2DF")])
 
 ;; ...and again in lower case.
-(define_mode_attr vsingle [(VNx32QI "vnx16qi")
+(define_mode_attr vsingle [(VNx8HI "vnx8hi")
+			   (VNx32QI "vnx16qi")
 			   (VNx16HI "vnx8hi") (VNx16HF "vnx8hf")
 			   (VNx16BF "vnx8bf")
 			   (VNx8SI "vnx4si") (VNx8SF "vnx4sf")
@@ -2144,6 +2290,47 @@ (define_mode_attr FCMLA_maybe_lane [(V2SF "<Vtype>") (V4SF "<Vetype>[%4]")
 				    (V4HF "<Vetype>[%4]") (V8HF "<Vetype>[%4]")
 				    ])
 
+(define_mode_attr za32_offset_range [(VNx16QI "0_to_12_step_4")
+				     (VNx8BF "0_to_14_step_2")
+				     (VNx8HF "0_to_14_step_2")
+				     (VNx8HI "0_to_14_step_2")
+				     (VNx32QI "0_to_4_step_4")
+				     (VNx16BF "0_to_6_step_2")
+				     (VNx16HF "0_to_6_step_2")
+				     (VNx16HI "0_to_6_step_2")
+				     (VNx64QI "0_to_4_step_4")
+				     (VNx32BF "0_to_6_step_2")
+				     (VNx32HF "0_to_6_step_2")
+				     (VNx32HI "0_to_6_step_2")])
+
+(define_mode_attr za64_offset_range [(VNx8HI "0_to_12_step_4")
+				     (VNx16HI "0_to_4_step_4")
+				     (VNx32HI "0_to_4_step_4")])
+
+(define_mode_attr za32_long [(VNx16QI "ll") (VNx32QI "ll") (VNx64QI "ll")
+			     (VNx8HI "l") (VNx16HI "l") (VNx32HI "l")])
+
+(define_mode_attr za32_last_offset [(VNx16QI "3") (VNx32QI "3") (VNx64QI "3")
+				    (VNx8HI "1") (VNx16HI "1") (VNx32HI "1")])
+
+(define_mode_attr vg_modifier [(VNx16QI "")
+			       (VNx32QI ", vgx2")
+			       (VNx64QI ", vgx4")
+			       (VNx8BF "")
+			       (VNx16BF ", vgx2")
+			       (VNx32BF ", vgx4")
+			       (VNx8HF "")
+			       (VNx16HF ", vgx2")
+			       (VNx32HF ", vgx4")
+			       (VNx8HI "")
+			       (VNx16HI ", vgx2")
+			       (VNx32HI ", vgx4")])
+
+(define_mode_attr z_suffix [(VNx16QI ".b") (VNx32QI "") (VNx64QI "")
+			    (VNx8BF ".h") (VNx16BF "") (VNx32BF "")
+			    (VNx8HF ".h") (VNx16HF "") (VNx32HF "")
+			    (VNx8HI ".h") (VNx16HI "") (VNx32HI "")])
+
 ;; The number of bytes controlled by a predicate
 (define_mode_attr data_bytes [(VNx16BI "1") (VNx8BI "2")
 			      (VNx4BI "4") (VNx2BI "8")])
@@ -2173,7 +2360,29 @@ (define_mode_attr vec_or_offset [(V8QI "vec") (V16QI "vec") (V4HI "vec")
 				 (V8HI "vec") (V2SI "vec") (V4SI "vec")
 				 (V2DI "vec") (DI "offset")])
 
-(define_mode_attr b [(VNx8BF "b") (VNx8HF "") (VNx4SF "") (VNx2DF "")])
+(define_mode_attr b [(VNx8BF "b") (VNx8HF "") (VNx4SF "") (VNx2DF "")
+		     (VNx16BF "b") (VNx16HF "")
+		     (VNx32BF "b") (VNx32HF "")])
+
+(define_mode_attr aligned_operand [(VNx16QI "register_operand")
+				   (VNx8HI "register_operand")
+				   (VNx8BF "register_operand")
+				   (VNx8HF "register_operand")
+				   (VNx32QI "aligned_register_operand")
+				   (VNx16HI "aligned_register_operand")
+				   (VNx16BF "aligned_register_operand")
+				   (VNx16HF "aligned_register_operand")
+				   (VNx64QI "aligned_register_operand")
+				   (VNx32HI "aligned_register_operand")
+				   (VNx32BF "aligned_register_operand")
+				   (VNx32HF "aligned_register_operand")])
+
+(define_mode_attr aligned_fpr [(VNx16QI "w") (VNx8HI "w")
+			       (VNx8BF "w") (VNx8HF "w")
+			       (VNx32QI "Uw2") (VNx16HI "Uw2")
+			       (VNx16BF "Uw2") (VNx16HF "Uw2")
+			       (VNx64QI "Uw4") (VNx32HI "Uw4")
+			       (VNx32BF "Uw4") (VNx32HF "Uw4")])
 
 ;; -------------------------------------------------------------------
 ;; Code Iterators
@@ -2304,6 +2513,10 @@ (define_code_iterator SVE_INT_BINARY_SD [div udiv])
 ;; SVE integer binary operations that have an immediate form.
 (define_code_iterator SVE_INT_BINARY_IMM [mult smax smin umax umin])
 
+(define_code_iterator SVE_INT_BINARY_MULTI [smax smin umax umin])
+
+(define_code_iterator SVE_INT_BINARY_SINGLE [plus smax smin umax umin])
+
 ;; SVE floating-point operations with an unpredicated all-register form.
 (define_code_iterator SVE_UNPRED_FP_BINARY [plus minus mult])
 
@@ -2759,18 +2972,30 @@ (define_int_iterator SVE_INT_SHIFT_IMM [UNSPEC_ASRD
 					(UNSPEC_SRSHR "TARGET_SVE2")
 					(UNSPEC_URSHR "TARGET_SVE2")])
 
+(define_int_iterator SVE_INT_BINARY_MULTI [UNSPEC_SQDMULH
+					   UNSPEC_SRSHL UNSPEC_URSHL])
+
 (define_int_iterator SVE_FP_BINARY [UNSPEC_FRECPS UNSPEC_RSQRTS])
 
 (define_int_iterator SVE_FP_BINARY_INT [UNSPEC_FTSMUL UNSPEC_FTSSEL])
 
-(define_int_iterator SVE_BFLOAT_TERNARY_LONG [UNSPEC_BFDOT
-					      UNSPEC_BFMLALB
-					      UNSPEC_BFMLALT
-					      (UNSPEC_BFMMLA "TARGET_NON_STREAMING")])
+(define_int_iterator SVE_FP_BINARY_MULTI [UNSPEC_FMAX UNSPEC_FMAXNM
+					  UNSPEC_FMIN UNSPEC_FMINNM])
+
+(define_int_iterator SVE_BFLOAT_TERNARY_LONG
+  [UNSPEC_BFDOT
+   UNSPEC_BFMLALB
+   UNSPEC_BFMLALT
+   (UNSPEC_BFMLSLB "TARGET_SME2 && TARGET_STREAMING_SME")
+   (UNSPEC_BFMLSLT "TARGET_SME2 && TARGET_STREAMING_SME")
+   (UNSPEC_BFMMLA "TARGET_NON_STREAMING")])
 
-(define_int_iterator SVE_BFLOAT_TERNARY_LONG_LANE [UNSPEC_BFDOT
-						   UNSPEC_BFMLALB
-						   UNSPEC_BFMLALT])
+(define_int_iterator SVE_BFLOAT_TERNARY_LONG_LANE
+  [UNSPEC_BFDOT
+   UNSPEC_BFMLALB
+   UNSPEC_BFMLALT
+   (UNSPEC_BFMLSLB "TARGET_SME2 && TARGET_STREAMING_SME")
+   (UNSPEC_BFMLSLT "TARGET_SME2 && TARGET_STREAMING_SME")])
 
 (define_int_iterator SVE_INT_REDUCTION [UNSPEC_ANDV
 					UNSPEC_IORV
@@ -2914,6 +3139,11 @@ (define_int_iterator SVE_WHILE [UNSPEC_WHILELE UNSPEC_WHILELO
 
 (define_int_iterator SVE2_WHILE_PTR [UNSPEC_WHILERW UNSPEC_WHILEWR])
 
+(define_int_iterator SVE_WHILE_ORDER [UNSPEC_WHILEGE UNSPEC_WHILEGT
+				      UNSPEC_WHILEHI UNSPEC_WHILEHS
+				      UNSPEC_WHILELE UNSPEC_WHILELO
+				      UNSPEC_WHILELS UNSPEC_WHILELT])
+
 (define_int_iterator SVE_SHIFT_WIDE [UNSPEC_ASHIFT_WIDE
 				     UNSPEC_ASHIFTRT_WIDE
 				     UNSPEC_LSHIFTRT_WIDE])
@@ -3025,6 +3255,13 @@ (define_int_iterator SVE2_INT_SHIFT_IMM_NARROWT [UNSPEC_RSHRNT
 						 UNSPEC_UQRSHRNT
 						 UNSPEC_UQSHRNT])
 
+(define_int_iterator SVE2_INT_SHIFT_IMM_NARROWxN [UNSPEC_SQRSHR
+						  UNSPEC_SQRSHRN
+						  UNSPEC_SQRSHRU
+						  UNSPEC_SQRSHRUN
+						  UNSPEC_UQRSHR
+						  UNSPEC_UQRSHRN])
+
 (define_int_iterator SVE2_INT_SHIFT_INSERT [UNSPEC_SLI UNSPEC_SRI])
 
 (define_int_iterator SVE2_INT_CADD [UNSPEC_CADD90
@@ -3168,6 +3405,16 @@ (define_int_iterator SVE2_PMULL [UNSPEC_PMULLB UNSPEC_PMULLT])
 
 (define_int_iterator SVE2_PMULL_PAIR [UNSPEC_PMULLB_PAIR UNSPEC_PMULLT_PAIR])
 
+(define_int_iterator SVE_QCVTxN [UNSPEC_SQCVT UNSPEC_SQCVTN
+				 UNSPEC_SQCVTU UNSPEC_SQCVTUN
+				 UNSPEC_UQCVT UNSPEC_UQCVTN])
+
+(define_int_iterator SVE2_SFx24_UNARY [UNSPEC_FRINTA UNSPEC_FRINTM
+				       UNSPEC_FRINTN UNSPEC_FRINTP])
+
+(define_int_iterator SVE2_x24_PERMUTE [UNSPEC_ZIP UNSPEC_UZP])
+(define_int_iterator SVE2_x24_PERMUTEQ [UNSPEC_ZIPQ UNSPEC_UZPQ])
+
 (define_int_iterator FCADD [UNSPEC_FCADD90
 			    UNSPEC_FCADD270])
 
@@ -3203,6 +3450,8 @@ (define_int_iterator FCMLA_OP [UNSPEC_FCMLA
 (define_int_iterator FCMUL_OP [UNSPEC_FCMUL
 			       UNSPEC_FCMUL_CONJ])
 
+(define_int_iterator UNSPEC_REVD_ONLY [UNSPEC_REVD])
+
 (define_int_iterator SME_LD1 [UNSPEC_SME_LD1_HOR UNSPEC_SME_LD1_VER])
 (define_int_iterator SME_READ [UNSPEC_SME_READ_HOR UNSPEC_SME_READ_VER])
 (define_int_iterator SME_ST1 [UNSPEC_SME_ST1_HOR UNSPEC_SME_ST1_VER])
@@ -3215,8 +3464,37 @@ (define_int_iterator SME_INT_MOP [UNSPEC_SME_SMOPA UNSPEC_SME_SMOPS
 				  UNSPEC_SME_UMOPA UNSPEC_SME_UMOPS
 				  UNSPEC_SME_USMOPA UNSPEC_SME_USMOPS])
 
+(define_int_iterator SME2_INT_MOP [UNSPEC_SME_SMOPA UNSPEC_SME_SMOPS
+				   UNSPEC_SME_UMOPA UNSPEC_SME_UMOPS])
+
 (define_int_iterator SME_FP_MOP [UNSPEC_SME_FMOPA UNSPEC_SME_FMOPS])
 
+(define_int_iterator SME2_BMOP [UNSPEC_SME_BMOPA UNSPEC_SME_BMOPS])
+
+(define_int_iterator SME_BINARY_SLICE_SDI [UNSPEC_SME_ADD UNSPEC_SME_SUB])
+
+(define_int_iterator SME_BINARY_SLICE_SDF [UNSPEC_SME_FADD UNSPEC_SME_FSUB])
+
+(define_int_iterator SME_BINARY_WRITE_SLICE_SDI [UNSPEC_SME_ADD_WRITE
+						 UNSPEC_SME_SUB_WRITE])
+
+(define_int_iterator SME_INT_DOTPROD [UNSPEC_SME_SDOT UNSPEC_SME_UDOT
+				      UNSPEC_SME_USDOT])
+
+(define_int_iterator SME_INT_DOTPROD_LANE [UNSPEC_SME_SDOT UNSPEC_SME_SVDOT
+					   UNSPEC_SME_UDOT UNSPEC_SME_UVDOT
+					   UNSPEC_SME_SUDOT UNSPEC_SME_SUVDOT
+					   UNSPEC_SME_USDOT UNSPEC_SME_USVDOT])
+
+(define_int_iterator SME_FP_DOTPROD [UNSPEC_SME_FDOT])
+
+(define_int_iterator SME_FP_DOTPROD_LANE [UNSPEC_SME_FDOT UNSPEC_SME_FVDOT])
+
+(define_int_iterator SME_INT_TERNARY_SLICE [UNSPEC_SME_SMLA UNSPEC_SME_SMLS
+					    UNSPEC_SME_UMLA UNSPEC_SME_UMLS])
+
+(define_int_iterator SME_FP_TERNARY_SLICE [UNSPEC_SME_FMLA UNSPEC_SME_FMLS])
+
 ;; Iterators for atomic operations.
 
 (define_int_iterator ATOMIC_LDOP
@@ -3233,6 +3511,10 @@ (define_int_attr atomic_ldoptab
 
 (define_int_iterator SUBDI_BITS [8 16 32])
 
+(define_int_iterator BHSD_BITS [8 16 32 64])
+
+(define_int_iterator LUTI_BITS [2 4])
+
 ;; -------------------------------------------------------------------
 ;; Int Iterators Attributes.
 ;; -------------------------------------------------------------------
@@ -3254,6 +3536,7 @@ (define_int_attr optab [(UNSPEC_ANDF "and")
 			(UNSPEC_RSQRTS "frsqrts")
 			(UNSPEC_RBIT "rbit")
 			(UNSPEC_REVB "revb")
+			(UNSPEC_REVD "revd")
 			(UNSPEC_REVH "revh")
 			(UNSPEC_REVW "revw")
 			(UNSPEC_UMAXV "umax")
@@ -3291,28 +3574,60 @@ (define_int_attr optab [(UNSPEC_ANDF "and")
 			(UNSPEC_PMULLT "pmullt")
 			(UNSPEC_PMULLT_PAIR "pmullt_pair")
 			(UNSPEC_SMATMUL "smatmul")
+			(UNSPEC_UZP "uzp")
+			(UNSPEC_UZPQ "uzpq")
+			(UNSPEC_ZIP "zip")
+			(UNSPEC_ZIPQ "zipq")
+			(UNSPEC_SME_ADD "add")
+			(UNSPEC_SME_ADD_WRITE "add_write")
 			(UNSPEC_SME_ADDHA "addha")
 			(UNSPEC_SME_ADDVA "addva")
+			(UNSPEC_SME_BMOPA "bmopa")
+			(UNSPEC_SME_BMOPS "bmops")
+			(UNSPEC_SME_FADD "fadd")
+			(UNSPEC_SME_FDOT "fdot")
+			(UNSPEC_SME_FVDOT "fvdot")
+			(UNSPEC_SME_FMLA "fmla")
+			(UNSPEC_SME_FMLS "fmls")
 			(UNSPEC_SME_FMOPA "fmopa")
 			(UNSPEC_SME_FMOPS "fmops")
+			(UNSPEC_SME_FSUB "fsub")
 			(UNSPEC_SME_LD1_HOR "ld1_hor")
 			(UNSPEC_SME_LD1_VER "ld1_ver")
 			(UNSPEC_SME_READ_HOR "read_hor")
 			(UNSPEC_SME_READ_VER "read_ver")
+			(UNSPEC_SME_SDOT "sdot")
+			(UNSPEC_SME_SVDOT "svdot")
+			(UNSPEC_SME_SMLA "smla")
+			(UNSPEC_SME_SMLS "smls")
 			(UNSPEC_SME_SMOPA "smopa")
 			(UNSPEC_SME_SMOPS "smops")
 			(UNSPEC_SME_ST1_HOR "st1_hor")
 			(UNSPEC_SME_ST1_VER "st1_ver")
+			(UNSPEC_SME_SUB "sub")
+			(UNSPEC_SME_SUB_WRITE "sub_write")
+			(UNSPEC_SME_SUDOT "sudot")
+			(UNSPEC_SME_SUVDOT "suvdot")
 			(UNSPEC_SME_SUMOPA "sumopa")
 			(UNSPEC_SME_SUMOPS "sumops")
+			(UNSPEC_SME_UDOT "udot")
+			(UNSPEC_SME_UVDOT "uvdot")
+			(UNSPEC_SME_UMLA "umla")
+			(UNSPEC_SME_UMLS "umls")
 			(UNSPEC_SME_UMOPA "umopa")
 			(UNSPEC_SME_UMOPS "umops")
+			(UNSPEC_SME_USDOT "usdot")
+			(UNSPEC_SME_USVDOT "usvdot")
 			(UNSPEC_SME_USMOPA "usmopa")
 			(UNSPEC_SME_USMOPS "usmops")
 			(UNSPEC_SME_WRITE_HOR "write_hor")
 			(UNSPEC_SME_WRITE_VER "write_ver")
 			(UNSPEC_SQCADD90 "sqcadd90")
 			(UNSPEC_SQCADD270 "sqcadd270")
+			(UNSPEC_SQCVT "sqcvt")
+			(UNSPEC_SQCVTN "sqcvtn")
+			(UNSPEC_SQCVTU "sqcvtu")
+			(UNSPEC_SQCVTUN "sqcvtun")
 			(UNSPEC_SQRDCMLAH "sqrdcmlah")
 			(UNSPEC_SQRDCMLAH90 "sqrdcmlah90")
 			(UNSPEC_SQRDCMLAH180 "sqrdcmlah180")
@@ -3320,6 +3635,8 @@ (define_int_attr optab [(UNSPEC_ANDF "and")
 			(UNSPEC_TRN1Q "trn1q")
 			(UNSPEC_TRN2Q "trn2q")
 			(UNSPEC_UMATMUL "umatmul")
+			(UNSPEC_UQCVT "uqcvt")
+			(UNSPEC_UQCVTN "uqcvtn")
 			(UNSPEC_USMATMUL "usmatmul")
 			(UNSPEC_UZP1Q "uzp1q")
 			(UNSPEC_UZP2Q "uzp2q")
@@ -3549,7 +3866,9 @@ (define_int_attr perm_insn [(UNSPEC_ZIP1 "zip1") (UNSPEC_ZIP2 "zip2")
 			    (UNSPEC_TRN1 "trn1") (UNSPEC_TRN2 "trn2")
 			    (UNSPEC_TRN1Q "trn1") (UNSPEC_TRN2Q "trn2")
 			    (UNSPEC_UZP1 "uzp1") (UNSPEC_UZP2 "uzp2")
-			    (UNSPEC_UZP1Q "uzp1") (UNSPEC_UZP2Q "uzp2")])
+			    (UNSPEC_UZP1Q "uzp1") (UNSPEC_UZP2Q "uzp2")
+			    (UNSPEC_UZP "uzp") (UNSPEC_UZPQ "uzp")
+			    (UNSPEC_ZIP "zip") (UNSPEC_ZIPQ "zip")])
 
 ; op code for REV instructions (size within which elements are reversed).
 (define_int_attr rev_op [(UNSPEC_REV64 "64") (UNSPEC_REV32 "32")
@@ -3727,8 +4046,12 @@ (define_int_attr sve_int_op [(UNSPEC_ADCLB "adclb")
 			     (UNSPEC_SQRDMLSH "sqrdmlsh")
 			     (UNSPEC_SQRDMULH "sqrdmulh")
 			     (UNSPEC_SQRSHL "sqrshl")
+			     (UNSPEC_SQRSHR "sqrshr")
+			     (UNSPEC_SQRSHRN "sqrshrn")
 			     (UNSPEC_SQRSHRNB "sqrshrnb")
 			     (UNSPEC_SQRSHRNT "sqrshrnt")
+			     (UNSPEC_SQRSHRU "sqrshru")
+			     (UNSPEC_SQRSHRUN "sqrshrun")
 			     (UNSPEC_SQRSHRUNB "sqrshrunb")
 			     (UNSPEC_SQRSHRUNT "sqrshrunt")
 			     (UNSPEC_SQSHL "sqshl")
@@ -3773,6 +4096,8 @@ (define_int_attr sve_int_op [(UNSPEC_ADCLB "adclb")
 			     (UNSPEC_UMULLB "umullb")
 			     (UNSPEC_UMULLT "umullt")
 			     (UNSPEC_UQRSHL "uqrshl")
+			     (UNSPEC_UQRSHR "uqrshr")
+			     (UNSPEC_UQRSHRN "uqrshrn")
 			     (UNSPEC_UQRSHRNB "uqrshrnb")
 			     (UNSPEC_UQRSHRNT "uqrshrnt")
 			     (UNSPEC_UQSHL "uqshl")
@@ -3829,6 +4154,8 @@ (define_int_attr sve_int_qsub_op [(UNSPEC_SQDMULLB "sqdmlslb")
 (define_int_attr sve_fp_op [(UNSPEC_BFDOT "bfdot")
 			    (UNSPEC_BFMLALB "bfmlalb")
 			    (UNSPEC_BFMLALT "bfmlalt")
+			    (UNSPEC_BFMLSLB "bfmlslb")
+			    (UNSPEC_BFMLSLT "bfmlslt")
 			    (UNSPEC_BFMMLA "bfmmla")
 			    (UNSPEC_FRECPE "frecpe")
 			    (UNSPEC_FRECPS "frecps")
@@ -3889,6 +4216,9 @@ (define_int_attr sve_fp_op_rev [(UNSPEC_COND_FADD "fadd")
 				(UNSPEC_COND_FMULX "fmulx")
 				(UNSPEC_COND_FSUB "fsubr")])
 
+(define_int_attr sme_int_op [(UNSPEC_SME_ADD_WRITE "add")
+			     (UNSPEC_SME_SUB_WRITE "sub")])
+
 (define_int_attr rot [(UNSPEC_CADD90 "90")
 		      (UNSPEC_CADD270 "270")
 		      (UNSPEC_CDOT "0")
@@ -4065,6 +4395,15 @@ (define_int_attr hv [(UNSPEC_SME_LD1_HOR "h")
 		     (UNSPEC_SME_WRITE_HOR "h")
 		     (UNSPEC_SME_WRITE_VER "v")])
 
+(define_int_attr has_16bit_form [(UNSPEC_SME_SDOT "true")
+				 (UNSPEC_SME_SVDOT "true")
+				 (UNSPEC_SME_UDOT "true")
+				 (UNSPEC_SME_UVDOT "true")
+				 (UNSPEC_SME_SUDOT "false")
+				 (UNSPEC_SME_SUVDOT "false")
+				 (UNSPEC_SME_USDOT "false")
+				 (UNSPEC_SME_USVDOT "false")])
+
 ;; Iterators and attributes for fpcr fpsr getter setters
 
 (define_int_iterator GET_FPSCR
@@ -4079,4 +4418,4 @@ (define_int_attr fpscr_name
    (UNSPECV_GET_FPCR "fpcr")
    (UNSPECV_SET_FPCR "fpcr")])
 
-(define_int_attr bits_etype [(8 "b") (16 "h") (32 "s")])
+(define_int_attr bits_etype [(8 "b") (16 "h") (32 "s") (64 "d")])
diff --git a/gcc/config/aarch64/predicates.md b/gcc/config/aarch64/predicates.md
index 5f304898a8c..c60a9e19c70 100644
--- a/gcc/config/aarch64/predicates.md
+++ b/gcc/config/aarch64/predicates.md
@@ -42,6 +42,30 @@ (define_predicate "const0_operand"
   (and (match_code "const_int")
        (match_test "op == CONST0_RTX (mode)")))
 
+(define_predicate "const_0_to_7_operand"
+  (and (match_code "const_int")
+       (match_test "IN_RANGE (INTVAL (op), 0, 7)")))
+
+(define_predicate "const_0_to_4_step_4_operand"
+  (and (match_code "const_int")
+       (match_test "IN_RANGE (INTVAL (op), 0, 4)")
+       (match_test "(INTVAL (op) & 3) == 0")))
+
+(define_predicate "const_0_to_6_step_2_operand"
+  (and (match_code "const_int")
+       (match_test "IN_RANGE (INTVAL (op), 0, 6)")
+       (match_test "(INTVAL (op) & 1) == 0")))
+
+(define_predicate "const_0_to_12_step_4_operand"
+  (and (match_code "const_int")
+       (match_test "IN_RANGE (INTVAL (op), 0, 12)")
+       (match_test "(INTVAL (op) & 3) == 0")))
+
+(define_predicate "const_0_to_14_step_2_operand"
+  (and (match_code "const_int")
+       (match_test "IN_RANGE (INTVAL (op), 0, 14)")
+       (match_test "(INTVAL (op) & 1) == 0")))
+
 (define_predicate "const_1_to_3_operand"
   (match_code "const_int,const_vector")
 {
@@ -564,8 +588,7 @@ (define_predicate "aarch64_simd_nonimmediate_operand"
 ;;   Shifts with a range 1-bit_size (aarch64_simd_shift_imm_offset)
 ;;   Shifts with a range 0-bit_size (aarch64_simd_shift_imm_bitsize)
 (define_predicate "aarch64_simd_shift_imm_qi"
-  (and (match_code "const_int")
-       (match_test "IN_RANGE (INTVAL (op), 0, 7)")))
+  (match_operand 0 "const_0_to_7_operand"))
 
 (define_predicate "aarch64_simd_shift_imm_hi"
   (and (match_code "const_int")
[...many tests snipped because they haven't changed...]
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/create2_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/create2_1.c
index e9158ed8adf..3b9245e199f 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/create2_1.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/create2_1.c
@@ -121,3 +121,21 @@ TEST_CREATE (create2_u64, svuint64x2_t, svuint64_t,
 TEST_CREATE (create2_f64, svfloat64x2_t, svfloat64_t,
 	     z0 = svcreate2_f64 (z5, z4),
 	     z0 = svcreate2 (z5, z4))
+
+/*
+** create2_b_0:
+**	ret
+*/
+TEST_CREATE_B (create2_b_0, svboolx2_t,
+	       p0_res = svcreate2_b (p0, p1),
+	       p0_res = svcreate2 (p0, p1))
+
+/*
+** create2_b_1:
+**	mov	p0\.b, p2\.b
+**	mov	p1\.b, p3\.b
+**	ret
+*/
+TEST_CREATE_B (create2_b_1, svboolx2_t,
+	       p0_res = svcreate2_b (p2, p3),
+	       p0_res = svcreate2 (p2, p3))
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/get2_b.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/get2_b.c
new file mode 100644
index 00000000000..f54feeae6ba
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/get2_b.c
@@ -0,0 +1,55 @@
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+/*
+** get2_b_p0_0:
+**	mov	p0\.b, p4\.b
+**	ret
+*/
+TEST_GET_B (get2_b_p0_0, svboolx2_t,
+	    p0 = svget2_b (p4, 0),
+	    p0 = svget2 (p4, 0))
+
+/*
+** get2_b_p0_1:
+**	mov	p0\.b, p5\.b
+**	ret
+*/
+TEST_GET_B (get2_b_p0_1, svboolx2_t,
+	    p0 = svget2_b (p4, 1),
+	    p0 = svget2 (p4, 1))
+
+/*
+** get2_b_p4_0:
+**	ret
+*/
+TEST_GET_B (get2_b_p4_0, svboolx2_t,
+	    p4_res = svget2_b (p4, 0),
+	    p4_res = svget2 (p4, 0))
+
+/*
+** get2_b_p4_1:
+**	mov	p4\.b, p5\.b
+**	ret
+*/
+TEST_GET_B (get2_b_p4_1, svboolx2_t,
+	    p4_res = svget2_b (p4, 1),
+	    p4_res = svget2 (p4, 1))
+
+/*
+** get2_b_p5_0:
+**	mov	p5\.b, p4\.b
+**	ret
+*/
+TEST_GET_B (get2_b_p5_0, svboolx2_t,
+	    p5_res = svget2_b (p4, 0),
+	    p5_res = svget2 (p4, 0))
+
+/*
+** get2_b_p5_1:
+**	ret
+*/
+TEST_GET_B (get2_b_p5_1, svboolx2_t,
+	    p5_res = svget2_b (p4, 1),
+	    p5_res = svget2 (p4, 1))
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/set2_b.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/set2_b.c
new file mode 100644
index 00000000000..30afb6abc24
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/set2_b.c
@@ -0,0 +1,41 @@
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+/*
+** set2_b_p8_0:
+**	mov	p9\.b, p5\.b
+**	mov	p8\.b, p0\.b
+**	ret
+*/
+TEST_SET_B (set2_b_p8_0, svboolx2_t,
+	    p8 = svset2_b (p4, 0, p0),
+	    p8 = svset2 (p4, 0, p0))
+
+/*
+** set2_b_p8_1:
+**	mov	p8\.b, p4\.b
+**	mov	p9\.b, p0\.b
+**	ret
+*/
+TEST_SET_B (set2_b_p8_1, svboolx2_t,
+	    p8 = svset2_b (p4, 1, p0),
+	    p8 = svset2 (p4, 1, p0))
+
+/*
+** set2_b_p4_0:
+**	mov	p4\.b, p12\.b
+**	ret
+*/
+TEST_SET_B (set2_b_p4_0, svboolx2_t,
+	    p4 = svset2_b (p4, 0, p12),
+	    p4 = svset2 (p4, 0, p12))
+
+/*
+** set2_b_p4_1:
+**	mov	p5\.b, p13\.b
+**	ret
+*/
+TEST_SET_B (set2_b_p4_1, svboolx2_t,
+	    p4 = svset2_b (p4, 1, p13),
+	    p4 = svset2 (p4, 1, p13))
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/test_sve_acle.h b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/test_sve_acle.h
index 5ce0be5947b..756fe4db385 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/test_sve_acle.h
+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/test_sve_acle.h
@@ -25,7 +25,13 @@
 #define ZA_ATTR
 #endif
 
-#define ATTR SM_ATTR ZA_ATTR
+#ifdef SHARED_ZT0
+#define ZT0_ATTR __arm_inout("zt0")
+#else
+#define ZT0_ATTR
+#endif
+
+#define ATTR SM_ATTR ZA_ATTR ZT0_ATTR
 
 #ifdef __cplusplus
 #define PROTO(NAME, RET, ARGS) \
@@ -232,6 +238,24 @@
     return z0;						\
   }
 
+#define TEST_LOAD_COUNT(NAME, TTYPE, STYPE, CODE1, CODE2) \
+  PROTO (NAME, void, (const STYPE *x0, intptr_t x1))	\
+  {							\
+    register svcount_t pn0 __asm ("pn0");		\
+    register svcount_t pn7 __asm ("pn7");		\
+    register svcount_t pn8 __asm ("pn8");		\
+    register svcount_t pn15 __asm ("pn15");		\
+    register TTYPE z0 __asm ("z0");			\
+    register TTYPE z17 __asm ("z17");			\
+    register TTYPE z22 __asm ("z22");			\
+    register TTYPE z28 __asm ("z28");			\
+    __asm volatile ("" : "=Upa" (pn0), "=Upa" (pn7),	\
+		    "=Upa" (pn8), "=Upa" (pn15));	\
+    INVOKE (CODE1, CODE2);				\
+    __asm volatile ("" :: "w" (z0), "w" (z17),		\
+		    "w" (z22), "w" (z28));		\
+  }
+
 #define TEST_LOAD_GATHER_SZ(NAME, RES_TYPE, STYPE, ZTYPE, CODE1, CODE2) \
   PROTO (NAME, RES_TYPE, (ZTYPE z0, ZTYPE z1, svbool_t p0,	\
 			  const STYPE *x0))			\
@@ -278,6 +302,24 @@
     INVOKE (CODE1, CODE2);				\
   }
 
+#define TEST_STORE_COUNT(NAME, TTYPE, STYPE, CODE1, CODE2) \
+  PROTO (NAME, void, (STYPE *x0, intptr_t x1))		\
+  {							\
+    register svcount_t pn0 __asm ("pn0");		\
+    register svcount_t pn7 __asm ("pn7");		\
+    register svcount_t pn8 __asm ("pn8");		\
+    register svcount_t pn15 __asm ("pn15");		\
+    register TTYPE z0 __asm ("z0");			\
+    register TTYPE z17 __asm ("z17");			\
+    register TTYPE z22 __asm ("z22");			\
+    register TTYPE z28 __asm ("z28");			\
+    __asm volatile ("" : "=Upa" (pn0), "=Upa" (pn7),	\
+		    "=Upa" (pn8), "=Upa" (pn15),	\
+		    "=w" (z0), "=w" (z17), "=w" (z22),	\
+		    "=w" (z28));			\
+    INVOKE (CODE1, CODE2);				\
+  }
+
 #define TEST_STORE_SCATTER_SZ(NAME, DATA_TYPE, STYPE, ZTYPE, CODE1, CODE2) \
   PROTO (NAME, void, (DATA_TYPE z0, ZTYPE z1, svbool_t p0,	\
 		      STYPE *x0))				\
@@ -308,6 +350,79 @@
     return x0;							\
   }
 
+#define TEST_PN(NAME, CODE1, CODE2)			\
+  PROTO (NAME, void, (void))				\
+  {							\
+    register svcount_t pn0 __asm("pn0");		\
+    register svcount_t pn7 __asm("pn7");		\
+    register svcount_t pn8 __asm("pn8");		\
+    register svcount_t pn15 __asm("pn15");		\
+    INVOKE (CODE1, CODE2);				\
+    __asm volatile ("" :: "Upa" (pn0), "Upa" (pn7),	\
+		    "Upa" (pn8), "Upa" (pn15));		\
+  }
+
+#define TEST_COUNT_PN(NAME, CODE1, CODE2) 		\
+  PROTO (NAME, void, (void))				\
+  {							\
+    register svcount_t pn0 __asm ("pn0");		\
+    register svcount_t pn7 __asm ("pn7");		\
+    register svcount_t pn8 __asm ("pn8");		\
+    register svcount_t pn15 __asm ("pn15");		\
+    register uint64_t x0 __asm ("x0");			\
+    register uint64_t x15 __asm ("x15");		\
+    register uint64_t x17 __asm ("x17");		\
+    __asm volatile ("" : "=Upa" (pn0), "=Upa" (pn7),	\
+		    "=Upa" (pn8), "=Upa" (pn15));	\
+    INVOKE (CODE1, CODE2);				\
+    __asm volatile ("" :: "r" (x0), "r" (x15),		\
+		    "r" (x17));				\
+  }
+
+#define TEST_EXTRACT_PN(NAME, TYPE, CODE1, CODE2) 	\
+  PROTO (NAME, void, (void))				\
+  {							\
+    register svcount_t pn0 __asm ("pn0");		\
+    register TYPE p2 __asm ("p2");			\
+    register TYPE p5 __asm ("p5");			\
+    register svcount_t pn7 __asm ("pn7");		\
+    register svcount_t pn8 __asm ("pn8");		\
+    register TYPE p9 __asm ("p9");			\
+    register svcount_t pn11 __asm ("pn11");		\
+    register TYPE p12 __asm ("p12");			\
+    register svcount_t pn15 __asm ("pn15");		\
+    __asm volatile ("" : "=Upa" (pn0), "=Upa" (pn7),	\
+		    "=Upa" (pn8), "=Upa" (pn11),	\
+		    "=Upa" (pn15));			\
+    INVOKE (CODE1, CODE2);				\
+    __asm volatile ("" :: "Upa" (p2), "Upa" (p5),	\
+		    "Upa" (p9), "Upa" (p12));		\
+  }
+
+#define TEST_SELECT_P(NAME, TYPE, CODE1, CODE2) 	\
+  PROTO (NAME, void, (void))				\
+  {							\
+    register TYPE p0 __asm ("p0");			\
+    register TYPE p2 __asm ("p2");			\
+    register svbool_t p7 __asm ("p7");			\
+    register svbool_t p8 __asm ("p8");			\
+    register TYPE p13 __asm ("p13");			\
+    register svbool_t p15 __asm ("p15");		\
+    register int32_t w11 __asm ("w11");			\
+    register int32_t w12 __asm ("w12");			\
+    register int32_t w15 __asm ("w15");			\
+    register int32_t w16 __asm ("w16");			\
+    __asm volatile ("" : "=Upa" (p0), "=Upa" (p2),	\
+		    "=Upa" (p7), "=Upa" (p8),		\
+		    "=Upa" (p13), "=Upa" (p15),		\
+		    "=r" (w11), "=r" (w12),		\
+		    "=r" (w15), "=r" (w16));		\
+    INVOKE (CODE1, CODE2);				\
+    __asm volatile ("" :: "Upa" (p0), "Upa" (p2),	\
+		    "Upa" (p7), "Upa" (p8),		\
+		    "Upa" (p13), "Upa" (p15));		\
+  }
+
 #define TEST_COMPARE_S(NAME, TYPE, CODE1, CODE2)	\
   PROTO (NAME, svbool_t, (TYPE x0, TYPE x1))		\
   {							\
@@ -316,6 +431,30 @@
     return p0;						\
   }
 
+#define TEST_COMPARE_S_X2(NAME, TYPE, CODE1, CODE2)	\
+  PROTO (NAME, void, (TYPE x0, TYPE x1))		\
+  {							\
+    register svboolx2_t p1 __asm("p1");			\
+    register svboolx2_t p4 __asm("p4");			\
+    register svboolx2_t p9 __asm("p9");			\
+    register svboolx2_t p14 __asm("p14");		\
+    INVOKE (CODE1, CODE2);				\
+    __asm volatile ("" :: "Upa" (p1), "Upa" (p4),	\
+		    "Upa" (p9), "Upa" (p14));		\
+  }
+
+#define TEST_COMPARE_S_C(NAME, TYPE, CODE1, CODE2)	\
+  PROTO (NAME, void, (TYPE x0, TYPE x1))		\
+  {							\
+    register svcount_t pn0 __asm("pn0");		\
+    register svcount_t pn7 __asm("pn7");		\
+    register svcount_t pn8 __asm("pn8");		\
+    register svcount_t pn15 __asm("pn15");		\
+    INVOKE (CODE1, CODE2);				\
+    __asm volatile ("" :: "Upa" (pn0), "Upa" (pn7),	\
+		    "Upa" (pn8), "Upa" (pn15));		\
+  }
+
 #define TEST_COMPARE_Z(NAME, TYPE, CODE1, CODE2)		\
   PROTO (NAME, svbool_t, (TYPE z0, TYPE z1,			\
 			  svbool_t p0, svbool_t p1))		\
@@ -414,6 +553,15 @@
     return z0;							\
   }
 
+#define TEST_CREATE_B(NAME, TTYPE, CODE1, CODE2)		\
+  PROTO (NAME, TTYPE, (svbool_t p0, svbool_t p1,		\
+		       svbool_t p2, svbool_t p3))		\
+  {								\
+    TTYPE p0_res;						\
+    INVOKE (CODE1, CODE2);					\
+    return p0_res;						\
+  }
+
 #define TEST_GET(NAME, TTYPE, ZTYPE, CODE1, CODE2)		\
   PROTO (NAME, void, (ZTYPE unused0, ZTYPE unused1,		\
 		      ZTYPE unused2, ZTYPE unused3, TTYPE z4))	\
@@ -428,6 +576,22 @@
 		    "w" (z6_res), "w" (z7_res));		\
   }
 
+#define TEST_GET_B(NAME, TTYPE, CODE1, CODE2)			\
+  PROTO (NAME, void, (void))					\
+  {								\
+    register svbool_t p0 __asm ("p0");				\
+    register TTYPE p4 __asm ("p4");				\
+    register svbool_t p4_res __asm ("p4");			\
+    register svbool_t p5_res __asm ("p5");			\
+    register svbool_t p6_res __asm ("p6");			\
+    register svbool_t p7_res __asm ("p7");			\
+    __asm volatile ("" : "=Upa" (p0), "=Upa" (p4));		\
+    INVOKE (CODE1, CODE2);					\
+    __asm volatile ("" :: "Upa" (p0), "Upa" (p4_res),		\
+		    "Upa" (p5_res), "Upa" (p6_res),		\
+		    "Upa" (p7_res));				\
+  }
+
 #define TEST_SET(NAME, TTYPE, ZTYPE, CODE1, CODE2)		\
   PROTO (NAME, void, (ZTYPE z0, ZTYPE z1, ZTYPE z2, ZTYPE z3,	\
 		      TTYPE z4))				\
@@ -437,6 +601,20 @@
     __asm volatile ("" :: "w" (z4), "w" (z24));			\
   }
 
+#define TEST_SET_B(NAME, TTYPE, CODE1, CODE2)			\
+  PROTO (NAME, void, (void))					\
+  {								\
+    register svbool_t p0 __asm ("p0");				\
+    register TTYPE p4 __asm ("p4");				\
+    register TTYPE p8 __asm ("p8");				\
+    register svbool_t p12 __asm ("p12");			\
+    register svbool_t p13 __asm ("p13");			\
+    __asm volatile ("" : "=Upa" (p0), "=Upa" (p4),		\
+		    "=Upa" (p12), "=Upa" (p13));		\
+    INVOKE (CODE1, CODE2);					\
+    __asm volatile ("" :: "Upa" (p4), "Upa" (p8));		\
+  }
+
 #define TEST_TBL2(NAME, TTYPE, ZTYPE, UTYPE, CODE1, CODE2)	\
   PROTO (NAME, ZTYPE, (TTYPE z0, TTYPE z2, UTYPE z4))		\
   {								\
@@ -453,6 +631,25 @@
     return z0_res;						\
   }
 
+#define TEST_XN(NAME, TTYPE, RES, CODE1, CODE2)			\
+  PROTO (NAME, void, ())					\
+  {								\
+    register TTYPE z0 __asm ("z0");				\
+    register TTYPE z4 __asm ("z4");				\
+    register TTYPE z18 __asm ("z18");				\
+    register TTYPE z23 __asm ("z23");				\
+    register TTYPE z28 __asm ("z28");				\
+    register svcount_t pn0 __asm ("pn0");			\
+    register svcount_t pn7 __asm ("pn7");			\
+    register svcount_t pn8 __asm ("pn8");			\
+    register svcount_t pn15 __asm ("pn15");			\
+    __asm volatile ("" : "=w" (z0), "=w" (z4), "=w" (z18),	\
+		    "=w" (z23), "=w" (z28), "=Upa" (pn0),	\
+		    "=Upa" (pn7), "=Upa" (pn8), "=Upa" (pn15));	\
+    INVOKE (RES = CODE1, RES = CODE2);				\
+    __asm volatile ("" :: "w" (RES));				\
+  }
+
 #define TEST_DUAL_XN(NAME, TTYPE1, TTYPE2, RES, CODE1, CODE2)	\
   PROTO (NAME, void, ())					\
   {								\
@@ -467,4 +664,74 @@
     __asm volatile ("" :: "w" (RES));				\
   }
 
+#define TEST_XN_SINGLE(NAME, TTYPE, ZTYPE, RES, CODE1, CODE2)	\
+  PROTO (NAME, void, ())					\
+  {								\
+    register ZTYPE z0 __asm ("z0");				\
+    register TTYPE z1 __asm ("z1");				\
+    register ZTYPE z5 __asm ("z5");				\
+    register ZTYPE z7 __asm ("z7");				\
+    register ZTYPE z16 __asm ("z16");				\
+    register TTYPE z18 __asm ("z18");				\
+    register ZTYPE z23 __asm ("z23");				\
+    register TTYPE z24 __asm ("z24");				\
+    register TTYPE z28 __asm ("z28");				\
+    __asm volatile ("" : "=w" (z0), "=w" (z1), "=w" (z5),	\
+		    "=w" (z7), "=w" (z16), "=w" (z18),		\
+		    "=w" (z23), "=w" (z24), "=w" (z28));	\
+    INVOKE (RES = CODE1, RES = CODE2);				\
+    __asm volatile ("" :: "w" (RES));				\
+  }
+
+#define TEST_XN_SINGLE_Z15(NAME, TTYPE, ZTYPE, CODE1, CODE2)	\
+  PROTO (NAME, TTYPE, (TTYPE z0))				\
+  {								\
+    register ZTYPE z15 __asm ("z15");				\
+    __asm volatile ("" : "=w" (z15));				\
+    INVOKE (CODE1, CODE2);					\
+    return z0;							\
+  }
+
+#define TEST_XN_SINGLE_AWKWARD(NAME, TTYPE, ZTYPE, CODE1, CODE2) \
+  PROTO (NAME, TTYPE, (ZTYPE z0, TTYPE z1, ZTYPE zn))		\
+  {								\
+    TTYPE z0_res;						\
+    INVOKE (CODE1, CODE2);					\
+    return z0_res;						\
+  }
+
+#define TEST_X2_NARROW(NAME, TTYPE, ZTYPE, CODE1, CODE2)	\
+  PROTO (NAME, void, ())					\
+  {								\
+    register TTYPE z0 __asm ("z0");				\
+    register ZTYPE z5 __asm ("z5");				\
+    register TTYPE z6 __asm ("z6");				\
+    register TTYPE z16 __asm ("z16");				\
+    register ZTYPE z22 __asm ("z22");				\
+    register TTYPE z29 __asm ("z29");				\
+    register ZTYPE z0_res __asm ("z0");				\
+    __asm volatile ("" : "=w" (z0), "=w" (z5), "=w" (z6),	\
+		    "=w" (z16), "=w" (z22), "=w" (z29));	\
+    INVOKE (CODE1, CODE2);					\
+    __asm volatile ("" :: "w" (z0_res), "w" (z5), "w" (z22));	\
+  }
+
+#define TEST_X4_NARROW(NAME, TTYPE, ZTYPE, CODE1, CODE2)	\
+  PROTO (NAME, void, ())					\
+  {								\
+    register TTYPE z0 __asm ("z0");				\
+    register TTYPE z4 __asm ("z4");				\
+    register TTYPE z16 __asm ("z16");				\
+    register TTYPE z21 __asm ("z21");				\
+    register ZTYPE z25 __asm ("z25");				\
+    register TTYPE z26 __asm ("z26");				\
+    register ZTYPE z0_res __asm ("z0");				\
+    register ZTYPE z22_res __asm ("z22");			\
+    __asm volatile ("" : "=w" (z0), "=w" (z4), "=w" (z16),	\
+		    "=w" (z21), "=w" (z26));			\
+    INVOKE (CODE1, CODE2);					\
+    __asm volatile ("" :: "w" (z0_res), "w" (z22_res),		\
+		    "w" (z25));					\
+  }
+
 #endif
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_int_opt_single_n_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_int_opt_single_n_1.c
new file mode 100644
index 00000000000..f0b2dbb41be
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_int_opt_single_n_1.c
@@ -0,0 +1,35 @@
+/* { dg-do compile } */
+
+#pragma GCC target "+sve2"
+
+#include <arm_sve.h>
+
+void
+f1 (svbool_t pg, svfloat16_t f16, svint16_t s16, svuint16_t u16,
+    svfloat32_t f32, svint32_t s32, svuint32_t u32, svint32x2_t s32x2,
+    svuint32x2_t u32x2)
+{
+  svrshl_x (pg, s16); /* { dg-error {too few arguments to function 'svrshl_x'} } */
+  svrshl_x (pg, s16, s16, s16); /* { dg-error {too many arguments to function 'svrshl_x'} } */
+  svrshl_x (s32, s16, s32); /* { dg-error {passing 'svint32_t' to argument 1 of 'svrshl_x', which expects 'svbool_t'} } */
+  svrshl_x (1, s16, s32); /* { dg-error {passing 'int' to argument 1 of 'svrshl_x', which expects 'svbool_t'} } */
+  svrshl_x (pg, pg, s16); /* { dg-error {'svrshl_x' has no form that takes 'svbool_t' arguments} } */
+  svrshl_x (pg, 1, s16); /* { dg-error {passing 'int' to argument 2 of 'svrshl_x', which expects an SVE type rather than a scalar} } */
+  svrshl_x (pg, s16, s16);
+  svrshl_x (pg, s16, u16); /* { dg-error {passing 'svuint16_t' to argument 3 of 'svrshl_x', which expects a vector of signed integers} } */
+  svrshl_x (pg, s16, f16); /* { dg-error {passing 'svfloat16_t' to argument 3 of 'svrshl_x', which expects a vector of signed integers} } */
+  svrshl_x (pg, s16, s32); /* { dg-error {arguments 2 and 3 of 'svrshl_x' must have the same element size, but the values passed here have type 'svint16_t' and 'svint32_t' respectively} } */
+  svrshl_x (pg, s16, u32); /* { dg-error {passing 'svuint32_t' to argument 3 of 'svrshl_x', which expects a vector of signed integers} } */
+  svrshl_x (pg, s16, f32); /* { dg-error {passing 'svfloat32_t' to argument 3 of 'svrshl_x', which expects a vector of signed integers} } */
+  svrshl_x (pg, s16, pg); /* { dg-error {passing 'svbool_t' to argument 3 of 'svrshl_x', which expects a vector of signed integers} } */
+  svrshl_x (pg, s16, 0);
+  svrshl_x (pg, f16, s16); /* { dg-error {'svrshl_x' has no form that takes 'svfloat16_t' arguments} } */
+  svrshl_x (pg, f16, u16); /* { dg-error {passing 'svuint16_t' to argument 3 of 'svrshl_x', which expects a vector of signed integers} } */
+  svrshl_x (pg, f16, s32); /* { dg-error {'svrshl_x' has no form that takes 'svfloat16_t' arguments} } */
+  svrshl_x (pg, f16, u32); /* { dg-error {passing 'svuint32_t' to argument 3 of 'svrshl_x', which expects a vector of signed integers} } */
+  svrshl_x (pg, u16, s16);
+
+  svrshl_x (pg, s32x2, s32x2); /* { dg-error {'svrshl_x' has no form that takes 'svint32x2_t' arguments} } */
+  svrshl_x (pg, s32x2, u32x2); /* { dg-error {passing 'svuint32x2_t' to argument 3 of 'svrshl_x', which expects vectors of signed integers} } */
+  svrshl_x (pg, s32x2, s32); /* { dg-error {'svrshl_x' has no form that takes 'svint32x2_t' arguments} } */
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_int_opt_single_n_2.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_int_opt_single_n_2.c
new file mode 100644
index 00000000000..976d5af7f23
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_int_opt_single_n_2.c
@@ -0,0 +1,36 @@
+/* { dg-do compile } */
+
+#pragma GCC target "+sme2"
+
+#include <arm_sve.h>
+
+void
+f1 (svbool_t pg, svfloat16x2_t f16x2, svint16x2_t s16x2, svuint16x2_t u16x2,
+    svfloat32x2_t f32x2, svint32x2_t s32x2, svuint32x2_t u32x2,
+    svint16_t s16, svuint16_t u16, svint32_t s32, svuint32_t u32,
+    svfloat32_t f32)
+  __arm_streaming
+{
+  svrshl (s16x2); /* { dg-error {too few arguments to function 'svrshl'} } */
+  svrshl (s16x2, s16x2, s16x2); /* { dg-error {too many arguments to function 'svrshl'} } */
+  svrshl (pg, s16x2); /* { dg-error {'svrshl' has no form that takes 'svbool_t' arguments} } */
+  svrshl (1, s16x2); /* { dg-error {passing 'int' to argument 1 of 'svrshl', which expects an SVE type rather than a scalar} } */
+  svrshl (s16, s16); /* { dg-error {'svrshl' has no form that takes 'svint16_t' arguments} } */
+  svrshl (s16x2, s16x2);
+  svrshl (s16x2, u16x2); /* { dg-error {passing 'svuint16x2_t' to argument 2 of 'svrshl', which expects vectors of signed integers} } */
+  svrshl (s16x2, f16x2); /* { dg-error {passing 'svfloat16x2_t' to argument 2 of 'svrshl', which expects vectors of signed integers} } */
+  svrshl (s16x2, s32x2); /* { dg-error {arguments 1 and 2 of 'svrshl' must have the same element size, but the values passed here have type 'svint16x2_t' and 'svint32x2_t' respectively} } */
+  svrshl (s32x2, s16); /* { dg-error {arguments 1 and 2 of 'svrshl' must have the same element size, but the values passed here have type 'svint32x2_t' and 'svint16_t' respectively} } */
+  svrshl (s32x2, u16); /* { dg-error {passing 'svuint16_t' to argument 2 of 'svrshl', which expects a vector of signed integers} } */
+  svrshl (s32x2, s32);
+  svrshl (s32x2, u32); /* { dg-error {passing 'svuint32_t' to argument 2 of 'svrshl', which expects a vector of signed integers} } */
+  svrshl (s32x2, f32); /* { dg-error {passing 'svfloat32_t' to argument 2 of 'svrshl', which expects a vector of signed integers} } */
+  svrshl (s16x2, u32x2); /* { dg-error {passing 'svuint32x2_t' to argument 2 of 'svrshl', which expects vectors of signed integers} } */
+  svrshl (s16x2, f32x2); /* { dg-error {passing 'svfloat32x2_t' to argument 2 of 'svrshl', which expects vectors of signed integers} } */
+  svrshl (s16x2, pg); /* { dg-error {passing 'svbool_t' to argument 2 of 'svrshl', which expects a vector of signed integers} } */
+  svrshl (s16x2, 0); /* { dg-error {passing 'int' to argument 2 of 'svrshl', which expects an SVE type rather than a scalar type} } */
+  svrshl (f16x2, s16x2); /* { dg-error {'svrshl' has no form that takes 'svfloat16x2_t' arguments} } */
+  svrshl (f16x2, u16x2); /* { dg-error {passing 'svuint16x2_t' to argument 2 of 'svrshl', which expects vectors of signed integers} } */
+  svrshl (f16x2, s32x2); /* { dg-error {'svrshl' has no form that takes 'svfloat16x2_t' arguments} } */
+  svrshl (u16x2, s16x2);
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_opt_single_n_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_opt_single_n_1.c
new file mode 100644
index 00000000000..9676de711f4
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_opt_single_n_1.c
@@ -0,0 +1,26 @@
+/* { dg-do compile } */
+
+#pragma GCC target "+sve2"
+
+#include <arm_sve.h>
+
+void
+f1 (svbool_t pg, svfloat16_t f16, svint16_t s16, svuint16_t u16,
+    svfloat32_t f32, svint32_t s32, svuint32_t u32, svint32x2_t s32x2,
+    svuint32x2_t u32x2)
+{
+  svqdmulh (s16); /* { dg-error {too few arguments to function 'svqdmulh'} } */
+  svqdmulh (s16, s16, s16); /* { dg-error {too many arguments to function 'svqdmulh'} } */
+  svqdmulh (pg, pg); /* { dg-error {'svqdmulh' has no form that takes 'svbool_t' arguments} } */
+  svqdmulh (1, s16); /* { dg-error {passing 'int' to argument 1 of 'svqdmulh', which expects an SVE type rather than a scalar} } */
+  svqdmulh (s16, s16);
+  svqdmulh (s16, u16); /* { dg-error {passing 'svuint16_t' to argument 2 of 'svqdmulh', but argument 1 had type 'svint16_t'} } */
+  svqdmulh (s16, f16); /* { dg-error {passing 'svfloat16_t' to argument 2 of 'svqdmulh', but argument 1 had type 'svint16_t'} } */
+  svqdmulh (s16, s32); /* { dg-error {passing 'svint32_t' to argument 2 of 'svqdmulh', but argument 1 had type 'svint16_t'} } */
+  svqdmulh (s32, s32x2); /* { dg-error {passing tuple 'svint32x2_t' to argument 2 of 'svqdmulh' after passing single vector 'svint32_t' to argument 1} } */
+  svqdmulh (s16, 0);
+  svqdmulh (f16, f16); /* { dg-error {'svqdmulh' has no form that takes 'svfloat16_t' arguments} } */
+  svqdmulh (u16, u16); /* { dg-error {'svqdmulh' has no form that takes 'svuint16_t' arguments} } */
+
+  svqdmulh (s32x2, s32x2); /* { dg-error {ACLE function 'svqdmulh_s32_x2' can only be called when SME streaming mode is enabled} } */
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_opt_single_n_2.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_opt_single_n_2.c
new file mode 100644
index 00000000000..5cc8a4c5c50
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_opt_single_n_2.c
@@ -0,0 +1,38 @@
+/* { dg-do compile } */
+
+#pragma GCC target "+sme2"
+
+#include <arm_sve.h>
+
+void
+f1 (svbool_t pg, svfloat16x2_t f16x2, svint16x2_t s16x2, svuint16x2_t u16x2,
+    svfloat32x2_t f32x2, svint32x2_t s32x2, svuint32x2_t u32x2,
+    svint32x3_t s32x3, svint32x4_t s32x4,
+    svint16_t s16, svuint16_t u16, svint32_t s32, svuint32_t u32,
+    svfloat32_t f32)
+  __arm_streaming
+{
+  svqdmulh (s16x2); /* { dg-error {too few arguments to function 'svqdmulh'} } */
+  svqdmulh (s16x2, s16x2, s16x2); /* { dg-error {too many arguments to function 'svqdmulh'} } */
+  svqdmulh (pg, s16x2); /* { dg-error {'svqdmulh' has no form that takes 'svbool_t' arguments} } */
+  svqdmulh (1, s16x2); /* { dg-error {passing 'int' to argument 1 of 'svqdmulh', which expects an SVE type rather than a scalar} } */
+  svqdmulh (s16, s16);
+  svqdmulh (s16x2, s16x2);
+  svqdmulh (s16x2, u16x2); /* { dg-error {passing 'svuint16x2_t' to argument 2 of 'svqdmulh', but argument 1 had type 'svint16x2_t'} } */
+  svqdmulh (s16x2, f16x2); /* { dg-error {passing 'svfloat16x2_t' to argument 2 of 'svqdmulh', but argument 1 had type 'svint16x2_t'} } */
+  svqdmulh (s32x2, s16); /* { dg-error {passing 'svint16_t' to argument 2 of 'svqdmulh', but argument 1 was a tuple of 'svint32_t'} } */
+  svqdmulh (s32x2, u16); /* { dg-error {passing 'svuint16_t' to argument 2 of 'svqdmulh', but argument 1 was a tuple of 'svint32_t'} } */
+  svqdmulh (s32x2, s32);
+  svqdmulh (s32x2, s32x3); /* { dg-error {passing mismatched tuple types 'svint32x2_t' and 'svint32x3_t' to arguments 1 and 2 of 'svqdmulh'} } */
+  svqdmulh (s32x2, s32x4); /* { dg-error {passing mismatched tuple types 'svint32x2_t' and 'svint32x4_t' to arguments 1 and 2 of 'svqdmulh'} } */
+  svqdmulh (s32x3, s32x2); /* { dg-error {'svqdmulh' has no form that takes 'svint32x3_t' arguments} } */
+  svqdmulh (s32x3, s32x3); /* { dg-error {'svqdmulh' has no form that takes 'svint32x3_t' arguments} } */
+  svqdmulh (s32x4, s32x2); /* { dg-error {passing mismatched tuple types 'svint32x4_t' and 'svint32x2_t' to arguments 1 and 2 of 'svqdmulh'} } */
+  svqdmulh (s32x4, s32x3); /* { dg-error {passing mismatched tuple types 'svint32x4_t' and 'svint32x3_t' to arguments 1 and 2 of 'svqdmulh'} } */
+  svqdmulh (s32x4, s32x4);
+  svqdmulh (u32x2, u32x2); /* { dg-error {'svqdmulh' has no form that takes 'svuint32x2_t' arguments} } */
+  svqdmulh (u32x2, u32); /* { dg-error {'svqdmulh' has no form that takes 'svuint32x2_t' arguments} } */
+
+  svqdmulh (s16x2, pg); /* { dg-error {passing 'svbool_t' to argument 2 of 'svqdmulh', but argument 1 was a tuple of 'svint16_t'} } */
+  svqdmulh (s16x2, 0); /* { dg-error {passing 'int' to argument 2 of 'svqdmulh', which expects an SVE type rather than a scalar type} } */
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_single_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_single_1.c
new file mode 100644
index 00000000000..aa7633bb322
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_single_1.c
@@ -0,0 +1,34 @@
+/* { dg-do compile } */
+
+#pragma GCC target "+sme2"
+
+#include <arm_sve.h>
+
+void
+f1 (svbool_t pg, svfloat16x2_t f16x2, svint16x2_t s16x2, svuint16x2_t u16x2,
+    svfloat32x2_t f32x2, svint32x2_t s32x2, svuint32x2_t u32x2,
+    svint32x3_t s32x3, svint32x4_t s32x4,
+    svint16_t s16, svuint16_t u16, svfloat16_t f16, svint32_t s32,
+    svuint32_t u32, svfloat32_t f32)
+  __arm_streaming
+{
+  svadd (s16x2); /* { dg-error {too few arguments to function 'svadd'} } */
+  svadd (s16x2, s16x2, s16x2); /* { dg-error {too many arguments to function 'svadd'} } */
+  svadd (pg, s16x2); /* { dg-error {passing 'svint16x2_t' to argument 2 of 'svadd', which expects a single SVE vector rather than a tuple} } */
+  svadd (1, s16x2); /* { dg-error {passing 'int' to argument 1 of 'svadd', which expects an SVE type rather than a scalar} } */
+  svadd (s16, s16); /* { dg-error {'svadd' has no form that takes 'svint16_t' arguments} } */
+  svadd (s16x2, s16x2); /* { dg-error {passing 'svint16x2_t' to argument 2 of 'svadd', which expects a single SVE vector rather than a tuple} } */
+  svadd (s16x2, u16x2); /* { dg-error {passing 'svuint16x2_t' to argument 2 of 'svadd', which expects a single SVE vector rather than a tuple} } */
+  svadd (s16x2, s16);
+  svadd (s16x2, u16); /* { dg-error {passing 'svuint16_t' to argument 2 of 'svadd', but argument 1 was a tuple of 'svint16_t'} } */
+  svadd (s16x2, f16); /* { dg-error {passing 'svfloat16_t' to argument 2 of 'svadd', but argument 1 was a tuple of 'svint16_t'} } */
+  svadd (s32x2, s16); /* { dg-error {passing 'svint16_t' to argument 2 of 'svadd', but argument 1 was a tuple of 'svint32_t'} } */
+  svadd (s32x2, u16); /* { dg-error {passing 'svuint16_t' to argument 2 of 'svadd', but argument 1 was a tuple of 'svint32_t'} } */
+  svadd (s32x2, s32);
+  svadd (s32x3, s32); /* { dg-error {'svadd' has no form that takes 'svint32x3_t' arguments} } */
+  svadd (s32x4, s32x2); /* { dg-error {passing 'svint32x2_t' to argument 2 of 'svadd', which expects a single SVE vector rather than a tuple} } */
+  svadd (f32x2, f32); /* { dg-error {'svadd' has no form that takes 'svfloat32x2_t' arguments} } */
+
+  svadd (s16x2, pg); /* { dg-error {passing 'svbool_t' to argument 2 of 'svadd', but argument 1 was a tuple of 'svint16_t'} } */
+  svadd (s16x2, 0); /* { dg-error {passing 'int' to argument 2 of 'svadd', which expects an SVE type rather than a scalar type} } */
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_za_m_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_za_m_1.c
index 7e91a41cc42..44c3e48e916 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_za_m_1.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_za_m_1.c
@@ -20,7 +20,6 @@ f1 (svbool_t pg, svint8_t s8, svuint8_t u8, svint16_t s16, svint32_t s32,
   svmopa_za32_m (0, pg, pg, u8, s8); /* { dg-error {passing 'svint8_t'.* to argument 5 of 'svmopa_za32_m', but argument 4 had type 'svuint8_t'} } */
   svmopa_za32_m (0, pg, pg, s8, f16); /* { dg-error {passing 'svfloat16_t'.* to argument 5 of 'svmopa_za32_m', but argument 4 had type 'svint8_t'} } */
   svmopa_za32_m (0, pg, pg, pg, pg); /* { dg-error {'svmopa_za32_m' has no form that takes 'svbool_t' arguments} } */
-  svmopa_za32_m (0, pg, pg, s16, s16); /* { dg-error {'svmopa_za32_m' has no form that takes 'svint16_t' arguments} } */
   svmopa_za32_m (0, pg, pg, s32, s32); /* { dg-error {'svmopa_za32_m' has no form that takes 'svint32_t' arguments} } */
   svmopa_za32_m (0, pg, pg, f64, f64); /* { dg-error {'svmopa_za32_m' has no form that takes 'svfloat64_t' arguments} } */
 
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_za_slice_int_opt_single_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_za_slice_int_opt_single_1.c
new file mode 100644
index 00000000000..01cd88f180b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_za_slice_int_opt_single_1.c
@@ -0,0 +1,61 @@
+/* { dg-do compile } */
+
+#include <arm_sme.h>
+
+#pragma GCC target ("+sme2")
+
+void
+f1 (svbool_t pg, svint16_t s16, svint8_t s8, svuint8_t u8,
+    svint16x2_t s16x2, svuint16x2_t u16x2, svint8x2_t s8x2, svuint8x2_t u8x2,
+    svint8x3_t s8x3, svuint8x3_t u8x3,
+    svint8x4_t s8x4, svuint8x4_t u8x4,
+    svint64x2_t s64x2, svuint64x2_t u64x2,
+    float f, double d)
+  __arm_streaming __arm_inout("za")
+{
+  svusdot_za32_vg1x2 (1, u8x2); /* { dg-error {too few arguments to function 'svusdot_za32_vg1x2'} } */
+  svusdot_za32_vg1x2 (1, u8x2, s8x2, s8x2); /* { dg-error {too many arguments to function 'svusdot_za32_vg1x2'} } */
+
+  svusdot_za32_vg1x2 (s8x2, u8x2, s8x2); /* { dg-error {passing 'svint8x2_t' to argument 1 of 'svusdot_za32_vg1x2', which expects 'uint32_t'} } */
+  svusdot_za32_vg1x2 (f, u8x2, s8x2);
+  svusdot_za32_vg1x2 (d, u8x2, s8x2);
+  svusdot_za32_vg1x2 (pg, u8x2, s8x2); /* { dg-error {passing 'svbool_t' to argument 1 of 'svusdot_za32_vg1x2', which expects 'uint32_t'} } */
+
+  svusdot_za32_vg1x2 (1, 1, s8x2); /* { dg-error {passing 'int' to argument 2 of 'svusdot_za32_vg1x2', which expects an SVE type rather than a scalar type} } */
+  svusdot_za32_vg1x2 (1, pg, s8x2); /* { dg-error {passing 'svbool_t' to argument 2 of 'svusdot_za32_vg1x2', which expects a tuple of 2 vectors} } */
+  svusdot_za32_vg1x2 (1, s8, s8x2); /* { dg-error {passing single vector 'svint8_t' to argument 2 of 'svusdot_za32_vg1x2', which expects a tuple of 2 vectors} } */
+  svusdot_za32_vg1x2 (1, u8x3, s8x3); /* { dg-error {passing 'svuint8x3_t' to argument 2 of 'svusdot_za32_vg1x2', which expects a tuple of 2 vectors} } */
+  svusdot_za32_vg1x2 (1, u8x4, s8x4); /* { dg-error {passing 'svuint8x4_t' to argument 2 of 'svusdot_za32_vg1x2', which expects a tuple of 2 vectors} } */
+
+  svusdot_za32_vg1x2 (1, u8x2, 1); /* { dg-error {passing 'int' to argument 3 of 'svusdot_za32_vg1x2', which expects an SVE type rather than a scalar type} } */
+  svusdot_za32_vg1x2 (1, u8x2, pg); /* { dg-error {passing 'svbool_t' to argument 3 of 'svusdot_za32_vg1x2', which expects a vector of signed integers} } */
+  svusdot_za32_vg1x2 (1, u8x2, s16); /* { dg-error {arguments 2 and 3 of 'svusdot_za32_vg1x2' must have the same element size, but the values passed here have type 'svuint8x2_t' and 'svint16_t' respectively} } */
+  svusdot_za32_vg1x2 (1, u8x2, s16x2); /* { dg-error {arguments 2 and 3 of 'svusdot_za32_vg1x2' must have the same element size, but the values passed here have type 'svuint8x2_t' and 'svint16x2_t' respectively} } */
+  svusdot_za32_vg1x2 (1, u8x2, s8);
+  svusdot_za32_vg1x2 (1, u8x2, s8x2);
+  svusdot_za32_vg1x2 (1, u8x2, s8x3); /* { dg-error {passing 'svint8x3_t' to argument 3 of 'svusdot_za32_vg1x2', which expects a tuple of 2 vectors} } */
+  svusdot_za32_vg1x2 (1, u8x2, s8x4); /* { dg-error {passing 'svint8x4_t' to argument 3 of 'svusdot_za32_vg1x2', which expects a tuple of 2 vectors} } */
+  svusdot_za32_vg1x2 (1, u8x2, u8); /* { dg-error {passing 'svuint8_t' to argument 3 of 'svusdot_za32_vg1x2', which expects a vector of signed integers} } */
+  svusdot_za32_vg1x2 (1, u8x2, u8x2); /* { dg-error {passing 'svuint8x2_t' to argument 3 of 'svusdot_za32_vg1x2', which expects vectors of signed integers} } */
+  svusdot_za32_vg1x2 (1, u8x2, s8x3); /* { dg-error {passing 'svint8x3_t' to argument 3 of 'svusdot_za32_vg1x2', which expects a tuple of 2 vectors} } */
+  svusdot_za32_vg1x2 (1, u8x2, s8x4); /* { dg-error {passing 'svint8x4_t' to argument 3 of 'svusdot_za32_vg1x2', which expects a tuple of 2 vectors} } */
+  svusdot_za32_vg1x2 (1, s8x2, s8); /* { dg-error {'svusdot_za32_vg1x2' has no form that takes 'svint8x2_t' arguments} } */
+  svusdot_za32_vg1x2 (1, s8x2, s8x2); /* { dg-error {'svusdot_za32_vg1x2' has no form that takes 'svint8x2_t' arguments} } */
+
+  svusdot_za32_vg1x2 (1, u16x2, s16); /* { dg-error {'svusdot_za32_vg1x2' has no form that takes 'svuint16x2_t' arguments} } */
+  svusdot_za32_vg1x2 (1, u16x2, s16x2); /* { dg-error {'svusdot_za32_vg1x2' has no form that takes 'svuint16x2_t' arguments} } */
+  svusdot_za32_vg1x2 (1, s64x2, s64x2); /* { dg-error {'svusdot_za32_vg1x2' has no form that takes 'svint64x2_t' arguments} } */
+  svusdot_za32_vg1x2 (1, u64x2, s64x2); /* { dg-error {'svusdot_za32_vg1x2' has no form that takes 'svuint64x2_t' arguments} } */
+}
+
+void
+f2 (svint8x2_t s8x2, svuint8x2_t u8x2) __arm_streaming
+{
+  svusdot_za32_vg1x2 (0, u8x2, s8x2); /* { dg-error {ACLE function 'svusdot_za32_u8_vg1x2' can only be called from a function that has 'za' state} } */
+}
+
+void
+f3 (svint8x2_t s8x2, svuint8x2_t u8x2) __arm_inout("za")
+{
+  svusdot_za32_vg1x2 (0, u8x2, s8x2); /* { dg-error {ACLE function 'svusdot_za32_u8_vg1x2' can only be called when SME streaming mode is enabled} } */
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_za_slice_lane_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_za_slice_lane_1.c
new file mode 100644
index 00000000000..937d992b054
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_za_slice_lane_1.c
@@ -0,0 +1,73 @@
+/* { dg-do compile } */
+
+#include <arm_sme.h>
+
+#pragma GCC target ("+sme2")
+
+void
+f1 (svbool_t pg, svint16_t s16, svuint16_t u16, svint32_t s32, svuint32_t u32,
+    svint16x2_t s16x2, svuint16x2_t u16x2,
+    svint32x2_t s32x2, svuint32x2_t u32x2,
+    svint16x3_t s16x3, svuint16x3_t u16x3,
+    float f, double d)
+  __arm_streaming __arm_inout("za")
+{
+  svmla_lane_za32_vg2x1 (0, s16, s16); /* { dg-error {too few arguments to function 'svmla_lane_za32_vg2x1'} } */
+  svmla_lane_za32_vg2x1 (0, s16, s16, 0, 0); /* { dg-error {too many arguments to function 'svmla_lane_za32_vg2x1'} } */
+
+  svmla_lane_za32_vg2x1 (s16, s16, s16, 0); /* { dg-error {passing 'svint16_t' to argument 1 of 'svmla_lane_za32_vg2x1', which expects 'uint32_t'} } */
+  svmla_lane_za32_vg2x1 (f, s16, s16, 0);
+  svmla_lane_za32_vg2x1 (d, s16, s16, 0);
+  svmla_lane_za32_vg2x1 (pg, s16, s16, 0); /* { dg-error {passing 'svbool_t' to argument 1 of 'svmla_lane_za32_vg2x1', which expects 'uint32_t'} } */
+
+  svmla_lane_za32_vg2x1 (0, 1, s16, 0); /* { dg-error {passing 'int' to argument 2 of 'svmla_lane_za32_vg2x1', which expects an SVE type rather than a scalar type} } */
+  svmla_lane_za32_vg2x1 (0, pg, s16, 0); /* { dg-error {passing 'svint16_t' to argument 3 of 'svmla_lane_za32_vg2x1', but argument 2 had type 'svbool_t'} } */
+  svmla_lane_za32_vg2x1 (0, s16x2, s16, 0); /* { dg-error {passing 'svint16x2_t' to argument 2 of 'svmla_lane_za32_vg2x1', which expects a single SVE vector rather than a tuple} } */
+  svmla_lane_za32_vg2x1 (0, s16x3, s16, 0); /* { dg-error {passing 'svint16x3_t' to argument 2 of 'svmla_lane_za32_vg2x1', which expects a single SVE vector rather than a tuple} } */
+
+  svmla_lane_za32_vg2x1 (0, s16, 1, 0); /* { dg-error {passing 'int' to argument 3 of 'svmla_lane_za32_vg2x1', which expects an SVE type rather than a scalar type} } */
+  svmla_lane_za32_vg2x1 (0, s16, pg, 0); /* { dg-error {passing 'svbool_t' to argument 3 of 'svmla_lane_za32_vg2x1', but argument 2 had type 'svint16_t'} } */
+  svmla_lane_za32_vg2x1 (0, s16, u16, 0); /* { dg-error {passing 'svuint16_t' to argument 3 of 'svmla_lane_za32_vg2x1', but argument 2 had type 'svint16_t'} } */
+  svmla_lane_za32_vg2x1 (0, s16, s32, 0); /* { dg-error {passing 'svint32_t' to argument 3 of 'svmla_lane_za32_vg2x1', but argument 2 had type 'svint16_t'} } */
+  svmla_lane_za32_vg2x1 (0, s16, s16x2, 0); /* { dg-error {passing 'svint16x2_t' to argument 3 of 'svmla_lane_za32_vg2x1', which expects a single SVE vector rather than a tuple} } */
+  svmla_lane_za32_vg2x1 (0, u16, u16, 0);
+  svmla_lane_za32_vg2x1 (0, u16, s16, 0); /* { dg-error {passing 'svint16_t' to argument 3 of 'svmla_lane_za32_vg2x1', but argument 2 had type 'svuint16_t'} } */
+  svmla_lane_za32_vg2x1 (0, s32, s32, 0); /* { dg-error {'svmla_lane_za32_vg2x1' has no form that takes 'svint32_t' arguments} } */
+  svmla_lane_za32_vg2x1 (0, u32, u32, 0); /* { dg-error {'svmla_lane_za32_vg2x1' has no form that takes 'svuint32_t' arguments} } */
+
+  svmla_lane_za32_vg2x1 (0, s16, s16, -1); /* { dg-error {passing -1 to argument 4 of 'svmla_lane_za32_vg2x1', which expects a value in the range \[0, 7\]} } */
+  svmla_lane_za32_vg2x1 (0, s16, s16, 7);
+  svmla_lane_za32_vg2x1 (0, s16, s16, 8); /* { dg-error {passing 8 to argument 4 of 'svmla_lane_za32_vg2x1', which expects a value in the range \[0, 7\]} } */
+  svmla_lane_za32_vg2x1 (0, s16, s16, f); /* { dg-error {argument 4 of 'svmla_lane_za32_vg2x1' must be an integer constant expression} } */
+}
+
+void
+f2 (svint16x2_t s16x2, svint16_t s16) __arm_streaming
+{
+  svmla_lane_za32_vg2x1 (0, s16, s16, 0); /* { dg-error {ACLE function 'svmla_lane_za32_s16_vg2x1' can only be called from a function that has 'za' state} } */
+}
+
+void
+f3 (svint16x2_t s16x2, svint16_t s16) __arm_inout("za")
+{
+  svmla_lane_za32_vg2x1 (0, s16, s16, 0); /* { dg-error {ACLE function 'svmla_lane_za32_s16_vg2x1' can only be called when SME streaming mode is enabled} } */
+}
+
+#pragma GCC target ("+sme-i16i64")
+
+void
+f4 (svint16_t s16, svuint16_t u16,
+    svint32_t s32, svuint32_t u32,
+    svint64_t s64, svuint64_t u64)
+  __arm_streaming __arm_inout("za")
+{
+  svmla_lane_za64_vg4x1 (0, s16, s16, 0);
+  svmla_lane_za64_vg4x1 (0, u16, u16, 0);
+  svmla_lane_za64_vg4x1 (0, s16, s16, -1); /* { dg-error {passing -1 to argument 4 of 'svmla_lane_za64_vg4x1', which expects a value in the range \[0, 7\]} } */
+  svmla_lane_za64_vg4x1 (0, s16, s16, 7);
+  svmla_lane_za64_vg4x1 (0, u16, u16, 8); /* { dg-error {passing 8 to argument 4 of 'svmla_lane_za64_vg4x1', which expects a value in the range \[0, 7\]} } */
+  svmla_lane_za64_vg4x1 (0, s32, s32, 0); /* { dg-error {'svmla_lane_za64_vg4x1' has no form that takes 'svint32_t' arguments} } */
+  svmla_lane_za64_vg4x1 (0, u32, u32, 0); /* { dg-error {'svmla_lane_za64_vg4x1' has no form that takes 'svuint32_t' arguments} } */
+  svmla_lane_za64_vg4x1 (0, s64, s64, 0); /* { dg-error {'svmla_lane_za64_vg4x1' has no form that takes 'svint64_t' arguments} } */
+  svmla_lane_za64_vg4x1 (0, u64, u64, 0); /* { dg-error {'svmla_lane_za64_vg4x1' has no form that takes 'svuint64_t' arguments} } */
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_za_slice_lane_2.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_za_slice_lane_2.c
new file mode 100644
index 00000000000..126a764c998
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_za_slice_lane_2.c
@@ -0,0 +1,78 @@
+/* { dg-do compile } */
+
+#include <arm_sme.h>
+
+#pragma GCC target ("+sme2")
+
+void
+f1 (svbool_t pg, svint16_t s16, svuint16_t u16, svint32_t s32, svuint32_t u32,
+    svint16x2_t s16x2, svuint16x2_t u16x2,
+    svint32x2_t s32x2, svuint32x2_t u32x2,
+    svint16x3_t s16x3, svuint16x3_t u16x3,
+    svint16x4_t s16x4, svuint16x4_t u16x4,
+    float f, double d)
+  __arm_streaming __arm_inout("za")
+{
+  svmla_lane_za32_vg2x2 (0, s16x2, s16); /* { dg-error {too few arguments to function 'svmla_lane_za32_vg2x2'} } */
+  svmla_lane_za32_vg2x2 (0, s16x2, s16, 0, 0); /* { dg-error {too many arguments to function 'svmla_lane_za32_vg2x2'} } */
+
+  svmla_lane_za32_vg2x2 (s16x2, s16x2, s16, 0); /* { dg-error {passing 'svint16x2_t' to argument 1 of 'svmla_lane_za32_vg2x2', which expects 'uint32_t'} } */
+  svmla_lane_za32_vg2x2 (f, s16x2, s16, 0);
+  svmla_lane_za32_vg2x2 (d, s16x2, s16, 0);
+  svmla_lane_za32_vg2x2 (pg, s16x2, s16, 0); /* { dg-error {passing 'svbool_t' to argument 1 of 'svmla_lane_za32_vg2x2', which expects 'uint32_t'} } */
+
+  svmla_lane_za32_vg2x2 (0, 1, s16, 0); /* { dg-error {passing 'int' to argument 2 of 'svmla_lane_za32_vg2x2', which expects an SVE type rather than a scalar type} } */
+  svmla_lane_za32_vg2x2 (0, pg, s16, 0); /* { dg-error {passing 'svbool_t' to argument 2 of 'svmla_lane_za32_vg2x2', which expects a tuple of 2 vectors} } */
+  svmla_lane_za32_vg2x2 (0, s16, s16, 0); /* { dg-error {passing single vector 'svint16_t' to argument 2 of 'svmla_lane_za32_vg2x2', which expects a tuple of 2 vectors} } */
+  svmla_lane_za32_vg2x2 (0, s16x3, s16, 0); /* { dg-error {passing 'svint16x3_t' to argument 2 of 'svmla_lane_za32_vg2x2', which expects a tuple of 2 vectors} } */
+  svmla_lane_za32_vg2x2 (0, s16x4, s16, 0); /* { dg-error {passing 'svint16x4_t' to argument 2 of 'svmla_lane_za32_vg2x2', which expects a tuple of 2 vectors} } */
+
+  svmla_lane_za32_vg2x2 (0, s16x2, 1, 0); /* { dg-error {passing 'int' to argument 3 of 'svmla_lane_za32_vg2x2', which expects an SVE type rather than a scalar type} } */
+  svmla_lane_za32_vg2x2 (0, s16x2, pg, 0); /* { dg-error {passing 'svbool_t' to argument 3 of 'svmla_lane_za32_vg2x2', but argument 2 was a tuple of 'svint16_t'} } */
+  svmla_lane_za32_vg2x2 (0, s16x2, u16, 0); /* { dg-error {passing 'svuint16_t' to argument 3 of 'svmla_lane_za32_vg2x2', but argument 2 was a tuple of 'svint16_t'} } */
+  svmla_lane_za32_vg2x2 (0, s16x2, s32, 0); /* { dg-error {passing 'svint32_t' to argument 3 of 'svmla_lane_za32_vg2x2', but argument 2 was a tuple of 'svint16_t'} } */
+  svmla_lane_za32_vg2x2 (0, s16x2, s16x2, 0); /* { dg-error {passing 'svint16x2_t' to argument 3 of 'svmla_lane_za32_vg2x2', which expects a single SVE vector rather than a tuple} } */
+  svmla_lane_za32_vg2x2 (0, u16x2, u16, 0);
+  svmla_lane_za32_vg2x2 (0, u16x2, s16, 0); /* { dg-error {passing 'svint16_t' to argument 3 of 'svmla_lane_za32_vg2x2', but argument 2 was a tuple of 'svuint16_t'} } */
+  svmla_lane_za32_vg2x2 (0, s32x2, s32, 0); /* { dg-error {'svmla_lane_za32_vg2x2' has no form that takes 'svint32x2_t' arguments} } */
+  svmla_lane_za32_vg2x2 (0, u32x2, u32, 0); /* { dg-error {'svmla_lane_za32_vg2x2' has no form that takes 'svuint32x2_t' arguments} } */
+
+  svmla_lane_za32_vg2x2 (0, s16x2, s16, -1); /* { dg-error {passing -1 to argument 4 of 'svmla_lane_za32_vg2x2', which expects a value in the range \[0, 7\]} } */
+  svmla_lane_za32_vg2x2 (0, s16x2, s16, 7);
+  svmla_lane_za32_vg2x2 (0, s16x2, s16, 8); /* { dg-error {passing 8 to argument 4 of 'svmla_lane_za32_vg2x2', which expects a value in the range \[0, 7\]} } */
+  svmla_lane_za32_vg2x2 (0, s16x2, s16, f); /* { dg-error {argument 4 of 'svmla_lane_za32_vg2x2' must be an integer constant expression} } */
+}
+
+void
+f2 (svint16x2_t s16x2, svint16_t s16) __arm_streaming
+{
+  svmla_lane_za32_vg2x2 (0, s16x2, s16, 0); /* { dg-error {ACLE function 'svmla_lane_za32_s16_vg2x2' can only be called from a function that has 'za' state} } */
+}
+
+void
+f3 (svint16x2_t s16x2, svint16_t s16) __arm_inout("za")
+{
+  svmla_lane_za32_vg2x2 (0, s16x2, s16, 0); /* { dg-error {ACLE function 'svmla_lane_za32_s16_vg2x2' can only be called when SME streaming mode is enabled} } */
+}
+
+#pragma GCC target ("+sme-i16i64")
+
+void
+f4 (svint16_t s16, svuint16_t u16,
+    svint16x2_t s16x2, svuint16x2_t u16x2,
+    svint32_t s32, svuint32_t u32,
+    svint32x2_t s32x2, svuint32x2_t u32x2,
+    svint64_t s64, svuint64_t u64,
+    svint64x2_t s64x2, svuint64x2_t u64x2)
+  __arm_streaming __arm_inout("za")
+{
+  svmla_lane_za64_vg4x2 (0, s16x2, s16, 0);
+  svmla_lane_za64_vg4x2 (0, u16x2, u16, 0);
+  svmla_lane_za64_vg4x2 (0, s16x2, s16, -1); /* { dg-error {passing -1 to argument 4 of 'svmla_lane_za64_vg4x2', which expects a value in the range \[0, 7\]} } */
+  svmla_lane_za64_vg4x2 (0, s16x2, s16, 7);
+  svmla_lane_za64_vg4x2 (0, u16x2, u16, 8); /* { dg-error {passing 8 to argument 4 of 'svmla_lane_za64_vg4x2', which expects a value in the range \[0, 7\]} } */
+  svmla_lane_za64_vg4x2 (0, s32x2, s32, 0); /* { dg-error {'svmla_lane_za64_vg4x2' has no form that takes 'svint32x2_t' arguments} } */
+  svmla_lane_za64_vg4x2 (0, u32x2, u32, 0); /* { dg-error {'svmla_lane_za64_vg4x2' has no form that takes 'svuint32x2_t' arguments} } */
+  svmla_lane_za64_vg4x2 (0, s64x2, s64, 0); /* { dg-error {'svmla_lane_za64_vg4x2' has no form that takes 'svint64x2_t' arguments} } */
+  svmla_lane_za64_vg4x2 (0, u64x2, u64, 0); /* { dg-error {'svmla_lane_za64_vg4x2' has no form that takes 'svuint64x2_t' arguments} } */
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_za_slice_lane_3.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_za_slice_lane_3.c
new file mode 100644
index 00000000000..17bed0c72dc
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_za_slice_lane_3.c
@@ -0,0 +1,78 @@
+/* { dg-do compile } */
+
+#include <arm_sme.h>
+
+#pragma GCC target ("+sme2")
+
+void
+f1 (svbool_t pg, svint16_t s16, svuint16_t u16, svint32_t s32, svuint32_t u32,
+    svint16x2_t s16x2, svuint16x2_t u16x2,
+    svint16x3_t s16x3, svuint16x3_t u16x3,
+    svint16x4_t s16x4, svuint16x4_t u16x4,
+    svint32x4_t s32x4, svuint32x4_t u32x4,
+    float f, double d)
+  __arm_streaming __arm_inout("za")
+{
+  svmla_lane_za32_vg2x4 (0, s16x4, s16); /* { dg-error {too few arguments to function 'svmla_lane_za32_vg2x4'} } */
+  svmla_lane_za32_vg2x4 (0, s16x4, s16, 0, 0); /* { dg-error {too many arguments to function 'svmla_lane_za32_vg2x4'} } */
+
+  svmla_lane_za32_vg2x4 (s16x4, s16x4, s16, 0); /* { dg-error {passing 'svint16x4_t' to argument 1 of 'svmla_lane_za32_vg2x4', which expects 'uint32_t'} } */
+  svmla_lane_za32_vg2x4 (f, s16x4, s16, 0);
+  svmla_lane_za32_vg2x4 (d, s16x4, s16, 0);
+  svmla_lane_za32_vg2x4 (pg, s16x4, s16, 0); /* { dg-error {passing 'svbool_t' to argument 1 of 'svmla_lane_za32_vg2x4', which expects 'uint32_t'} } */
+
+  svmla_lane_za32_vg2x4 (0, 1, s16, 0); /* { dg-error {passing 'int' to argument 2 of 'svmla_lane_za32_vg2x4', which expects an SVE type rather than a scalar type} } */
+  svmla_lane_za32_vg2x4 (0, pg, s16, 0); /* { dg-error {passing 'svbool_t' to argument 2 of 'svmla_lane_za32_vg2x4', which expects a tuple of 4 vectors} } */
+  svmla_lane_za32_vg2x4 (0, s16, s16, 0); /* { dg-error {passing single vector 'svint16_t' to argument 2 of 'svmla_lane_za32_vg2x4', which expects a tuple of 4 vectors} } */
+  svmla_lane_za32_vg2x4 (0, s16x2, s16, 0); /* { dg-error {passing 'svint16x2_t' to argument 2 of 'svmla_lane_za32_vg2x4', which expects a tuple of 4 vectors} } */
+  svmla_lane_za32_vg2x4 (0, s16x3, s16, 0); /* { dg-error {passing 'svint16x3_t' to argument 2 of 'svmla_lane_za32_vg2x4', which expects a tuple of 4 vectors} } */
+
+  svmla_lane_za32_vg2x4 (0, s16x4, 1, 0); /* { dg-error {passing 'int' to argument 3 of 'svmla_lane_za32_vg2x4', which expects an SVE type rather than a scalar type} } */
+  svmla_lane_za32_vg2x4 (0, s16x4, pg, 0); /* { dg-error {passing 'svbool_t' to argument 3 of 'svmla_lane_za32_vg2x4', but argument 2 was a tuple of 'svint16_t'} } */
+  svmla_lane_za32_vg2x4 (0, s16x4, u16, 0); /* { dg-error {passing 'svuint16_t' to argument 3 of 'svmla_lane_za32_vg2x4', but argument 2 was a tuple of 'svint16_t'} } */
+  svmla_lane_za32_vg2x4 (0, s16x4, s32, 0); /* { dg-error {passing 'svint32_t' to argument 3 of 'svmla_lane_za32_vg2x4', but argument 2 was a tuple of 'svint16_t'} } */
+  svmla_lane_za32_vg2x4 (0, s16x4, s16x4, 0); /* { dg-error {passing 'svint16x4_t' to argument 3 of 'svmla_lane_za32_vg2x4', which expects a single SVE vector rather than a tuple} } */
+  svmla_lane_za32_vg2x4 (0, u16x4, u16, 0);
+  svmla_lane_za32_vg2x4 (0, u16x4, s16, 0); /* { dg-error {passing 'svint16_t' to argument 3 of 'svmla_lane_za32_vg2x4', but argument 2 was a tuple of 'svuint16_t'} } */
+  svmla_lane_za32_vg2x4 (0, s32x4, s32, 0); /* { dg-error {'svmla_lane_za32_vg2x4' has no form that takes 'svint32x4_t' arguments} } */
+  svmla_lane_za32_vg2x4 (0, u32x4, u32, 0); /* { dg-error {'svmla_lane_za32_vg2x4' has no form that takes 'svuint32x4_t' arguments} } */
+
+  svmla_lane_za32_vg2x4 (0, s16x4, s16, -1); /* { dg-error {passing -1 to argument 4 of 'svmla_lane_za32_vg2x4', which expects a value in the range \[0, 7\]} } */
+  svmla_lane_za32_vg2x4 (0, s16x4, s16, 7);
+  svmla_lane_za32_vg2x4 (0, s16x4, s16, 8); /* { dg-error {passing 8 to argument 4 of 'svmla_lane_za32_vg2x4', which expects a value in the range \[0, 7\]} } */
+  svmla_lane_za32_vg2x4 (0, s16x4, s16, f); /* { dg-error {argument 4 of 'svmla_lane_za32_vg2x4' must be an integer constant expression} } */
+}
+
+void
+f2 (svint16x4_t s16x4, svint16_t s16) __arm_streaming
+{
+  svmla_lane_za32_vg2x4 (0, s16x4, s16, 0); /* { dg-error {ACLE function 'svmla_lane_za32_s16_vg2x4' can only be called from a function that has 'za' state} } */
+}
+
+void
+f3 (svint16x4_t s16x4, svint16_t s16) __arm_inout("za")
+{
+  svmla_lane_za32_vg2x4 (0, s16x4, s16, 0); /* { dg-error {ACLE function 'svmla_lane_za32_s16_vg2x4' can only be called when SME streaming mode is enabled} } */
+}
+
+#pragma GCC target ("+sme-i16i64")
+
+void
+f4 (svint16_t s16, svuint16_t u16,
+    svint16x4_t s16x4, svuint16x4_t u16x4,
+    svint32_t s32, svuint32_t u32,
+    svint32x4_t s32x4, svuint32x4_t u32x4,
+    svint64_t s64, svuint64_t u64,
+    svint64x4_t s64x4, svuint64x4_t u64x4)
+  __arm_streaming __arm_inout("za")
+{
+  svmla_lane_za64_vg4x4 (0, s16x4, s16, 0);
+  svmla_lane_za64_vg4x4 (0, u16x4, u16, 0);
+  svmla_lane_za64_vg4x4 (0, s16x4, s16, -1); /* { dg-error {passing -1 to argument 4 of 'svmla_lane_za64_vg4x4', which expects a value in the range \[0, 7\]} } */
+  svmla_lane_za64_vg4x4 (0, s16x4, s16, 7);
+  svmla_lane_za64_vg4x4 (0, u16x4, u16, 8); /* { dg-error {passing 8 to argument 4 of 'svmla_lane_za64_vg4x4', which expects a value in the range \[0, 7\]} } */
+  svmla_lane_za64_vg4x4 (0, s32x4, s32, 0); /* { dg-error {'svmla_lane_za64_vg4x4' has no form that takes 'svint32x4_t' arguments} } */
+  svmla_lane_za64_vg4x4 (0, u32x4, u32, 0); /* { dg-error {'svmla_lane_za64_vg4x4' has no form that takes 'svuint32x4_t' arguments} } */
+  svmla_lane_za64_vg4x4 (0, s64x4, s64, 0); /* { dg-error {'svmla_lane_za64_vg4x4' has no form that takes 'svint64x4_t' arguments} } */
+  svmla_lane_za64_vg4x4 (0, u64x4, u64, 0); /* { dg-error {'svmla_lane_za64_vg4x4' has no form that takes 'svuint64x4_t' arguments} } */
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_za_slice_lane_4.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_za_slice_lane_4.c
new file mode 100644
index 00000000000..d2a67c678df
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_za_slice_lane_4.c
@@ -0,0 +1,26 @@
+/* { dg-do compile } */
+
+#include <arm_sme.h>
+
+#pragma GCC target ("+sme2")
+
+void
+f1 (svint8_t s8, svuint8_t u8, svint16_t s16, svuint16_t u16,
+    svint32_t s32, svuint32_t u32, svfloat32_t f32,
+    svint32x2_t s32x2, svuint32x2_t u32x2, svfloat32x2_t f32x2, int i)
+  __arm_streaming __arm_inout("za")
+{
+  svmla_lane_za32_vg4x1 (0, s8, s8, -1); /* { dg-error {passing -1 to argument 4 of 'svmla_lane_za32_vg4x1', which expects a value in the range \[0, 15\]} } */
+  svmla_lane_za32_vg4x1 (0, u8, u8, 0);
+  svmla_lane_za32_vg4x1 (0, s8, s8, 15);
+  svmla_lane_za32_vg4x1 (0, u8, u8, 16); /* { dg-error {passing 16 to argument 4 of 'svmla_lane_za32_vg4x1', which expects a value in the range \[0, 15\]} } */
+  svmla_lane_za32_vg4x1 (0, s16, s16, 0); /* { dg-error {'svmla_lane_za32_vg4x1' has no form that takes 'svint16_t' arguments} } */
+  svmla_lane_za32_vg4x1 (0, u16, u16, 0); /* { dg-error {'svmla_lane_za32_vg4x1' has no form that takes 'svuint16_t' arguments} } */
+
+  svmla_lane_za32_vg1x2 (0, s32x2, s32, 0); /* { dg-error {'svmla_lane_za32_vg1x2' has no form that takes 'svint32x2_t' arguments} } */
+  svmla_lane_za32_vg1x2 (0, u32x2, u32, 0); /* { dg-error {'svmla_lane_za32_vg1x2' has no form that takes 'svuint32x2_t' arguments} } */
+  svmla_lane_za32_vg1x2 (0, f32x2, f32, 0);
+  svmla_lane_za32_vg1x2 (0, f32x2, f32, -1); /* { dg-error {passing -1 to argument 4 of 'svmla_lane_za32_vg1x2', which expects a value in the range \[0, 3\]} } */
+  svmla_lane_za32_vg1x2 (0, f32x2, f32, 4); /* { dg-error {passing 4 to argument 4 of 'svmla_lane_za32_vg1x2', which expects a value in the range \[0, 3\]} } */
+  svmla_lane_za32_vg1x2 (0, f32x2, f32, i); /* { dg-error {argument 4 of 'svmla_lane_za32_vg1x2' must be an integer constant expression} } */
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_za_slice_opt_single_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_za_slice_opt_single_1.c
new file mode 100644
index 00000000000..8307a2813dd
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_za_slice_opt_single_1.c
@@ -0,0 +1,76 @@
+/* { dg-do compile } */
+
+#include <arm_sme.h>
+
+#pragma GCC target ("+sme2")
+
+void
+f1 (svbool_t pg, svint16_t s16, svint32_t s32, svuint32_t u32,
+    svint16x2_t s16x2, svint32x2_t s32x2, svuint32x2_t u32x2,
+    svint32x3_t s32x3, svuint32x3_t u32x3,
+    svint32x4_t s32x4, svuint32x4_t u32x4,
+    svint64x2_t s64x2, svuint64x2_t u64x2,
+    float f, double d)
+  __arm_streaming __arm_inout("za")
+{
+  svadd_write_za32_vg1x2 (1, s32x2); /* { dg-error {too few arguments to function 'svadd_write_za32_vg1x2'} } */
+  svadd_write_za32_vg1x2 (1, s32x2, s32x2, s32x2); /* { dg-error {too many arguments to function 'svadd_write_za32_vg1x2'} } */
+
+  svadd_write_za32_vg1x2 (s32x2, s32x2, s32x2); /* { dg-error {passing 'svint32x2_t' to argument 1 of 'svadd_write_za32_vg1x2', which expects 'uint32_t'} } */
+  svadd_write_za32_vg1x2 (f, s32x2, s32x2);
+  svadd_write_za32_vg1x2 (d, s32x2, s32x2);
+  svadd_write_za32_vg1x2 (pg, s32x2, s32x2); /* { dg-error {passing 'svbool_t' to argument 1 of 'svadd_write_za32_vg1x2', which expects 'uint32_t'} } */
+
+  svadd_write_za32_vg1x2 (1, 1, s32x2); /* { dg-error {passing 'int' to argument 2 of 'svadd_write_za32_vg1x2', which expects an SVE type rather than a scalar type} } */
+  svadd_write_za32_vg1x2 (1, pg, s32x2); /* { dg-error {passing 'svbool_t' to argument 2 of 'svadd_write_za32_vg1x2', which expects a tuple of 2 vectors} } */
+  svadd_write_za32_vg1x2 (1, s32, s32x2); /* { dg-error {passing single vector 'svint32_t' to argument 2 of 'svadd_write_za32_vg1x2', which expects a tuple of 2 vectors} } */
+  svadd_write_za32_vg1x2 (1, s32x3, s32x3); /* { dg-error {passing 'svint32x3_t' to argument 2 of 'svadd_write_za32_vg1x2', which expects a tuple of 2 vectors} } */
+  svadd_write_za32_vg1x2 (1, s32x4, s32x4); /* { dg-error {passing 'svint32x4_t' to argument 2 of 'svadd_write_za32_vg1x2', which expects a tuple of 2 vectors} } */
+
+  svadd_write_za32_vg1x2 (1, s32x2, 1); /* { dg-error {passing 'int' to argument 3 of 'svadd_write_za32_vg1x2', which expects an SVE type rather than a scalar type} } */
+  svadd_write_za32_vg1x2 (1, s32x2, pg); /* { dg-error {passing 'svbool_t' to argument 3 of 'svadd_write_za32_vg1x2', but argument 2 was a tuple of 'svint32_t'} } */
+  svadd_write_za32_vg1x2 (1, s32x2, s16); /* { dg-error {passing 'svint16_t' to argument 3 of 'svadd_write_za32_vg1x2', but argument 2 was a tuple of 'svint32_t'} } */
+  svadd_write_za32_vg1x2 (1, s32x2, s16x2); /* { dg-error {passing 'svint16x2_t' to argument 3 of 'svadd_write_za32_vg1x2', but argument 2 had type 'svint32x2_t'} } */
+  svadd_write_za32_vg1x2 (1, s32x2, s32);
+  svadd_write_za32_vg1x2 (1, s32x2, s32x2);
+  svadd_write_za32_vg1x2 (1, s32x2, s32x3); /* { dg-error {passing 'svint32x3_t' to argument 3 of 'svadd_write_za32_vg1x2', which expects a tuple of 2 vectors} } */
+  svadd_write_za32_vg1x2 (1, s32x2, s32x4); /* { dg-error {passing 'svint32x4_t' to argument 3 of 'svadd_write_za32_vg1x2', which expects a tuple of 2 vectors} } */
+  svadd_write_za32_vg1x2 (1, s32x2, u32); /* { dg-error {passing 'svuint32_t' to argument 3 of 'svadd_write_za32_vg1x2', but argument 2 was a tuple of 'svint32_t'} } */
+  svadd_write_za32_vg1x2 (1, s32x2, u32x2); /* { dg-error {passing 'svuint32x2_t' to argument 3 of 'svadd_write_za32_vg1x2', but argument 2 had type 'svint32x2_t'} } */
+  svadd_write_za32_vg1x2 (1, s32x2, u32x3); /* { dg-error {passing 'svuint32x3_t' to argument 3 of 'svadd_write_za32_vg1x2', which expects a tuple of 2 vectors} } */
+  svadd_write_za32_vg1x2 (1, s32x2, u32x4); /* { dg-error {passing 'svuint32x4_t' to argument 3 of 'svadd_write_za32_vg1x2', which expects a tuple of 2 vectors} } */
+  svadd_write_za32_vg1x2 (1, u32x2, s32); /* { dg-error {passing 'svint32_t' to argument 3 of 'svadd_write_za32_vg1x2', but argument 2 was a tuple of 'svuint32_t'} } */
+  svadd_write_za32_vg1x2 (1, u32x2, s32x2); /* { dg-error {passing 'svint32x2_t' to argument 3 of 'svadd_write_za32_vg1x2', but argument 2 had type 'svuint32x2_t'} } */
+  svadd_write_za32_vg1x2 (1, u32x2, u32);
+  svadd_write_za32_vg1x2 (1, u32x2, u32x2);
+
+  svadd_write_za32_vg1x2 (1, s16x2, s16); /* { dg-error {'svadd_write_za32_vg1x2' has no form that takes 'svint16x2_t' arguments} } */
+  svadd_write_za32_vg1x2 (1, s16x2, s16x2); /* { dg-error {'svadd_write_za32_vg1x2' has no form that takes 'svint16x2_t' arguments} } */
+  svadd_write_za32_vg1x2 (1, s64x2, s64x2); /* { dg-error {'svadd_write_za32_vg1x2' has no form that takes 'svint64x2_t' arguments} } */
+  svadd_write_za32_vg1x2 (1, u64x2, u64x2); /* { dg-error {'svadd_write_za32_vg1x2' has no form that takes 'svuint64x2_t' arguments} } */
+}
+
+void
+f2 (svint32x2_t s32x2) __arm_streaming
+{
+  svadd_write_za32_vg1x2 (0, s32x2, s32x2); /* { dg-error {ACLE function 'svadd_write_za32_s32_vg1x2' can only be called from a function that has 'za' state} } */
+}
+
+void
+f3 (svint32x2_t s32x2) __arm_inout("za")
+{
+  svadd_write_za32_vg1x2 (0, s32x2, s32x2); /* { dg-error {ACLE function 'svadd_write_za32_s32_vg1x2' can only be called when SME streaming mode is enabled} } */
+}
+
+#pragma GCC target ("+sme-i16i64")
+
+void
+f4 (svint32x2_t s32x2, svuint32x2_t u32x2,
+    svint64x2_t s64x2, svuint64x2_t u64x2)
+  __arm_streaming __arm_inout("za")
+{
+  svadd_write_za64_vg1x2 (1, s32x2, s32x2); /* { dg-error {'svadd_write_za64_vg1x2' has no form that takes 'svint32x2_t' arguments} } */
+  svadd_write_za64_vg1x2 (1, u32x2, u32x2); /* { dg-error {'svadd_write_za64_vg1x2' has no form that takes 'svuint32x2_t' arguments} } */
+  svadd_write_za64_vg1x2 (1, s64x2, s64x2);
+  svadd_write_za64_vg1x2 (1, u64x2, u64x2);
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_za_slice_opt_single_2.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_za_slice_opt_single_2.c
new file mode 100644
index 00000000000..181f509eee1
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_za_slice_opt_single_2.c
@@ -0,0 +1,29 @@
+/* { dg-do compile } */
+
+#include <arm_sme.h>
+
+#pragma GCC target ("+sme2")
+
+void
+f1 (svbool_t pg, svint16_t s16, svint32_t s32, svuint32_t u32,
+    svint16x2_t s16x2, svint32x2_t s32x2, svuint32x2_t u32x2,
+    svint32x3_t s32x3, svuint32x3_t u32x3,
+    svint32x4_t s32x4, svuint32x4_t u32x4,
+    svint64x2_t s64x2, svuint64x2_t u64x2,
+    float f, double d)
+  __arm_streaming __arm_inout("za")
+{
+  svadd_write_za32_vg1x4 (1, s32x4); /* { dg-error {too few arguments to function 'svadd_write_za32_vg1x4'} } */
+  svadd_write_za32_vg1x4 (1, s32x4, s32x4, s32x4); /* { dg-error {too many arguments to function 'svadd_write_za32_vg1x4'} } */
+
+  svadd_write_za32_vg1x4 (s32x4, s32x4, s32x4); /* { dg-error {passing 'svint32x4_t' to argument 1 of 'svadd_write_za32_vg1x4', which expects 'uint32_t'} } */
+  svadd_write_za32_vg1x4 (f, s32x4, s32x4);
+  svadd_write_za32_vg1x4 (d, s32x4, s32x4);
+  svadd_write_za32_vg1x4 (pg, s32x4, s32x4); /* { dg-error {passing 'svbool_t' to argument 1 of 'svadd_write_za32_vg1x4', which expects 'uint32_t'} } */
+
+  svadd_write_za32_vg1x4 (1, 1, s32x4); /* { dg-error {passing 'int' to argument 2 of 'svadd_write_za32_vg1x4', which expects an SVE type rather than a scalar} } */
+  svadd_write_za32_vg1x4 (1, pg, s32x4); /* { dg-error {passing 'svbool_t' to argument 2 of 'svadd_write_za32_vg1x4', which expects a tuple of 4 vectors} } */
+  svadd_write_za32_vg1x4 (1, s32, s32x4); /* { dg-error {passing single vector 'svint32_t' to argument 2 of 'svadd_write_za32_vg1x4', which expects a tuple of 4 vectors} } */
+  svadd_write_za32_vg1x4 (1, s32x2, s32x2); /* { dg-error {passing 'svint32x2_t' to argument 2 of 'svadd_write_za32_vg1x4', which expects a tuple of 4 vectors} } */
+  svadd_write_za32_vg1x4 (1, s32x3, s32x3); /* { dg-error {passing 'svint32x3_t' to argument 2 of 'svadd_write_za32_vg1x4', which expects a tuple of 4 vectors} } */
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_za_slice_opt_single_3.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_za_slice_opt_single_3.c
new file mode 100644
index 00000000000..8c8414ec55c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_za_slice_opt_single_3.c
@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+
+#include <arm_sme.h>
+
+#pragma GCC target ("+sme2+nosme-i16i64")
+
+void
+f1 (svint32x2_t s32x2, svuint32x2_t u32x2,
+    svint64x2_t s64x2, svuint64x2_t u64x2)
+  __arm_streaming __arm_inout("za")
+{
+  svadd_write_za64_vg1x2 (1, s32x2, s32x2); /* { dg-error {'svadd_write_za64_vg1x2' has no form that takes 'svint32x2_t' arguments} } */
+  svadd_write_za64_vg1x2 (1, u32x2, u32x2); /* { dg-error {'svadd_write_za64_vg1x2' has no form that takes 'svuint32x2_t' arguments} } */
+  svadd_write_za64_vg1x2 (1, s64x2, s64x2); /* { dg-error {ACLE function 'svadd_write_za64_s64_vg1x2' requires ISA extension 'sme-i16i64'} } */
+  svadd_write_za64_vg1x2 (1, u64x2, u64x2);
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_za_slice_uint_opt_single_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_za_slice_uint_opt_single_1.c
new file mode 100644
index 00000000000..b00c04320bf
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_za_slice_uint_opt_single_1.c
@@ -0,0 +1,61 @@
+/* { dg-do compile } */
+
+#include <arm_sme.h>
+
+#pragma GCC target ("+sme2")
+
+void
+f1 (svbool_t pg, svuint16_t u16, svint8_t s8, svuint8_t u8,
+    svint16x2_t s16x2, svuint16x2_t u16x2, svint8x2_t s8x2, svuint8x2_t u8x2,
+    svint8x3_t s8x3, svuint8x3_t u8x3,
+    svint8x4_t s8x4, svuint8x4_t u8x4,
+    svint64x2_t s64x2, svuint64x2_t u64x2,
+    float f, double d)
+  __arm_streaming __arm_inout("za")
+{
+  svsudot_za32_vg1x2 (1, s8x2); /* { dg-error {too few arguments to function 'svsudot_za32_vg1x2'} } */
+  svsudot_za32_vg1x2 (1, s8x2, u8x2, u8x2); /* { dg-error {too many arguments to function 'svsudot_za32_vg1x2'} } */
+
+  svsudot_za32_vg1x2 (s8x2, s8x2, u8x2); /* { dg-error {passing 'svint8x2_t' to argument 1 of 'svsudot_za32_vg1x2', which expects 'uint32_t'} } */
+  svsudot_za32_vg1x2 (f, s8x2, u8x2);
+  svsudot_za32_vg1x2 (d, s8x2, u8x2);
+  svsudot_za32_vg1x2 (pg, s8x2, u8x2); /* { dg-error {passing 'svbool_t' to argument 1 of 'svsudot_za32_vg1x2', which expects 'uint32_t'} } */
+
+  svsudot_za32_vg1x2 (1, 1, u8x2); /* { dg-error {passing 'int' to argument 2 of 'svsudot_za32_vg1x2', which expects an SVE type rather than a scalar type} } */
+  svsudot_za32_vg1x2 (1, pg, u8x2); /* { dg-error {passing 'svbool_t' to argument 2 of 'svsudot_za32_vg1x2', which expects a tuple of 2 vectors} } */
+  svsudot_za32_vg1x2 (1, s8, u8x2); /* { dg-error {passing single vector 'svint8_t' to argument 2 of 'svsudot_za32_vg1x2', which expects a tuple of 2 vectors} } */
+  svsudot_za32_vg1x2 (1, s8x3, u8x3); /* { dg-error {passing 'svint8x3_t' to argument 2 of 'svsudot_za32_vg1x2', which expects a tuple of 2 vectors} } */
+  svsudot_za32_vg1x2 (1, s8x4, u8x4); /* { dg-error {passing 'svint8x4_t' to argument 2 of 'svsudot_za32_vg1x2', which expects a tuple of 2 vectors} } */
+
+  svsudot_za32_vg1x2 (1, s8x2, 1); /* { dg-error {passing 'int' to argument 3 of 'svsudot_za32_vg1x2', which expects an SVE type rather than a scalar type} } */
+  svsudot_za32_vg1x2 (1, s8x2, pg); /* { dg-error {passing 'svbool_t' to argument 3 of 'svsudot_za32_vg1x2', which expects a vector of unsigned integers} } */
+  svsudot_za32_vg1x2 (1, s8x2, u16); /* { dg-error {arguments 2 and 3 of 'svsudot_za32_vg1x2' must have the same element size, but the values passed here have type 'svint8x2_t' and 'svuint16_t' respectively} } */
+  svsudot_za32_vg1x2 (1, s8x2, u16x2); /* { dg-error {arguments 2 and 3 of 'svsudot_za32_vg1x2' must have the same element size, but the values passed here have type 'svint8x2_t' and 'svuint16x2_t' respectively} } */
+  svsudot_za32_vg1x2 (1, s8x2, u8);
+  svsudot_za32_vg1x2 (1, s8x2, u8x2);
+  svsudot_za32_vg1x2 (1, s8x2, u8x3); /* { dg-error {passing 'svuint8x3_t' to argument 3 of 'svsudot_za32_vg1x2', which expects a tuple of 2 vectors} } */
+  svsudot_za32_vg1x2 (1, s8x2, u8x4); /* { dg-error {passing 'svuint8x4_t' to argument 3 of 'svsudot_za32_vg1x2', which expects a tuple of 2 vectors} } */
+  svsudot_za32_vg1x2 (1, s8x2, s8); /* { dg-error {passing 'svint8_t' to argument 3 of 'svsudot_za32_vg1x2', which expects a vector of unsigned integers} } */
+  svsudot_za32_vg1x2 (1, s8x2, s8x2); /* { dg-error {passing 'svint8x2_t' to argument 3 of 'svsudot_za32_vg1x2', which expects vectors of unsigned integers} } */
+  svsudot_za32_vg1x2 (1, s8x2, u8x3); /* { dg-error {passing 'svuint8x3_t' to argument 3 of 'svsudot_za32_vg1x2', which expects a tuple of 2 vectors} } */
+  svsudot_za32_vg1x2 (1, s8x2, u8x4); /* { dg-error {passing 'svuint8x4_t' to argument 3 of 'svsudot_za32_vg1x2', which expects a tuple of 2 vectors} } */
+  svsudot_za32_vg1x2 (1, u8x2, u8); /* { dg-error {'svsudot_za32_vg1x2' has no form that takes 'svuint8x2_t' arguments} } */
+  svsudot_za32_vg1x2 (1, u8x2, u8x2); /* { dg-error {'svsudot_za32_vg1x2' has no form that takes 'svuint8x2_t' arguments} } */
+
+  svsudot_za32_vg1x2 (1, s16x2, u16); /* { dg-error {'svsudot_za32_vg1x2' has no form that takes 'svint16x2_t' arguments} } */
+  svsudot_za32_vg1x2 (1, s16x2, u16x2); /* { dg-error {'svsudot_za32_vg1x2' has no form that takes 'svint16x2_t' arguments} } */
+  svsudot_za32_vg1x2 (1, s64x2, u64x2); /* { dg-error {'svsudot_za32_vg1x2' has no form that takes 'svint64x2_t' arguments} } */
+  svsudot_za32_vg1x2 (1, u64x2, u64x2); /* { dg-error {'svsudot_za32_vg1x2' has no form that takes 'svuint64x2_t' arguments} } */
+}
+
+void
+f2 (svint8x2_t s8x2, svuint8x2_t u8x2) __arm_streaming
+{
+  svsudot_za32_vg1x2 (0, s8x2, u8x2); /* { dg-error {ACLE function 'svsudot_za32_s8_vg1x2' can only be called from a function that has 'za' state} } */
+}
+
+void
+f3 (svint8x2_t s8x2, svuint8x2_t u8x2) __arm_inout("za")
+{
+  svsudot_za32_vg1x2 (0, s8x2, u8x2); /* { dg-error {ACLE function 'svsudot_za32_s8_vg1x2' can only be called when SME streaming mode is enabled} } */
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binaryxn_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binaryxn_1.c
new file mode 100644
index 00000000000..98b2433ce72
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binaryxn_1.c
@@ -0,0 +1,23 @@
+/* { dg-do compile } */
+
+#include <arm_sve.h>
+
+void
+f1 (svbool_t pg, svcount_t pn, svuint8_t u8, svint16_t s16,
+    svuint8x2_t u8x2, svuint8x3_t u8x3, svuint8x4_t u8x4)
+{
+  svsel (pg, u8); /* { dg-error {too few arguments to function 'svsel'} } */
+  svsel (pg, u8, u8, u8); /* { dg-error {too many arguments to function 'svsel'} } */
+  svsel (0, u8, u8); /* { dg-error {passing 'int' to argument 1 of 'svsel', which expects an 'svbool_t' or 'svcount_t'} } */
+  svsel (u8, u8, u8); /* { dg-error {passing 'svuint8_t' to argument 1 of 'svsel', which expects an 'svbool_t' or 'svcount_t'} } */
+  svsel (pn, u8, u8); /* { dg-error {operations on single vectors must be predicated by 'svbool_t' rather than 'svcount_t'} } */
+  svsel (pg, pg, u8); /* { dg-error {passing 'svuint8_t' to argument 3 of 'svsel', but argument 2 had type 'svbool_t'} } */
+  svsel (pg, u8, pg); /* { dg-error {passing 'svbool_t' to argument 3 of 'svsel', but argument 2 had type 'svuint8_t'} } */
+  svsel (pg, u8, s16); /* { dg-error {passing 'svint16_t' to argument 3 of 'svsel', but argument 2 had type 'svuint8_t'} } */
+  svsel (pg, u8, 0); /* { dg-error {passing 'int' to argument 3 of 'svsel', which expects an SVE type rather than a scalar} } */
+  svsel (pg, pg, pg);
+  svsel (pg, u8, u8);
+  svsel (pg, u8, u8x2); /* { dg-error {passing tuple 'svuint8x2_t' to argument 3 of 'svsel' after passing single vector 'svuint8_t' to argument 2} } */
+  svsel (pg, u8, u8x3); /* { dg-error {passing tuple 'svuint8x3_t' to argument 3 of 'svsel' after passing single vector 'svuint8_t' to argument 2} } */
+  svsel (pg, u8, u8x4); /* { dg-error {passing tuple 'svuint8x4_t' to argument 3 of 'svsel' after passing single vector 'svuint8_t' to argument 2} } */
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binaryxn_2.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binaryxn_2.c
new file mode 100644
index 00000000000..600b7fc7959
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binaryxn_2.c
@@ -0,0 +1,33 @@
+/* { dg-do compile } */
+
+#include <arm_sve.h>
+
+#pragma GCC target "+sme2"
+
+void
+f1 (svbool_t pg, svcount_t pn, svuint8_t u8, svint16_t s16,
+    svint8x2_t s8x2, svint8x3_t s8x3, svint8x4_t s8x4,
+    svuint8x2_t u8x2, svuint8x3_t u8x3, svuint8x4_t u8x4,
+    svuint16x2_t u16x2) __arm_streaming
+{
+  svsel (pn, u8x2); /* { dg-error {too few arguments to function 'svsel'} } */
+  svsel (pn, u8x2, u8x2, u8x2); /* { dg-error {too many arguments to function 'svsel'} } */
+  svsel (0, u8x2, u8x2); /* { dg-error {passing 'int' to argument 1 of 'svsel', which expects an 'svbool_t' or 'svcount_t'} } */
+  svsel (u8x2, u8x2, u8x2); /* { dg-error {passing 'svuint8x2_t' to argument 1 of 'svsel', which expects an 'svbool_t' or 'svcount_t'} } */
+  svsel (pg, u8x2, u8x2); /* { dg-error {operations on multiple vectors must be predicated by 'svcount_t' rather than 'svbool_t'} } */
+  svsel (pn, u8x2, s8x2); /* { dg-error {passing 'svint8x2_t' to argument 3 of 'svsel', but argument 2 had type 'svuint8x2_t'} } */
+  svsel (pn, u8x2, u16x2); /* { dg-error {passing 'svuint16x2_t' to argument 3 of 'svsel', but argument 2 had type 'svuint8x2_t'} } */
+  svsel (pn, u8x2, 0); /* { dg-error {passing 'int' to argument 3 of 'svsel', which expects an SVE type rather than a scalar} } */
+  svsel (pn, u8x2, u8); /* { dg-error {passing single vector 'svuint8_t' to argument 3 of 'svsel' after passing tuple 'svuint8x2_t' to argument 2} } */
+  svsel (pn, u8x2, u8x2);
+  svsel (pn, u8x2, u8x3); /* { dg-error {passing mismatched tuple types 'svuint8x2_t' and 'svuint8x3_t' to arguments 2 and 3 of 'svsel'} } */
+  svsel (pn, u8x2, s8x3); /* { dg-error {passing mismatched tuple types 'svuint8x2_t' and 'svint8x3_t' to arguments 2 and 3 of 'svsel'} } */
+  svsel (pn, u8x2, u8x4); /* { dg-error {passing mismatched tuple types 'svuint8x2_t' and 'svuint8x4_t' to arguments 2 and 3 of 'svsel'} } */
+  svsel (pn, s8x4, s8x2); /* { dg-error {passing mismatched tuple types 'svint8x4_t' and 'svint8x2_t' to arguments 2 and 3 of 'svsel'} } */
+}
+
+void
+f2 (svcount_t pn, svuint8x2_t u8x2)
+{
+  svsel (pn, u8x2, u8x2); /* { dg-error {ACLE function 'svsel_u8_x2' can only be called when SME streaming mode is enabled} } */
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/clamp_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/clamp_1.c
new file mode 100644
index 00000000000..342bebc07d6
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/clamp_1.c
@@ -0,0 +1,30 @@
+/* { dg-do compile } */
+
+#include <arm_sve.h>
+
+#pragma GCC target "+sme2"
+
+void
+f1 (svcount_t pn, svfloat16_t f16, svint16_t s16, svfloat32_t f32,
+    svfloat16x2_t f16x2, svfloat16x3_t f16x3, svfloat16x4_t f16x4)
+  __arm_streaming
+{
+  svclamp (f16, f16); /* { dg-error {too few arguments to function 'svclamp'} } */
+  svclamp (f16, f16, f16, f16); /* { dg-error {too many arguments to function 'svclamp'} } */
+  svclamp (0, f16, f16); /* { dg-error {passing 'int' to argument 1 of 'svclamp', which expects an SVE type rather than a scalar type} } */
+  svclamp (f16, f16, f16);
+  svclamp (s16, s16, s16); /* { dg-error {'svclamp' has no form that takes 'svint16_t' arguments} } */
+  svclamp (pn, f16, f16); /* { dg-error {passing 'svfloat16_t' to argument 2 of 'svclamp', but argument 1 had type 'svcount_t'} } */
+  svclamp (f16, s16, f16); /* { dg-error {passing 'svint16_t' to argument 2 of 'svclamp', but argument 1 had type 'svfloat16_t'} } */
+  svclamp (f16, f32, f32); /* { dg-error {passing 'svfloat32_t' to argument 2 of 'svclamp', but argument 1 had type 'svfloat16_t'} } */
+  svclamp (f16, f16, s16); /* { dg-error {passing 'svint16_t' to argument 3 of 'svclamp', but argument 1 had type 'svfloat16_t'} } */
+  svclamp (f16, f16, 0); /* { dg-error {passing 'int' to argument 3 of 'svclamp', which expects an SVE type rather than a scalar} } */
+  svclamp (f16, f16x2, f16); /* { dg-error {passing 'svfloat16x2_t' to argument 2 of 'svclamp', which expects a single SVE vector rather than a tuple} } */
+  svclamp (f16, f16x4, f16); /* { dg-error {passing 'svfloat16x4_t' to argument 2 of 'svclamp', which expects a single SVE vector rather than a tuple} } */
+  svclamp (f16, f16, f16x2); /* { dg-error {passing 'svfloat16x2_t' to argument 3 of 'svclamp', which expects a single SVE vector rather than a tuple} } */
+  svclamp (f16, f16, f16x3); /* { dg-error {passing 'svfloat16x3_t' to argument 3 of 'svclamp', which expects a single SVE vector rather than a tuple} } */
+
+  svclamp (f16x2, f16x2, f16x2); /* { dg-error {passing 'svfloat16x2_t' to argument 2 of 'svclamp', which expects a single SVE vector rather than a tuple} } */
+  svclamp (f16x2, s16, f16); /* { dg-error {passing 'svint16_t' to argument 2 of 'svclamp', but argument 1 was a tuple of 'svfloat16_t'} } */
+  svclamp (f16x2, f16, s16); /* { dg-error {passing 'svint16_t' to argument 3 of 'svclamp', but argument 1 was a tuple of 'svfloat16_t'} } */
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/compare_scalar_count_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/compare_scalar_count_1.c
new file mode 100644
index 00000000000..47077f7a4e5
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/compare_scalar_count_1.c
@@ -0,0 +1,55 @@
+/* { dg-do compile } */
+
+#include <arm_sve.h>
+#include <stdbool.h>
+
+#pragma GCC target "+sme2"
+
+enum signed_enum { SA = -1, SB };
+enum unsigned_enum { UA, UB };
+
+void
+test (int32_t s32, int64_t s64, uint16_t u16, uint32_t u32, uint64_t u64,
+      bool b, int *ptr, float f32, svbool_t pg, svint32_t vec)
+  __arm_streaming
+{
+  svwhilele_c8 (s64, 2); /* { dg-error {too few arguments to function 'svwhilele_c8'} } */
+  svwhilele_c8 (s64, s64, 2, 2); /* { dg-error {too many arguments to function 'svwhilele_c8'} } */
+
+  svwhilele_c8 (b, b, 2); /* { dg-error {passing '_Bool' and '_Bool' to arguments 1 and 2 of 'svwhilele_c8', which expects a pair of 64-bit integers} } */
+  svwhilele_c8 (u16, u16, 2); /* { dg-error {expects a pair of 64-bit integers} } */
+  svwhilele_c8 (ptr, ptr, 2); /* { dg-error {expects a pair of 64-bit integers} } */
+  svwhilele_c8 (f32, f32, 2); /* { dg-error {expects a pair of 64-bit integers} } */
+  svwhilele_c8 (pg, pg, 2); /* { dg-error {expects a pair of 64-bit integers} } */
+  svwhilele_c8 (vec, vec, 2); /* { dg-error {expects a pair of 64-bit integers} } */
+  svwhilele_c8 (0, 0, 2); /* { dg-error {expects a pair of 64-bit integers} } */
+  svwhilele_c8 (s32, s32, 2); /* { dg-error {expects a pair of 64-bit integers} } */
+
+  svwhilele_c8 (0, s64, 2);
+  svwhilele_c8 (0U, s64, 2);
+  svwhilele_c8 (0, u64, 2); /* { dg-error {mismatched integer types} } */
+  svwhilele_c8 (0U, u64, 2);
+
+  svwhilele_c8 (s32, s64, 2);
+  svwhilele_c8 (u32, s64, 2);
+  svwhilele_c8 (s32, u64, 2); /* { dg-error {mismatched integer types} } */
+  svwhilele_c8 (u32, u64, 2);
+
+  svwhilele_c8 (s64, s64, 2);
+  svwhilele_c8 (u64, s64, 2); /* { dg-error {mismatched integer types} } */
+  svwhilele_c8 (s64, u64, 2); /* { dg-error {mismatched integer types} } */
+  svwhilele_c8 (u64, u64, 2);
+
+  svwhilele_c8 (s64, 0, 2);
+  svwhilele_c8 (s64, 0U, 2);
+  svwhilele_c8 (u64, 0, 2); /* { dg-error {mismatched integer types} } */
+  svwhilele_c8 (u64, 0U, 2);
+
+  svwhilele_c8 (s64, s32, 2);
+  svwhilele_c8 (s64, u32, 2);
+  svwhilele_c8 (u64, s32, 2); /* { dg-error {mismatched integer types} } */
+  svwhilele_c8 (u64, u32, 2);
+
+  svwhilele_c8 (u64, u64, u64); /* { dg-error {argument 3 of 'svwhilele_c8' must be an integer constant expression} } */
+  svwhilele_c8 (u64, u64, 1); /* { dg-error {passing 1 to argument 3 of 'svwhilele_c8', which expects either 2 or 4} } */
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/create_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/create_1.c
index 7a617aa1563..22b031ad4db 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/create_1.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/create_1.c
@@ -15,7 +15,7 @@ f1 (svuint8x2_t *ptr, svbool_t pg, svuint8_t u8, svfloat64_t f64,
   *ptr = svcreate2 (u8, x); /* { dg-error {passing 'int' to argument 2 of 'svcreate2', which expects an SVE type rather than a scalar} } */
   *ptr = svcreate2 (x, u8); /* { dg-error {passing 'int' to argument 1 of 'svcreate2', which expects an SVE type rather than a scalar} } */
   *ptr = svcreate2 (pg, u8); /* { dg-error {passing 'svuint8_t' to argument 2 of 'svcreate2', but argument 1 had type 'svbool_t'} } */
-  *ptr = svcreate2 (pg, pg); /* { dg-error {'svcreate2' has no form that takes 'svbool_t' arguments} } */
+  *ptr = svcreate2 (pg, pg); /* { dg-error {incompatible types when assigning to type 'svuint8x2_t' from type 'svboolx2_t'} } */
   *ptr = svcreate2 (u8, u8);
   *ptr = svcreate2 (f64, f64); /* { dg-error {incompatible types when assigning to type 'svuint8x2_t' from type 'svfloat64x2_t'} } */
 }
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/dot_za_slice_int_lane_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/dot_za_slice_int_lane_1.c
new file mode 100644
index 00000000000..ca2a039b390
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/dot_za_slice_int_lane_1.c
@@ -0,0 +1,59 @@
+/* { dg-do compile } */
+
+#include <arm_sme.h>
+
+#pragma GCC target ("+sme2")
+
+void
+f1 (svbool_t pg, svint8_t s8, svuint8_t u8, svint16_t s16, svuint16_t u16,
+    svint32_t s32, svuint32_t u32,
+    svint8x2_t s8x2, svuint8x2_t u8x2,
+    svint8x3_t s8x3, svuint8x3_t u8x3,
+    svint8x4_t s8x4, svuint8x4_t u8x4,
+    svint16x2_t s16x2, svuint16x2_t u16x2,
+    float f, double d)
+  __arm_streaming __arm_inout("za")
+{
+  svusdot_lane_za32_vg1x2 (0, u8x2, s8); /* { dg-error {too few arguments to function 'svusdot_lane_za32_vg1x2'} } */
+  svusdot_lane_za32_vg1x2 (0, u8x2, s8, 0, 0); /* { dg-error {too many arguments to function 'svusdot_lane_za32_vg1x2'} } */
+
+  svusdot_lane_za32_vg1x2 (u8x2, u8x2, s8, 0); /* { dg-error {passing 'svuint8x2_t' to argument 1 of 'svusdot_lane_za32_vg1x2', which expects 'uint32_t'} } */
+  svusdot_lane_za32_vg1x2 (f, u8x2, s8, 0);
+  svusdot_lane_za32_vg1x2 (d, u8x2, s8, 0);
+  svusdot_lane_za32_vg1x2 (pg, u8x2, s8, 0); /* { dg-error {passing 'svbool_t' to argument 1 of 'svusdot_lane_za32_vg1x2', which expects 'uint32_t'} } */
+
+  svusdot_lane_za32_vg1x2 (0, 1, s8, 0); /* { dg-error {passing 'int' to argument 2 of 'svusdot_lane_za32_vg1x2', which expects an SVE type rather than a scalar type} } */
+  svusdot_lane_za32_vg1x2 (0, pg, s8, 0); /* { dg-error {passing 'svbool_t' to argument 2 of 'svusdot_lane_za32_vg1x2', which expects a tuple of 2 vectors} } */
+  svusdot_lane_za32_vg1x2 (0, u8, s8, 0); /* { dg-error {passing single vector 'svuint8_t' to argument 2 of 'svusdot_lane_za32_vg1x2', which expects a tuple of 2 vectors} } */
+  svusdot_lane_za32_vg1x2 (0, u8x3, s8, 0); /* { dg-error {passing 'svuint8x3_t' to argument 2 of 'svusdot_lane_za32_vg1x2', which expects a tuple of 2 vectors} } */
+  svusdot_lane_za32_vg1x2 (0, u8x4, s8, 0); /* { dg-error {passing 'svuint8x4_t' to argument 2 of 'svusdot_lane_za32_vg1x2', which expects a tuple of 2 vectors} } */
+
+  svusdot_lane_za32_vg1x2 (0, u8x2, 1, 0); /* { dg-error {passing 'int' to argument 3 of 'svusdot_lane_za32_vg1x2', which expects an SVE type rather than a scalar type} } */
+  svusdot_lane_za32_vg1x2 (0, u8x2, pg, 0); /* { dg-error {passing 'svbool_t' to argument 3 of 'svusdot_lane_za32_vg1x2', which expects a vector of signed integers} } */
+  svusdot_lane_za32_vg1x2 (0, u8x2, u8, 0); /* { dg-error {passing 'svuint8_t' to argument 3 of 'svusdot_lane_za32_vg1x2', which expects a vector of signed integers} } */
+  svusdot_lane_za32_vg1x2 (0, u8x2, s32, 0); /* { dg-error {arguments 2 and 3 of 'svusdot_lane_za32_vg1x2' must have the same element size, but the values passed here have type 'svuint8x2_t' and 'svint32_t' respectively} } */
+  svusdot_lane_za32_vg1x2 (0, u8x2, s8x2, 0); /* { dg-error {passing 'svint8x2_t' to argument 3 of 'svusdot_lane_za32_vg1x2', which expects a single SVE vector rather than a tuple} } */
+  svusdot_lane_za32_vg1x2 (0, u8x2, s8, 0);
+  svusdot_lane_za32_vg1x2 (0, s8x2, s8, 0); /* { dg-error {'svusdot_lane_za32_vg1x2' has no form that takes 'svint8x2_t' arguments} } */
+  svusdot_lane_za32_vg1x2 (0, u16x2, s16, 0); /* { dg-error {'svusdot_lane_za32_vg1x2' has no form that takes 'svuint16x2_t' arguments} } */
+
+  svusdot_lane_za32_vg1x2 (0, u8x2, s8, -1); /* { dg-error {passing -1 to argument 4 of 'svusdot_lane_za32_vg1x2', which expects a value in the range \[0, 3\]} } */
+  svusdot_lane_za32_vg1x2 (0, u8x2, s8, 3);
+  svusdot_lane_za32_vg1x2 (0, u8x2, s8, 4); /* { dg-error {passing 4 to argument 4 of 'svusdot_lane_za32_vg1x2', which expects a value in the range \[0, 3\]} } */
+  svusdot_lane_za32_vg1x2 (0, u8x2, s8, -1); /* { dg-error {passing -1 to argument 4 of 'svusdot_lane_za32_vg1x2', which expects a value in the range \[0, 3\]} } */
+  svusdot_lane_za32_vg1x2 (0, u8x2, s8, 3);
+  svusdot_lane_za32_vg1x2 (0, u8x2, s8, 4); /* { dg-error {passing 4 to argument 4 of 'svusdot_lane_za32_vg1x2', which expects a value in the range \[0, 3\]} } */
+  svusdot_lane_za32_vg1x2 (0, u8x2, s8, f); /* { dg-error {argument 4 of 'svusdot_lane_za32_vg1x2' must be an integer constant expression} } */
+}
+
+void
+f2 (svuint8x2_t u8x2, svint8_t s8) __arm_streaming
+{
+  svusdot_lane_za32_vg1x2 (0, u8x2, s8, 0); /* { dg-error {ACLE function 'svusdot_lane_za32_u8_vg1x2' can only be called from a function that has 'za' state} } */
+}
+
+void
+f3 (svuint8x2_t u8x2, svint8_t s8) __arm_inout("za")
+{
+  svusdot_lane_za32_vg1x2 (0, u8x2, s8, 0); /* { dg-error {ACLE function 'svusdot_lane_za32_u8_vg1x2' can only be called when SME streaming mode is enabled} } */
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/dot_za_slice_lane_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/dot_za_slice_lane_1.c
new file mode 100644
index 00000000000..e37d24ab608
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/dot_za_slice_lane_1.c
@@ -0,0 +1,83 @@
+/* { dg-do compile } */
+
+#include <arm_sme.h>
+
+#pragma GCC target ("+sme2")
+
+void
+f1 (svbool_t pg, svint8_t s8, svuint8_t u8, svint16_t s16, svuint16_t u16,
+    svint32_t s32, svuint32_t u32,
+    svint8x2_t s8x2, svuint8x2_t u8x2,
+    svint16x2_t s16x2, svuint16x2_t u16x2,
+    svint16x3_t s16x3, svuint16x3_t u16x3,
+    svint16x4_t s16x4, svuint16x4_t u16x4,
+    svint32x2_t s32x2, svuint32x2_t u32x2,
+    float f, double d)
+  __arm_streaming __arm_inout("za")
+{
+  svdot_lane_za32_vg1x2 (0, s16x2, s16); /* { dg-error {too few arguments to function 'svdot_lane_za32_vg1x2'} } */
+  svdot_lane_za32_vg1x2 (0, s16x2, s16, 0, 0); /* { dg-error {too many arguments to function 'svdot_lane_za32_vg1x2'} } */
+
+  svdot_lane_za32_vg1x2 (s16x2, s16x2, s16, 0); /* { dg-error {passing 'svint16x2_t' to argument 1 of 'svdot_lane_za32_vg1x2', which expects 'uint32_t'} } */
+  svdot_lane_za32_vg1x2 (f, s16x2, s16, 0);
+  svdot_lane_za32_vg1x2 (d, s16x2, s16, 0);
+  svdot_lane_za32_vg1x2 (pg, s16x2, s16, 0); /* { dg-error {passing 'svbool_t' to argument 1 of 'svdot_lane_za32_vg1x2', which expects 'uint32_t'} } */
+
+  svdot_lane_za32_vg1x2 (0, 1, s16, 0); /* { dg-error {passing 'int' to argument 2 of 'svdot_lane_za32_vg1x2', which expects an SVE type rather than a scalar type} } */
+  svdot_lane_za32_vg1x2 (0, pg, s16, 0); /* { dg-error {passing 'svbool_t' to argument 2 of 'svdot_lane_za32_vg1x2', which expects a tuple of 2 vectors} } */
+  svdot_lane_za32_vg1x2 (0, s16, s16, 0); /* { dg-error {passing single vector 'svint16_t' to argument 2 of 'svdot_lane_za32_vg1x2', which expects a tuple of 2 vectors} } */
+  svdot_lane_za32_vg1x2 (0, s16x3, s16, 0); /* { dg-error {passing 'svint16x3_t' to argument 2 of 'svdot_lane_za32_vg1x2', which expects a tuple of 2 vectors} } */
+  svdot_lane_za32_vg1x2 (0, s16x4, s16, 0); /* { dg-error {passing 'svint16x4_t' to argument 2 of 'svdot_lane_za32_vg1x2', which expects a tuple of 2 vectors} } */
+
+  svdot_lane_za32_vg1x2 (0, s16x2, 1, 0); /* { dg-error {passing 'int' to argument 3 of 'svdot_lane_za32_vg1x2', which expects an SVE type rather than a scalar type} } */
+  svdot_lane_za32_vg1x2 (0, s16x2, pg, 0); /* { dg-error {passing 'svbool_t' to argument 3 of 'svdot_lane_za32_vg1x2', but argument 2 was a tuple of 'svint16_t'} } */
+  svdot_lane_za32_vg1x2 (0, s16x2, u16, 0); /* { dg-error {passing 'svuint16_t' to argument 3 of 'svdot_lane_za32_vg1x2', but argument 2 was a tuple of 'svint16_t'} } */
+  svdot_lane_za32_vg1x2 (0, s16x2, s32, 0); /* { dg-error {passing 'svint32_t' to argument 3 of 'svdot_lane_za32_vg1x2', but argument 2 was a tuple of 'svint16_t'} } */
+  svdot_lane_za32_vg1x2 (0, s16x2, s16x2, 0); /* { dg-error {passing 'svint16x2_t' to argument 3 of 'svdot_lane_za32_vg1x2', which expects a single SVE vector rather than a tuple} } */
+  svdot_lane_za32_vg1x2 (0, u16x2, u16, 0);
+  svdot_lane_za32_vg1x2 (0, u16x2, s16, 0); /* { dg-error {passing 'svint16_t' to argument 3 of 'svdot_lane_za32_vg1x2', but argument 2 was a tuple of 'svuint16_t'} } */
+  svdot_lane_za32_vg1x2 (0, s32x2, s32, 0); /* { dg-error {'svdot_lane_za32_vg1x2' has no form that takes 'svint32x2_t' arguments} } */
+  svdot_lane_za32_vg1x2 (0, u32x2, u32, 0); /* { dg-error {'svdot_lane_za32_vg1x2' has no form that takes 'svuint32x2_t' arguments} } */
+
+  svdot_lane_za32_vg1x2 (0, s8x2, s8, -1); /* { dg-error {passing -1 to argument 4 of 'svdot_lane_za32_vg1x2', which expects a value in the range \[0, 3\]} } */
+  svdot_lane_za32_vg1x2 (0, s8x2, s8, 3);
+  svdot_lane_za32_vg1x2 (0, s8x2, s8, 4); /* { dg-error {passing 4 to argument 4 of 'svdot_lane_za32_vg1x2', which expects a value in the range \[0, 3\]} } */
+  svdot_lane_za32_vg1x2 (0, s16x2, s16, -1); /* { dg-error {passing -1 to argument 4 of 'svdot_lane_za32_vg1x2', which expects a value in the range \[0, 3\]} } */
+  svdot_lane_za32_vg1x2 (0, s16x2, s16, 3);
+  svdot_lane_za32_vg1x2 (0, s16x2, s16, 4); /* { dg-error {passing 4 to argument 4 of 'svdot_lane_za32_vg1x2', which expects a value in the range \[0, 3\]} } */
+  svdot_lane_za32_vg1x2 (0, s16x2, s16, f); /* { dg-error {argument 4 of 'svdot_lane_za32_vg1x2' must be an integer constant expression} } */
+}
+
+void
+f2 (svint16x2_t s16x2, svint16_t s16) __arm_streaming
+{
+  svdot_lane_za32_vg1x2 (0, s16x2, s16, 0); /* { dg-error {ACLE function 'svdot_lane_za32_s16_vg1x2' can only be called from a function that has 'za' state} } */
+}
+
+void
+f3 (svint16x2_t s16x2, svint16_t s16) __arm_inout("za")
+{
+  svdot_lane_za32_vg1x2 (0, s16x2, s16, 0); /* { dg-error {ACLE function 'svdot_lane_za32_s16_vg1x2' can only be called when SME streaming mode is enabled} } */
+}
+
+#pragma GCC target ("+sme-i16i64")
+
+void
+f4 (svint16_t s16, svuint16_t u16,
+    svint16x2_t s16x2, svuint16x2_t u16x2,
+    svint32_t s32, svuint32_t u32,
+    svint32x2_t s32x2, svuint32x2_t u32x2,
+    svint64_t s64, svuint64_t u64,
+    svint64x2_t s64x2, svuint64x2_t u64x2)
+  __arm_streaming __arm_inout("za")
+{
+  svdot_lane_za64_vg1x2 (0, s16x2, s16, 0);
+  svdot_lane_za64_vg1x2 (0, u16x2, u16, 0);
+  svdot_lane_za64_vg1x2 (0, s16x2, s16, -1); /* { dg-error {passing -1 to argument 4 of 'svdot_lane_za64_vg1x2', which expects a value in the range \[0, 1\]} } */
+  svdot_lane_za64_vg1x2 (0, s16x2, s16, 1);
+  svdot_lane_za64_vg1x2 (0, u16x2, u16, 2); /* { dg-error {passing 2 to argument 4 of 'svdot_lane_za64_vg1x2', which expects a value in the range \[0, 1\]} } */
+  svdot_lane_za64_vg1x2 (0, s32x2, s32, 0); /* { dg-error {'svdot_lane_za64_vg1x2' has no form that takes 'svint32x2_t' arguments} } */
+  svdot_lane_za64_vg1x2 (0, u32x2, u32, 0); /* { dg-error {'svdot_lane_za64_vg1x2' has no form that takes 'svuint32x2_t' arguments} } */
+  svdot_lane_za64_vg1x2 (0, s64x2, s64, 0); /* { dg-error {'svdot_lane_za64_vg1x2' has no form that takes 'svint64x2_t' arguments} } */
+  svdot_lane_za64_vg1x2 (0, u64x2, u64, 0); /* { dg-error {'svdot_lane_za64_vg1x2' has no form that takes 'svuint64x2_t' arguments} } */
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/dot_za_slice_lane_2.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/dot_za_slice_lane_2.c
new file mode 100644
index 00000000000..7af3c6f9eef
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/dot_za_slice_lane_2.c
@@ -0,0 +1,83 @@
+/* { dg-do compile } */
+
+#include <arm_sme.h>
+
+#pragma GCC target ("+sme2")
+
+void
+f1 (svbool_t pg, svint8_t s8, svuint8_t u8, svint16_t s16, svuint16_t u16,
+    svint32_t s32, svuint32_t u32,
+    svint8x4_t s8x4, svuint8x4_t u8x4,
+    svint16x2_t s16x2, svuint16x2_t u16x2,
+    svint16x3_t s16x3, svuint16x3_t u16x3,
+    svint16x4_t s16x4, svuint16x4_t u16x4,
+    svint32x4_t s32x4, svuint32x4_t u32x4,
+    float f, double d)
+  __arm_streaming __arm_inout("za")
+{
+  svdot_lane_za32_vg1x4 (0, s16x4, s16); /* { dg-error {too few arguments to function 'svdot_lane_za32_vg1x4'} } */
+  svdot_lane_za32_vg1x4 (0, s16x4, s16, 0, 0); /* { dg-error {too many arguments to function 'svdot_lane_za32_vg1x4'} } */
+
+  svdot_lane_za32_vg1x4 (s16x4, s16x4, s16, 0); /* { dg-error {passing 'svint16x4_t' to argument 1 of 'svdot_lane_za32_vg1x4', which expects 'uint32_t'} } */
+  svdot_lane_za32_vg1x4 (f, s16x4, s16, 0);
+  svdot_lane_za32_vg1x4 (d, s16x4, s16, 0);
+  svdot_lane_za32_vg1x4 (pg, s16x4, s16, 0); /* { dg-error {passing 'svbool_t' to argument 1 of 'svdot_lane_za32_vg1x4', which expects 'uint32_t'} } */
+
+  svdot_lane_za32_vg1x4 (0, 1, s16, 0); /* { dg-error {passing 'int' to argument 2 of 'svdot_lane_za32_vg1x4', which expects an SVE type rather than a scalar type} } */
+  svdot_lane_za32_vg1x4 (0, pg, s16, 0); /* { dg-error {passing 'svbool_t' to argument 2 of 'svdot_lane_za32_vg1x4', which expects a tuple of 4 vectors} } */
+  svdot_lane_za32_vg1x4 (0, s16, s16, 0); /* { dg-error {passing single vector 'svint16_t' to argument 2 of 'svdot_lane_za32_vg1x4', which expects a tuple of 4 vectors} } */
+  svdot_lane_za32_vg1x4 (0, s16x2, s16, 0); /* { dg-error {passing 'svint16x2_t' to argument 2 of 'svdot_lane_za32_vg1x4', which expects a tuple of 4 vectors} } */
+  svdot_lane_za32_vg1x4 (0, s16x3, s16, 0); /* { dg-error {passing 'svint16x3_t' to argument 2 of 'svdot_lane_za32_vg1x4', which expects a tuple of 4 vectors} } */
+
+  svdot_lane_za32_vg1x4 (0, s16x4, 1, 0); /* { dg-error {passing 'int' to argument 3 of 'svdot_lane_za32_vg1x4', which expects an SVE type rather than a scalar type} } */
+  svdot_lane_za32_vg1x4 (0, s16x4, pg, 0); /* { dg-error {passing 'svbool_t' to argument 3 of 'svdot_lane_za32_vg1x4', but argument 2 was a tuple of 'svint16_t'} } */
+  svdot_lane_za32_vg1x4 (0, s16x4, u16, 0); /* { dg-error {passing 'svuint16_t' to argument 3 of 'svdot_lane_za32_vg1x4', but argument 2 was a tuple of 'svint16_t'} } */
+  svdot_lane_za32_vg1x4 (0, s16x4, s32, 0); /* { dg-error {passing 'svint32_t' to argument 3 of 'svdot_lane_za32_vg1x4', but argument 2 was a tuple of 'svint16_t'} } */
+  svdot_lane_za32_vg1x4 (0, s16x4, s16x4, 0); /* { dg-error {passing 'svint16x4_t' to argument 3 of 'svdot_lane_za32_vg1x4', which expects a single SVE vector rather than a tuple} } */
+  svdot_lane_za32_vg1x4 (0, u16x4, u16, 0);
+  svdot_lane_za32_vg1x4 (0, u16x4, s16, 0); /* { dg-error {passing 'svint16_t' to argument 3 of 'svdot_lane_za32_vg1x4', but argument 2 was a tuple of 'svuint16_t'} } */
+  svdot_lane_za32_vg1x4 (0, s32x4, s32, 0); /* { dg-error {'svdot_lane_za32_vg1x4' has no form that takes 'svint32x4_t' arguments} } */
+  svdot_lane_za32_vg1x4 (0, u32x4, u32, 0); /* { dg-error {'svdot_lane_za32_vg1x4' has no form that takes 'svuint32x4_t' arguments} } */
+
+  svdot_lane_za32_vg1x4 (0, s8x4, s8, -1); /* { dg-error {passing -1 to argument 4 of 'svdot_lane_za32_vg1x4', which expects a value in the range \[0, 3\]} } */
+  svdot_lane_za32_vg1x4 (0, s8x4, s8, 3);
+  svdot_lane_za32_vg1x4 (0, s8x4, s8, 4); /* { dg-error {passing 4 to argument 4 of 'svdot_lane_za32_vg1x4', which expects a value in the range \[0, 3\]} } */
+  svdot_lane_za32_vg1x4 (0, s16x4, s16, -1); /* { dg-error {passing -1 to argument 4 of 'svdot_lane_za32_vg1x4', which expects a value in the range \[0, 3\]} } */
+  svdot_lane_za32_vg1x4 (0, s16x4, s16, 3);
+  svdot_lane_za32_vg1x4 (0, s16x4, s16, 4); /* { dg-error {passing 4 to argument 4 of 'svdot_lane_za32_vg1x4', which expects a value in the range \[0, 3\]} } */
+  svdot_lane_za32_vg1x4 (0, s16x4, s16, f); /* { dg-error {argument 4 of 'svdot_lane_za32_vg1x4' must be an integer constant expression} } */
+}
+
+void
+f2 (svint16x4_t s16x4, svint16_t s16) __arm_streaming
+{
+  svdot_lane_za32_vg1x4 (0, s16x4, s16, 0); /* { dg-error {ACLE function 'svdot_lane_za32_s16_vg1x4' can only be called from a function that has 'za' state} } */
+}
+
+void
+f3 (svint16x4_t s16x4, svint16_t s16) __arm_inout("za")
+{
+  svdot_lane_za32_vg1x4 (0, s16x4, s16, 0); /* { dg-error {ACLE function 'svdot_lane_za32_s16_vg1x4' can only be called when SME streaming mode is enabled} } */
+}
+
+#pragma GCC target ("+sme-i16i64")
+
+void
+f4 (svint16_t s16, svuint16_t u16,
+    svint16x4_t s16x4, svuint16x4_t u16x4,
+    svint32_t s32, svuint32_t u32,
+    svint32x4_t s32x4, svuint32x4_t u32x4,
+    svint64_t s64, svuint64_t u64,
+    svint64x4_t s64x4, svuint64x4_t u64x4)
+  __arm_streaming __arm_inout("za")
+{
+  svdot_lane_za64_vg1x4 (0, s16x4, s16, 0);
+  svdot_lane_za64_vg1x4 (0, u16x4, u16, 0);
+  svdot_lane_za64_vg1x4 (0, s16x4, s16, -1); /* { dg-error {passing -1 to argument 4 of 'svdot_lane_za64_vg1x4', which expects a value in the range \[0, 1\]} } */
+  svdot_lane_za64_vg1x4 (0, s16x4, s16, 1);
+  svdot_lane_za64_vg1x4 (0, u16x4, u16, 2); /* { dg-error {passing 2 to argument 4 of 'svdot_lane_za64_vg1x4', which expects a value in the range \[0, 1\]} } */
+  svdot_lane_za64_vg1x4 (0, s32x4, s32, 0); /* { dg-error {'svdot_lane_za64_vg1x4' has no form that takes 'svint32x4_t' arguments} } */
+  svdot_lane_za64_vg1x4 (0, u32x4, u32, 0); /* { dg-error {'svdot_lane_za64_vg1x4' has no form that takes 'svuint32x4_t' arguments} } */
+  svdot_lane_za64_vg1x4 (0, s64x4, s64, 0); /* { dg-error {'svdot_lane_za64_vg1x4' has no form that takes 'svint64x4_t' arguments} } */
+  svdot_lane_za64_vg1x4 (0, u64x4, u64, 0); /* { dg-error {'svdot_lane_za64_vg1x4' has no form that takes 'svuint64x4_t' arguments} } */
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/dot_za_slice_uint_lane_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/dot_za_slice_uint_lane_1.c
new file mode 100644
index 00000000000..2efa2eb15ee
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/dot_za_slice_uint_lane_1.c
@@ -0,0 +1,59 @@
+/* { dg-do compile } */
+
+#include <arm_sme.h>
+
+#pragma GCC target ("+sme2")
+
+void
+f1 (svbool_t pg, svint8_t s8, svuint8_t u8, svint16_t s16, svuint16_t u16,
+    svint32_t s32, svuint32_t u32,
+    svint8x2_t s8x2, svuint8x2_t u8x2,
+    svint8x3_t s8x3, svuint8x3_t u8x3,
+    svint8x4_t s8x4, svuint8x4_t u8x4,
+    svint16x2_t s16x2, svuint16x2_t u16x2,
+    float f, double d)
+  __arm_streaming __arm_inout("za")
+{
+  svsudot_lane_za32_vg1x2 (0, s8x2, u8); /* { dg-error {too few arguments to function 'svsudot_lane_za32_vg1x2'} } */
+  svsudot_lane_za32_vg1x2 (0, s8x2, u8, 0, 0); /* { dg-error {too many arguments to function 'svsudot_lane_za32_vg1x2'} } */
+
+  svsudot_lane_za32_vg1x2 (u8x2, s8x2, u8, 0); /* { dg-error {passing 'svuint8x2_t' to argument 1 of 'svsudot_lane_za32_vg1x2', which expects 'uint32_t'} } */
+  svsudot_lane_za32_vg1x2 (f, s8x2, u8, 0);
+  svsudot_lane_za32_vg1x2 (d, s8x2, u8, 0);
+  svsudot_lane_za32_vg1x2 (pg, s8x2, u8, 0); /* { dg-error {passing 'svbool_t' to argument 1 of 'svsudot_lane_za32_vg1x2', which expects 'uint32_t'} } */
+
+  svsudot_lane_za32_vg1x2 (0, 1, u8, 0); /* { dg-error {passing 'int' to argument 2 of 'svsudot_lane_za32_vg1x2', which expects an SVE type rather than a scalar type} } */
+  svsudot_lane_za32_vg1x2 (0, pg, u8, 0); /* { dg-error {passing 'svbool_t' to argument 2 of 'svsudot_lane_za32_vg1x2', which expects a tuple of 2 vectors} } */
+  svsudot_lane_za32_vg1x2 (0, s8, u8, 0); /* { dg-error {passing single vector 'svint8_t' to argument 2 of 'svsudot_lane_za32_vg1x2', which expects a tuple of 2 vectors} } */
+  svsudot_lane_za32_vg1x2 (0, s8x3, u8, 0); /* { dg-error {passing 'svint8x3_t' to argument 2 of 'svsudot_lane_za32_vg1x2', which expects a tuple of 2 vectors} } */
+  svsudot_lane_za32_vg1x2 (0, s8x4, u8, 0); /* { dg-error {passing 'svint8x4_t' to argument 2 of 'svsudot_lane_za32_vg1x2', which expects a tuple of 2 vectors} } */
+
+  svsudot_lane_za32_vg1x2 (0, s8x2, 1, 0); /* { dg-error {passing 'int' to argument 3 of 'svsudot_lane_za32_vg1x2', which expects an SVE type rather than a scalar type} } */
+  svsudot_lane_za32_vg1x2 (0, s8x2, pg, 0); /* { dg-error {passing 'svbool_t' to argument 3 of 'svsudot_lane_za32_vg1x2', which expects a vector of unsigned integers} } */
+  svsudot_lane_za32_vg1x2 (0, s8x2, s8, 0); /* { dg-error {passing 'svint8_t' to argument 3 of 'svsudot_lane_za32_vg1x2', which expects a vector of unsigned integers} } */
+  svsudot_lane_za32_vg1x2 (0, s8x2, u32, 0); /* { dg-error {arguments 2 and 3 of 'svsudot_lane_za32_vg1x2' must have the same element size, but the values passed here have type 'svint8x2_t' and 'svuint32_t' respectively} } */
+  svsudot_lane_za32_vg1x2 (0, s8x2, u8x2, 0); /* { dg-error {passing 'svuint8x2_t' to argument 3 of 'svsudot_lane_za32_vg1x2', which expects a single SVE vector rather than a tuple} } */
+  svsudot_lane_za32_vg1x2 (0, s8x2, u8, 0);
+  svsudot_lane_za32_vg1x2 (0, u8x2, u8, 0); /* { dg-error {'svsudot_lane_za32_vg1x2' has no form that takes 'svuint8x2_t' arguments} } */
+  svsudot_lane_za32_vg1x2 (0, s16x2, u16, 0); /* { dg-error {'svsudot_lane_za32_vg1x2' has no form that takes 'svint16x2_t' arguments} } */
+
+  svsudot_lane_za32_vg1x2 (0, s8x2, u8, -1); /* { dg-error {passing -1 to argument 4 of 'svsudot_lane_za32_vg1x2', which expects a value in the range \[0, 3\]} } */
+  svsudot_lane_za32_vg1x2 (0, s8x2, u8, 3);
+  svsudot_lane_za32_vg1x2 (0, s8x2, u8, 4); /* { dg-error {passing 4 to argument 4 of 'svsudot_lane_za32_vg1x2', which expects a value in the range \[0, 3\]} } */
+  svsudot_lane_za32_vg1x2 (0, s8x2, u8, -1); /* { dg-error {passing -1 to argument 4 of 'svsudot_lane_za32_vg1x2', which expects a value in the range \[0, 3\]} } */
+  svsudot_lane_za32_vg1x2 (0, s8x2, u8, 3);
+  svsudot_lane_za32_vg1x2 (0, s8x2, u8, 4); /* { dg-error {passing 4 to argument 4 of 'svsudot_lane_za32_vg1x2', which expects a value in the range \[0, 3\]} } */
+  svsudot_lane_za32_vg1x2 (0, s8x2, u8, f); /* { dg-error {argument 4 of 'svsudot_lane_za32_vg1x2' must be an integer constant expression} } */
+}
+
+void
+f2 (svint8x2_t s8x2, svuint8_t u8) __arm_streaming
+{
+  svsudot_lane_za32_vg1x2 (0, s8x2, u8, 0); /* { dg-error {ACLE function 'svsudot_lane_za32_s8_vg1x2' can only be called from a function that has 'za' state} } */
+}
+
+void
+f3 (svint8x2_t s8x2, svuint8_t u8) __arm_inout("za")
+{
+  svsudot_lane_za32_vg1x2 (0, s8x2, u8, 0); /* { dg-error {ACLE function 'svsudot_lane_za32_s8_vg1x2' can only be called when SME streaming mode is enabled} } */
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/shift_right_imm_narrowxn_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/shift_right_imm_narrowxn_1.c
new file mode 100644
index 00000000000..ab5602f0aa6
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/shift_right_imm_narrowxn_1.c
@@ -0,0 +1,89 @@
+/* { dg-do compile } */
+
+#include <arm_sve.h>
+
+#pragma GCC target ("+sme2")
+
+void
+f1 (svboolx2_t pgx2,
+    svint8x2_t s8x2, svuint8x2_t u8x2,
+    svint8x4_t s8x4, svuint8x4_t u8x4,
+    svint16x2_t s16x2, svuint16x2_t u16x2,
+    svint16x4_t s16x4, svuint16x4_t u16x4,
+    svint32x2_t s32x2, svuint32x2_t u32x2,
+    svint32x4_t s32x4, svuint32x4_t u32x4,
+    svint64x2_t s64x2, svuint64x2_t u64x2,
+    svint64x4_t s64x4, svuint64x4_t u64x4,
+    svfloat32x2_t f32x2, int x) __arm_streaming
+{
+  const int one = 1;
+  svqrshr_u8 (u32x4); /* { dg-error {too few arguments to function 'svqrshr_u8'} } */
+  svqrshr_u8 (u32x4, 1, 1); /* { dg-error {too many arguments to function 'svqrshr_u8'} } */
+
+  svqrshr_u8 (u32x4, x); /* { dg-error {argument 2 of 'svqrshr_u8' must be an integer constant expression} } */
+  svqrshr_u8 (u32x4, one); /* { dg-error {argument 2 of 'svqrshr_u8' must be an integer constant expression} } */
+  svqrshr_u8 (u32x4, 0.4); /* { dg-error {passing 0 to argument 2 of 'svqrshr_u8', which expects a value in the range \[1, 32\]} } */
+  svqrshr_u8 (u32x4, 1.0);
+
+  svqrshr_u8 (pgx2, 1); /* { dg-error {'svqrshr_u8' has no form that takes 'svboolx2_t' arguments} } */
+  svqrshr_u8 (u8x2, 1); /* { dg-error {'svqrshr_u8' has no form that takes 'svuint8x2_t' arguments} } */
+  svqrshr_u8 (u8x4, 1); /* { dg-error {'svqrshr_u8' has no form that takes 'svuint8x4_t' arguments} } */
+  svqrshr_u8 (u16x2, 1); /* { dg-error {'svqrshr_u8' has no form that takes 'svuint16x2_t' arguments} } */
+  svqrshr_u8 (u16x4, 1); /* { dg-error {'svqrshr_u8' has no form that takes 'svuint16x4_t' arguments} } */
+  svqrshr_u8 (u32x2, 1); /* { dg-error {'svqrshr_u8' has no form that takes 'svuint32x2_t' arguments} } */
+  svqrshr_u8 (u32x4, 1);
+  svqrshr_u8 (u64x2, 1); /* { dg-error {'svqrshr_u8' has no form that takes 'svuint64x2_t' arguments} } */
+  svqrshr_u8 (u64x4, 1); /* { dg-error {'svqrshr_u8' has no form that takes 'svuint64x4_t' arguments} } */
+  svqrshr_u8 (s32x4, 1); /* { dg-error {'svqrshr_u8' has no form that takes 'svint32x4_t' arguments} } */
+
+  svqrshr_s8 (s8x2, 1); /* { dg-error {'svqrshr_s8' has no form that takes 'svint8x2_t' arguments} } */
+  svqrshr_s8 (s8x4, 1); /* { dg-error {'svqrshr_s8' has no form that takes 'svint8x4_t' arguments} } */
+  svqrshr_s8 (s16x2, 1); /* { dg-error {'svqrshr_s8' has no form that takes 'svint16x2_t' arguments} } */
+  svqrshr_s8 (s16x4, 1); /* { dg-error {'svqrshr_s8' has no form that takes 'svint16x4_t' arguments} } */
+  svqrshr_s8 (s32x2, 1); /* { dg-error {'svqrshr_s8' has no form that takes 'svint32x2_t' arguments} } */
+  svqrshr_s8 (s32x4, 1);
+  svqrshr_s8 (s64x2, 1); /* { dg-error {'svqrshr_s8' has no form that takes 'svint64x2_t' arguments} } */
+  svqrshr_s8 (s64x4, 1); /* { dg-error {'svqrshr_s8' has no form that takes 'svint64x4_t' arguments} } */
+  svqrshr_s8 (u32x4, 1); /* { dg-error {'svqrshr_s8' has no form that takes 'svuint32x4_t' arguments} } */
+
+  svqrshr_u16 (pgx2, 1); /* { dg-error {'svqrshr_u16' has no form that takes 'svboolx2_t' arguments} } */
+  svqrshr_u16 (u8x2, 1); /* { dg-error {'svqrshr_u16' has no form that takes 'svuint8x2_t' arguments} } */
+  svqrshr_u16 (u8x4, 1); /* { dg-error {'svqrshr_u16' has no form that takes 'svuint8x4_t' arguments} } */
+  svqrshr_u16 (u16x2, 1); /* { dg-error {'svqrshr_u16' has no form that takes 'svuint16x2_t' arguments} } */
+  svqrshr_u16 (u16x4, 1); /* { dg-error {'svqrshr_u16' has no form that takes 'svuint16x4_t' arguments} } */
+  svqrshr_u16 (u32x2, 1);
+  svqrshr_u16 (u32x4, 1); /* { dg-error {'svqrshr_u16' has no form that takes 'svuint32x4_t' arguments} } */
+  svqrshr_u16 (u64x2, 1); /* { dg-error {'svqrshr_u16' has no form that takes 'svuint64x2_t' arguments} } */
+  svqrshr_u16 (u64x4, 1);
+  svqrshr_u16 (s32x2, 1); /* { dg-error {'svqrshr_u16' has no form that takes 'svint32x2_t' arguments} } */
+
+  svqrshr_s16 (s8x2, 1); /* { dg-error {'svqrshr_s16' has no form that takes 'svint8x2_t' arguments} } */
+  svqrshr_s16 (s8x4, 1); /* { dg-error {'svqrshr_s16' has no form that takes 'svint8x4_t' arguments} } */
+  svqrshr_s16 (s16x2, 1); /* { dg-error {'svqrshr_s16' has no form that takes 'svint16x2_t' arguments} } */
+  svqrshr_s16 (s16x4, 1); /* { dg-error {'svqrshr_s16' has no form that takes 'svint16x4_t' arguments} } */
+  svqrshr_s16 (s32x2, 1);
+  svqrshr_s16 (s32x4, 1); /* { dg-error {'svqrshr_s16' has no form that takes 'svint32x4_t' arguments} } */
+  svqrshr_s16 (s64x2, 1); /* { dg-error {'svqrshr_s16' has no form that takes 'svint64x2_t' arguments} } */
+  svqrshr_s16 (s64x4, 1);
+  svqrshr_s16 (u32x2, 1); /* { dg-error {'svqrshr_s16' has no form that takes 'svuint32x2_t' arguments} } */
+
+  svqrshr_u8 (u32x4, -1); /* { dg-error {passing -1 to argument 2 of 'svqrshr_u8', which expects a value in the range \[1, 32\]} } */
+  svqrshr_u8 (u32x4, 0); /* { dg-error {passing 0 to argument 2 of 'svqrshr_u8', which expects a value in the range \[1, 32\]} } */
+  svqrshr_u8 (u32x4, 1);
+  svqrshr_u8 (u32x4, 32);
+  svqrshr_u8 (u32x4, 33); /* { dg-error {passing 33 to argument 2 of 'svqrshr_u8', which expects a value in the range \[1, 32\]} } */
+
+  svqrshr_u16 (u32x2, -1); /* { dg-error {passing -1 to argument 2 of 'svqrshr_u16', which expects a value in the range \[1, 16\]} } */
+  svqrshr_u16 (u32x2, 0); /* { dg-error {passing 0 to argument 2 of 'svqrshr_u16', which expects a value in the range \[1, 16\]} } */
+  svqrshr_u16 (u32x2, 1);
+  svqrshr_u16 (u32x2, 16);
+  svqrshr_u16 (u32x2, 17); /* { dg-error {passing 17 to argument 2 of 'svqrshr_u16', which expects a value in the range \[1, 16\]} } */
+
+  svqrshr_u16 (u64x4, -1); /* { dg-error {passing -1 to argument 2 of 'svqrshr_u16', which expects a value in the range \[1, 64\]} } */
+  svqrshr_u16 (u64x4, 0); /* { dg-error {passing 0 to argument 2 of 'svqrshr_u16', which expects a value in the range \[1, 64\]} } */
+  svqrshr_u16 (u64x4, 1);
+  svqrshr_u16 (u64x4, 64);
+  svqrshr_u16 (u64x4, 65); /* { dg-error {passing 65 to argument 2 of 'svqrshr_u16', which expects a value in the range \[1, 64\]} } */
+
+  svqrshr_u8 (1, 1); /* { dg-error {passing 'int' to argument 1 of 'svqrshr_u8', which expects an SVE type rather than a scalar} } */
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/store_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/store_1.c
index 6011ab05414..cfe686929f7 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/store_1.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/store_1.c
@@ -12,7 +12,7 @@ f1 (svbool_t pg, signed char *s8_ptr, void *void_ptr, struct s *s_ptr,
 {
   svst1 (pg, s8_ptr); /* { dg-error {too few arguments to function 'svst1'} } */
   svst1 (pg, s8_ptr, s8, 0); /* { dg-error {too many arguments to function 'svst1'} } */
-  svst1 (0, s8_ptr, s8); /* { dg-error {passing 'int' to argument 1 of 'svst1', which expects 'svbool_t'} } */
+  svst1 (0, s8_ptr, s8); /* { dg-error {passing 'int' to argument 1 of 'svst1', which expects an 'svbool_t' or 'svcount_t'} } */
   svst1 (pg, void_ptr, 0); /* { dg-error {passing 'int' to argument 3 of 'svst1', which expects an SVE type rather than a scalar} } */
   svst1 (pg, void_ptr, pg); /* { dg-error {'svst1' has no form that takes 'svbool_t' arguments} } */
   svst1 (pg, 0, s8);
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/store_2.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/store_2.c
index 552540bf7ff..eb12cbb8af8 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/store_2.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/store_2.c
@@ -11,7 +11,7 @@ f1 (svbool_t pg, signed char *s8_ptr, void *void_ptr, struct s *s_ptr,
 {
   svst1_vnum (pg, s8_ptr, 0); /* { dg-error {too few arguments to function 'svst1_vnum'} } */
   svst1_vnum (pg, s8_ptr, 0, s8, 0); /* { dg-error {too many arguments to function 'svst1_vnum'} } */
-  svst1_vnum (0, s8_ptr, 0, s8); /* { dg-error {passing 'int' to argument 1 of 'svst1_vnum', which expects 'svbool_t'} } */
+  svst1_vnum (0, s8_ptr, 0, s8); /* { dg-error {passing 'int' to argument 1 of 'svst1_vnum', which expects an 'svbool_t' or 'svcount_t'} } */
   svst1_vnum (pg, s8_ptr, pg, s8); /* { dg-error {passing 'svbool_t' to argument 3 of 'svst1_vnum', which expects 'int64_t'} } */
   svst1_vnum (pg, s8_ptr, s8, s8); /* { dg-error {passing 'svint8_t' to argument 3 of 'svst1_vnum', which expects 'int64_t'} } */
   svst1_vnum (pg, s8_ptr, void_ptr, s8); /* { dg-error "passing argument 3 of 'svst1_vnum_s8' makes integer from pointer without a cast" } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/storexn_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/storexn_1.c
new file mode 100644
index 00000000000..7ad4ca8a580
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/storexn_1.c
@@ -0,0 +1,33 @@
+/* { dg-do compile } */
+/* { dg-options "-std=c99" } */
+
+#include <arm_sve.h>
+
+#pragma GCC target "+sme2"
+
+struct s { signed char x; };
+
+svuint8_t
+f1 (svbool_t pg, svcount_t pn, svboolx2_t pgx2,
+    signed char *s8_ptr, void *void_ptr, struct s *s_ptr,
+    float *f32_ptr, _Complex float *cf32_ptr,
+    svint8_t s8, svint8x2_t s8x2, svint8x3_t s8x3,
+    svfloat32x4_t f32x4, struct s s) __arm_streaming
+{
+  svst1 (pn, s8_ptr); /* { dg-error {too few arguments to function 'svst1'} } */
+  svst1 (pn, s8_ptr, s8x2, 0); /* { dg-error {too many arguments to function 'svst1'} } */
+  svst1 (0, s8_ptr, s8x2); /* { dg-error {passing 'int' to argument 1 of 'svst1', which expects an 'svbool_t' or 'svcount_t'} } */
+  svst1 (pn, void_ptr, 0x2); /* { dg-error {passing 'int' to argument 3 of 'svst1', which expects an SVE type rather than a scalar} } */
+  svst1 (pn, void_ptr, pgx2); /* { dg-error {'svst1' has no form that takes 'svboolx2_t' arguments} } */
+  svst1 (pn, 0, s8); /* { dg-error {operations on single vectors must be predicated by 'svbool_t' rather than 'svcount_t'} } */
+  svst1 (pn, 0, s8x2);
+  svst1 (pg, 0, s8x2); /* { dg-error {operations on multiple vectors must be predicated by 'svcount_t' rather than 'svbool_t'} } */
+  svst1 (pn, 0, s8x3); /* { dg-error {'svst1' has no form that takes 'svint8x3_t' arguments} } */
+  svst1 (pn, (int32_t *) 0, s8x2); /* { dg-error "passing argument 2 of 'svst1_s8_x2' from incompatible pointer type" } */
+  svst1 (pn, void_ptr, s8x2);
+  svst1 (pn, s_ptr, s8x2); /* { dg-error "passing argument 2 of 'svst1_s8_x2' from incompatible pointer type" } */
+  svst1 (pn, f32_ptr, s8x2); /* { dg-error "passing argument 2 of 'svst1_s8_x2' from incompatible pointer type" } */
+  svst1 (pn, f32_ptr, f32x4);
+  svst1 (pn, cf32_ptr, f32x4); /* { dg-error "passing argument 2 of 'svst1_f32_x4' from incompatible pointer type" } */
+  svst1 (pn, s, s8x2); /* { dg-error {passing 'struct s' to argument 2 of 'svst1', which expects a scalar pointer} } */
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_qq_lane_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_qq_lane_1.c
index c69b2d57503..ed38b78d3cd 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_qq_lane_1.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_qq_lane_1.c
@@ -10,32 +10,32 @@ f1 (svbool_t pg, svint8_t s8, svuint8_t u8, svint16_t s16, svuint16_t u16,
   svdot_lane (u32, u8, u8); /* { dg-error {too few arguments to function 'svdot_lane'} } */
   svdot_lane (u32, u8, u8, 0, 0); /* { dg-error {too many arguments to function 'svdot_lane'} } */
   svdot_lane (0, u8, u8, 0); /* { dg-error {passing 'int' to argument 1 of 'svdot_lane', which expects an SVE type rather than a scalar} } */
-  svdot_lane (pg, u8, u8, 0); /* { dg-error {'svdot_lane' has no form that takes 'svbool_t' arguments} } */
-  svdot_lane (u8, u8, u8, 0); /* { dg-error {'svdot_lane' has no form that takes 'svuint8_t' arguments} } */
-  svdot_lane (f32, u8, u8, 0); /* { dg-error {'svdot_lane' has no form that takes 'svfloat32_t' arguments} } */
+  svdot_lane (pg, u8, u8, 0); /* { dg-error {'svdot_lane' has no form that takes 'svbool_t' and 'svuint8_t' arguments} } */
+  svdot_lane (u8, u8, u8, 0); /* { dg-error {'svdot_lane' has no form that takes 'svuint8_t' and 'svuint8_t' arguments} } */
+  svdot_lane (f32, u8, u8, 0); /* { dg-error {'svdot_lane' has no form that takes 'svfloat32_t' and 'svuint8_t' arguments} } */
   svdot_lane (u32, u8, u8, 0);
   svdot_lane (u32, 0, u8, 0); /* { dg-error {passing 'int' to argument 2 of 'svdot_lane', which expects an SVE type rather than a scalar} } */
   svdot_lane (u32, u8, 0, 0); /* { dg-error {passing 'int' to argument 3 of 'svdot_lane', which expects an SVE type rather than a scalar} } */
 
   svdot_lane (s32, s8, s8, 0);
-  svdot_lane (s32, u8, s8, 0); /* { dg-error {arguments 1 and 2 of 'svdot_lane' must have the same signedness, but the values passed here have type 'svint32_t' and 'svuint8_t' respectively} } */
-  svdot_lane (s32, s8, u8, 0); /* { dg-error {arguments 1 and 3 of 'svdot_lane' must have the same signedness, but the values passed here have type 'svint32_t' and 'svuint8_t' respectively} } */
-  svdot_lane (s32, s32, s32, 0); /* { dg-error {passing 'svint32_t' instead of the expected 'svint8_t' to argument 2 of 'svdot_lane', after passing 'svint32_t' to argument 1} } */
+  svdot_lane (s32, u8, s8, 0); /* { dg-error {passing 'svint8_t' to argument 3 of 'svdot_lane', but argument 2 had type 'svuint8_t'} } */
+  svdot_lane (s32, s8, u8, 0); /* { dg-error {passing 'svuint8_t' to argument 3 of 'svdot_lane', but argument 2 had type 'svint8_t'} } */
+  svdot_lane (s32, s32, s32, 0); /* { dg-error {'svdot_lane' has no form that takes 'svint32_t' and 'svint32_t' arguments} } */
 
   svdot_lane (u32, u8, u8, 0);
-  svdot_lane (u32, s8, u8, 0); /* { dg-error {arguments 1 and 2 of 'svdot_lane' must have the same signedness, but the values passed here have type 'svuint32_t' and 'svint8_t' respectively} } */
-  svdot_lane (u32, u8, s8, 0); /* { dg-error {arguments 1 and 3 of 'svdot_lane' must have the same signedness, but the values passed here have type 'svuint32_t' and 'svint8_t' respectively} } */
-  svdot_lane (u32, u32, u32, 0); /* { dg-error {passing 'svuint32_t' instead of the expected 'svuint8_t' to argument 2 of 'svdot_lane', after passing 'svuint32_t' to argument 1} } */
+  svdot_lane (u32, s8, u8, 0); /* { dg-error {passing 'svuint8_t' to argument 3 of 'svdot_lane', but argument 2 had type 'svint8_t'} } */
+  svdot_lane (u32, u8, s8, 0); /* { dg-error {passing 'svint8_t' to argument 3 of 'svdot_lane', but argument 2 had type 'svuint8_t'} } */
+  svdot_lane (u32, u32, u32, 0); /* { dg-error {'svdot_lane' has no form that takes 'svuint32_t' and 'svuint32_t' arguments} } */
 
   svdot_lane (s64, s16, s16, 0);
-  svdot_lane (s64, u16, s16, 0); /* { dg-error {arguments 1 and 2 of 'svdot_lane' must have the same signedness, but the values passed here have type 'svint64_t' and 'svuint16_t' respectively} } */
-  svdot_lane (s64, s16, u16, 0); /* { dg-error {arguments 1 and 3 of 'svdot_lane' must have the same signedness, but the values passed here have type 'svint64_t' and 'svuint16_t' respectively} } */
-  svdot_lane (s64, s64, s64, 0); /* { dg-error {passing 'svint64_t' instead of the expected 'svint16_t' to argument 2 of 'svdot_lane', after passing 'svint64_t' to argument 1} } */
+  svdot_lane (s64, u16, s16, 0); /* { dg-error {passing 'svint16_t' to argument 3 of 'svdot_lane', but argument 2 had type 'svuint16_t'} } */
+  svdot_lane (s64, s16, u16, 0); /* { dg-error {passing 'svuint16_t' to argument 3 of 'svdot_lane', but argument 2 had type 'svint16_t'} } */
+  svdot_lane (s64, s64, s64, 0); /* { dg-error {'svdot_lane' has no form that takes 'svint64_t' and 'svint64_t' arguments} } */
 
   svdot_lane (u64, u16, u16, 0);
-  svdot_lane (u64, s16, u16, 0); /* { dg-error {arguments 1 and 2 of 'svdot_lane' must have the same signedness, but the values passed here have type 'svuint64_t' and 'svint16_t' respectively} } */
-  svdot_lane (u64, u16, s16, 0); /* { dg-error {arguments 1 and 3 of 'svdot_lane' must have the same signedness, but the values passed here have type 'svuint64_t' and 'svint16_t' respectively} } */
-  svdot_lane (u64, u64, u64, 0); /* { dg-error {passing 'svuint64_t' instead of the expected 'svuint16_t' to argument 2 of 'svdot_lane', after passing 'svuint64_t' to argument 1} } */
+  svdot_lane (u64, s16, u16, 0); /* { dg-error {passing 'svuint16_t' to argument 3 of 'svdot_lane', but argument 2 had type 'svint16_t'} } */
+  svdot_lane (u64, u16, s16, 0); /* { dg-error {passing 'svint16_t' to argument 3 of 'svdot_lane', but argument 2 had type 'svuint16_t'} } */
+  svdot_lane (u64, u64, u64, 0); /* { dg-error {'svdot_lane' has no form that takes 'svuint64_t' and 'svuint64_t' arguments} } */
 
   svdot_lane (s32, s8, s8, i); /* { dg-error {argument 4 of 'svdot_lane' must be an integer constant expression} } */
   svdot_lane (s32, s8, s8, 0);
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_qq_opt_n_2.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_qq_opt_n_2.c
index 85d4b2dd8d5..fc92dcedcb2 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_qq_opt_n_2.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_qq_opt_n_2.c
@@ -9,13 +9,13 @@ f1 (svbool_t pg, svint8_t s8, svuint8_t u8, svuint32_t u32,
   svdot (u32, u8); /* { dg-error {too few arguments to function 'svdot'} } */
   svdot (u32, u8, u8, u8); /* { dg-error {too many arguments to function 'svdot'} } */
   svdot (0, u8, u8); /* { dg-error {passing 'int' to argument 1 of 'svdot', which expects an SVE type rather than a scalar} } */
-  svdot (pg, u8, u8); /* { dg-error {'svdot' has no form that takes 'svbool_t' arguments} } */
-  svdot (u8, u8, u8); /* { dg-error {'svdot' has no form that takes 'svuint8_t' arguments} } */
-  svdot (f32, u8, u8); /* { dg-error {'svdot' has no form that takes 'svfloat32_t' arguments} } */
+  svdot (pg, u8, u8); /* { dg-error {'svdot' has no form that takes 'svbool_t' and 'svuint8_t' arguments} }*/
+  svdot (u8, u8, u8); /* { dg-error {'svdot' has no form that takes 'svuint8_t' and 'svuint8_t' arguments} } */
+  svdot (f32, u8, u8); /* { dg-error {'svdot' has no form that takes 'svfloat32_t' and 'svuint8_t' arguments} } */
   svdot (u32, u8, u8);
   svdot (u32, 0, u8); /* { dg-error {passing 'int' to argument 2 of 'svdot', which expects an SVE type rather than a scalar} } */
-  svdot (u32, s8, u8); /* { dg-error {arguments 1 and 2 of 'svdot' must have the same signedness, but the values passed here have type 'svuint32_t' and 'svint8_t' respectively} } */
+  svdot (u32, s8, u8); /* { dg-error {passing 'svuint8_t' to argument 3 of 'svdot', but argument 2 had type 'svint8_t'} } */
   svdot (u32, u8, 0);
-  svdot (u32, u8, s8); /* { dg-error {arguments 1 and 3 of 'svdot' must have the same signedness, but the values passed here have type 'svuint32_t' and 'svint8_t' respectively} } */
-  svdot (u32, u32, u32); /* { dg-error {passing 'svuint32_t' instead of the expected 'svuint8_t' to argument 2 of 'svdot', after passing 'svuint32_t' to argument 1} } */
+  svdot (u32, u8, s8); /* { dg-error {passing 'svint8_t' to argument 3 of 'svdot', but argument 2 had type 'svuint8_t'} } */
+  svdot (u32, u32, u32); /* { dg-error {'svdot' has no form that takes 'svuint32_t' and 'svuint32_t' arguments} } */
 }
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_qq_or_011_lane_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_qq_or_011_lane_1.c
new file mode 100644
index 00000000000..b8968c878e1
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_qq_or_011_lane_1.c
@@ -0,0 +1,33 @@
+/* { dg-do compile } */
+
+#include <arm_sve.h>
+
+#pragma GCC target "+sme2"
+
+void
+f1 (svbool_t pg, svint8_t s8, svuint8_t u8, svint16_t s16, svuint16_t u16,
+    svint32_t s32, svuint32_t u32, svint64_t s64, svuint64_t u64,
+    svfloat16_t f16, svfloat32_t f32, int i) __arm_streaming
+{
+  svdot_lane (u32, u16, u8, 0); /* { dg-error {passing 'svuint8_t' to argument 3 of 'svdot_lane', but argument 2 had type 'svuint16_t'} } */
+  svdot_lane (u32, u8, u16, 0); /* { dg-error {passing 'svuint16_t' to argument 3 of 'svdot_lane', but argument 2 had type 'svuint8_t'} } */
+  svdot_lane (u32, s16, s16, 0); /* { dg-error {'svdot_lane' has no form that takes 'svuint32_t' and 'svint16_t' arguments} } */
+
+  svdot_lane (u32, u16, u16, i); /* { dg-error {argument 4 of 'svdot_lane' must be an integer constant expression} } */
+  svdot_lane (u32, u16, u16, 0);
+  svdot_lane (u32, u16, u16, 3);
+  svdot_lane (u32, u16, u16, 4);  /* { dg-error {passing 4 to argument 4 of 'svdot_lane', which expects a value in the range \[0, 3\]} } */
+  svdot_lane (u32, u16, u16, -1);  /* { dg-error {passing -1 to argument 4 of 'svdot_lane', which expects a value in the range \[0, 3\]} } */
+
+  svdot_lane (s32, s16, s16, i); /* { dg-error {argument 4 of 'svdot_lane' must be an integer constant expression} } */
+  svdot_lane (s32, s16, s16, 0);
+  svdot_lane (s32, s16, s16, 3);
+  svdot_lane (s32, s16, s16, 4);  /* { dg-error {passing 4 to argument 4 of 'svdot_lane', which expects a value in the range \[0, 3\]} } */
+  svdot_lane (s32, s16, s16, -1);  /* { dg-error {passing -1 to argument 4 of 'svdot_lane', which expects a value in the range \[0, 3\]} } */
+
+  svdot_lane (f32, f16, f16, i); /* { dg-error {argument 4 of 'svdot_lane' must be an integer constant expression} } */
+  svdot_lane (f32, f16, f16, 0);
+  svdot_lane (f32, f16, f16, 3);
+  svdot_lane (f32, f16, f16, 4);  /* { dg-error {passing 4 to argument 4 of 'svdot_lane', which expects a value in the range \[0, 3\]} } */
+  svdot_lane (f32, f16, f16, -1);  /* { dg-error {passing -1 to argument 4 of 'svdot_lane', which expects a value in the range \[0, 3\]} } */
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/unary_convertxn_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/unary_convertxn_1.c
new file mode 100644
index 00000000000..85f8b45032d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/unary_convertxn_1.c
@@ -0,0 +1,28 @@
+#include <arm_sve.h>
+
+#pragma GCC target "+sme2"
+
+void
+test (svbool_t pg, float f, svint8_t s8, svfloat32_t f32,
+      svint32x2_t s32x2, svint32x3_t s32x3, svint32x4_t s32x4,
+      svfloat32x2_t f32x2, svfloat32x3_t f32x3, svfloat32x4_t f32x4)
+  __arm_streaming
+{
+  svcvt_bf16 (); /* { dg-error {too few arguments to function 'svcvt_bf16'} } */
+  svcvt_bf16 (f32x2, f32x2); /* { dg-error {too many arguments to function 'svcvt_bf16'} } */
+  svcvt_bf16 (0); /* { dg-error {passing 'int' to argument 1 of 'svcvt_bf16', which expects an SVE type rather than a scalar} } */
+  svcvt_bf16 (f); /* { dg-error {passing 'float' to argument 1 of 'svcvt_bf16', which expects an SVE type rather than a scalar} } */
+  svcvt_bf16 (pg); /* { dg-error {svcvt_bf16' has no form that takes 'svbool_t' arguments} } */
+  svcvt_bf16 (s8); /* { dg-error {svcvt_bf16' has no form that takes 'svint8_t' arguments} } */
+  svcvt_bf16 (f32); /* { dg-error {svcvt_bf16' has no form that takes 'svfloat32_t' arguments} } */
+  svcvt_bf16 (f32x2);
+  svcvt_bf16 (f32x3); /* { dg-error {svcvt_bf16' has no form that takes 'svfloat32x3_t' arguments} } */
+  svcvt_bf16 (f32x4); /* { dg-error {svcvt_bf16' has no form that takes 'svfloat32x4_t' arguments} } */
+  svcvt_bf16 (s32x2); /* { dg-error {svcvt_bf16' has no form that takes 'svint32x2_t' arguments} } */
+  svcvt_s32 (f32x2);
+  svcvt_s32 (f32x3); /* { dg-error {svcvt_s32' has no form that takes 'svfloat32x3_t' arguments} } */
+  svcvt_s32 (f32x4);
+  svcvt_f32 (s32x2);
+  svcvt_f32 (s32x3); /* { dg-error {svcvt_f32' has no form that takes 'svint32x3_t' arguments} } */
+  svcvt_f32 (s32x4);
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/unary_za_slice_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/unary_za_slice_1.c
new file mode 100644
index 00000000000..e02fe5405b1
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/unary_za_slice_1.c
@@ -0,0 +1,54 @@
+/* { dg-do compile } */
+
+#include <arm_sme.h>
+
+#pragma GCC target ("+sme2")
+
+void
+f1 (svbool_t pg, svint32_t s32, svint16x2_t s16x2, svint32x2_t s32x2,
+    svint32x3_t s32x3, svint32x4_t s32x4, svint64x2_t s64x2, float f, double d)
+  __arm_streaming __arm_inout("za")
+{
+  svadd_za32_vg1x2 (1); /* { dg-error {too few arguments to function 'svadd_za32_vg1x2'} } */
+  svadd_za32_vg1x2 (1, s32x2, s32x2); /* { dg-error {too many arguments to function 'svadd_za32_vg1x2'} } */
+
+  svadd_za32_vg1x2 (s32x2, s32x2); /* { dg-error {passing 'svint32x2_t' to argument 1 of 'svadd_za32_vg1x2', which expects 'uint32_t'} } */
+  svadd_za32_vg1x2 (f, s32x2);
+  svadd_za32_vg1x2 (d, s32x2);
+  svadd_za32_vg1x2 (pg, s32x2); /* { dg-error {passing 'svbool_t' to argument 1 of 'svadd_za32_vg1x2', which expects 'uint32_t'} } */
+
+  svadd_za32_vg1x2 (1, 1); /* { dg-error {passing 'int' to argument 2 of 'svadd_za32_vg1x2', which expects an SVE type rather than a scalar type} } */
+  svadd_za32_vg1x2 (1, pg); /* { dg-error {passing 'svbool_t' to argument 2 of 'svadd_za32_vg1x2', which expects a tuple of 2 vectors} } */
+  svadd_za32_vg1x2 (1, s32); /* { dg-error {passing single vector 'svint32_t' to argument 2 of 'svadd_za32_vg1x2', which expects a tuple of 2 vectors} } */
+  svadd_za32_vg1x2 (1, s32x2);
+  svadd_za32_vg1x2 (1, s32x3); /* { dg-error {passing 'svint32x3_t' to argument 2 of 'svadd_za32_vg1x2', which expects a tuple of 2 vectors} } */
+  svadd_za32_vg1x2 (1, s32x4); /* { dg-error {passing 'svint32x4_t' to argument 2 of 'svadd_za32_vg1x2', which expects a tuple of 2 vectors} } */
+
+  svadd_za32_vg1x2 (1, s16x2); /* { dg-error {'svadd_za32_vg1x2' has no form that takes 'svint16x2_t' arguments} } */
+  svadd_za32_vg1x2 (1, s64x2); /* { dg-error {'svadd_za32_vg1x2' has no form that takes 'svint64x2_t' arguments} } */
+}
+
+void
+f2 (svint32x2_t s32x2) __arm_streaming
+{
+  svadd_za32_vg1x2 (0, s32x2); /* { dg-error {ACLE function 'svadd_za32_s32_vg1x2' can only be called from a function that has 'za' state} } */
+}
+
+void
+f3 (svint32x2_t s32x2) __arm_inout("za")
+{
+  svadd_za32_vg1x2 (0, s32x2); /* { dg-error {ACLE function 'svadd_za32_s32_vg1x2' can only be called when SME streaming mode is enabled} } */
+}
+
+#pragma GCC target ("+sme-i16i64")
+
+void
+f4 (svint32x2_t s32x2, svuint32x2_t u32x2,
+    svint64x2_t s64x2, svuint64x2_t u64x2)
+  __arm_streaming __arm_inout("za")
+{
+  svadd_za64_vg1x2 (1, s32x2); /* { dg-error {'svadd_za64_vg1x2' has no form that takes 'svint32x2_t' arguments} } */
+  svadd_za64_vg1x2 (1, u32x2); /* { dg-error {'svadd_za64_vg1x2' has no form that takes 'svuint32x2_t' arguments} } */
+  svadd_za64_vg1x2 (1, s64x2);
+  svadd_za64_vg1x2 (1, u64x2);
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/unary_za_slice_2.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/unary_za_slice_2.c
new file mode 100644
index 00000000000..b28b03e3815
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/unary_za_slice_2.c
@@ -0,0 +1,27 @@
+/* { dg-do compile } */
+
+#include <arm_sme.h>
+
+#pragma GCC target ("arch=armv9-a+sme2")
+
+void
+f1 (svbool_t pg, svint32_t s32, svint16x4_t s16x4, svint32x2_t s32x2,
+    svint32x3_t s32x3, svint32x4_t s32x4, svint64x4_t s64x4, float f, double d)
+  __arm_streaming __arm_inout("za")
+{
+  svadd_za32_vg1x4 (1); /* { dg-error {too few arguments to function 'svadd_za32_vg1x4'} } */
+  svadd_za32_vg1x4 (1, s32x4, s32x4); /* { dg-error {too many arguments to function 'svadd_za32_vg1x4'} } */
+
+  svadd_za32_vg1x4 (s32x2, s32x4); /* { dg-error {passing 'svint32x2_t' to argument 1 of 'svadd_za32_vg1x4', which expects 'uint32_t'} } */
+  svadd_za32_vg1x4 (f, s32x4);
+  svadd_za32_vg1x4 (d, s32x4);
+  svadd_za32_vg1x4 (pg, s32x4); /* { dg-error {passing 'svbool_t' to argument 1 of 'svadd_za32_vg1x4', which expects 'uint32_t'} } */
+
+  svadd_za32_vg1x4 (1, s32); /* { dg-error {passing single vector 'svint32_t' to argument 2 of 'svadd_za32_vg1x4', which expects a tuple of 4 vectors} } */
+  svadd_za32_vg1x4 (1, s32x2); /* { dg-error {passing 'svint32x2_t' to argument 2 of 'svadd_za32_vg1x4', which expects a tuple of 4 vectors} } */
+  svadd_za32_vg1x4 (1, s32x3); /* { dg-error {passing 'svint32x3_t' to argument 2 of 'svadd_za32_vg1x4', which expects a tuple of 4 vectors} } */
+  svadd_za32_vg1x4 (1, s32x4);
+
+  svadd_za32_vg1x4 (1, s16x4); /* { dg-error {'svadd_za32_vg1x4' has no form that takes 'svint16x4_t' arguments} } */
+  svadd_za32_vg1x4 (1, s64x4); /* { dg-error {'svadd_za32_vg1x4' has no form that takes 'svint64x4_t' arguments} } */
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/unary_za_slice_3.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/unary_za_slice_3.c
new file mode 100644
index 00000000000..22d91b1858b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/unary_za_slice_3.c
@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+
+#include <arm_sme.h>
+
+#pragma GCC target ("arch=armv9-a+sme2+nosme-i16i64")
+
+void
+f1 (svint32x2_t s32x2, svuint32x2_t u32x2,
+    svint64x2_t s64x2, svuint64x2_t u64x2)
+  __arm_streaming __arm_inout("za")
+{
+  svadd_za64_vg1x2 (1, s32x2); /* { dg-error {'svadd_za64_vg1x2' has no form that takes 'svint32x2_t' arguments} } */
+  svadd_za64_vg1x2 (1, u32x2); /* { dg-error {'svadd_za64_vg1x2' has no form that takes 'svuint32x2_t' arguments} } */
+  svadd_za64_vg1x2 (1, s64x2); /* { dg-error {ACLE function 'svadd_za64_s64_vg1x2' requires ISA extension 'sme-i16i64'} } */
+  svadd_za64_vg1x2 (1, u64x2);
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/unaryxn_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/unaryxn_1.c
new file mode 100644
index 00000000000..f478945562c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/unaryxn_1.c
@@ -0,0 +1,15 @@
+#include <arm_sve.h>
+
+#pragma GCC target "+sme2"
+
+void
+test (svfloat32_t f32, svfloat32x2_t f32x2, svfloat32x3_t f32x3,
+      svfloat32x4_t f32x4) __arm_streaming
+{
+  svuzp (); /* { dg-error {too few arguments to function 'svuzp'} } */
+  svuzp (f32x2, f32x2); /* { dg-error {too many arguments to function 'svuzp'} } */
+  svuzp (f32); /* { dg-error {svuzp' has no form that takes 'svfloat32_t' arguments} } */
+  svuzp (f32x2);
+  svuzp (f32x3); /* { dg-error {svuzp' has no form that takes 'svfloat32x3_t' arguments} } */
+  svuzp (f32x4);
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/write_za_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/write_za_1.c
new file mode 100644
index 00000000000..3a45b58b023
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/write_za_1.c
@@ -0,0 +1,50 @@
+/* { dg-do compile } */
+
+#include <arm_sme.h>
+
+#pragma GCC target "+sme2"
+
+void
+f1 (svint8_t s8, svint8x2_t s8x2, svint8x3_t s8x3, svint8x4_t s8x4,
+    svuint8_t u8, svuint16x2_t u16x2, svfloat32x2_t f32x2, svint64x2_t s64x2,
+    uint32_t tile)
+  __arm_streaming __arm_inout("za")
+{
+  svwrite_ver_za8_vg2 (0, 0); /* { dg-error {too few arguments to function 'svwrite_ver_za8_vg2'} } */
+  svwrite_ver_za8_vg2 (0, 0, s8x2, 0); /* { dg-error {too many arguments to function 'svwrite_ver_za8_vg2'} } */
+  svwrite_ver_za8_vg2 (tile, 0, s8x2); /* { dg-error {argument 1 of 'svwrite_ver_za8_vg2' must be an integer constant expression} } */
+  svwrite_ver_za8_vg2 (-1, 0, s8x2); /* { dg-error {passing -1 to argument 1 of 'svwrite_ver_za8_vg2', which expects the value 0} } */
+  svwrite_ver_za8_vg2 (1, 0, s8x2); /* { dg-error {passing 1 to argument 1 of 'svwrite_ver_za8_vg2', which expects the value 0} } */
+  svwrite_ver_za8_vg2 (0, u8, s8x2); /* { dg-error {passing 'svuint8_t' to argument 2 of 'svwrite_ver_za8_vg2', which expects 'uint32_t'} } */
+  svwrite_ver_za8_vg2 (0, 0, tile); /* { dg-error {passing 'uint32_t'.* to argument 3 of 'svwrite_ver_za8_vg2', which expects an SVE type} } */
+  svwrite_ver_za8_vg2 (0, 0, s8); /* { dg-error {passing single vector 'svint8_t' to argument 3 of 'svwrite_ver_za8_vg2', which expects a tuple of 2 vectors} } */
+  svwrite_ver_za8_vg2 (0, 0, s8x2);
+  svwrite_ver_za8_vg2 (0, 0, s8x3); /* { dg-error {passing 'svint8x3_t' to argument 3 of 'svwrite_ver_za8_vg2', which expects a tuple of 2 vectors} } */
+  svwrite_ver_za8_vg2 (0, 0, s8x4); /* { dg-error {passing 'svint8x4_t' to argument 3 of 'svwrite_ver_za8_vg2', which expects a tuple of 2 vectors} } */
+
+  svwrite_ver_za16_vg2 (-1, 0, u16x2); /* { dg-error {passing -1 to argument 1 of 'svwrite_ver_za16_vg2', which expects a value in the range \[0, 1\]} } */
+  svwrite_ver_za16_vg2 (2, 0, u16x2); /* { dg-error {passing 2 to argument 1 of 'svwrite_ver_za16_vg2', which expects a value in the range \[0, 1\]} } */
+
+  svwrite_ver_za32_vg2 (-1, 0, f32x2); /* { dg-error {passing -1 to argument 1 of 'svwrite_ver_za32_vg2', which expects a value in the range \[0, 3\]} } */
+  svwrite_ver_za32_vg2 (4, 0, f32x2); /* { dg-error {passing 4 to argument 1 of 'svwrite_ver_za32_vg2', which expects a value in the range \[0, 3\]} } */
+
+  svwrite_ver_za64_vg2 (-1, 0, s64x2); /* { dg-error {passing -1 to argument 1 of 'svwrite_ver_za64_vg2', which expects a value in the range \[0, 7\]} } */
+  svwrite_ver_za64_vg2 (8, 0, s64x2); /* { dg-error {passing 8 to argument 1 of 'svwrite_ver_za64_vg2', which expects a value in the range \[0, 7\]} } */
+
+  svwrite_ver_za8_vg4 (0, 0, s8); /* { dg-error {passing single vector 'svint8_t' to argument 3 of 'svwrite_ver_za8_vg4', which expects a tuple of 4 vectors} } */
+  svwrite_ver_za8_vg4 (0, 0, s8x2); /* { dg-error {passing 'svint8x2_t' to argument 3 of 'svwrite_ver_za8_vg4', which expects a tuple of 4 vectors} } */
+  svwrite_ver_za8_vg4 (0, 0, s8x3); /* { dg-error {passing 'svint8x3_t' to argument 3 of 'svwrite_ver_za8_vg4', which expects a tuple of 4 vectors} } */
+  svwrite_ver_za8_vg4 (0, 0, s8x4);
+}
+
+void
+f2 (svint8x2_t s8x2) __arm_streaming
+{
+  svwrite_ver_za8_vg2 (0, 0, s8x2); /* { dg-error {ACLE function 'svwrite_ver_za8_s8_vg2' can only be called from a function that has 'za' state} } */
+}
+
+void
+f3 (svint8x2_t s8x2) __arm_inout("za")
+{
+  svwrite_ver_za8_vg2 (0, 0, s8x2); /* { dg-error {ACLE function 'svwrite_ver_za8_s8_vg2' can only be called when SME streaming mode is enabled} } */
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/write_za_slice_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/write_za_slice_1.c
new file mode 100644
index 00000000000..dedd4b16ea2
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/write_za_slice_1.c
@@ -0,0 +1,38 @@
+/* { dg-do compile } */
+
+#include <arm_sme.h>
+
+#pragma GCC target "+sme2"
+
+void
+f1 (svint8_t s8, svint8x2_t s8x2, svint8x3_t s8x3, svint8x4_t s8x4,
+    svuint8_t u8, svuint16x2_t u16x2, svfloat32x2_t f32x2, svint64x2_t s64x2,
+    uint32_t tile)
+  __arm_streaming __arm_inout("za")
+{
+  svwrite_za8_vg1x2 (0); /* { dg-error {too few arguments to function 'svwrite_za8_vg1x2'} } */
+  svwrite_za8_vg1x2 (0, s8x2, 0); /* { dg-error {too many arguments to function 'svwrite_za8_vg1x2'} } */
+  svwrite_za8_vg1x2 (u8, s8x2); /* { dg-error {passing 'svuint8_t' to argument 1 of 'svwrite_za8_vg1x2', which expects 'uint32_t'} } */
+  svwrite_za8_vg1x2 (0, tile); /* { dg-error {passing 'uint32_t'.* to argument 2 of 'svwrite_za8_vg1x2', which expects an SVE type} } */
+  svwrite_za8_vg1x2 (0, s8); /* { dg-error {passing single vector 'svint8_t' to argument 2 of 'svwrite_za8_vg1x2', which expects a tuple of 2 vectors} } */
+  svwrite_za8_vg1x2 (0, s8x2);
+  svwrite_za8_vg1x2 (0, s8x3); /* { dg-error {passing 'svint8x3_t' to argument 2 of 'svwrite_za8_vg1x2', which expects a tuple of 2 vectors} } */
+  svwrite_za8_vg1x2 (0, s8x4); /* { dg-error {passing 'svint8x4_t' to argument 2 of 'svwrite_za8_vg1x2', which expects a tuple of 2 vectors} } */
+
+  svwrite_za8_vg1x4 (0, s8); /* { dg-error {passing single vector 'svint8_t' to argument 2 of 'svwrite_za8_vg1x4', which expects a tuple of 4 vectors} } */
+  svwrite_za8_vg1x4 (0, s8x2); /* { dg-error {passing 'svint8x2_t' to argument 2 of 'svwrite_za8_vg1x4', which expects a tuple of 4 vectors} } */
+  svwrite_za8_vg1x4 (0, s8x3); /* { dg-error {passing 'svint8x3_t' to argument 2 of 'svwrite_za8_vg1x4', which expects a tuple of 4 vectors} } */
+  svwrite_za8_vg1x4 (0, s8x4);
+}
+
+void
+f2 (svint8x2_t s8x2) __arm_streaming
+{
+  svwrite_za8_vg1x2 (0, s8x2); /* { dg-error {ACLE function 'svwrite_za8_s8_vg1x2' can only be called from a function that has 'za' state} } */
+}
+
+void
+f3 (svint8x2_t s8x2) __arm_inout("za")
+{
+  svwrite_za8_vg1x2 (0, s8x2); /* { dg-error {ACLE function 'svwrite_za8_s8_vg1x2' can only be called when SME streaming mode is enabled} } */
+}
-- 
2.25.1


^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2023-12-05 10:25 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-12-05 10:24 [pushed v2 0/5] aarch64: Add support for SME2 Richard Sandiford
2023-12-05 10:24 ` [pushed v2 1/5] aarch64: Add +sme2 Richard Sandiford
2023-12-05 10:25 ` [pushed v2 2/5] aarch64: Add svcount_t Richard Sandiford
2023-12-05 10:25 ` [pushed v2 3/5] aarch64: Add svboolx2_t Richard Sandiford
2023-12-05 10:25 ` [pushed v2 4/5] aarch64: Add ZT0 Richard Sandiford
2023-12-05 10:25 ` [pushed v2 5/5] aarch64: Add support for SME2 intrinsics Richard Sandiford

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).