public inbox for gcc-bugs@sourceware.org help / color / mirror / Atom feed
From: "juzhe.zhong at rivai dot ai" <gcc-bugzilla@gcc.gnu.org> To: gcc-bugs@gcc.gnu.org Subject: [Bug c/108271] New: Missed RVV cost model Date: Tue, 03 Jan 2023 02:03:10 +0000 [thread overview] Message-ID: <bug-108271-4@http.gcc.gnu.org/bugzilla/> (raw) https://gcc.gnu.org/bugzilla/show_bug.cgi?id=108271 Bug ID: 108271 Summary: Missed RVV cost model Product: gcc Version: 13.0 Status: UNCONFIRMED Severity: normal Priority: P3 Component: c Assignee: unassigned at gcc dot gnu.org Reporter: juzhe.zhong at rivai dot ai Target Milestone: --- #include "riscv_vector.h" void f3 (int * restrict in, int * restrict out, void * restrict mask_in, int n) { vfloat32mf2_t v = __riscv_vle32_v_f32mf2 ((float *)(in + 10000), 19); __riscv_vse32_v_f32mf2 ((float *)(out + 10000), v, 19); vbool64_t mask = *(vbool64_t*)mask_in; for (int i = 0; i < n; i++) { vint16mf2_t v1 = __riscv_vle16_v_i16mf2 ((int16_t *)(in + i + 1), 19); __riscv_vse16_v_i16mf2 ((int16_t *)(out + i + 1), v1, 19); vint32mf2_t v2 = __riscv_vle32_v_i32mf2 ((int32_t *)(in + i + 2), 19); __riscv_vse32_v_i32mf2 ((int32_t *)(out + i + 2), v2, 19); vint32mf2_t v3 = __riscv_vle32_v_i32mf2_tumu (mask, v2, (int32_t *)(in + i + 200), 13); *(vint32mf2_t*)(out + i + 200) = v3; vfloat64m1_t v4 = __riscv_vle64_v_f64m1_m (mask, (double *)(in + i + 300), 11); __riscv_vse64_v_f64m1 ((double *)(out + i + 300), v4, 11); vfloat64m1_t v5 = __riscv_vle64_v_f64m1_tum (mask, v4, (double *)(in + i + 500), 11); __riscv_vse64_v_f64m1 ((double *)(out + i + 500), v5, 11); vfloat64m1_t v6 = __riscv_vle64_v_f64m1_mu (mask, v5, (double *)(in + i + 600), 11); __riscv_vse64_v_f64m1_m (mask, (double *)(out + i + 600), v6, 11); vuint8mf4_t v7 = __riscv_vle8_v_u8mf4 ((uint8_t *)(in + i + 700), 11); __riscv_vse8_v_u8mf4 ((uint8_t *)(out + i + 700), v7, 11); vuint8mf4_t v8 = __riscv_vle8_v_u8mf4 ((uint8_t *)(in + i + 800), 11); __riscv_vse8_v_u8mf4 ((uint8_t *)(out + i + 800), v7, 11); vuint8mf4_t v9 = __riscv_vle8_v_u8mf4 ((uint8_t *)(in + i + 900), 11); __riscv_vse8_v_u8mf4 ((uint8_t *)(out + i + 900), v7, 11); vuint8mf4_t v10 = __riscv_vle8_v_u8mf4 ((uint8_t *)(in + i + 1000), 11); __riscv_vse8_v_u8mf4 ((uint8_t *)(out + i + 1000), v7, 11); } } -O3 -S ASM: f3: li a5,40960 addi a5,a5,-960 addi sp,sp,-64 sd s4,24(sp) add a4,a0,a5 add a5,a1,a5 vsetivli zero,19,e32,mf2,ta,ma vle32.v v24,0(a4) vse32.v v24,0(a5) vsetvli s4,zero,e8,mf8,ta,ma vlm.v v0,0(a2) ble a3,zero,.L1 addi a3,a3,1 sd s3,32(sp) slli a3,a3,2 li s3,4096 sd s2,40(sp) sd s5,16(sp) sd s6,8(sp) addi t6,s3,-1700 addi t5,s3,-1300 addi s6,s3,-900 addi s5,s3,-500 sd s0,56(sp) sd s1,48(sp) addi a0,a0,4 addi a4,a1,4 add s2,a1,a3 addi s3,s3,-100 .L3: vsetivli zero,19,e16,mf2,ta,ma mv a5,a4 vle16.v v24,0(a0) mv a3,a0 vse16.v v24,0(a4) addi a0,a0,4 vsetivli zero,19,e32,mf2,ta,ma addi a4,a4,4 vle32.v v24,0(a0) addi s1,a3,796 vse32.v v24,0(a4) vsetivli zero,13,e32,mf2,tu,mu addi s0,a5,796 vle32.v v24,0(s1),v0.t addi a1,a3,1196 addi t4,a5,1196 addi t2,a3,1996 addi t3,a5,1996 add t0,a3,t6 vsetvli s4,zero,e32,mf2,ta,ma add t1,a5,t6 vse32.v v24,0(s0) add a7,a5,t5 vsetivli zero,11,e64,m1,tu,mu add a6,a5,s6 vle64.v v24,0(a1),v0.t add a2,a5,s5 vse64.v v24,0(t4) add a3,a3,t5 vle64.v v24,0(t2),v0.t add a5,a5,s3 vse64.v v24,0(t3) vle64.v v24,0(t0),v0.t vse64.v v24,0(t1),v0.t vsetivli zero,11,e8,mf4,ta,ma vle8.v v24,0(a3) vse8.v v24,0(a7) vse8.v v24,0(a6) vse8.v v24,0(a2) vse8.v v24,0(a5) bne s2,a4,.L3 ld s0,56(sp) ld s1,48(sp) ld s2,40(sp) ld s3,32(sp) ld s5,16(sp) ld s6,8(sp) .L1: ld s4,24(sp) addi sp,sp,64 jr ra GCC allocate redundant stack and generate a lot of redundant ld or sd instructions. However, if we use -O3 -fno-schedule-insns ASM: f3: li a5,40960 addi a5,a5,-960 add a4,a0,a5 add a5,a1,a5 vsetivli zero,19,e32,mf2,ta,ma vle32.v v24,0(a4) vse32.v v24,0(a5) vsetvli t3,zero,e8,mf8,ta,ma vlm.v v0,0(a2) ble a3,zero,.L1 addi a3,a3,1 li t1,4096 slli a3,a3,2 addi a4,a1,4 addi a7,t1,-1700 addi a6,t1,-1300 addi t5,t1,-900 addi t4,t1,-500 addi a2,a0,4 add a1,a1,a3 addi t1,t1,-100 .L3: mv a3,a2 vsetivli zero,19,e16,mf2,ta,ma mv a5,a4 vle16.v v24,0(a2) addi a0,a3,796 vse16.v v24,0(a4) addi a2,a2,4 vsetivli zero,19,e32,mf2,ta,ma addi a4,a4,4 vle32.v v24,0(a2) vse32.v v24,0(a4) vsetivli zero,13,e32,mf2,tu,mu vle32.v v24,0(a0),v0.t addi a0,a5,796 vsetvli t3,zero,e32,mf2,ta,ma vse32.v v24,0(a0) addi a0,a3,1196 vsetivli zero,11,e64,m1,tu,mu vle64.v v24,0(a0),v0.t addi a0,a5,1196 vse64.v v24,0(a0) addi a0,a3,1996 vle64.v v24,0(a0),v0.t addi a0,a5,1996 vse64.v v24,0(a0) add a0,a3,a7 vle64.v v24,0(a0),v0.t add a3,a3,a6 add a0,a5,a7 vse64.v v24,0(a0),v0.t vsetivli zero,11,e8,mf4,ta,ma vle8.v v24,0(a3) add a3,a5,a6 vse8.v v24,0(a3) add a3,a5,t5 vse8.v v24,0(a3) add a3,a5,t4 add a5,a5,t1 vse8.v v24,0(a3) vse8.v v24,0(a5) bne a1,a4,.L3 .L1: ret This issue is gone. we should correctly adjust the RVV instruction COST model to make the codegen of with -fno-schedule-insns and without -fno-schedule-insns the same.
next reply other threads:[~2023-01-03 2:03 UTC|newest] Thread overview: 5+ messages / expand[flat|nested] mbox.gz Atom feed top 2023-01-03 2:03 juzhe.zhong at rivai dot ai [this message] 2023-01-03 2:08 ` [Bug target/108271] " pinskia at gcc dot gnu.org 2023-01-03 2:09 ` juzhe.zhong at rivai dot ai 2023-08-25 7:43 ` rdapp at gcc dot gnu.org 2023-12-21 8:25 ` juzhe.zhong at rivai dot ai
Reply instructions: You may reply publicly to this message via plain-text email using any one of the following methods: * Save the following mbox file, import it into your mail client, and reply-to-all from there: mbox Avoid top-posting and favor interleaved quoting: https://en.wikipedia.org/wiki/Posting_style#Interleaved_style * Reply using the --to, --cc, and --in-reply-to switches of git-send-email(1): git send-email \ --in-reply-to=bug-108271-4@http.gcc.gnu.org/bugzilla/ \ --to=gcc-bugzilla@gcc.gnu.org \ --cc=gcc-bugs@gcc.gnu.org \ /path/to/YOUR_REPLY https://kernel.org/pub/software/scm/git/docs/git-send-email.html * If your mail client supports setting the In-Reply-To header via mailto: links, try the mailto: linkBe sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox; as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).