public inbox for gcc-bugs@sourceware.org help / color / mirror / Atom feed
* [Bug c/108412] New: Negative optimization of GCSE && LOOP INVARIANTS @ 2023-01-16 2:04 juzhe.zhong at rivai dot ai 2023-01-16 2:23 ` [Bug rtl-optimization/108412] " juzhe.zhong at rivai dot ai ` (4 more replies) 0 siblings, 5 replies; 6+ messages in thread From: juzhe.zhong at rivai dot ai @ 2023-01-16 2:04 UTC (permalink / raw) To: gcc-bugs https://gcc.gnu.org/bugzilla/show_bug.cgi?id=108412 Bug ID: 108412 Summary: Negative optimization of GCSE && LOOP INVARIANTS Product: gcc Version: 13.0 Status: UNCONFIRMED Severity: normal Priority: P3 Component: c Assignee: unassigned at gcc dot gnu.org Reporter: juzhe.zhong at rivai dot ai Target Milestone: --- Consider such case: void f (void * restrict in, void * restrict out, int l, int n, int m) { int vl = 32; for (int i = 0; i < n; i++) { vint8mf8_t v1 = __riscv_vle8_v_i8mf8 (in + i + 1, vl); __riscv_vse8_v_i8mf8 (out + i + 1, v1, vl); vint8mf8_t v2 = __riscv_vle8_v_i8mf8 (in + i + 2, vl); __riscv_vse8_v_i8mf8 (out + i + 2, v2, vl); vint8mf8_t v3 = __riscv_vle8_v_i8mf8 (in + i + 3, vl); __riscv_vse8_v_i8mf8 (out + i + 3, v3, vl); vint8mf8_t v4 = __riscv_vle8_v_i8mf8 (in + i + 4, vl); __riscv_vse8_v_i8mf8 (out + i + 4, v4, vl); vint8mf8_t v5 = __riscv_vle8_v_i8mf8 (in + i + 5, vl); __riscv_vse8_v_i8mf8 (out + i + 5, v5, vl); vint8mf8_t v6 = __riscv_vle8_v_i8mf8 (in + i + 6, vl); __riscv_vse8_v_i8mf8 (out + i + 6, v6, vl); vint8mf8_t v7 = __riscv_vle8_v_i8mf8 (in + i + 7, vl); __riscv_vse8_v_i8mf8 (out + i + 7, v7, vl); vint8mf8_t v8 = __riscv_vle8_v_i8mf8 (in + i + 8, vl); __riscv_vse8_v_i8mf8 (out + i + 8, v8, vl); vint8mf8_t v9 = __riscv_vle8_v_i8mf8 (in + i + 9, vl); __riscv_vse8_v_i8mf8 (out + i + 9, v9, vl); vint8mf8_t v10 = __riscv_vle8_v_i8mf8 (in + i + 10, vl); __riscv_vse8_v_i8mf8 (out + i + 10, v10, vl); vint8mf8_t v11 = __riscv_vle8_v_i8mf8 (in + i + 11, vl); __riscv_vse8_v_i8mf8 (out + i + 11, v11, vl); vint8mf8_t v12 = __riscv_vle8_v_i8mf8 (in + i + 12, vl); __riscv_vse8_v_i8mf8 (out + i + 12, v12, vl); vint8mf8_t v13 = __riscv_vle8_v_i8mf8 (in + i + 13, vl); __riscv_vse8_v_i8mf8 (out + i + 13, v13, vl); vint8mf8_t v14 = __riscv_vle8_v_i8mf8 (in + i + 14, vl); __riscv_vse8_v_i8mf8 (out + i + 14, v14, vl); } } -O3 ASM: f: addi sp,sp,-192 sw a3,28(sp) ble a3,zero,.L1 addi a5,a0,1 sw a5,76(sp) addi a5,a1,1 sw a5,80(sp) addi a5,a0,2 sw a5,84(sp) addi a5,a1,2 sw a5,88(sp) addi a5,a0,3 sw a5,92(sp) addi a5,a1,3 sw a5,96(sp) addi a5,a0,4 sw a5,100(sp) addi a5,a1,4 sw a5,104(sp) addi a5,a0,5 sw a5,108(sp) addi a5,a1,5 sw a5,112(sp) addi a5,a0,6 sw a5,116(sp) addi a5,a1,6 sw a5,120(sp) addi a5,a0,7 sw a5,124(sp) addi a5,a1,7 sw a5,128(sp) addi a5,a0,8 sw a5,132(sp) addi a5,a1,8 sw a5,136(sp) addi a5,a0,9 sw a5,140(sp) addi a5,a1,9 sw a5,32(sp) addi a5,a0,10 sw a5,36(sp) addi a5,a1,10 sw a5,40(sp) sw s0,188(sp) addi a5,a0,11 sw s1,184(sp) sw s2,180(sp) sw s3,176(sp) sw s4,172(sp) sw s5,168(sp) sw s6,164(sp) sw s7,160(sp) sw s8,156(sp) sw s9,152(sp) sw s10,148(sp) sw s11,144(sp) sw a5,44(sp) addi a5,a1,11 sw a5,48(sp) addi a5,a0,12 sw a5,52(sp) addi a5,a1,12 sw a5,56(sp) addi a5,a0,13 sw a5,60(sp) addi a5,a1,13 sw a5,64(sp) addi a5,a0,14 sw a5,68(sp) addi a5,a1,14 sw a5,72(sp) li a4,0 li a5,32 vsetvli zero,a5,e8,mf8,ta,ma .L3: lw a3,76(sp) lw t2,60(sp) add s11,a3,a4 lw a3,80(sp) add t2,t2,a4 add t0,a3,a4 lw a3,84(sp) add s10,a3,a4 lw a3,88(sp) add t6,a3,a4 lw a3,92(sp) add s9,a3,a4 lw a3,96(sp) add t5,a3,a4 lw a3,100(sp) add s8,a3,a4 lw a3,104(sp) add t4,a3,a4 lw a3,108(sp) add s7,a3,a4 lw a3,112(sp) add t3,a3,a4 lw a3,116(sp) add s6,a3,a4 lw a3,120(sp) add t1,a3,a4 lw a3,124(sp) add s5,a3,a4 lw a3,128(sp) add a7,a3,a4 lw a3,132(sp) add s4,a3,a4 lw a3,136(sp) add a6,a3,a4 lw a3,140(sp) add s3,a3,a4 lw a3,32(sp) add a0,a3,a4 lw a3,36(sp) add s2,a3,a4 lw a3,40(sp) add a1,a3,a4 lw a3,44(sp) add s1,a3,a4 lw a3,48(sp) add a2,a3,a4 lw a3,52(sp) add s0,a3,a4 lw a3,56(sp) add a3,a3,a4 sw a3,12(sp) lw a3,64(sp) add a3,a3,a4 sw a3,16(sp) lw a3,68(sp) add a3,a3,a4 sw a3,20(sp) lw a3,72(sp) add a3,a3,a4 sw a3,24(sp) vle8.v v24,0(s11) vse8.v v24,0(t0) vle8.v v24,0(s10) vse8.v v24,0(t6) vle8.v v24,0(s9) vse8.v v24,0(t5) vle8.v v24,0(s8) vse8.v v24,0(t4) vle8.v v24,0(s7) vse8.v v24,0(t3) vle8.v v24,0(s6) vse8.v v24,0(t1) vle8.v v24,0(s5) vse8.v v24,0(a7) vle8.v v24,0(s4) vse8.v v24,0(a6) vle8.v v24,0(s3) vse8.v v24,0(a0) vle8.v v24,0(s2) vse8.v v24,0(a1) vle8.v v24,0(s1) vse8.v v24,0(a2) lw a3,12(sp) vle8.v v24,0(s0) addi a4,a4,1 vse8.v v24,0(a3) lw a3,16(sp) vle8.v v24,0(t2) vse8.v v24,0(a3) lw a3,20(sp) vle8.v v24,0(a3) lw a3,24(sp) vse8.v v24,0(a3) lw a3,28(sp) bne a3,a4,.L3 lw s0,188(sp) lw s1,184(sp) lw s2,180(sp) lw s3,176(sp) lw s4,172(sp) lw s5,168(sp) lw s6,164(sp) lw s7,160(sp) lw s8,156(sp) lw s9,152(sp) lw s10,148(sp) lw s11,144(sp) .L1: addi sp,sp,192 jr ra Codegen is quite ugly. However, if we try -O3 -fno-gcse -fno-schedule-insns -fno-move-loop-invariants ASM is much better, same as LLVM: f: ble a3,zero,.L1 li a5,0 vsetvli zero,a4,e8,mf8,ta,ma .L3: addi a2,a0,1 add a2,a2,a5 vle8.v v24,0(a2) addi a2,a1,1 add a2,a2,a5 vse8.v v24,0(a2) addi a2,a0,2 add a2,a2,a5 vle8.v v24,0(a2) addi a2,a1,2 add a2,a2,a5 vse8.v v24,0(a2) addi a2,a0,3 add a2,a2,a5 vle8.v v24,0(a2) addi a2,a1,3 add a2,a2,a5 vse8.v v24,0(a2) addi a2,a0,4 add a2,a2,a5 vle8.v v24,0(a2) addi a2,a1,4 add a2,a2,a5 vse8.v v24,0(a2) addi a2,a0,5 add a2,a2,a5 vle8.v v24,0(a2) addi a2,a1,5 add a2,a2,a5 vse8.v v24,0(a2) addi a2,a0,6 add a2,a2,a5 vle8.v v24,0(a2) addi a2,a1,6 add a2,a2,a5 vse8.v v24,0(a2) addi a2,a0,7 add a2,a2,a5 vle8.v v24,0(a2) addi a2,a1,7 add a2,a2,a5 vse8.v v24,0(a2) addi a2,a0,8 add a2,a2,a5 vle8.v v24,0(a2) addi a2,a1,8 add a2,a2,a5 vse8.v v24,0(a2) addi a2,a0,9 add a2,a2,a5 vle8.v v24,0(a2) addi a2,a1,9 add a2,a2,a5 vse8.v v24,0(a2) addi a2,a0,10 add a2,a2,a5 vle8.v v24,0(a2) addi a2,a1,10 add a2,a2,a5 vse8.v v24,0(a2) addi a2,a0,11 add a2,a2,a5 vle8.v v24,0(a2) addi a2,a1,11 add a2,a2,a5 vse8.v v24,0(a2) addi a2,a0,12 add a2,a2,a5 vle8.v v24,0(a2) addi a2,a1,12 add a2,a2,a5 vse8.v v24,0(a2) addi a2,a0,13 add a2,a2,a5 vle8.v v24,0(a2) addi a2,a1,13 add a2,a2,a5 vse8.v v24,0(a2) addi a2,a0,14 add a2,a2,a5 vle8.v v24,0(a2) addi a2,a1,14 add a2,a2,a5 vse8.v v24,0(a2) addi a5,a5,1 bne a3,a5,.L3 .L1: ret Currently, RVV support doesn't have any Cost model. I am not sure whether it's related to it. May need someone help me to fix it. Thanks. ^ permalink raw reply [flat|nested] 6+ messages in thread
* [Bug rtl-optimization/108412] Negative optimization of GCSE && LOOP INVARIANTS 2023-01-16 2:04 [Bug c/108412] New: Negative optimization of GCSE && LOOP INVARIANTS juzhe.zhong at rivai dot ai @ 2023-01-16 2:23 ` juzhe.zhong at rivai dot ai 2023-01-16 2:49 ` [Bug rtl-optimization/108412] RISC-V: " pinskia at gcc dot gnu.org ` (3 subsequent siblings) 4 siblings, 0 replies; 6+ messages in thread From: juzhe.zhong at rivai dot ai @ 2023-01-16 2:23 UTC (permalink / raw) To: gcc-bugs https://gcc.gnu.org/bugzilla/show_bug.cgi?id=108412 --- Comment #1 from JuzheZhong <juzhe.zhong at rivai dot ai> --- (In reply to JuzheZhong from comment #0) > Consider such case: > void f (void * restrict in, void * restrict out, int l, int n, int m) > { > int vl = 32; > for (int i = 0; i < n; i++) > { > vint8mf8_t v1 = __riscv_vle8_v_i8mf8 (in + i + 1, vl); > __riscv_vse8_v_i8mf8 (out + i + 1, v1, vl); > > vint8mf8_t v2 = __riscv_vle8_v_i8mf8 (in + i + 2, vl); > __riscv_vse8_v_i8mf8 (out + i + 2, v2, vl); > > vint8mf8_t v3 = __riscv_vle8_v_i8mf8 (in + i + 3, vl); > __riscv_vse8_v_i8mf8 (out + i + 3, v3, vl); > > vint8mf8_t v4 = __riscv_vle8_v_i8mf8 (in + i + 4, vl); > __riscv_vse8_v_i8mf8 (out + i + 4, v4, vl); > > vint8mf8_t v5 = __riscv_vle8_v_i8mf8 (in + i + 5, vl); > __riscv_vse8_v_i8mf8 (out + i + 5, v5, vl); > > vint8mf8_t v6 = __riscv_vle8_v_i8mf8 (in + i + 6, vl); > __riscv_vse8_v_i8mf8 (out + i + 6, v6, vl); > > vint8mf8_t v7 = __riscv_vle8_v_i8mf8 (in + i + 7, vl); > __riscv_vse8_v_i8mf8 (out + i + 7, v7, vl); > > vint8mf8_t v8 = __riscv_vle8_v_i8mf8 (in + i + 8, vl); > __riscv_vse8_v_i8mf8 (out + i + 8, v8, vl); > > vint8mf8_t v9 = __riscv_vle8_v_i8mf8 (in + i + 9, vl); > __riscv_vse8_v_i8mf8 (out + i + 9, v9, vl); > > vint8mf8_t v10 = __riscv_vle8_v_i8mf8 (in + i + 10, vl); > __riscv_vse8_v_i8mf8 (out + i + 10, v10, vl); > > vint8mf8_t v11 = __riscv_vle8_v_i8mf8 (in + i + 11, vl); > __riscv_vse8_v_i8mf8 (out + i + 11, v11, vl); > > vint8mf8_t v12 = __riscv_vle8_v_i8mf8 (in + i + 12, vl); > __riscv_vse8_v_i8mf8 (out + i + 12, v12, vl); > > vint8mf8_t v13 = __riscv_vle8_v_i8mf8 (in + i + 13, vl); > __riscv_vse8_v_i8mf8 (out + i + 13, v13, vl); > > vint8mf8_t v14 = __riscv_vle8_v_i8mf8 (in + i + 14, vl); > __riscv_vse8_v_i8mf8 (out + i + 14, v14, vl); > } > } > > -O3 ASM: > f: > addi sp,sp,-192 > sw a3,28(sp) > ble a3,zero,.L1 > addi a5,a0,1 > sw a5,76(sp) > addi a5,a1,1 > sw a5,80(sp) > addi a5,a0,2 > sw a5,84(sp) > addi a5,a1,2 > sw a5,88(sp) > addi a5,a0,3 > sw a5,92(sp) > addi a5,a1,3 > sw a5,96(sp) > addi a5,a0,4 > sw a5,100(sp) > addi a5,a1,4 > sw a5,104(sp) > addi a5,a0,5 > sw a5,108(sp) > addi a5,a1,5 > sw a5,112(sp) > addi a5,a0,6 > sw a5,116(sp) > addi a5,a1,6 > sw a5,120(sp) > addi a5,a0,7 > sw a5,124(sp) > addi a5,a1,7 > sw a5,128(sp) > addi a5,a0,8 > sw a5,132(sp) > addi a5,a1,8 > sw a5,136(sp) > addi a5,a0,9 > sw a5,140(sp) > addi a5,a1,9 > sw a5,32(sp) > addi a5,a0,10 > sw a5,36(sp) > addi a5,a1,10 > sw a5,40(sp) > sw s0,188(sp) > addi a5,a0,11 > sw s1,184(sp) > sw s2,180(sp) > sw s3,176(sp) > sw s4,172(sp) > sw s5,168(sp) > sw s6,164(sp) > sw s7,160(sp) > sw s8,156(sp) > sw s9,152(sp) > sw s10,148(sp) > sw s11,144(sp) > sw a5,44(sp) > addi a5,a1,11 > sw a5,48(sp) > addi a5,a0,12 > sw a5,52(sp) > addi a5,a1,12 > sw a5,56(sp) > addi a5,a0,13 > sw a5,60(sp) > addi a5,a1,13 > sw a5,64(sp) > addi a5,a0,14 > sw a5,68(sp) > addi a5,a1,14 > sw a5,72(sp) > li a4,0 > li a5,32 > vsetvli zero,a5,e8,mf8,ta,ma > .L3: > lw a3,76(sp) > lw t2,60(sp) > add s11,a3,a4 > lw a3,80(sp) > add t2,t2,a4 > add t0,a3,a4 > lw a3,84(sp) > add s10,a3,a4 > lw a3,88(sp) > add t6,a3,a4 > lw a3,92(sp) > add s9,a3,a4 > lw a3,96(sp) > add t5,a3,a4 > lw a3,100(sp) > add s8,a3,a4 > lw a3,104(sp) > add t4,a3,a4 > lw a3,108(sp) > add s7,a3,a4 > lw a3,112(sp) > add t3,a3,a4 > lw a3,116(sp) > add s6,a3,a4 > lw a3,120(sp) > add t1,a3,a4 > lw a3,124(sp) > add s5,a3,a4 > lw a3,128(sp) > add a7,a3,a4 > lw a3,132(sp) > add s4,a3,a4 > lw a3,136(sp) > add a6,a3,a4 > lw a3,140(sp) > add s3,a3,a4 > lw a3,32(sp) > add a0,a3,a4 > lw a3,36(sp) > add s2,a3,a4 > lw a3,40(sp) > add a1,a3,a4 > lw a3,44(sp) > add s1,a3,a4 > lw a3,48(sp) > add a2,a3,a4 > lw a3,52(sp) > add s0,a3,a4 > lw a3,56(sp) > add a3,a3,a4 > sw a3,12(sp) > lw a3,64(sp) > add a3,a3,a4 > sw a3,16(sp) > lw a3,68(sp) > add a3,a3,a4 > sw a3,20(sp) > lw a3,72(sp) > add a3,a3,a4 > sw a3,24(sp) > vle8.v v24,0(s11) > vse8.v v24,0(t0) > vle8.v v24,0(s10) > vse8.v v24,0(t6) > vle8.v v24,0(s9) > vse8.v v24,0(t5) > vle8.v v24,0(s8) > vse8.v v24,0(t4) > vle8.v v24,0(s7) > vse8.v v24,0(t3) > vle8.v v24,0(s6) > vse8.v v24,0(t1) > vle8.v v24,0(s5) > vse8.v v24,0(a7) > vle8.v v24,0(s4) > vse8.v v24,0(a6) > vle8.v v24,0(s3) > vse8.v v24,0(a0) > vle8.v v24,0(s2) > vse8.v v24,0(a1) > vle8.v v24,0(s1) > vse8.v v24,0(a2) > lw a3,12(sp) > vle8.v v24,0(s0) > addi a4,a4,1 > vse8.v v24,0(a3) > lw a3,16(sp) > vle8.v v24,0(t2) > vse8.v v24,0(a3) > lw a3,20(sp) > vle8.v v24,0(a3) > lw a3,24(sp) > vse8.v v24,0(a3) > lw a3,28(sp) > bne a3,a4,.L3 > lw s0,188(sp) > lw s1,184(sp) > lw s2,180(sp) > lw s3,176(sp) > lw s4,172(sp) > lw s5,168(sp) > lw s6,164(sp) > lw s7,160(sp) > lw s8,156(sp) > lw s9,152(sp) > lw s10,148(sp) > lw s11,144(sp) > .L1: > addi sp,sp,192 > jr ra > > Codegen is quite ugly. > > However, if we try -O3 -fno-gcse -fno-schedule-insns > -fno-move-loop-invariants > > ASM is much better, same as LLVM: > f: > ble a3,zero,.L1 > li a5,0 > .L3: > li a4,32 > vsetvli zero,a4,e8,mf8,ta,ma > addi a2,a0,1 > add a2,a2,a5 > vle8.v v24,0(a2) > addi a2,a1,1 > add a2,a2,a5 > vse8.v v24,0(a2) > addi a2,a0,2 > add a2,a2,a5 > vle8.v v24,0(a2) > addi a2,a1,2 > add a2,a2,a5 > vse8.v v24,0(a2) > addi a2,a0,3 > add a2,a2,a5 > vle8.v v24,0(a2) > addi a2,a1,3 > add a2,a2,a5 > vse8.v v24,0(a2) > addi a2,a0,4 > add a2,a2,a5 > vle8.v v24,0(a2) > addi a2,a1,4 > add a2,a2,a5 > vse8.v v24,0(a2) > addi a2,a0,5 > add a2,a2,a5 > vle8.v v24,0(a2) > addi a2,a1,5 > add a2,a2,a5 > vse8.v v24,0(a2) > addi a2,a0,6 > add a2,a2,a5 > vle8.v v24,0(a2) > addi a2,a1,6 > add a2,a2,a5 > vse8.v v24,0(a2) > addi a2,a0,7 > add a2,a2,a5 > vle8.v v24,0(a2) > addi a2,a1,7 > add a2,a2,a5 > vse8.v v24,0(a2) > addi a2,a0,8 > add a2,a2,a5 > vle8.v v24,0(a2) > addi a2,a1,8 > add a2,a2,a5 > vse8.v v24,0(a2) > addi a2,a0,9 > add a2,a2,a5 > vle8.v v24,0(a2) > addi a2,a1,9 > add a2,a2,a5 > vse8.v v24,0(a2) > addi a2,a0,10 > add a2,a2,a5 > vle8.v v24,0(a2) > addi a2,a1,10 > add a2,a2,a5 > vse8.v v24,0(a2) > addi a2,a0,11 > add a2,a2,a5 > vle8.v v24,0(a2) > addi a2,a1,11 > add a2,a2,a5 > vse8.v v24,0(a2) > addi a2,a0,12 > add a2,a2,a5 > vle8.v v24,0(a2) > addi a2,a1,12 > add a2,a2,a5 > vse8.v v24,0(a2) > addi a2,a0,13 > add a2,a2,a5 > vle8.v v24,0(a2) > addi a2,a1,13 > add a2,a2,a5 > vse8.v v24,0(a2) > addi a2,a0,14 > add a2,a2,a5 > vle8.v v24,0(a2) > addi a2,a1,14 > add a2,a2,a5 > vse8.v v24,0(a2) > addi a5,a5,1 > bne a3,a5,.L3 > .L1: > ret > > Currently, RVV support doesn't have any Cost model. I am not sure whether > it's related to it. May need someone help me to fix it. Thanks. If we disable hoist, it overall make codes better. However, it also makes li a4,32 not hoist outside the loop so that VSETVL PASS can not hoist vsetvl instruction outside the loop too. ^ permalink raw reply [flat|nested] 6+ messages in thread
* [Bug rtl-optimization/108412] RISC-V: Negative optimization of GCSE && LOOP INVARIANTS 2023-01-16 2:04 [Bug c/108412] New: Negative optimization of GCSE && LOOP INVARIANTS juzhe.zhong at rivai dot ai 2023-01-16 2:23 ` [Bug rtl-optimization/108412] " juzhe.zhong at rivai dot ai @ 2023-01-16 2:49 ` pinskia at gcc dot gnu.org 2023-08-24 9:28 ` rdapp at gcc dot gnu.org ` (2 subsequent siblings) 4 siblings, 0 replies; 6+ messages in thread From: pinskia at gcc dot gnu.org @ 2023-01-16 2:49 UTC (permalink / raw) To: gcc-bugs https://gcc.gnu.org/bugzilla/show_bug.cgi?id=108412 --- Comment #2 from Andrew Pinski <pinskia at gcc dot gnu.org> --- -fsched-pressure is something definitely to look into for RISCV to improve pre-ra scheduling . ^ permalink raw reply [flat|nested] 6+ messages in thread
* [Bug rtl-optimization/108412] RISC-V: Negative optimization of GCSE && LOOP INVARIANTS 2023-01-16 2:04 [Bug c/108412] New: Negative optimization of GCSE && LOOP INVARIANTS juzhe.zhong at rivai dot ai 2023-01-16 2:23 ` [Bug rtl-optimization/108412] " juzhe.zhong at rivai dot ai 2023-01-16 2:49 ` [Bug rtl-optimization/108412] RISC-V: " pinskia at gcc dot gnu.org @ 2023-08-24 9:28 ` rdapp at gcc dot gnu.org 2023-12-21 8:24 ` juzhe.zhong at rivai dot ai 2023-12-21 8:25 ` juzhe.zhong at rivai dot ai 4 siblings, 0 replies; 6+ messages in thread From: rdapp at gcc dot gnu.org @ 2023-08-24 9:28 UTC (permalink / raw) To: gcc-bugs https://gcc.gnu.org/bugzilla/show_bug.cgi?id=108412 Robin Dapp <rdapp at gcc dot gnu.org> changed: What |Removed |Added ---------------------------------------------------------------------------- CC| |rdapp at gcc dot gnu.org --- Comment #3 from Robin Dapp <rdapp at gcc dot gnu.org> --- I played around a bit with the scheduling model and the pressure-aware scheduling. -fsched-pressure alone does not seem to help because the problem is indeed the latency of vector load and store. The scheduler will try to keep dependent loads and stores apart (for the number of cycles specified), and then, after realizing there is nothing to put in between, lump everything together at the end of the sequence. That's a well known but unfortunate property of scheduling. Will need to think of something but not resolved for now. ^ permalink raw reply [flat|nested] 6+ messages in thread
* [Bug rtl-optimization/108412] RISC-V: Negative optimization of GCSE && LOOP INVARIANTS 2023-01-16 2:04 [Bug c/108412] New: Negative optimization of GCSE && LOOP INVARIANTS juzhe.zhong at rivai dot ai ` (2 preceding siblings ...) 2023-08-24 9:28 ` rdapp at gcc dot gnu.org @ 2023-12-21 8:24 ` juzhe.zhong at rivai dot ai 2023-12-21 8:25 ` juzhe.zhong at rivai dot ai 4 siblings, 0 replies; 6+ messages in thread From: juzhe.zhong at rivai dot ai @ 2023-12-21 8:24 UTC (permalink / raw) To: gcc-bugs https://gcc.gnu.org/bugzilla/show_bug.cgi?id=108412 --- Comment #4 from JuzheZhong <juzhe.zhong at rivai dot ai> --- This issue is fixed when we use -mtune=sifive-u74 so it won't be a problem. ^ permalink raw reply [flat|nested] 6+ messages in thread
* [Bug rtl-optimization/108412] RISC-V: Negative optimization of GCSE && LOOP INVARIANTS 2023-01-16 2:04 [Bug c/108412] New: Negative optimization of GCSE && LOOP INVARIANTS juzhe.zhong at rivai dot ai ` (3 preceding siblings ...) 2023-12-21 8:24 ` juzhe.zhong at rivai dot ai @ 2023-12-21 8:25 ` juzhe.zhong at rivai dot ai 4 siblings, 0 replies; 6+ messages in thread From: juzhe.zhong at rivai dot ai @ 2023-12-21 8:25 UTC (permalink / raw) To: gcc-bugs https://gcc.gnu.org/bugzilla/show_bug.cgi?id=108412 JuzheZhong <juzhe.zhong at rivai dot ai> changed: What |Removed |Added ---------------------------------------------------------------------------- Resolution|--- |FIXED Status|UNCONFIRMED |RESOLVED --- Comment #5 from JuzheZhong <juzhe.zhong at rivai dot ai> --- This issue is fixed when we use -mtune=sifive-u74 so it won't be a problem. ^ permalink raw reply [flat|nested] 6+ messages in thread
end of thread, other threads:[~2023-12-21 8:25 UTC | newest] Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed) -- links below jump to the message on this page -- 2023-01-16 2:04 [Bug c/108412] New: Negative optimization of GCSE && LOOP INVARIANTS juzhe.zhong at rivai dot ai 2023-01-16 2:23 ` [Bug rtl-optimization/108412] " juzhe.zhong at rivai dot ai 2023-01-16 2:49 ` [Bug rtl-optimization/108412] RISC-V: " pinskia at gcc dot gnu.org 2023-08-24 9:28 ` rdapp at gcc dot gnu.org 2023-12-21 8:24 ` juzhe.zhong at rivai dot ai 2023-12-21 8:25 ` juzhe.zhong at rivai dot ai
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox; as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).