> You indeed need to use CONVERT_EXPR here, maybe you can elaborate > on the optimization issues. > >> 2. for inline asm (a reduced test case that might not make much as a >> stand alone test-case, but I ran into similar cases with valid programmes) >> >> ;; Function fn1 (fn1, funcdef_no=0, decl_uid=4220, cgraph_uid=0, >> symbol_order=0) >> >> fn1 (short int p1) >> { >> : >> __asm__("" : "=r" p1_2 : "0" p1_1(D)); >> return; >> >> } >> >> >> I am generating something like the following which ICEs. What is the >> expected out? >> >> ;; Function fn1 (fn1, funcdef_no=0, decl_uid=4220, cgraph_uid=0, >> symbol_order=0) >> >> fn1 (short int p1) >> { >> int _1; >> int _2; >> short int _5; >> >> : >> _1 = (int) p1_4(D); >> _5 = (short int) _1; >> __asm__("" : "=r" p1_6 : "0" _5); >> _2 = (int) p1_6; >> return; >> >> } > > Parameters are indeed "interesting" to handle ;) As we now see on ARM > the incoming parameter (the default def) and later assignments to it > can require different promotions (well, different extensions for ARM). > > The only sensible way to deal with promoting parameters is to > promote them by changing the function signature. Thus reflect the > targets ABI for parameters in the GIMPLE representation (which > includes TYPE_ARG_TYPES and DECL_ARGUMENTS). > IMHO we should do this during gimplification of parameters / call > arguments already. > > So for your example you'd end up with > > fn1 (int p1) > { > __asm__("" : "=r" p1_6 : "0" p1_4(D)); > return; > } > > that is, promotions also apply to asm inputs/outputs (no?) Thanks for the review and answers. For the time being, I am handling gimple_asm as one that has to be handled in original type. I Will look into improving it after getting the basic framework right. As it is, attached patch bootstraps on x86_64-linux-gnu, arm-linux-gnu and aarch64-linux-gnu. There are few regressions to look into (Please see below). There are cases it is working well. There are cases where it can be improved. I am attaching couple test cases (and their results). I am seeing some BIT_AND_EXPR which are inserted by promotion are not being optimized when they are redundant. This is especially the case when I invalidate the VRP range into from VRP1 during the type promotion. I am looking into it. Please note that attached patch still needs to address: * Adding gimple_debug stmts. * Address review comment for expr.c handling SEXT_EXPR. * Address regression failures Based on the feedback, I will address the above and split the patch into logical patch set for easy detailed review. Here are the outputs for the testcases. --- c5.c.142t.veclower21 2015-08-05 08:50:11.367135339 +1000 +++ c5.c.143t.promotion 2015-08-05 08:50:11.367135339 +1000 @@ -1,34 +1,45 @@ ;; Function unPack (unPack, funcdef_no=0, decl_uid=4145, cgraph_uid=0, symbol_order=0) unPack (unsigned char c) { - short int _1; - unsigned short _4; - unsigned short _5; - short int _6; - short int _7; + int _1; + unsigned int _2; + unsigned int _3; + unsigned int _4; + unsigned int _5; + int _6; + int _7; + unsigned int _9; + int _11; + int _12; + short int _13; : - c_3 = c_2(D) & 15; - if (c_3 > 7) + _2 = (unsigned int) c_10(D); + _3 = _2 & 15; + _9 = _3 & 255; + if (_9 > 7) goto ; else goto ; : - _4 = (unsigned short) c_3; - _5 = _4 + 65531; - _6 = (short int) _5; + _4 = _3 & 65535; + _5 = _4 + 4294967291; + _11 = (int) _5; + _6 = (_11) sext from bit (16); goto ; : - _7 = (short int) c_3; + _12 = (int) _3; + _7 = (_12) sext from bit (16); : # _1 = PHI <_6(3), _7(4)> - return _1; + _13 = (short int) _1; + return _13; } --- c5.org.s 2015-08-05 08:51:44.619133892 +1000 +++ c5.new.s 2015-08-05 08:51:29.643134124 +1000 @@ -16,16 +16,14 @@ .syntax divided .arm .type unPack, %function unPack: @ args = 0, pretend = 0, frame = 0 @ frame_needed = 0, uses_anonymous_args = 0 @ link register save eliminated. and r0, r0, #15 cmp r0, #7 subhi r0, r0, #5 - uxth r0, r0 - sxth r0, r0 bx lr .size unPack, .-unPack .ident "GCC: (GNU) 6.0.0 20150724 (experimental)" .section .note.GNU-stack,"",%progbits --- crc.c.142t.veclower21 2015-08-05 08:52:43.811132974 +1000 +++ crc.c.143t.promotion 2015-08-05 08:52:43.811132974 +1000 @@ -1,52 +1,78 @@ ;; Function crc2 (crc2, funcdef_no=0, decl_uid=4146, cgraph_uid=0, symbol_order=0) crc2 (short unsigned int crc, unsigned char data) { unsigned char carry; unsigned char x16; unsigned char i; - unsigned char ivtmp_5; - unsigned char _9; - unsigned char _10; - unsigned char ivtmp_18; + unsigned int _2; + unsigned int _3; + unsigned int _5; + unsigned int _7; + unsigned int _8; + unsigned int _9; + unsigned int _10; + unsigned int _11; + unsigned int _12; + unsigned int _13; + unsigned int _15; + unsigned int _16; + unsigned int _18; + unsigned int _19; + unsigned int _21; + unsigned int _22; + unsigned int _24; + short unsigned int _25; + unsigned int _26; + unsigned int _27; + unsigned int _28; + unsigned int _29; : + _8 = (unsigned int) data_4(D); + _7 = (unsigned int) crc_30(D); : - # crc_28 = PHI - # data_29 = PHI - # ivtmp_18 = PHI - _9 = (unsigned char) crc_28; - _10 = _9 ^ data_29; - x16_11 = _10 & 1; - data_12 = data_29 >> 1; - if (x16_11 == 1) + # _28 = PHI <_2(5), _7(2)> + # _29 = PHI <_12(5), _8(2)> + # _18 = PHI <_5(5), 8(2)> + _9 = _28 & 255; + _10 = _9 ^ _29; + _11 = _10 & 1; + _3 = _29 & 255; + _12 = _3 >> 1; + _27 = _11 & 255; + if (_27 == 1) goto ; else goto ; : - crc_13 = crc_28 ^ 16386; - crc_24 = crc_13 >> 1; - crc_15 = crc_24 | 32768; + _13 = _28 ^ 16386; + _26 = _13 & 65535; + _24 = _26 >> 1; + _15 = _24 | 4294934528; : - # crc_2 = PHI - ivtmp_5 = ivtmp_18 - 1; - if (ivtmp_5 != 0) + # _2 = PHI <_15(4), _21(7)> + _5 = _18 - 1; + _22 = _5 & 255; + if (_22 != 0) goto ; else goto ; : - # crc_19 = PHI - return crc_19; + # _19 = PHI <_2(5)> + _25 = (short unsigned int) _19; + return _25; : - crc_21 = crc_28 >> 1; + _16 = _28 & 65535; + _21 = _16 >> 1; goto ; } --- crc.org.s 2015-08-05 08:54:17.491131520 +1000 +++ crc.new.s 2015-08-05 08:53:12.183132534 +1000 @@ -15,27 +15,28 @@ .global crc2 .syntax divided .arm .type crc2, %function crc2: @ args = 0, pretend = 0, frame = 0 @ frame_needed = 0, uses_anonymous_args = 0 mov ip, #32768 movt ip, 65535 str lr, [sp, #-4]! - mov r3, #8 + mov r2, #8 movw lr, #16386 .L3: - eor r2, r1, r0 - sub r3, r3, #1 - tst r2, #1 + uxtb r3, r0 + eor r3, r3, r1 mov r1, r1, lsr #1 + tst r3, #1 eorne r0, r0, lr - moveq r0, r0, lsr #1 - orrne r0, ip, r0, lsr #1 - uxthne r0, r0 - ands r3, r3, #255 + ubfxeq r0, r0, #1, #15 + ubfxne r0, r0, #1, #15 + orrne r0, r0, ip + subs r2, r2, #1 bne .L3 + uxth r0, r0 ldr pc, [sp], #4 .size crc2, .-crc2 .ident "GCC: (GNU) 6.0.0 20150724 (experimental)" .section .note.GNU-stack,"",%progbits Testsuite regression for x86_64-unknown-linux-gnu: Tests that now fail, but worked before: gfortran.dg/graphite/pr42393-1.f90 -O (test for excess errors) Testsuite regression for arm-linux-gnu: Tests that now fail, but worked before: arm-sim: gcc.dg/fixed-point/convert-sat.c execution test arm-sim: gcc.dg/tree-ssa/20030729-1.c scan-tree-dump-times dom2 "\\(unsigned int\\)" 0 arm-sim: gcc.dg/tree-ssa/pr54245.c scan-tree-dump-times slsr "Inserting initializer" 0 arm-sim: gcc.dg/tree-ssa/shorten-1.c scan-tree-dump-not optimized "\\(int\\)" arm-sim: gcc.dg/tree-ssa/shorten-1.c scan-tree-dump-times optimized "\\(unsigned char\\)" 8 arm-sim: gcc.target/arm/mla-2.c scan-assembler smlalbb arm-sim: gcc.target/arm/unsigned-extend-2.c scan-assembler ands arm-sim: gcc.target/arm/wmul-1.c scan-assembler-times smlabb 2 arm-sim: gcc.target/arm/wmul-2.c scan-assembler-times smulbb 1 arm-sim: gcc.target/arm/wmul-3.c scan-assembler-times smulbb 2 arm-sim: gcc.target/arm/wmul-9.c scan-assembler smlalbb arm-sim: gfortran.dg/graphite/pr42393-1.f90 -O (test for excess errors) Tests that now work, but didn't before: arm-sim: gcc.dg/tree-prof/time-profiler-2.c scan-ipa-dump-times profile "Read tp_first_run: 0" 2 arm-sim: gcc.dg/tree-prof/time-profiler-2.c scan-ipa-dump-times profile "Read tp_first_run: 2" 1 arm-sim: gcc.dg/tree-prof/time-profiler-2.c scan-ipa-dump-times profile "Read tp_first_run: 3" 1 arm-sim: gcc.target/arm/builtin-bswap-1.c scan-assembler-times rev16ne\\t 1 arm-sim: gcc.target/arm/builtin-bswap-1.c scan-assembler-times revshne\\t 1 arm-sim: gcc.target/arm/smlaltb-1.c scan-assembler smlaltb\\t arm-sim: gcc.target/arm/smlaltt-1.c scan-assembler smlaltt\\t Testsuite regression for aarch64-linux-gnu: Tests that now fail, but worked before: c-c++-common/torture/vector-compare-1.c -O3 -g (test for excess errors) c-c++-common/torture/vector-compare-1.c -O3 -g (test for excess errors) gcc.dg/tree-ssa/20030729-1.c scan-tree-dump-times dom2 "\\(unsigned int\\)" 0 gcc.dg/tree-ssa/pr54245.c scan-tree-dump-times slsr "Inserting initializer" 0 gcc.dg/tree-ssa/shorten-1.c scan-tree-dump-not optimized "\\(int\\)" gcc.dg/tree-ssa/shorten-1.c scan-tree-dump-times optimized "\\(unsigned char\\)" 8 Thanks, Kugan