public inbox for gcc-prs@sourceware.org
help / color / mirror / Atom feed
* Re: optimization/7625: gcc pessimized 64-bit % operator on hppa2.0
@ 2002-11-21 14:26 danglin
  0 siblings, 0 replies; 2+ messages in thread
From: danglin @ 2002-11-21 14:26 UTC (permalink / raw)
  To: danglin, dtucker, gcc-bugs, gcc-prs, nobody

Synopsis: gcc pessimized 64-bit % operator on hppa2.0

Responsible-Changed-From-To: unassigned->danglin
Responsible-Changed-By: danglin
Responsible-Changed-When: Fri Nov 15 15:25:18 2002
Responsible-Changed-Why:
    Assignment.
State-Changed-From-To: open->analyzed
State-Changed-By: danglin
State-Changed-When: Fri Nov 15 15:25:18 2002
State-Changed-Why:
    Problem confirmed.  GCC currently uses __umoddi3 from
    libgcc2.c for the operation.  We need to add pattern
    to allow use of $$rem2U when available.  We don't
    currently have this routine in the millicode routines
    used with linux.
    
    I suspect there may be other 64-bit operations that
    are pessimized by using generic libgcc code.

http://gcc.gnu.org/cgi-bin/gnatsweb.pl?cmd=view%20audit-trail&database=gcc&pr=7625


^ permalink raw reply	[flat|nested] 2+ messages in thread

* optimization/7625: gcc pessimized 64-bit % operator on hppa2.0
@ 2002-08-18 14:26 Darren Tucker
  0 siblings, 0 replies; 2+ messages in thread
From: Darren Tucker @ 2002-08-18 14:26 UTC (permalink / raw)
  To: gcc-gnats


>Number:         7625
>Category:       optimization
>Synopsis:       gcc pessimized 64-bit % operator on hppa2.0
>Confidential:   no
>Severity:       non-critical
>Priority:       low
>Responsible:    unassigned
>State:          open
>Class:          pessimizes-code
>Submitter-Id:   net
>Arrival-Date:   Sun Aug 18 04:36:00 PDT 2002
>Closed-Date:
>Last-Modified:
>Originator:     Darren Tucker
>Release:        3.2
>Organization:
none
>Environment:
System: HP-UX c240 B.11.00 A 9000/782 2007058445 two-user license

host: hppa2.0w-hp-hpux11.00
build: hppa2.0w-hp-hpux11.00
target: hppa2.0w-hp-hpux11.00
configured with: ../gcc-3.2/configure --with-as=/usr/local/hppa2.0w-hp-hpux11.00/bin/as --with-gnu-as --with-ld=/usr/ccs/bin/ld --enable-languages=c,c++
>Description:

GCC seems to compile code for the 64-bit "%" operator that is about 6 times
slower that the HP native compiler on HPPA2.0 machines, even with -march=2.0.

This was noticed affecting OpenSSL DSA operations and identified by Deron
Meranda . For background, please see
http://marc.theaimsgroup.com/?l=openssh-unix-dev&m=102646106016694&w=2

$ cat logmodtest.c
#include <stdio.h>

int
main()
{
        unsigned long long i, a=0;

        for(i=2000000; i; --i)
                a += (i+10) % i;

        printf("Result=%llu\n", a);
        exit(0);
}

$ cc +O3 longmodtest.c
$ time ./a.out
Result=19999913

real    0m0.649s
user    0m0.650s
sys     0m0.000s

$ gcc -O3 -march=2.0 longmodtest.c
$ time ./a.out
Result=19999913

real    0m3.712s
user    0m3.700s
sys     0m0.020s

>How-To-Repeat:

$ gcc -O3 -march=2.0 -v -save-temps longmodtest.c
Reading specs from /usr/local/lib/gcc-lib/hppa2.0w-hp-hpux11.00/3.2/specs
Configured with: ../gcc-3.2/configure --with-as=/usr/local/hppa2.0w-hp-hpux11.00/bin/as --with-gnu-as --with-ld=/usr/ccs/bin/ld --enable-languages=c,c++
Thread model: single
gcc version 3.2
 /usr/local/lib/gcc-lib/hppa2.0w-hp-hpux11.00/3.2/cpp0 -lang-c -v -D__GNUC__=3 -D__GNUC_MINOR__=2 -D__GNUC_PATCHLEVEL__=0 -D__GXX_ABI_VERSION=102 -Dhppa -Dhp9000s800 -D__hp9000s800 -Dhp9k8 -DPWB -Dhpux -Dunix -D__hppa__ -D__hp9000s800__ -D__hp9000s800 -D__hp9k8__ -D__PWB__ -D__hpux__ -D__unix__ -D__hppa -D__hp9000s800 -D__hp9k8 -D__PWB -D__hpux -D__unix -Asystem=unix -Asystem=hpux -Acpu=hppa -Amachine=hppa -D__OPTIMIZE__ -D__STDC_HOSTED__=1 -D_PA_RISC1_1 -D__hp9000s700 -D_HPUX_SOURCE -D_HIUX_SOURCE -D__STDC_EXT__ -D_INCLUDE_LONGLONG longmodtest.c longmodtest.i
GNU CPP version 3.2 (cpplib) (hppa)
ignoring nonexistent directory "NONE/include"
ignoring nonexistent directory "/usr/local/hppa2.0w-hp-hpux11.00/include"
#include "..." search starts here:
#include <...> search starts here:
 /usr/local/include
 /usr/local/lib/gcc-lib/hppa2.0w-hp-hpux11.00/3.2/include
 /usr/include
End of search list.
 /usr/local/lib/gcc-lib/hppa2.0w-hp-hpux11.00/3.2/cc1 -fpreprocessed longmodtest.i -quiet -dumpbase longmodtest.c -march=2.0 -O3 -version -o longmodtest.s
GNU CPP version 3.2 (cpplib) (hppa)
GNU C version 3.2 (hppa2.0w-hp-hpux11.00)
        compiled by GNU C version 3.2.
 /usr/local/hppa2.0w-hp-hpux11.00/bin/as --traditional-format -o longmodtest.o longmodtest.s
 /usr/local/lib/gcc-lib/hppa2.0w-hp-hpux11.00/3.2/collect2 -L/lib/pa1.1 -L/usr/lib/pa1.1 -z -u main /usr/ccs/lib/crt0.o -L/usr/local/lib/gcc-lib/hppa2.0w-hp-hpux11.00/3.2 -L/usr/ccs/bin -L/usr/ccs/lib -L/opt/langtools/lib -L/usr/local/lib/gcc-lib/hppa2.0w-hp-hpux11.00/3.2/../../.. longmodtest.o -lgcc -lgcc_eh -lc -lgcc -lgcc_eh

begin 644 longmodtest.i.bz2
M0EIH.3%!62936=43M8T`!)W?@'X46G__[P-D$$Z_[__Z8`J_>`Z:6VN[?>JC
ME=ZUMPY`.SW<V`'">W<"22:)C0B83:GJ::,33(TT```&@$IJ9`IE3\BGFJ,F
MGJ8F0``-``]0!S3$R9-&$P3$TP"8!#!&!&`2:2131/0F1,$#1ZC:GJ!H&@:#
M0`(DE3U/5/4/U31LD9/4WI1H,:C33$--,$9&!$D0"",(:AZJ>4_5,8H`'I`!
MIID8AF06"(2$B$0.E%$BQ6$4!`('O3R,(I`4BS_;20Q;1:J-M9Y:%8"C#&5J
M`+`J5A60UACF.#,B7"RX%D,3,ZV88JQE:VE!2L*@T.E9_UT(JL$8:$E%8JBR
M_QDS(52VI14$4=Z<H!(3CJ`I).*2'0G0:Z6B.4K:6NBR8*6R:88FJ62;,V8:
M2%0#$,2;@S')K21S<Z[CADI<L/S173GO65Y24J7O9*JM-Z1>W(;[[9#DZ^Q/
M8'H/%;%6+*A41@HQ2"(*(BHBGW"$]26V8HSRVKS_;YOJ^"/="(R0"#P"A7B%
M0DD)2V5.P9L0_4G'I?PEBJ<X,VD>C5U:&AGXI6"A!8/4&F<"L+"Q)J<VT`H:
M/;+ZW;2ASCE:AD20M!K[/&S;961-5B.,*D@$!4A1-#`D,^+LR0_3#MJ1HBZA
M10@T\O+FM*HC4O#D)I)IFG&3D9I,3E<8X!$HS,UH<<%HK>`F5K1J720&1<S`
M%,F%WP,9(9*S",G[)MPX\!_,])ZX#F0D31@):F6G.M#G)"-+('\&UWZYNAH7
M77J[E_4X_[T>?VW5A#<#?^+.H:2)V<<?(HG9]M#$Q6=>0Z1[W)R^S;N]P^0[
M>ZIQ^(AWDGJMQ,2*5JC6HU(.F)DU==$33!&L*+KS6A1&+INM8D?\U02`2,8)
MCEU03[D$_ECYH)Q%^6<3*@FL@G$@E[GQK18V85))())!(1(U0\>J"4)]P@GD
M:BD4BAH@E()G`;H*Q0]0HT7X<<X\G)MUJ8WB)L()+Y3G\<L4$\C6#C)MEG='
MTS02K_9\?P02XX0.PMZ,Z*U_C2&?J@G!!,K\?&BVU2$WVAL0`;BC;7;E\T$P
MM#@B'E#Z_C7Y('LF/!=B_N@X]=)H#6N.<CIN,-:E9LX]G'U#!DFRRS>8S97V
M*\9QB2FMM!Z)2L!M*[)SHL^\6."`WID?RP/I%<+%:_+=\[LP*(@_7[+V`<X:
M];6^15#ZO<^YJ+L[8([?%=P=!1/$KK\H;!#8ID)>=;"7._]E.X[=^W?T]77(
M0\WH%6,51(L@I6//=(0Q\)"*,5G$QAQ:WPYC9G,V6SAT1YHFOSV<@46!$&"N
M'4PXI-@/FE@>=@:R%T!O9B-HWDK0KH^$"T*BM!5.(P._D)-NPD=J'>:U:MLH
M92:',E@M>;:\6:F]Y=H&VX5,4VXVB8IU45SH_&G:F@DBJ*-/[<CXW3.89T3F
M=5"9]4L40^9+H-3%7:4;#8_I5\>M"O:E*;C#V`2IPXO,P)/-2_D7]$Z\Y;7G
M5>=BO0S)A6VF"";<`M8?M10W:F_U<6_F221A)(T?,03MW^WJ4<NDDEN24X/+
M`DE]_*^V9]\^CBG-!-@+ZS>@F@^2"7X0?T,"0`A(2$4/Z174F>!F&'D\'D)"
M;Z?.>!6EIG'J\6'0(L%1!(B(O&=^3L/'-X`Q@<4A8@;9A4<R\X%U_&'#2>)B
M!J*/B&HBN.`)-NFUC[]^I[0,+9S5YOD1[=L;9BCD=<,FV`8?^,H!"489<+9X
M?#-#+0%)V$5F0BO+N;LYB%<32:EM,00*E\5':S^.+P21DW;].7X)TG>*D@C"
MJ'OR$5W=?1HU)($)+%O$X51V`Q-:"2':8$,@C:^3J\[!A&32QJ9<,1](VC4$
MN"^#V*,OD@DL:8X0K.46?:"FP&/O%'A[E4(K_=!-R"1!-P3W[,[B"<T,6TXM
M+!NRX;=*$B\A2$%''2ZH2%D=E)-P:"]K!TA/=9`/"JP3G%!(C$1=`RK%51C$
M&"OCI5$541\,"U4599,025613?W!OS-171"X-N9XN2F@](B9)D=L3T8G<,&`
M9\`*]$$[()P<M^*CL;3(&*+NR=`BBG5"7*R"MA*LJAU*<GNO;`\2$^7`1E;6
MR=69'+&62EI]>2&15S#18<Q:2`/Z>0)Z"8EO+84<(6PE.`*77#8N%=B-86OL
MY<SGXF(*9H!BJ+@P`C&$$(D0=0,<-WLP!(U*\D4KI".YWN?)38R0"(!IX9<=
MNAWV>AGZJZ%F)L5QU>.*"<\+Y();PU?(17H%4(Z8%C#?6,_-#$OQ.*;T$]:-
M3B=-#89N-%QZ9X,FJ";LK>:"'CRV;.HH[7AA?'I8ND&&UR85'KT0YW;`@:'%
MPWW#>KJ!"!1PM0!(DCT%://#'PXX,Q+;GB^4+<Y?(\7S<9J86E4VL#"U=04J
MI$)E7N,F]M*K8M1F*-^-[FG6`!%NY@%,/:J8+?(KC<5RN;I1"=26L6ZE)85C
M:-[P::LL8B*"&K*UJS2%M*QB*"B"B8T1%B-*-"M:JEIIAG#8YO:WVWL%M@JJ
M)UY+L:)@)1A`H(`Q%*4*82!LD#@A@R%*4U&0#."Y6N`2)RJ;9Y!Q9@J:AR[;
M_50#/1V4\#?>QD;6A9:$V76YN-V*6+>[W[$@2%_G<;QPTE\H.7A(=$)(Z"D%
M)J(FY5YQ(D%$01FJ9O36\VVFH(JPS.&&:J".C:QBM0IDS:@93YE.TZX&==44
M?DMT(HEYYSS\&H!`^$!J#"#WACWTO?B*/!5.UU+^'&C$FVU#1$P/H@F?!FW#
AV()[RZ('+',$0@K]0=Q=D\X2/L[VM_\7<D4X4)#5$[6-
`
end

For comparison, if you split the "%" operation out into a separate source
file:

unsigned long long longmod(unsigned long long a, unsigned long long b)
{
        return(a % b);
}

the HP compiler produces the following assembler output:

        .LEVEL  2.0N

        .SPACE  $TEXT$,SORT=8
        .SUBSPA $CODE$,QUAD=0,ALIGN=4,ACCESS=0x2c,CODE_ONLY,SORT=24
longmod
        .PROC
        .CALLINFO FRAME=0,ARGS_SAVED,ORDERING_AWARE
        .ENTRY
        DEPD    %r25,31,32,%r26 ;offset 0x0
        DEPD    %r23,31,32,%r24 ;offset 0x4
        EXTRD,U %r26,31,32,%r25 ;offset 0x8
        .CALL   ;in=23,24,25,26;out=21,22,28,29; (MILLICALL)
        B,L     $$rem2U,%r31    ;offset 0xc
        EXTRD,U %r24,31,32,%r23 ;offset 0x10
        DEPD    %r28,31,32,%r29 ;offset 0x14
$00000002
$L0
        BVE     (%r2)   ;offset 0x18
        .EXIT
        EXTRD,U %r29,31,32,%r28 ;offset 0x1c
        .PROCEND        ;in=23,25;out=28,29;fpin=105,107;

        .SPACE  $TEXT$
        .SUBSPA $CODE$
        .SPACE  $PRIVATE$,SORT=16
        .SPACE  $TEXT$
        .SUBSPA $CODE$
        .EXPORT longmod,ENTRY,PRIV_LEV=3,ARGW0=GR,ARGW1=GR,ARGW2=GR,ARGW3=GR,RTNVAL=GR,LONG_RETURN
        .IMPORT $$rem2U,MILLICODE
        .END

>Fix:
	Be patient :-)

>Release-Note:
>Audit-Trail:
>Unformatted:


^ permalink raw reply	[flat|nested] 2+ messages in thread

end of thread, other threads:[~2002-11-15 23:25 UTC | newest]

Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2002-11-21 14:26 optimization/7625: gcc pessimized 64-bit % operator on hppa2.0 danglin
  -- strict thread matches above, loose matches on Subject: below --
2002-08-18 14:26 Darren Tucker

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).