Hello longest_match is a function that is used in compression to find out same bytes. I notice GCC 4.5.0(but on GGC series 4 happen too) use 0x64 bytes from stack gcc3.4.0 only 8. gcc4.5.0 is btw faster as 4.4.1. Can somebody tell me, how this optimization can switch off in GCC ?. GCC4.5.0 move data on this 0x64 bytes in this function.gcc3.4 do this not and the asm optimized version of longest_match do this too not.I also dont understand wy there must data copy.this func only compare data as far i see and return length of same values. here is this function optimized in asm and X86 asm http://hermes.ibert.com/source/foreign/gzip/match.s this stack usage happen when i use -O1. So i can see if it run faster then. I think thats not only a backend problem, and happen on all platforms. Here is C source and below is asm Output from gcc 4.5.0 and GCC 3.4.0 in 68k asm. its compile with -g2 there are stabd in.To see what sourceline it is, look in attached file. uInt longest_match(s, cur_match) deflate_state *s; IPos cur_match; /* current match */ { unsigned chain_length = s->max_chain_length;/* max hash chain length */ register Bytef *scan = s->window + s->strstart; /* current string */ register Bytef *match; /* matched string */ register int len; /* length of current match */ int best_len = s->prev_length; /* best match length so far */ int nice_match = s->nice_match; /* stop if match long enough */ IPos limit = s->strstart > (IPos)MAX_DIST(s) ? s->strstart - (IPos)MAX_DIST(s) : NIL; /* Stop when cur_match becomes <= limit. To simplify the code, * we prevent matches with the string of window index 0. */ Posf *prev = s->prev; uInt wmask = s->w_mask; #ifdef UNALIGNED_OK /* Compare two bytes at a time. Note: this is not always beneficial. * Try with and without -DUNALIGNED_OK to check. */ register Bytef *strend = s->window + s->strstart + MAX_MATCH - 1; register ush scan_start = *(ushf*)scan; register ush scan_end = *(ushf*)(scan+best_len-1); #else register Bytef *strend = s->window + s->strstart + MAX_MATCH; register Byte scan_end1 = scan[best_len-1]; register Byte scan_end = scan[best_len]; #endif /* The code is optimized for HASH_BITS >= 8 and MAX_MATCH-2 multiple of 16. * It is easy to get rid of this optimization if necessary. */ Assert(s->hash_bits >= 8 && MAX_MATCH == 258, "Code too clever"); /* Do not waste too much time if we already have a good match: */ if (s->prev_length >= s->good_match) { chain_length >>= 2; } /* Do not look for matches beyond the end of the input. This is necessary * to make deflate deterministic. */ if ((uInt)nice_match > s->lookahead) nice_match = s->lookahead; Assert((ulg)s->strstart <= s->window_size-MIN_LOOKAHEAD, "need lookahead"); do { Assert(cur_match < s->strstart, "no future"); match = s->window + cur_match; /* Skip to next match if the match length cannot increase * or if the match length is less than 2. Note that the checks below * for insufficient lookahead only occur occasionally for performance * reasons. Therefore uninitialized memory will be accessed, and * conditional jumps will be made that depend on those values. * However the length of the match is limited to the lookahead, so * the output of deflate is not affected by the uninitialized values. */ #if (defined(UNALIGNED_OK) && MAX_MATCH == 258) /* This code assumes sizeof(unsigned short) == 2. Do not use * UNALIGNED_OK if your compiler uses a different size. */ if (*(ushf*)(match+best_len-1) != scan_end || *(ushf*)match != scan_start) continue; /* It is not necessary to compare scan[2] and match[2] since they are * always equal when the other bytes match, given that the hash keys * are equal and that HASH_BITS >= 8. Compare 2 bytes at a time at * strstart+3, +5, ... up to strstart+257. We check for insufficient * lookahead only every 4th comparison; the 128th check will be made * at strstart+257. If MAX_MATCH-2 is not a multiple of 8, it is * necessary to put more guard bytes at the end of the window, or * to check more often for insufficient lookahead. */ Assert(scan[2] == match[2], "scan[2]?"); scan++, match++; do { } while (*(ushf*)(scan+=2) == *(ushf*)(match+=2) && *(ushf*)(scan+=2) == *(ushf*)(match+=2) && *(ushf*)(scan+=2) == *(ushf*)(match+=2) && *(ushf*)(scan+=2) == *(ushf*)(match+=2) && scan < strend); /* The funny "do {}" generates better code on most compilers */ /* Here, scan <= window+strstart+257 */ Assert(scan <= s->window+(unsigned)(s->window_size-1), "wild scan"); if (*scan == *match) scan++; len = (MAX_MATCH - 1) - (int)(strend-scan); scan = strend - (MAX_MATCH-1); #else /* UNALIGNED_OK */ if (match[best_len] != scan_end || match[best_len-1] != scan_end1 || *match != *scan || *++match != scan[1]) continue; /* The check at best_len-1 can be removed because it will be made * again later. (This heuristic is not always a win.) * It is not necessary to compare scan[2] and match[2] since they * are always equal when the other bytes match, given that * the hash keys are equal and that HASH_BITS >= 8. */ scan += 2, match++; Assert(*scan == *match, "match[2]?"); /* We check for insufficient lookahead only every 8th comparison; * the 256th check will be made at strstart+258. */ do { } while (*++scan == *++match && *++scan == *++match && *++scan == *++match && *++scan == *++match && *++scan == *++match && *++scan == *++match && *++scan == *++match && *++scan == *++match && scan < strend); Assert(scan <= s->window+(unsigned)(s->window_size-1), "wild scan"); len = MAX_MATCH - (int)(strend - scan); scan = strend - MAX_MATCH; #endif /* UNALIGNED_OK */ if (len > best_len) { s->match_start = cur_match; best_len = len; if (len >= nice_match) break; #ifdef UNALIGNED_OK scan_end = *(ushf*)(scan+best_len-1); #else scan_end1 = scan[best_len-1]; scan_end = scan[best_len]; #endif } } while ((cur_match = prev[cur_match & wmask]) > limit && --chain_length != 0); if ((uInt)best_len <= s->lookahead) return (uInt)best_len; return s->lookahead; } #endif /* ASMV */ #endif /* FASTEST */ GCC 4.5.0 _longest_match: .stabd 68,0,1069 LFBB1: lea (-64,sp),sp movem.l #16190,-(sp) move.l 112(sp),a1 move.l 116(sp),d1 .stabd 68,0,1070 move.l 122(a1),d2 .stabd 68,0,1071 move.l 54(a1),a2 move.l 106(a1),d3 lea (a2,d3.l),a4 .stabd 68,0,1075 move.l 118(a1),d0 move.l d0,d7 .stabd 68,0,1076 move.l 142(a1),a6 .stabd 68,0,1077 move.l 42(a1),d5 .stabd 68,0,1078 move.l d5,d4 add.l #-262,d4 cmp.l d3,d4 jcc L12 move.l d3,d4 add.l #262,d4 sub.l d5,d4 L2: .stabd 68,0,1082 move.l 62(a1),a3 .stabd 68,0,1083 move.l 50(a1),d5 .stabd 68,0,1093 lea 258(a2,d3.l),a5 .stabd 68,0,1094 move.b -1(a4,d0.l),50(sp) .stabd 68,0,1095 move.b (a4,d0.l),d3 .stabd 68,0,1101 cmp.l 138(a1),d0 jcs L3 .stabd 68,0,1106 lsr.l #2,d2 L3: .stabd 68,0,1111 move.l 114(a1),d6 cmp.l a6,d6 jcc L10 move.l d6,a6 L10: .stabd 68,0,1188 move.l #258,d0 sub.l a5,d0 move.l d0,62(sp) .stabd 68,0,1189 lea (-258,a5),a0 move.l a0,58(sp) move.l d7,a0 move.l a1,66(sp) move.l d6,54(sp) move.l a6,46(sp) move.l a5,a6 move.w d3,a5 L25: .stabd 68,0,1117 lea (a2,d1.l),a1 .stabd 68,0,1162 move.l a0,d0 move.b (a1,a0.l),d3 move.w a5,d6 cmp.b d3,d6 jeq L29 L5: .stabd 68,0,1204 and.l d5,d1 .stabd 68,0,1205 move.w (a3,d1.l*2),d1 and.l #65535,d1 cmp.l d4,d1 jls L27 subq.l #1,d2 jne L25 L27: move.l 54(sp),d6 L9: .stabd 68,0,1207 cmp.l d0,d6 jcc L11 move.l d6,d0 L11: .stabd 68,0,1209 movem.l (sp)+,#31996 lea (64,sp),sp rts L29: .stabd 68,0,1163 move.b -1(a1,a0.l),d7 .stabd 68,0,1162 cmp.b 50(sp),d7 jne L5 .stabd 68,0,1164 move.b (a1),d6 cmp.b (a4),d6 jne L5 addq.l #1,a1 .stabd 68,0,1165 move.b (a1),d6 cmp.b 1(a4),d6 jne L5 .stabd 68,0,1173 lea (2,a4),a5 move.l a1,d6 addq.l #1,d6 lea (3,a4),a1 move.l a1,84(sp) lea (9,a4),a1 move.l a1,88(sp) lea (8,a4),a1 move.l a1,92(sp) lea (7,a4),a1 move.l a1,96(sp) lea (6,a4),a1 move.l a1,100(sp) lea (5,a4),a1 move.l a1,104(sp) addq.l #4,a4 move.l d6,a1 move.l 46(sp),d6 move.l d5,50(sp) move.l a0,d5 move.l a0,70(sp) move.l 88(sp),d0 move.b d3,74(sp) move.l d2,88(sp) move.l a6,d2 move.b d7,75(sp) move.l a3,d7 move.l 100(sp),a6 move.l 104(sp),a0 move.l d1,76(sp) move.l 92(sp),d1 move.l a2,80(sp) move.l a4,a2 move.l 84(sp),a4 L7: .stabd 68,0,1180 move.b (a4),d3 cmp.b 1(a1),d3 jne L16 move.b (a2),d3 cmp.b 2(a1),d3 jne L17 .stabd 68,0,1181 move.b (a0),d3 cmp.b 3(a1),d3 jne L18 move.b (a6),d3 cmp.b 4(a1),d3 jne L19 .stabd 68,0,1182 move.l 96(sp),a3 move.b (a3),d3 cmp.b 5(a1),d3 jne L20 move.l d1,a3 move.b (a3),d3 cmp.b 6(a1),d3 jne L21 .stabd 68,0,1183 move.l d0,a3 move.b (a3),d3 cmp.b 7(a1),d3 jne L22 .stabd 68,0,1066 addq.l #8,a5 addq.l #8,a1 .stabd 68,0,1183 move.b (a5),d3 cmp.b (a1),d3 jne L26 addq.l #8,a4 addq.l #8,d0 addq.l #8,d1 addq.l #8,96(sp) addq.l #8,a6 addq.l #8,a0 addq.l #8,a2 cmp.l d2,a5 jcs L7 L26: move.l d5,a0 move.l d6,46(sp) move.l d7,a3 move.l 50(sp),d5 move.l d2,a6 move.l 88(sp),d2 move.l 70(sp),d0 move.b 74(sp),d3 move.b 75(sp),d7 move.l 76(sp),d1 move.l 80(sp),a2 L6: .stabd 68,0,1188 add.l 62(sp),a5 .stabd 68,0,1189 move.l 58(sp),a4 .stabd 68,0,1193 cmp.l a0,a5 jle L23 .stabd 68,0,1194 move.l 66(sp),a0 move.l d1,110(a0) .stabd 68,0,1196 cmp.l 46(sp),a5 jge L30 .stabd 68,0,1200 move.l a5,d0 move.l 58(sp),a1 move.b -1(a1,a5.l),50(sp) .stabd 68,0,1201 move.b (a1,a5.l),d3 move.w d3,a5 move.l d0,a0 jra L5 L12: .stabd 68,0,1078 clr.l d4 jra L2 L23: .stabd 68,0,1193 move.w d3,a5 move.b d7,50(sp) jra L5 L16: move.l d5,a0 move.l a4,84(sp) move.l d6,46(sp) move.l d7,a3 move.l 50(sp),d5 move.l d2,a6 move.l 88(sp),d2 move.l 70(sp),d0 move.b 74(sp),d3 move.b 75(sp),d7 move.l 76(sp),d1 move.l 80(sp),a2 .stabd 68,0,1184 move.l a4,a5 jra L6 L17: move.l d5,a0 move.l d6,46(sp) move.l d7,a3 move.l 50(sp),d5 move.l d2,a6 move.l 88(sp),d2 move.l 70(sp),d0 move.b 74(sp),d3 move.b 75(sp),d7 move.l a2,a4 move.l 76(sp),d1 move.l 80(sp),a2 .stabd 68,0,1180 move.l a4,a5 jra L6 L19: move.l d5,a0 move.l d6,46(sp) move.l d7,a3 move.l 50(sp),d5 move.l 70(sp),d0 move.b 75(sp),d7 move.l a6,100(sp) move.l d2,a6 move.l 88(sp),d2 move.b 74(sp),d3 move.l 76(sp),d1 move.l 80(sp),a2 .stabd 68,0,1181 move.l 100(sp),a5 jra L6 L18: move.l d6,46(sp) move.l d7,a3 move.l d2,a6 move.l 88(sp),d2 move.l 70(sp),d0 move.b 74(sp),d3 move.b 75(sp),d7 move.l a0,104(sp) move.l d5,a0 move.l 50(sp),d5 move.l 76(sp),d1 move.l 80(sp),a2 .stabd 68,0,1180 move.l 104(sp),a5 jra L6 L21: move.l d5,a0 move.l d6,46(sp) move.l d7,a3 move.l 50(sp),d5 move.l d2,a6 move.l 88(sp),d2 move.l 70(sp),d0 move.b 74(sp),d3 move.b 75(sp),d7 move.l d1,92(sp) move.l 76(sp),d1 move.l 80(sp),a2 .stabd 68,0,1182 move.l 92(sp),a5 jra L6 L20: move.l d5,a0 move.l d6,46(sp) move.l 50(sp),d5 move.l d2,a6 move.l 88(sp),d2 move.l 70(sp),d0 move.b 74(sp),d3 move.l d7,a3 move.b 75(sp),d7 move.l 76(sp),d1 move.l 80(sp),a2 .stabd 68,0,1181 move.l 96(sp),a5 jra L6 L22: move.l d5,a0 move.l d6,46(sp) move.l d7,a3 move.l 50(sp),d5 move.l d2,a6 move.l 88(sp),d2 move.l d0,88(sp) move.l 70(sp),d0 move.b 74(sp),d3 move.b 75(sp),d7 move.l 76(sp),d1 move.l 80(sp),a2 .stabd 68,0,1182 move.l 88(sp),a5 jra L6 L30: move.l a5,d0 move.l 54(sp),d6 jra L9 .stabs "longest_match:f17",36,0,1066,_longest_match .stabs "s:p189=*181",160,0,1067,112 .stabs "cur_match:p171",160,0,1068,116 .stabs "chain_length:r4",64,0,1070,2 .stabs "len:r1",64,0,1073,0 .stabs "best_len:r1",64,0,1075,7 .stabs "nice_match:r1",64,0,1076,14 .stabs "limit:r171",64,0,1077,4 .stabs "prev:r172",64,0,1082,11 .stabs "wmask:r17",64,0,1083,5 .stabs "strend:r127",64,0,1093,13 .stabs "s:r189",64,0,1067,9 .stabs "cur_match:r171",64,0,1068,1 .stabn 192,0,0,LFBB1 .stabn 224,0,0,Lscope1 Lscope1: .even ---------------------- GCC 3.4.0 ---------------------------------------- .even _longest_match: .stabd 68,0,1069 subqw #8,sp moveml #0x3f3e,sp@- movel sp@(56),a4 movel sp@(60),d1 .stabd 68,0,1070 movel a4@(122),d2 .stabd 68,0,1071 movel a4@(54),a2 movel a4@(106),a1 lea a2@(0,a1:l),a3 .stabd 68,0,1075 movel a4@(118),d3 .stabd 68,0,1076 movel a4@(142),d7 .stabd 68,0,1077 movel a4@(42),a0 movel a0,d0 addl #-262,d0 lea 0:w,a6 cmpl a1,d0 jcc L378 .stabd 68,0,1077 movel a1,a5 subl a0,a5 lea a5@(262),a6 L378: .stabd 68,0,1082 movel a4@(62),a5 .stabd 68,0,1083 movel a4@(50),d6 .stabd 68,0,1093 lea a1@(258,a2:l),a1 movel a1,sp@(48) .stabd 68,0,1094 moveb a3@(-1,d3:l),sp@(47) .stabd 68,0,1095 moveb a3@(d3:l),d4 .stabd 68,0,1104 cmpl a4@(138),d3 jcs L379 .stabd 68,0,1105 lsrl #2,d2 L379: .stabd 68,0,1110 movel a4@(114),d5 cmpl d7,d5 jcc L392 movel d5,d7 .even L392: .stabd 68,0,1116 lea a2@(0,d1:l),a0 .stabd 68,0,1161 cmpb a0@(d3:l),d4 jeq L396 L383: .stabd 68,0,1203 movel d1,d0 andl d6,d0 clrl d1 movew a5@(d0:l:2),d1 cmpl d1,a6 jcc L382 subql #1,d2 jne L392 L382: .stabd 68,0,1206 movel d3,d0 cmpl d3,d5 jcc L376 .stabd 68,0,1207 movel d5,d0 jra L376 .even L396: .stabd 68,0,1161 moveb sp@(47),d0 cmpb a0@(-1,d3:l),d0 jne L383 moveb a0@,d0 cmpb a3@,d0 jne L383 lea a0@(1),a1 moveb a1@,d0 cmpb a3@(1),d0 jne L383 .stabd 68,0,1172 lea a3@(2),a0 addql #1,a1 L386: .stabd 68,0,1178 addql #1,a0 addql #1,a1 moveb a0@,d0 cmpb a1@,d0 jne L387 addql #1,a0 addql #1,a1 moveb a0@,d0 cmpb a1@,d0 jne L387 addql #1,a0 addql #1,a1 moveb a0@,d0 cmpb a1@,d0 jne L387 addql #1,a0 addql #1,a1 moveb a0@,d0 cmpb a1@,d0 jne L387 addql #1,a0 addql #1,a1 moveb a0@,d0 cmpb a1@,d0 jne L387 addql #1,a0 addql #1,a1 moveb a0@,d0 cmpb a1@,d0 jne L387 addql #1,a0 addql #1,a1 moveb a0@,d0 cmpb a1@,d0 jne L387 addql #1,a0 addql #1,a1 moveb a0@,d0 cmpb a1@,d0 jne L387 cmpl sp@(48),a0 jcs L386 L387: .stabd 68,0,1187 movel sp@(48),a1 subl a0,a1 movel #258,d0 subl a1,d0 .stabd 68,0,1188 movel sp@(48),a3 lea a3@(-258),a3 .stabd 68,0,1192 cmpl d0,d3 jge L383 .stabd 68,0,1193 movel d1,a4@(110) .stabd 68,0,1194 movel d0,d3 .stabd 68,0,1195 cmpl d0,d7 jle L382 .stabd 68,0,1199 moveb a3@(-1,d0:l),sp@(47) .stabd 68,0,1200 moveb a3@(d0:l),d4 jra L383 .even L376: moveml sp@+,#0x7cfc addqw #8,sp rts .stabs "longest_match:f23",36,0,1069,_longest_match .stabs "s:p198",160,0,1067,56 .stabs "cur_match:p176",160,0,1068,60 .stabs "chain_length:r4",64,0,1070,2 .stabs "scan:r132",64,0,1071,11 .stabs "match:r132",64,0,1072,8 .stabs "len:r1",64,0,1073,0 .stabs "best_len:r1",64,0,1075,3 .stabs "nice_match:r1",64,0,1076,7 .stabs "limit:r176",64,0,1077,14 .stabs "prev:r177",64,0,1082,13 .stabs "wmask:r23",64,0,1083,6 .stabs "strend:132",128,0,1093,48 .stabs "scan_end1:22",128,0,1094,47 .stabs "scan_end:r22",64,0,1095,4 .stabs "s:r198",64,0,1067,12 .stabs "cur_match:r176",64,0,1068,1 .even _fill_window: > HSMathLibs V.46.00 beta 1 (17.07.2007): > > * the functions "IEEEDPFloor", "IEEEDPCeil" (mathieeedoubbas.library), > "IEEESPFloor", "IEEESPCeil" (mathieeesingbas.library and > mathieeesingbas-Patch), "SPFloor" and "SPCeil" (mathffp.library and > mathffp-Patch) new written > * Bugfixing of the functions "IEEEDPPow" (mathieeedoubtrans.library), > "IEEESPPow" (mathieeesingtrans.library), "SPCos", "SPSincos", "SPCosh", > "SPExp" and "SPPow" (mathtrans.library) > * changed the init-function of the mathffp-Patch and > mathieeesingbas-Patch > * changed the init-function of all libraries > * revised the manual Regards