diff options
| author | Shuxin Yang <shuxinyang2006@gmail.com> | 2014-03-17 18:17:23 -0700 |
|---|---|---|
| committer | hansr <hk-git@circlestorm.org> | 2014-10-08 14:15:37 +0200 |
| commit | 34bf824bf755fdf4979a9ed26e86028a12fdedc8 (patch) | |
| tree | 052e976273a8b6949b10e44d267b0eb70d1783a0 | |
| parent | c787ff04c3139fd6cba14a689e3e84b923be96e8 (diff) | |
| download | Project-Tick-34bf824bf755fdf4979a9ed26e86028a12fdedc8.tar.gz Project-Tick-34bf824bf755fdf4979a9ed26e86028a12fdedc8.zip | |
Restructure the loop, and see about 3% speedup in run time. I believe the
speedup arises from:
o. Remove the conditional branch in the loop
o. Remove some indirection memory accesses:
The memory accesses to "s->prev_length" s->strstart" cannot be promoted
to register because the compiler is not able to disambiguate them with
store-operation in INSERT_STRING()
o. Convert non-countable loop to countable loop.
I'm not sure if this change really contribute, in general, countable
loop is lots easier to optimized than non-countable loop.
Conflicts:
deflate.h
| -rw-r--r-- | deflate.c | 27 | ||||
| -rw-r--r-- | deflate.h | 3 |
2 files changed, 30 insertions, 0 deletions
@@ -236,6 +236,17 @@ local inline Pos insert_string(deflate_state *z_const s, z_const Pos str) } +#ifndef NOT_TWEAK_COMPILER +__attribute__ ((always_inline)) local void +bulk_insert_str(deflate_state *s, Pos startpos, uInt count) { + uInt idx; + for (idx = 0; idx < count; idx++) { + Posf dummy; + INSERT_STRING(s, startpos + idx, dummy); + } +} +#endif + /* =========================================================================== * Initialize the hash table (avoiding 64K overflow for 16 bit systems). * prev[] will be initialized on the fly. @@ -1729,6 +1740,8 @@ local block_state deflate_slow(s, flush) * the hash table. */ s->lookahead -= s->prev_length-1; + +#ifdef NOT_TWEAK_COMPILER s->prev_length -= 2; do { if (++s->strstart <= max_insert) { @@ -1738,6 +1751,20 @@ local block_state deflate_slow(s, flush) s->match_available = 0; s->match_length = MIN_MATCH-1; s->strstart++; +#else + { + uInt mov_fwd = s->prev_length - 2; + uInt insert_cnt = mov_fwd; + if (unlikely(insert_cnt > max_insert - s->strstart)) + insert_cnt = max_insert - s->strstart; + + bulk_insert_str(s, s->strstart + 1, insert_cnt); + s->prev_length = 0; + s->match_available = 0; + s->match_length = MIN_MATCH-1; + s->strstart += mov_fwd + 1; + } +#endif /*NOT_TWEAK_COMPILER*/ if (bflush) FLUSH_BLOCK(s, 0); @@ -445,4 +445,7 @@ local void send_bits(s, value, length) #endif +#define likely(x) __builtin_expect((x),1) +#define unlikely(x) __builtin_expect((x),0) + #endif /* DEFLATE_H */ |
