summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorShuxin Yang <shuxinyang2006@gmail.com>2014-03-17 18:17:23 -0700
committerhansr <hk-git@circlestorm.org>2014-10-08 14:15:37 +0200
commit34bf824bf755fdf4979a9ed26e86028a12fdedc8 (patch)
tree052e976273a8b6949b10e44d267b0eb70d1783a0
parentc787ff04c3139fd6cba14a689e3e84b923be96e8 (diff)
downloadProject-Tick-34bf824bf755fdf4979a9ed26e86028a12fdedc8.tar.gz
Project-Tick-34bf824bf755fdf4979a9ed26e86028a12fdedc8.zip
Restructure the loop, and see about 3% speedup in run time. I believe the
speedup arises from: o. Remove the conditional branch in the loop o. Remove some indirection memory accesses: The memory accesses to "s->prev_length" s->strstart" cannot be promoted to register because the compiler is not able to disambiguate them with store-operation in INSERT_STRING() o. Convert non-countable loop to countable loop. I'm not sure if this change really contribute, in general, countable loop is lots easier to optimized than non-countable loop. Conflicts: deflate.h
-rw-r--r--deflate.c27
-rw-r--r--deflate.h3
2 files changed, 30 insertions, 0 deletions
diff --git a/deflate.c b/deflate.c
index 73f8f83a6f..ba7235b5a4 100644
--- a/deflate.c
+++ b/deflate.c
@@ -236,6 +236,17 @@ local inline Pos insert_string(deflate_state *z_const s, z_const Pos str)
}
+#ifndef NOT_TWEAK_COMPILER
+__attribute__ ((always_inline)) local void
+bulk_insert_str(deflate_state *s, Pos startpos, uInt count) {
+ uInt idx;
+ for (idx = 0; idx < count; idx++) {
+ Posf dummy;
+ INSERT_STRING(s, startpos + idx, dummy);
+ }
+}
+#endif
+
/* ===========================================================================
* Initialize the hash table (avoiding 64K overflow for 16 bit systems).
* prev[] will be initialized on the fly.
@@ -1729,6 +1740,8 @@ local block_state deflate_slow(s, flush)
* the hash table.
*/
s->lookahead -= s->prev_length-1;
+
+#ifdef NOT_TWEAK_COMPILER
s->prev_length -= 2;
do {
if (++s->strstart <= max_insert) {
@@ -1738,6 +1751,20 @@ local block_state deflate_slow(s, flush)
s->match_available = 0;
s->match_length = MIN_MATCH-1;
s->strstart++;
+#else
+ {
+ uInt mov_fwd = s->prev_length - 2;
+ uInt insert_cnt = mov_fwd;
+ if (unlikely(insert_cnt > max_insert - s->strstart))
+ insert_cnt = max_insert - s->strstart;
+
+ bulk_insert_str(s, s->strstart + 1, insert_cnt);
+ s->prev_length = 0;
+ s->match_available = 0;
+ s->match_length = MIN_MATCH-1;
+ s->strstart += mov_fwd + 1;
+ }
+#endif /*NOT_TWEAK_COMPILER*/
if (bflush) FLUSH_BLOCK(s, 0);
diff --git a/deflate.h b/deflate.h
index 1990409558..5cf2ce1268 100644
--- a/deflate.h
+++ b/deflate.h
@@ -445,4 +445,7 @@ local void send_bits(s, value, length)
#endif
+#define likely(x) __builtin_expect((x),1)
+#define unlikely(x) __builtin_expect((x),0)
+
#endif /* DEFLATE_H */