diff options
| author | Adam Stylinski <kungfujesus06@gmail.com> | 2026-03-07 13:27:27 -0500 |
|---|---|---|
| committer | Hans Kristian Rosbach <hk-github@circlestorm.org> | 2026-03-09 13:33:29 +0100 |
| commit | 2385c8fed8663798b2cf51648fb8d74be0ba594b (patch) | |
| tree | 60b78c0e75ed5ec1cad19ca32961d95c8497ee29 | |
| parent | db3a6caa9aeb5bc26dec4100f45fa81f3663cd18 (diff) | |
| download | Project-Tick-2385c8fed8663798b2cf51648fb8d74be0ba594b.tar.gz Project-Tick-2385c8fed8663798b2cf51648fb8d74be0ba594b.zip | |
Unroll the slide hash loop similar to other ISAs
We do this to backfill the pipeline a little bit better, particularly
on the G5. We also conveniently operate on an entire cacheline for
this.
| -rw-r--r-- | arch/power/slide_ppc_tpl.h | 26 |
1 files changed, 19 insertions, 7 deletions
diff --git a/arch/power/slide_ppc_tpl.h b/arch/power/slide_ppc_tpl.h index 680a7f8e2a..24629b4039 100644 --- a/arch/power/slide_ppc_tpl.h +++ b/arch/power/slide_ppc_tpl.h @@ -12,15 +12,27 @@ static inline void slide_hash_chain(Pos *table, uint32_t entries, uint16_t wsize Pos *p = table; do { - vector unsigned short value, result; + /* Do the pointer arithmetic early to hopefully overlap the vector unit */ + Pos *q = p; + p += 32; + vector unsigned short value0, value1, value2, value3; + vector unsigned short result0, result1, result2, result3; - value = vec_ld(0, p); - result = vec_subs(value, vmx_wsize); - vec_st(result, 0, p); + value0 = vec_ld(0, q); + value1 = vec_ld(16, q); + value2 = vec_ld(32, q); + value3 = vec_ld(48, q); + result0 = vec_subs(value0, vmx_wsize); + result1 = vec_subs(value1, vmx_wsize); + result2 = vec_subs(value2, vmx_wsize); + result3 = vec_subs(value3, vmx_wsize); + vec_st(result0, 0, q); + vec_st(result1, 16, q); + vec_st(result2, 32, q); + vec_st(result3, 48, q); - p += 8; - entries -= 8; - } while (entries > 0); + entries -= 32; + } while (entries); } void Z_INTERNAL SLIDE_PPC(deflate_state *s) { |
