diff options
Diffstat (limited to 'neozip/arch/power/slide_ppc_tpl.h')
| -rw-r--r-- | neozip/arch/power/slide_ppc_tpl.h | 44 |
1 files changed, 44 insertions, 0 deletions
diff --git a/neozip/arch/power/slide_ppc_tpl.h b/neozip/arch/power/slide_ppc_tpl.h new file mode 100644 index 0000000000..24629b4039 --- /dev/null +++ b/neozip/arch/power/slide_ppc_tpl.h @@ -0,0 +1,44 @@ +/* Optimized slide_hash for PowerPC processors + * Copyright (C) 2017-2021 Mika T. Lindqvist <postmaster@raasu.org> + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +#include <altivec.h> +#include "zbuild.h" +#include "deflate.h" + +static inline void slide_hash_chain(Pos *table, uint32_t entries, uint16_t wsize) { + const vector unsigned short vmx_wsize = vec_splats(wsize); + Pos *p = table; + + do { + /* Do the pointer arithmetic early to hopefully overlap the vector unit */ + Pos *q = p; + p += 32; + vector unsigned short value0, value1, value2, value3; + vector unsigned short result0, result1, result2, result3; + + value0 = vec_ld(0, q); + value1 = vec_ld(16, q); + value2 = vec_ld(32, q); + value3 = vec_ld(48, q); + result0 = vec_subs(value0, vmx_wsize); + result1 = vec_subs(value1, vmx_wsize); + result2 = vec_subs(value2, vmx_wsize); + result3 = vec_subs(value3, vmx_wsize); + vec_st(result0, 0, q); + vec_st(result1, 16, q); + vec_st(result2, 32, q); + vec_st(result3, 48, q); + + entries -= 32; + } while (entries); +} + +void Z_INTERNAL SLIDE_PPC(deflate_state *s) { + Assert(s->w_size <= UINT16_MAX, "w_size should fit in uint16_t"); + uint16_t wsize = (uint16_t)s->w_size; + + slide_hash_chain(s->head, HASH_SIZE, wsize); + slide_hash_chain(s->prev, wsize, wsize); +} |
