summaryrefslogtreecommitdiff
path: root/neozip/arch/power/slide_ppc_tpl.h
blob: 24629b4039318b5aaa10ee0dddc5ee37c189f8fa (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
/* Optimized slide_hash for PowerPC processors
 * Copyright (C) 2017-2021 Mika T. Lindqvist <postmaster@raasu.org>
 * For conditions of distribution and use, see copyright notice in zlib.h
 */

#include <altivec.h>
#include "zbuild.h"
#include "deflate.h"

static inline void slide_hash_chain(Pos *table, uint32_t entries, uint16_t wsize) {
    const vector unsigned short vmx_wsize = vec_splats(wsize);
    Pos *p = table;

    do {
        /* Do the pointer arithmetic early to hopefully overlap the vector unit */
        Pos *q = p;
        p += 32;
        vector unsigned short value0, value1, value2, value3;
        vector unsigned short result0, result1, result2, result3;

        value0 = vec_ld(0, q);
        value1 = vec_ld(16, q);
        value2 = vec_ld(32, q);
        value3 = vec_ld(48, q);
        result0 = vec_subs(value0, vmx_wsize);
        result1 = vec_subs(value1, vmx_wsize);
        result2 = vec_subs(value2, vmx_wsize);
        result3 = vec_subs(value3, vmx_wsize);
        vec_st(result0, 0, q);
        vec_st(result1, 16, q);
        vec_st(result2, 32, q);
        vec_st(result3, 48, q);

        entries -= 32;
   } while (entries);
}

void Z_INTERNAL SLIDE_PPC(deflate_state *s) {
    Assert(s->w_size <= UINT16_MAX, "w_size should fit in uint16_t");
    uint16_t wsize = (uint16_t)s->w_size;

    slide_hash_chain(s->head, HASH_SIZE, wsize);
    slide_hash_chain(s->prev, wsize, wsize);
}