summaryrefslogtreecommitdiff
path: root/neozip/arch/arm/crc32_armv8.c
blob: 59f2b6500983460cd4ecd0f9fde418b9d4d5f30f (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
/* crc32_armv8.c -- compute the CRC-32 of a data stream
 * Copyright (C) 1995-2006, 2010, 2011, 2012 Mark Adler
 * Copyright (C) 2016 Yang Zhang
 * For conditions of distribution and use, see copyright notice in zlib.h
 */

#ifdef ARM_CRC32

#include "zbuild.h"
#include "acle_intrins.h"
#include "crc32_armv8_p.h"

Z_FORCEINLINE static Z_TARGET_CRC uint32_t crc32_copy_impl(uint32_t crc, uint8_t *dst, const uint8_t *src, size_t len,
                                                           const int COPY) {
    uint32_t c = ~crc;

    if (UNLIKELY(len == 1)) {
        if (COPY)
            *dst = *src;
        c = __crc32b(c, *src);
        return ~c;
    }

    /* Align to 8-byte boundary for tail processing */
    uintptr_t align_diff = ALIGN_DIFF(src, 8);
    if (align_diff)
        c = crc32_armv8_align(c, &dst, &src, &len, align_diff, COPY);

    while (len >= 64) {
        uint64_t d0 = *(const uint64_t *)src;
        uint64_t d1 = *(const uint64_t *)(src + 8);
        uint64_t d2 = *(const uint64_t *)(src + 16);
        uint64_t d3 = *(const uint64_t *)(src + 24);
        uint64_t d4 = *(const uint64_t *)(src + 32);
        uint64_t d5 = *(const uint64_t *)(src + 40);
        uint64_t d6 = *(const uint64_t *)(src + 48);
        uint64_t d7 = *(const uint64_t *)(src + 56);

        if (COPY) {
            memcpy(dst,      &d0, 8);
            memcpy(dst + 8,  &d1, 8);
            memcpy(dst + 16, &d2, 8);
            memcpy(dst + 24, &d3, 8);
            memcpy(dst + 32, &d4, 8);
            memcpy(dst + 40, &d5, 8);
            memcpy(dst + 48, &d6, 8);
            memcpy(dst + 56, &d7, 8);
            dst += 64;
        }

        c = __crc32d(c, d0);
        c = __crc32d(c, d1);
        c = __crc32d(c, d2);
        c = __crc32d(c, d3);
        c = __crc32d(c, d4);
        c = __crc32d(c, d5);
        c = __crc32d(c, d6);
        c = __crc32d(c, d7);

        src += 64;
        len -= 64;
    }

    return crc32_armv8_tail(c, dst, src, len, COPY);
}

Z_INTERNAL Z_TARGET_CRC uint32_t crc32_armv8(uint32_t crc, const uint8_t *buf, size_t len) {
    return crc32_copy_impl(crc, NULL, buf, len, 0);
}

Z_INTERNAL Z_TARGET_CRC uint32_t crc32_copy_armv8(uint32_t crc, uint8_t *dst, const uint8_t *src, size_t len) {
#if OPTIMAL_CMP >= 32
    return crc32_copy_impl(crc, dst, src, len, 1);
#else
    /* Without unaligned access, interleaved stores get decomposed into byte ops */
    crc = crc32_armv8(crc, src, len);
    memcpy(dst, src, len);
    return crc;
#endif
}
#endif