summaryrefslogtreecommitdiff
path: root/neozip/arch/loongarch/lasxintrin_ext.h
blob: b1e72cff86f649d53e6ec0bf7f0ddcd292e4d84a (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
/* lasxintrin_ext.h
 * Copyright (C) 2025 Vladislav Shchapov <vladislav@shchapov.ru>
 * For conditions of distribution and use, see copyright notice in zlib.h
 */
#ifndef LASXINTRIN_EXT_H
#define LASXINTRIN_EXT_H

#include <lsxintrin.h>
#include <lasxintrin.h>


static inline __m256i lasx_zext_128(__m128i src) {
#ifdef __loongarch_asx_sx_conv
    return __lasx_insert_128_lo(__lasx_xvldi(0), src);
#else
    __m256i dest = __lasx_xvldi(0);
    __asm__ volatile ("xvpermi.q %u0,%u2,0x30\n" : "=f"(dest) : "0"(dest), "f"(src));
    return dest;
#endif
}

#ifndef __loongarch_asx_sx_conv
static inline __m256i __lasx_concat_128(__m128i lo, __m128i hi) {
    __m256i dest;
    __asm__ volatile ("xvpermi.q %u0,%u2,0x02\n" : "=f"(dest) : "0"(lo), "f"(hi));
    return dest;
}
#endif

static inline __m256i lasx_broadcast_128(__m128i in) {
    return __lasx_concat_128(in, in);
}

static inline __m256i lasx_sad_bu(__m256i a, __m256i b) {
    __m256i tmp = __lasx_xvabsd_bu(a, b);
    tmp = __lasx_xvhaddw_hu_bu(tmp, tmp);
    tmp = __lasx_xvhaddw_wu_hu(tmp, tmp);
    return __lasx_xvhaddw_du_wu(tmp, tmp);
}

static inline __m256i lasx_maddubs_w_h(__m256i a, __m256i b) {
    return __lasx_xvsadd_h(__lasx_xvmulwod_h_bu_b(a, b), __lasx_xvmulwev_h_bu_b(a, b));
}

static inline __m256i lasx_madd_w_h(__m256i a, __m256i b) {
    return __lasx_xvmaddwod_w_h(__lasx_xvmulwev_w_h(a, b), a, b);
}

static inline int lasx_movemask_b(__m256i v) {
    v = __lasx_xvmskltz_b(v);
    return __lasx_xvpickve2gr_w(v, 0) | (__lasx_xvpickve2gr_w(v, 4) << 16);
}

/* See: lsx_shuffle_b */
static inline __m256i lasx_shuffle_b(__m256i a, __m256i b) {
    __m256i msb_mask = __lasx_xvslti_b(b, 0);
    __m256i dst = __lasx_xvshuf_b(a, a, __lasx_xvandi_b(b, 0xF));
    return __lasx_xvand_v(dst, __lasx_xvnor_v(msb_mask, msb_mask));
}

#endif // include guard LASXINTRIN_EXT_H