summaryrefslogtreecommitdiff
path: root/neozip/arch/loongarch/lasxintrin_ext.h
diff options
context:
space:
mode:
Diffstat (limited to 'neozip/arch/loongarch/lasxintrin_ext.h')
-rw-r--r--neozip/arch/loongarch/lasxintrin_ext.h61
1 files changed, 61 insertions, 0 deletions
diff --git a/neozip/arch/loongarch/lasxintrin_ext.h b/neozip/arch/loongarch/lasxintrin_ext.h
new file mode 100644
index 0000000000..b1e72cff86
--- /dev/null
+++ b/neozip/arch/loongarch/lasxintrin_ext.h
@@ -0,0 +1,61 @@
+/* lasxintrin_ext.h
+ * Copyright (C) 2025 Vladislav Shchapov <vladislav@shchapov.ru>
+ * For conditions of distribution and use, see copyright notice in zlib.h
+ */
+#ifndef LASXINTRIN_EXT_H
+#define LASXINTRIN_EXT_H
+
+#include <lsxintrin.h>
+#include <lasxintrin.h>
+
+
+static inline __m256i lasx_zext_128(__m128i src) {
+#ifdef __loongarch_asx_sx_conv
+ return __lasx_insert_128_lo(__lasx_xvldi(0), src);
+#else
+ __m256i dest = __lasx_xvldi(0);
+ __asm__ volatile ("xvpermi.q %u0,%u2,0x30\n" : "=f"(dest) : "0"(dest), "f"(src));
+ return dest;
+#endif
+}
+
+#ifndef __loongarch_asx_sx_conv
+static inline __m256i __lasx_concat_128(__m128i lo, __m128i hi) {
+ __m256i dest;
+ __asm__ volatile ("xvpermi.q %u0,%u2,0x02\n" : "=f"(dest) : "0"(lo), "f"(hi));
+ return dest;
+}
+#endif
+
+static inline __m256i lasx_broadcast_128(__m128i in) {
+ return __lasx_concat_128(in, in);
+}
+
+static inline __m256i lasx_sad_bu(__m256i a, __m256i b) {
+ __m256i tmp = __lasx_xvabsd_bu(a, b);
+ tmp = __lasx_xvhaddw_hu_bu(tmp, tmp);
+ tmp = __lasx_xvhaddw_wu_hu(tmp, tmp);
+ return __lasx_xvhaddw_du_wu(tmp, tmp);
+}
+
+static inline __m256i lasx_maddubs_w_h(__m256i a, __m256i b) {
+ return __lasx_xvsadd_h(__lasx_xvmulwod_h_bu_b(a, b), __lasx_xvmulwev_h_bu_b(a, b));
+}
+
+static inline __m256i lasx_madd_w_h(__m256i a, __m256i b) {
+ return __lasx_xvmaddwod_w_h(__lasx_xvmulwev_w_h(a, b), a, b);
+}
+
+static inline int lasx_movemask_b(__m256i v) {
+ v = __lasx_xvmskltz_b(v);
+ return __lasx_xvpickve2gr_w(v, 0) | (__lasx_xvpickve2gr_w(v, 4) << 16);
+}
+
+/* See: lsx_shuffle_b */
+static inline __m256i lasx_shuffle_b(__m256i a, __m256i b) {
+ __m256i msb_mask = __lasx_xvslti_b(b, 0);
+ __m256i dst = __lasx_xvshuf_b(a, a, __lasx_xvandi_b(b, 0xF));
+ return __lasx_xvand_v(dst, __lasx_xvnor_v(msb_mask, msb_mask));
+}
+
+#endif // include guard LASXINTRIN_EXT_H