summaryrefslogtreecommitdiff
path: root/tools
diff options
context:
space:
mode:
authorNathan Moinvaziri <nathan@nathanm.com>2022-05-24 11:44:20 -0700
committerHans Kristian Rosbach <hk-github@circlestorm.org>2022-05-25 12:04:35 +0200
commita6155234a2aa34b4562570dbd359a2a505962a01 (patch)
treefffecc2105016a315c3da7d3d77caaf4d9ff05e4 /tools
parentd79984b5bcaccab15e6cd13d7d1edea32ac36977 (diff)
downloadProject-Tick-a6155234a2aa34b4562570dbd359a2a505962a01.tar.gz
Project-Tick-a6155234a2aa34b4562570dbd359a2a505962a01.zip
Speed up software CRC-32 computation by a factor of 1.5 to 3.
Use the interleaved method of Kadatch and Jenkins in order to make use of pipelined instructions through multiple ALUs in a single core. This also speeds up and simplifies the combination of CRCs, and updates the functions to pre-calculate and use an operator for CRC combination. Co-authored-by: Nathan Moinvaziri <nathan@nathanm.com>
Diffstat (limited to 'tools')
-rw-r--r--tools/makecrct.c321
1 files changed, 197 insertions, 124 deletions
diff --git a/tools/makecrct.c b/tools/makecrct.c
index 3f6b37b13d..bfc2b96551 100644
--- a/tools/makecrct.c
+++ b/tools/makecrct.c
@@ -1,4 +1,4 @@
-/* crc32.c -- output crc32 tables
+/* makecrct.c -- output crc32 tables
* Copyright (C) 1995-2006, 2010, 2011, 2012, 2016, 2018 Mark Adler
* For conditions of distribution and use, see copyright notice in zlib.h
*/
@@ -6,172 +6,245 @@
#include <stdio.h>
#include <inttypes.h>
#include "zbuild.h"
-#include "deflate.h"
-#include "crc32_p.h"
+#include "zutil.h"
-static uint32_t crc_table[8][256];
-static uint32_t crc_comb[GF2_DIM][GF2_DIM];
+/*
+ The crc32 table header file contains tables for both 32-bit and 64-bit
+ z_word_t's, and so requires a 64-bit type be available. In that case,
+ z_word_t must be defined to be 64-bits. This code then also generates
+ and writes out the tables for the case that z_word_t is 32 bits.
+*/
+
+#define W 8 /* Need a 64-bit integer type in order to generate crc32 tables. */
+
+#include "crc32_braid_p.h"
+
+static uint32_t crc_table[256];
+static z_word_t crc_big_table[256];
+
+static uint32_t crc_braid_table[W][256];
+static z_word_t crc_braid_big_table[W][256];
+static uint32_t x2n_table[32];
+
+#include "crc32_braid_comb_p.h"
-static void gf2_matrix_square(uint32_t *square, const uint32_t *mat);
static void make_crc_table(void);
-static void make_crc_combine_table(void);
static void print_crc_table(void);
-static void print_crc_combine_table(void);
-static void write_table(const uint32_t *, int);
+static void braid(uint32_t ltl[][256], z_word_t big[][256], int n, int w);
-/* ========================================================================= */
-static void gf2_matrix_square(uint32_t *square, const uint32_t *mat) {
- int n;
-
- for (n = 0; n < GF2_DIM; n++)
- square[n] = gf2_matrix_times(mat, mat[n]);
-}
+static void write_table(const uint32_t *table, int k);
+static void write_table32hi(const z_word_t *table, int k);
+static void write_table64(const z_word_t *table, int k);
-/* =========================================================================
+/* ========================================================================= */
+/*
Generate tables for a byte-wise 32-bit CRC calculation on the polynomial:
x^32+x^26+x^23+x^22+x^16+x^12+x^11+x^10+x^8+x^7+x^5+x^4+x^2+x+1.
Polynomials over GF(2) are represented in binary, one bit per coefficient,
- with the lowest powers in the most significant bit. Then adding polynomials
+ with the lowest powers in the most significant bit. Then adding polynomials
is just exclusive-or, and multiplying a polynomial by x is a right shift by
- one. If we call the above polynomial p, and represent a byte as the
+ one. If we call the above polynomial p, and represent a byte as the
polynomial q, also with the lowest power in the most significant bit (so the
byte 0xb1 is the polynomial x^7+x^3+x^2+1), then the CRC is (q*x^32) mod p,
where a mod b means the remainder after dividing a by b.
This calculation is done using the shift-register method of multiplying and
- taking the remainder. The register is initialized to zero, and for each
+ taking the remainder. The register is initialized to zero, and for each
incoming bit, x^32 is added mod p to the register if the bit is a one (where
- x^32 mod p is p+x^32 = x^26+...+1), and the register is multiplied mod p by
- x (which is shifting right by one and adding x^32 mod p if the bit shifted
- out is a one). We start with the highest power (least significant bit) of
- q and repeat for all eight bits of q.
-
- The first table is simply the CRC of all possible eight bit values. This is
- all the information needed to generate CRCs on data a byte at a time for all
- combinations of CRC register values and incoming bytes. The remaining tables
- allow for word-at-a-time CRC calculation for both big-endian and little-
- endian machines, where a word is four bytes.
+ x^32 mod p is p+x^32 = x^26+...+1), and the register is multiplied mod p by x
+ (which is shifting right by one and adding x^32 mod p if the bit shifted out
+ is a one). We start with the highest power (least significant bit) of q and
+ repeat for all eight bits of q.
+
+ The table is simply the CRC of all possible eight bit values. This is all the
+ information needed to generate CRCs on data a byte at a time for all
+ combinations of CRC register values and incoming bytes.
*/
-static void make_crc_table(void) {
- int n, k;
- uint32_t c;
- uint32_t poly; /* polynomial exclusive-or pattern */
- /* terms of polynomial defining this crc (except x^32): */
- static const unsigned char p[] = {0, 1, 2, 4, 5, 7, 8, 10, 11, 12, 16, 22, 23, 26};
-
- /* make exclusive-or pattern from polynomial (0xedb88320) */
- poly = 0;
- for (n = 0; n < (int)(sizeof(p)/sizeof(unsigned char)); n++)
- poly |= (uint32_t)1 << (31 - p[n]);
-
- /* generate a crc for every 8-bit value */
- for (n = 0; n < 256; n++) {
- c = (uint32_t)n;
- for (k = 0; k < 8; k++)
- c = c & 1 ? poly ^ (c >> 1) : c >> 1;
- crc_table[0][n] = c;
+static void make_crc_table() {
+ unsigned i, j, n;
+ uint32_t p;
+
+ /* initialize the CRC of bytes tables */
+ for (i = 0; i < 256; i++) {
+ p = i;
+ for (j = 0; j < 8; j++)
+ p = p & 1 ? (p >> 1) ^ POLY : p >> 1;
+ crc_table[i] = p;
+ crc_big_table[i] = ZSWAP64(p);
}
- /* generate crc for each value followed by one, two, and three zeros,
- and then the byte reversal of those as well as the first table */
- for (n = 0; n < 256; n++) {
- c = crc_table[0][n];
- crc_table[4][n] = ZSWAP32(c);
- for (k = 1; k < 4; k++) {
- c = crc_table[0][c & 0xff] ^ (c >> 8);
- crc_table[k][n] = c;
- crc_table[k + 4][n] = ZSWAP32(c);
- }
- }
+ /* initialize the x^2^n mod p(x) table */
+ p = (uint32_t)1 << 30; /* x^1 */
+ x2n_table[0] = p;
+ for (n = 1; n < 32; n++)
+ x2n_table[n] = p = multmodp(p, p);
+
+ /* initialize the braiding tables -- needs x2n_table[] */
+ braid(crc_braid_table, crc_braid_big_table, N, W);
}
-static void make_crc_combine_table(void) {
- int n, k;
- /* generate zero operators table for crc32_combine() */
-
- /* generate the operator to apply a single zero bit to a CRC -- the
- first row adds the polynomial if the low bit is a 1, and the
- remaining rows shift the CRC right one bit */
- k = GF2_DIM - 3;
- crc_comb[k][0] = 0xedb88320UL; /* CRC-32 polynomial */
- uint32_t row = 1;
- for (n = 1; n < GF2_DIM; n++) {
- crc_comb[k][n] = row;
- row <<= 1;
+/*
+ Generate the little and big-endian braid tables for the given n and z_word_t
+ size w. Each array must have room for w blocks of 256 elements.
+ */
+static void braid(uint32_t ltl[][256], z_word_t big[][256], int n, int w) {
+ int k;
+ uint32_t i, p, q;
+ for (k = 0; k < w; k++) {
+ p = x2nmodp((n * w + 3 - k) << 3, 0);
+ ltl[k][0] = 0;
+ big[w - 1 - k][0] = 0;
+ for (i = 1; i < 256; i++) {
+ ltl[k][i] = q = multmodp(i << 24, p);
+ big[w - 1 - k][i] = ZSWAP64(q);
+ }
}
- /* generate operators that apply 2, 4, and 8 zeros to a CRC, putting
- the last one, the operator for one zero byte, at the 0 position */
- gf2_matrix_square(crc_comb[k + 1], crc_comb[k]);
- gf2_matrix_square(crc_comb[k + 2], crc_comb[k + 1]);
- gf2_matrix_square(crc_comb[0], crc_comb[k + 2]);
-
- /* generate operators for applying 2^n zero bytes to a CRC, filling out
- the remainder of the table -- the operators repeat after GF2_DIM
- values of n, so the table only needs GF2_DIM entries, regardless of
- the size of the length being processed */
- for (n = 1; n < k; n++)
- gf2_matrix_square(crc_comb[n], crc_comb[n - 1]);
}
+/*
+ Write the 32-bit values in table[0..k-1] to out, five per line in
+ hexadecimal separated by commas.
+ */
static void write_table(const uint32_t *table, int k) {
int n;
for (n = 0; n < k; n++)
- printf("%s0x%08" PRIx32 "%s", n % 5 ? "" : " ",
+ printf("%s0x%08" PRIx32 "%s", n == 0 || n % 5 ? "" : " ",
(uint32_t)(table[n]),
- n == k - 1 ? "\n" : (n % 5 == 4 ? ",\n" : ", "));
+ n == k - 1 ? "" : (n % 5 == 4 ? ",\n" : ", "));
}
-static void print_crc_table(void) {
- int k;
- printf("#ifndef CRC32_TBL_H_\n");
- printf("#define CRC32_TBL_H_\n\n");
- printf("/* crc32_tbl.h -- tables for rapid CRC calculation\n");
- printf(" * Generated automatically by makecrct.c\n */\n\n");
+/*
+ Write the high 32-bits of each value in table[0..k-1] to out, five per line
+ in hexadecimal separated by commas.
+ */
+static void write_table32hi(const z_word_t *table, int k) {
+ int n;
- /* print CRC table */
- printf("static const uint32_t ");
- printf("crc_table[8][256] =\n{\n {\n");
- write_table(crc_table[0], 256);
- for (k = 1; k < 8; k++) {
- printf(" },\n {\n");
- write_table(crc_table[k], 256);
- }
- printf(" }\n};\n\n");
+ for (n = 0; n < k; n++)
+ printf("%s0x%08" PRIx32 "%s", n == 0 || n % 5 ? "" : " ",
+ (uint32_t)(table[n] >> 32),
+ n == k - 1 ? "" : (n % 5 == 4 ? ",\n" : ", "));
+}
- printf("#endif /* CRC32_TBL_H_ */\n");
+/*
+ Write the 64-bit values in table[0..k-1] to out, three per line in
+ hexadecimal separated by commas. This assumes that if there is a 64-bit
+ type, then there is also a long long integer type, and it is at least 64
+ bits. If not, then the type cast and format string can be adjusted
+ accordingly.
+ */
+static void write_table64(const z_word_t *table, int k) {
+ int n;
+
+ for (n = 0; n < k; n++)
+ printf("%s0x%016" PRIx64 "%s", n == 0 || n % 3 ? "" : " ",
+ (uint64_t)(table[n]),
+ n == k - 1 ? "" : (n % 3 == 2 ? ",\n" : ", "));
}
-static void print_crc_combine_table(void) {
- int k;
- printf("#ifndef CRC32_COMB_TBL_H_\n");
- printf("#define CRC32_COMB_TBL_H_\n\n");
- printf("/* crc32_comb_tbl.h -- zero operators table for CRC combine\n");
+static void print_crc_table(void) {
+ int k, n;
+ uint32_t ltl[8][256];
+ z_word_t big[8][256];
+
+ printf("#ifndef CRC32_BRAID_TBL_H_\n");
+ printf("#define CRC32_BRAID_TBL_H_\n\n");
+ printf("/* crc32_braid_tbl.h -- tables for braided CRC calculation\n");
printf(" * Generated automatically by makecrct.c\n */\n\n");
- /* print zero operator table */
- printf("static const uint32_t ");
- printf("crc_comb[%d][%d] =\n{\n {\n", GF2_DIM, GF2_DIM);
- write_table(crc_comb[0], GF2_DIM);
- for (k = 1; k < GF2_DIM; k++) {
- printf(" },\n {\n");
- write_table(crc_comb[k], GF2_DIM);
+ /* print little-endian CRC table */
+ printf("static const uint32_t crc_table[] = {\n");
+ printf(" ");
+ write_table(crc_table, 256);
+ printf("};\n\n");
+
+ /* print big-endian CRC table for 64-bit z_word_t */
+ printf("#ifdef W\n\n");
+ printf("#if W == 8\n\n");
+ printf("static const z_word_t crc_big_table[] = {\n");
+ printf(" ");
+ write_table64(crc_big_table, 256);
+ printf("};\n\n");
+
+ /* print big-endian CRC table for 32-bit z_word_t */
+ printf("#else /* W == 4 */\n\n");
+ printf("static const z_word_t crc_big_table[] = {\n");
+ printf(" ");
+ write_table32hi(crc_big_table, 256);
+ printf("};\n\n");
+ printf("#endif\n\n");
+ printf("#endif /* W */\n\n");
+
+ /* write out braid tables for each value of N */
+ for (n = 1; n <= 6; n++) {
+ printf("#if N == %d\n", n);
+
+ /* compute braid tables for this N and 64-bit word_t */
+ braid(ltl, big, n, 8);
+
+ /* write out braid tables for 64-bit z_word_t */
+ printf("\n");
+ printf("#if W == 8\n\n");
+ printf("static const uint32_t crc_braid_table[][256] = {\n");
+ for (k = 0; k < 8; k++) {
+ printf(" {");
+ write_table(ltl[k], 256);
+ printf("}%s", k < 7 ? ",\n" : "");
+ }
+ printf("};\n\n");
+ printf("static const z_word_t crc_braid_big_table[][256] = {\n");
+ for (k = 0; k < 8; k++) {
+ printf(" {");
+ write_table64(big[k], 256);
+ printf("}%s", k < 7 ? ",\n" : "");
+ }
+ printf("};\n");
+
+ /* compute braid tables for this N and 32-bit word_t */
+ braid(ltl, big, n, 4);
+
+ /* write out braid tables for 32-bit z_word_t */
+ printf("\n");
+ printf("#else /* W == 4 */\n\n");
+ printf("static const uint32_t crc_braid_table[][256] = {\n");
+ for (k = 0; k < 4; k++) {
+ printf(" {");
+ write_table(ltl[k], 256);
+ printf("}%s", k < 3 ? ",\n" : "");
+ }
+ printf("};\n\n");
+ printf("static const z_word_t crc_braid_big_table[][256] = {\n");
+ for (k = 0; k < 4; k++) {
+ printf(" {");
+ write_table32hi(big[k], 256);
+ printf("}%s", k < 3 ? ",\n" : "");
+ }
+ printf("};\n\n");
+ printf("#endif /* W */\n\n");
+
+ printf("#endif /* N == %d */\n", n);
}
- printf(" }\n};\n\n");
+ printf("\n");
+
+ /* write out zeros operator table */
+ printf("static const uint32_t x2n_table[] = {\n");
+ printf(" ");
+ write_table(x2n_table, 32);
+ printf("};\n");
- printf("#endif /* CRC32_COMB_TBL_H_ */\n");
+ printf("\n");
+ printf("#endif /* CRC32_BRAID_TBL_H_ */\n");
}
-// The output of this application can be piped out to recreate crc32.h
+// The output of this application can be piped out to recreate crc32 tables
int main(int argc, char *argv[]) {
- if (argc > 1 && strcmp(argv[1], "-c") == 0) {
- make_crc_combine_table();
- print_crc_combine_table();
- } else {
- make_crc_table();
- print_crc_table();
- }
+ Z_UNUSED(argc);
+ Z_UNUSED(argv);
+
+ make_crc_table();
+ print_crc_table();
return 0;
}