diff options
| author | Nathan Moinvaziri <nathan@nathanm.com> | 2022-05-24 11:44:20 -0700 |
|---|---|---|
| committer | Hans Kristian Rosbach <hk-github@circlestorm.org> | 2022-05-25 12:04:35 +0200 |
| commit | a6155234a2aa34b4562570dbd359a2a505962a01 (patch) | |
| tree | fffecc2105016a315c3da7d3d77caaf4d9ff05e4 /tools | |
| parent | d79984b5bcaccab15e6cd13d7d1edea32ac36977 (diff) | |
| download | Project-Tick-a6155234a2aa34b4562570dbd359a2a505962a01.tar.gz Project-Tick-a6155234a2aa34b4562570dbd359a2a505962a01.zip | |
Speed up software CRC-32 computation by a factor of 1.5 to 3.
Use the interleaved method of Kadatch and Jenkins in order to make
use of pipelined instructions through multiple ALUs in a single
core. This also speeds up and simplifies the combination of CRCs,
and updates the functions to pre-calculate and use an operator for
CRC combination.
Co-authored-by: Nathan Moinvaziri <nathan@nathanm.com>
Diffstat (limited to 'tools')
| -rw-r--r-- | tools/makecrct.c | 321 |
1 files changed, 197 insertions, 124 deletions
diff --git a/tools/makecrct.c b/tools/makecrct.c index 3f6b37b13d..bfc2b96551 100644 --- a/tools/makecrct.c +++ b/tools/makecrct.c @@ -1,4 +1,4 @@ -/* crc32.c -- output crc32 tables +/* makecrct.c -- output crc32 tables * Copyright (C) 1995-2006, 2010, 2011, 2012, 2016, 2018 Mark Adler * For conditions of distribution and use, see copyright notice in zlib.h */ @@ -6,172 +6,245 @@ #include <stdio.h> #include <inttypes.h> #include "zbuild.h" -#include "deflate.h" -#include "crc32_p.h" +#include "zutil.h" -static uint32_t crc_table[8][256]; -static uint32_t crc_comb[GF2_DIM][GF2_DIM]; +/* + The crc32 table header file contains tables for both 32-bit and 64-bit + z_word_t's, and so requires a 64-bit type be available. In that case, + z_word_t must be defined to be 64-bits. This code then also generates + and writes out the tables for the case that z_word_t is 32 bits. +*/ + +#define W 8 /* Need a 64-bit integer type in order to generate crc32 tables. */ + +#include "crc32_braid_p.h" + +static uint32_t crc_table[256]; +static z_word_t crc_big_table[256]; + +static uint32_t crc_braid_table[W][256]; +static z_word_t crc_braid_big_table[W][256]; +static uint32_t x2n_table[32]; + +#include "crc32_braid_comb_p.h" -static void gf2_matrix_square(uint32_t *square, const uint32_t *mat); static void make_crc_table(void); -static void make_crc_combine_table(void); static void print_crc_table(void); -static void print_crc_combine_table(void); -static void write_table(const uint32_t *, int); +static void braid(uint32_t ltl[][256], z_word_t big[][256], int n, int w); -/* ========================================================================= */ -static void gf2_matrix_square(uint32_t *square, const uint32_t *mat) { - int n; - - for (n = 0; n < GF2_DIM; n++) - square[n] = gf2_matrix_times(mat, mat[n]); -} +static void write_table(const uint32_t *table, int k); +static void write_table32hi(const z_word_t *table, int k); +static void write_table64(const z_word_t *table, int k); -/* ========================================================================= +/* ========================================================================= */ +/* Generate tables for a byte-wise 32-bit CRC calculation on the polynomial: x^32+x^26+x^23+x^22+x^16+x^12+x^11+x^10+x^8+x^7+x^5+x^4+x^2+x+1. Polynomials over GF(2) are represented in binary, one bit per coefficient, - with the lowest powers in the most significant bit. Then adding polynomials + with the lowest powers in the most significant bit. Then adding polynomials is just exclusive-or, and multiplying a polynomial by x is a right shift by - one. If we call the above polynomial p, and represent a byte as the + one. If we call the above polynomial p, and represent a byte as the polynomial q, also with the lowest power in the most significant bit (so the byte 0xb1 is the polynomial x^7+x^3+x^2+1), then the CRC is (q*x^32) mod p, where a mod b means the remainder after dividing a by b. This calculation is done using the shift-register method of multiplying and - taking the remainder. The register is initialized to zero, and for each + taking the remainder. The register is initialized to zero, and for each incoming bit, x^32 is added mod p to the register if the bit is a one (where - x^32 mod p is p+x^32 = x^26+...+1), and the register is multiplied mod p by - x (which is shifting right by one and adding x^32 mod p if the bit shifted - out is a one). We start with the highest power (least significant bit) of - q and repeat for all eight bits of q. - - The first table is simply the CRC of all possible eight bit values. This is - all the information needed to generate CRCs on data a byte at a time for all - combinations of CRC register values and incoming bytes. The remaining tables - allow for word-at-a-time CRC calculation for both big-endian and little- - endian machines, where a word is four bytes. + x^32 mod p is p+x^32 = x^26+...+1), and the register is multiplied mod p by x + (which is shifting right by one and adding x^32 mod p if the bit shifted out + is a one). We start with the highest power (least significant bit) of q and + repeat for all eight bits of q. + + The table is simply the CRC of all possible eight bit values. This is all the + information needed to generate CRCs on data a byte at a time for all + combinations of CRC register values and incoming bytes. */ -static void make_crc_table(void) { - int n, k; - uint32_t c; - uint32_t poly; /* polynomial exclusive-or pattern */ - /* terms of polynomial defining this crc (except x^32): */ - static const unsigned char p[] = {0, 1, 2, 4, 5, 7, 8, 10, 11, 12, 16, 22, 23, 26}; - - /* make exclusive-or pattern from polynomial (0xedb88320) */ - poly = 0; - for (n = 0; n < (int)(sizeof(p)/sizeof(unsigned char)); n++) - poly |= (uint32_t)1 << (31 - p[n]); - - /* generate a crc for every 8-bit value */ - for (n = 0; n < 256; n++) { - c = (uint32_t)n; - for (k = 0; k < 8; k++) - c = c & 1 ? poly ^ (c >> 1) : c >> 1; - crc_table[0][n] = c; +static void make_crc_table() { + unsigned i, j, n; + uint32_t p; + + /* initialize the CRC of bytes tables */ + for (i = 0; i < 256; i++) { + p = i; + for (j = 0; j < 8; j++) + p = p & 1 ? (p >> 1) ^ POLY : p >> 1; + crc_table[i] = p; + crc_big_table[i] = ZSWAP64(p); } - /* generate crc for each value followed by one, two, and three zeros, - and then the byte reversal of those as well as the first table */ - for (n = 0; n < 256; n++) { - c = crc_table[0][n]; - crc_table[4][n] = ZSWAP32(c); - for (k = 1; k < 4; k++) { - c = crc_table[0][c & 0xff] ^ (c >> 8); - crc_table[k][n] = c; - crc_table[k + 4][n] = ZSWAP32(c); - } - } + /* initialize the x^2^n mod p(x) table */ + p = (uint32_t)1 << 30; /* x^1 */ + x2n_table[0] = p; + for (n = 1; n < 32; n++) + x2n_table[n] = p = multmodp(p, p); + + /* initialize the braiding tables -- needs x2n_table[] */ + braid(crc_braid_table, crc_braid_big_table, N, W); } -static void make_crc_combine_table(void) { - int n, k; - /* generate zero operators table for crc32_combine() */ - - /* generate the operator to apply a single zero bit to a CRC -- the - first row adds the polynomial if the low bit is a 1, and the - remaining rows shift the CRC right one bit */ - k = GF2_DIM - 3; - crc_comb[k][0] = 0xedb88320UL; /* CRC-32 polynomial */ - uint32_t row = 1; - for (n = 1; n < GF2_DIM; n++) { - crc_comb[k][n] = row; - row <<= 1; +/* + Generate the little and big-endian braid tables for the given n and z_word_t + size w. Each array must have room for w blocks of 256 elements. + */ +static void braid(uint32_t ltl[][256], z_word_t big[][256], int n, int w) { + int k; + uint32_t i, p, q; + for (k = 0; k < w; k++) { + p = x2nmodp((n * w + 3 - k) << 3, 0); + ltl[k][0] = 0; + big[w - 1 - k][0] = 0; + for (i = 1; i < 256; i++) { + ltl[k][i] = q = multmodp(i << 24, p); + big[w - 1 - k][i] = ZSWAP64(q); + } } - /* generate operators that apply 2, 4, and 8 zeros to a CRC, putting - the last one, the operator for one zero byte, at the 0 position */ - gf2_matrix_square(crc_comb[k + 1], crc_comb[k]); - gf2_matrix_square(crc_comb[k + 2], crc_comb[k + 1]); - gf2_matrix_square(crc_comb[0], crc_comb[k + 2]); - - /* generate operators for applying 2^n zero bytes to a CRC, filling out - the remainder of the table -- the operators repeat after GF2_DIM - values of n, so the table only needs GF2_DIM entries, regardless of - the size of the length being processed */ - for (n = 1; n < k; n++) - gf2_matrix_square(crc_comb[n], crc_comb[n - 1]); } +/* + Write the 32-bit values in table[0..k-1] to out, five per line in + hexadecimal separated by commas. + */ static void write_table(const uint32_t *table, int k) { int n; for (n = 0; n < k; n++) - printf("%s0x%08" PRIx32 "%s", n % 5 ? "" : " ", + printf("%s0x%08" PRIx32 "%s", n == 0 || n % 5 ? "" : " ", (uint32_t)(table[n]), - n == k - 1 ? "\n" : (n % 5 == 4 ? ",\n" : ", ")); + n == k - 1 ? "" : (n % 5 == 4 ? ",\n" : ", ")); } -static void print_crc_table(void) { - int k; - printf("#ifndef CRC32_TBL_H_\n"); - printf("#define CRC32_TBL_H_\n\n"); - printf("/* crc32_tbl.h -- tables for rapid CRC calculation\n"); - printf(" * Generated automatically by makecrct.c\n */\n\n"); +/* + Write the high 32-bits of each value in table[0..k-1] to out, five per line + in hexadecimal separated by commas. + */ +static void write_table32hi(const z_word_t *table, int k) { + int n; - /* print CRC table */ - printf("static const uint32_t "); - printf("crc_table[8][256] =\n{\n {\n"); - write_table(crc_table[0], 256); - for (k = 1; k < 8; k++) { - printf(" },\n {\n"); - write_table(crc_table[k], 256); - } - printf(" }\n};\n\n"); + for (n = 0; n < k; n++) + printf("%s0x%08" PRIx32 "%s", n == 0 || n % 5 ? "" : " ", + (uint32_t)(table[n] >> 32), + n == k - 1 ? "" : (n % 5 == 4 ? ",\n" : ", ")); +} - printf("#endif /* CRC32_TBL_H_ */\n"); +/* + Write the 64-bit values in table[0..k-1] to out, three per line in + hexadecimal separated by commas. This assumes that if there is a 64-bit + type, then there is also a long long integer type, and it is at least 64 + bits. If not, then the type cast and format string can be adjusted + accordingly. + */ +static void write_table64(const z_word_t *table, int k) { + int n; + + for (n = 0; n < k; n++) + printf("%s0x%016" PRIx64 "%s", n == 0 || n % 3 ? "" : " ", + (uint64_t)(table[n]), + n == k - 1 ? "" : (n % 3 == 2 ? ",\n" : ", ")); } -static void print_crc_combine_table(void) { - int k; - printf("#ifndef CRC32_COMB_TBL_H_\n"); - printf("#define CRC32_COMB_TBL_H_\n\n"); - printf("/* crc32_comb_tbl.h -- zero operators table for CRC combine\n"); +static void print_crc_table(void) { + int k, n; + uint32_t ltl[8][256]; + z_word_t big[8][256]; + + printf("#ifndef CRC32_BRAID_TBL_H_\n"); + printf("#define CRC32_BRAID_TBL_H_\n\n"); + printf("/* crc32_braid_tbl.h -- tables for braided CRC calculation\n"); printf(" * Generated automatically by makecrct.c\n */\n\n"); - /* print zero operator table */ - printf("static const uint32_t "); - printf("crc_comb[%d][%d] =\n{\n {\n", GF2_DIM, GF2_DIM); - write_table(crc_comb[0], GF2_DIM); - for (k = 1; k < GF2_DIM; k++) { - printf(" },\n {\n"); - write_table(crc_comb[k], GF2_DIM); + /* print little-endian CRC table */ + printf("static const uint32_t crc_table[] = {\n"); + printf(" "); + write_table(crc_table, 256); + printf("};\n\n"); + + /* print big-endian CRC table for 64-bit z_word_t */ + printf("#ifdef W\n\n"); + printf("#if W == 8\n\n"); + printf("static const z_word_t crc_big_table[] = {\n"); + printf(" "); + write_table64(crc_big_table, 256); + printf("};\n\n"); + + /* print big-endian CRC table for 32-bit z_word_t */ + printf("#else /* W == 4 */\n\n"); + printf("static const z_word_t crc_big_table[] = {\n"); + printf(" "); + write_table32hi(crc_big_table, 256); + printf("};\n\n"); + printf("#endif\n\n"); + printf("#endif /* W */\n\n"); + + /* write out braid tables for each value of N */ + for (n = 1; n <= 6; n++) { + printf("#if N == %d\n", n); + + /* compute braid tables for this N and 64-bit word_t */ + braid(ltl, big, n, 8); + + /* write out braid tables for 64-bit z_word_t */ + printf("\n"); + printf("#if W == 8\n\n"); + printf("static const uint32_t crc_braid_table[][256] = {\n"); + for (k = 0; k < 8; k++) { + printf(" {"); + write_table(ltl[k], 256); + printf("}%s", k < 7 ? ",\n" : ""); + } + printf("};\n\n"); + printf("static const z_word_t crc_braid_big_table[][256] = {\n"); + for (k = 0; k < 8; k++) { + printf(" {"); + write_table64(big[k], 256); + printf("}%s", k < 7 ? ",\n" : ""); + } + printf("};\n"); + + /* compute braid tables for this N and 32-bit word_t */ + braid(ltl, big, n, 4); + + /* write out braid tables for 32-bit z_word_t */ + printf("\n"); + printf("#else /* W == 4 */\n\n"); + printf("static const uint32_t crc_braid_table[][256] = {\n"); + for (k = 0; k < 4; k++) { + printf(" {"); + write_table(ltl[k], 256); + printf("}%s", k < 3 ? ",\n" : ""); + } + printf("};\n\n"); + printf("static const z_word_t crc_braid_big_table[][256] = {\n"); + for (k = 0; k < 4; k++) { + printf(" {"); + write_table32hi(big[k], 256); + printf("}%s", k < 3 ? ",\n" : ""); + } + printf("};\n\n"); + printf("#endif /* W */\n\n"); + + printf("#endif /* N == %d */\n", n); } - printf(" }\n};\n\n"); + printf("\n"); + + /* write out zeros operator table */ + printf("static const uint32_t x2n_table[] = {\n"); + printf(" "); + write_table(x2n_table, 32); + printf("};\n"); - printf("#endif /* CRC32_COMB_TBL_H_ */\n"); + printf("\n"); + printf("#endif /* CRC32_BRAID_TBL_H_ */\n"); } -// The output of this application can be piped out to recreate crc32.h +// The output of this application can be piped out to recreate crc32 tables int main(int argc, char *argv[]) { - if (argc > 1 && strcmp(argv[1], "-c") == 0) { - make_crc_combine_table(); - print_crc_combine_table(); - } else { - make_crc_table(); - print_crc_table(); - } + Z_UNUSED(argc); + Z_UNUSED(argv); + + make_crc_table(); + print_crc_table(); return 0; } |
