1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
|
/* benchmark_insert_string.cc -- benchmark insert_string variants
* Copyright (C) 2025 Nathan Moinvaziri
* For conditions of distribution and use, see copyright notice in zlib.h
*/
#include <limits.h>
#include <cstring>
#include <benchmark/benchmark.h>
extern "C" {
# include "zbuild.h"
# include "deflate.h"
# include "arch_functions.h"
# include "../test_cpu_features.h"
# include "insert_string_p.h"
}
#define MAX_WSIZE 32768
#define TEST_WINDOW_SIZE (MAX_WSIZE * 2)
typedef uint32_t (* quick_insert_string_cb)(deflate_state *const s, uint32_t str);
// Base class with common setup/teardown for both insert_string benchmarks
class insert_string_base: public benchmark::Fixture {
protected:
deflate_state *s;
public:
void SetUp(const ::benchmark::State&) {
s = (deflate_state*)zng_alloc_aligned(sizeof(deflate_state), 64);
memset(s, 0, sizeof(deflate_state));
// Set up window parameters
s->w_size = MAX_WSIZE;
s->window_size = TEST_WINDOW_SIZE;
// Allocate window
s->window = (uint8_t*)zng_alloc_aligned(TEST_WINDOW_SIZE, 64);
// Allocate hash tables
s->head = (Pos*)zng_alloc_aligned(HASH_SIZE * sizeof(Pos), 64);
s->prev = (Pos*)zng_alloc_aligned(MAX_WSIZE * sizeof(Pos), 64);
// Initialize hash tables
memset(s->head, 0, HASH_SIZE * sizeof(Pos));
memset(s->prev, 0, MAX_WSIZE * sizeof(Pos));
// Initialize rolling hash state for rolling variant
s->ins_h = 0;
// Fill window with deterministic data patterns
for (size_t i = 0; i < TEST_WINDOW_SIZE; i++) {
// Create patterns that will exercise the hash function well
s->window[i] = (uint8_t)((i * 17 + (i >> 4) * 31 + (i >> 8) * 13) & 0xFF);
}
}
void TearDown(const ::benchmark::State&) {
zng_free_aligned(s->window);
zng_free_aligned(s->head);
zng_free_aligned(s->prev);
zng_free_aligned(s);
}
};
class insert_string_bench: public insert_string_base {
public:
void Bench(benchmark::State& state, insert_string_cb insert_func) {
uint32_t str_pos = (uint32_t)state.range(0); // Starting position
uint32_t count = (uint32_t)state.range(1); // Number of strings to insert
// Ensure we don't go beyond window bounds
if (str_pos + count >= TEST_WINDOW_SIZE - 4) {
state.SkipWithError("Parameters exceed window size");
return;
}
for (auto _ : state) {
state.PauseTiming();
// Reset hash tables to ensure consistent starting state
memset(s->head, 0, HASH_SIZE * sizeof(Pos));
memset(s->prev, 0, MAX_WSIZE * sizeof(Pos));
s->ins_h = 0;
state.ResumeTiming();
// Benchmark the insert_string function
insert_func(s, str_pos, count);
}
}
};
#define BENCHMARK_INSERT_STRING(name, fptr, support_flag) \
BENCHMARK_DEFINE_F(insert_string_bench, name)(benchmark::State& state) { \
if (!(support_flag)) { \
state.SkipWithError("Function " #name " not supported"); \
} \
Bench(state, fptr); \
} \
BENCHMARK_REGISTER_F(insert_string_bench, name) \
->Args({100, 3}) /* Most common case */ \
->Args({100, 4}) \
->Args({100, 5}) \
->Args({100, 7}) \
->Args({100, 14}) /* Mid-range cluster */ \
->Args({100, 32}) /* Transition point */ \
->Args({100, 127}) /* Large cluster around powers of 2 */ \
->Args({100, 255}) /* Near maximum observed values */ \
->Unit(benchmark::kNanosecond);
// Benchmark the standard integer hash variant
BENCHMARK_INSERT_STRING(integer_hash, ::insert_string, 1);
// Benchmark the rolling hash variant
BENCHMARK_INSERT_STRING(rolling_hash, ::insert_string_roll, 1);
// Additional benchmark class for quick_insert_string functions
class quick_insert_string_bench: public insert_string_base {
public:
void Bench(benchmark::State& state, quick_insert_string_cb quick_insert_func) {
uint32_t start_pos = (uint32_t)state.range(0); // Starting position
uint32_t count = (uint32_t)state.range(1); // Number of insertions
if (start_pos + count >= TEST_WINDOW_SIZE - 4) {
state.SkipWithError("Parameters exceed window size");
return;
}
for (auto _ : state) {
state.PauseTiming();
// Reset hash tables
memset(s->head, 0, HASH_SIZE * sizeof(Pos));
memset(s->prev, 0, MAX_WSIZE * sizeof(Pos));
s->ins_h = 0;
state.ResumeTiming();
// Benchmark quick_insert_string (single insertions)
for (uint32_t i = 0; i < count; i++) {
uint32_t result = quick_insert_func(s, start_pos + i);
benchmark::DoNotOptimize(result);
}
}
}
};
#define BENCHMARK_QUICK_INSERT_STRING(name, fptr, support_flag) \
BENCHMARK_DEFINE_F(quick_insert_string_bench, name)(benchmark::State& state) { \
if (!(support_flag)) { \
state.SkipWithError("Function " #name " not supported"); \
} \
Bench(state, fptr); \
} \
BENCHMARK_REGISTER_F(quick_insert_string_bench, name) \
->Args({100, 1}) /* Single insertion (baseline) */ \
->Args({100, 100}) /* 100 insertions (measure amortized cost) */ \
->Args({16000, 100}) /* 100 insertions at mid window (different hash distribution) */ \
->Unit(benchmark::kNanosecond);
BENCHMARK_QUICK_INSERT_STRING(quick_integer_hash, ::quick_insert_string, 1);
BENCHMARK_QUICK_INSERT_STRING(quick_rolling_hash, ::quick_insert_string_roll, 1);
|