summaryrefslogtreecommitdiff
path: root/json4cpp/tools/serve_header/serve_header.py
blob: e2da2dad0ba5012565310f7dac27bdc4744318c4 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
#!/usr/bin/env python3

import contextlib
import logging
import os
import re
import shutil
import sys
import subprocess

from datetime import datetime, timedelta
from io import BytesIO
from threading import Lock, Timer

from watchdog.events import FileSystemEventHandler
from watchdog.observers import Observer

from http import HTTPStatus
from http.server import ThreadingHTTPServer, SimpleHTTPRequestHandler

CONFIG_FILE = 'serve_header.yml'
MAKEFILE = 'Makefile'
INCLUDE = 'include/nlohmann/'
SINGLE_INCLUDE = 'single_include/nlohmann/'
HEADER = 'json.hpp'

DATETIME_FORMAT = '%Y-%m-%d %H:%M:%S'

JSON_VERSION_RE = re.compile(r'\s*#\s*define\s+NLOHMANN_JSON_VERSION_MAJOR\s+')

class ExitHandler(logging.StreamHandler):
    def __init__(self, level):
        """."""
        super().__init__()
        self.level = level

    def emit(self, record):
        if record.levelno >= self.level:
            sys.exit(1)

def is_project_root(test_dir='.'):
    makefile = os.path.join(test_dir, MAKEFILE)
    include = os.path.join(test_dir, INCLUDE)
    single_include = os.path.join(test_dir, SINGLE_INCLUDE)

    return (os.path.exists(makefile)
            and os.path.isfile(makefile)
            and os.path.exists(include)
            and os.path.exists(single_include))

class DirectoryEventBucket:
    def __init__(self, callback, delay=1.2, threshold=0.8):
        """."""
        self.delay = delay
        self.threshold = timedelta(seconds=threshold)
        self.callback = callback
        self.event_dirs = set([])
        self.timer = None
        self.lock = Lock()

    def start_timer(self):
        if self.timer is None:
            self.timer = Timer(self.delay, self.process_dirs)
            self.timer.start()

    def process_dirs(self):
        result_dirs = []
        event_dirs = set([])
        with self.lock:
            self.timer = None
            while self.event_dirs:
                time, event_dir = self.event_dirs.pop()
                delta = datetime.now() - time
                if delta < self.threshold:
                    event_dirs.add((time, event_dir))
                else:
                    result_dirs.append(event_dir)
            self.event_dirs = event_dirs
            if result_dirs:
                self.callback(os.path.commonpath(result_dirs))
            if self.event_dirs:
                self.start_timer()

    def add_dir(self, path):
        with self.lock:
            # add path to the set of event_dirs if it is not a sibling of
            # a directory already in the set
            if not any(os.path.commonpath([path, event_dir]) == event_dir
               for (_, event_dir) in self.event_dirs):
                self.event_dirs.add((datetime.now(), path))
                if self.timer is None:
                    self.start_timer()

class WorkTree:
    make_command = 'make'

    def __init__(self, root_dir, tree_dir):
        """."""
        self.root_dir = root_dir
        self.tree_dir = tree_dir
        self.rel_dir = os.path.relpath(tree_dir, root_dir)
        self.name = os.path.basename(tree_dir)
        self.include_dir = os.path.abspath(os.path.join(tree_dir, INCLUDE))
        self.header = os.path.abspath(os.path.join(tree_dir, SINGLE_INCLUDE, HEADER))
        self.rel_header = os.path.relpath(self.header, root_dir)
        self.dirty = True
        self.build_count = 0
        t = os.path.getmtime(self.header)
        t = datetime.fromtimestamp(t)
        self.build_time = t.strftime(DATETIME_FORMAT)

    def __hash__(self):
        """."""
        return hash((self.tree_dir))

    def __eq__(self, other):
        """."""
        if not isinstance(other, type(self)):
            return NotImplemented
        return self.tree_dir == other.tree_dir

    def update_dirty(self, path):
        if self.dirty:
            return

        path = os.path.abspath(path)
        if os.path.commonpath([path, self.include_dir]) == self.include_dir:
            logging.info(f'{self.name}: working tree marked dirty')
            self.dirty = True

    def amalgamate_header(self):
        if not self.dirty:
            return

        mtime = os.path.getmtime(self.header)
        subprocess.run([WorkTree.make_command, 'amalgamate'], cwd=self.tree_dir,
                       stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
        if mtime == os.path.getmtime(self.header):
            logging.info(f'{self.name}: no changes')
        else:
            self.build_count += 1
            self.build_time = datetime.now().strftime(DATETIME_FORMAT)
            logging.info(f'{self.name}: header amalgamated (build count {self.build_count})')

        self.dirty = False

class WorkTrees(FileSystemEventHandler):
    def __init__(self, root_dir):
        """."""
        super().__init__()
        self.root_dir = root_dir
        self.trees = set([])
        self.tree_lock = Lock()
        self.scan(root_dir)
        self.created_bucket = DirectoryEventBucket(self.scan)
        self.observer = Observer()
        self.observer.schedule(self, root_dir, recursive=True)
        self.observer.start()

    def scan(self, base_dir):
        scan_dirs = set([base_dir])
        # recursively scan base_dir for working trees

        while scan_dirs:
            scan_dir = os.path.abspath(scan_dirs.pop())
            self.scan_tree(scan_dir)
            try:
                with os.scandir(scan_dir) as dir_it:
                    for entry in dir_it:
                        if entry.is_dir():
                            scan_dirs.add(entry.path)
            except FileNotFoundError as e:
                logging.debug('path disappeared: %s', e)

    def scan_tree(self, scan_dir):
        if not is_project_root(scan_dir):
            return

        # skip source trees in build directories
        # this check could be enhanced
        if scan_dir.endswith('/_deps/json-src'):
            return

        tree = WorkTree(self.root_dir, scan_dir)
        with self.tree_lock:
            if not tree in self.trees:
                if tree.name == tree.rel_dir:
                    logging.info(f'adding working tree {tree.name}')
                else:
                    logging.info(f'adding working tree {tree.name} at {tree.rel_dir}')
                url = os.path.join('/', tree.rel_dir, HEADER)
                logging.info(f'{tree.name}: serving header at {url}')
                self.trees.add(tree)

    def rescan(self, path=None):
        if path is not None:
            path = os.path.abspath(path)
        trees = set([])
        # check if any working trees have been removed
        with self.tree_lock:
            while self.trees:
                tree = self.trees.pop()
                if ((path is None
                    or os.path.commonpath([path, tree.tree_dir]) == tree.tree_dir)
                    and not is_project_root(tree.tree_dir)):
                    if tree.name == tree.rel_dir:
                        logging.info(f'removing working tree {tree.name}')
                    else:
                        logging.info(f'removing working tree {tree.name} at {tree.rel_dir}')
                else:
                    trees.add(tree)
            self.trees = trees

    def find(self, path):
        # find working tree for a given header file path
        path = os.path.abspath(path)
        with self.tree_lock:
            for tree in self.trees:
                if path == tree.header:
                    return tree
        return None

    def on_any_event(self, event):
        logging.debug('%s (is_dir=%s): %s', event.event_type,
                      event.is_directory, event.src_path)
        path = os.path.abspath(event.src_path)
        if event.is_directory:
            if event.event_type == 'created':
                # check for new working trees
                self.created_bucket.add_dir(path)
            elif event.event_type == 'deleted':
                # check for deleted working trees
                self.rescan(path)
            elif event.event_type == 'moved':
                # handle moved directories - treat source as deleted and dest as created
                self.rescan(path)
                if hasattr(event, 'dest_path'):
                    dest_path = os.path.abspath(event.dest_path)
                    self.created_bucket.add_dir(dest_path)
        elif event.event_type == 'closed':
            with self.tree_lock:
                for tree in self.trees:
                    tree.update_dirty(path)

    def stop(self):
        self.observer.stop()
        self.observer.join()

class HeaderRequestHandler(SimpleHTTPRequestHandler): # lgtm[py/missing-call-to-init]
    def __init__(self, request, client_address, server):
        """."""
        self.worktrees = server.worktrees
        self.worktree = None
        try:
            super().__init__(request, client_address, server,
                             directory=server.worktrees.root_dir)
        except ConnectionResetError:
            logging.debug('connection reset by peer')

    def translate_path(self, path):
        path = os.path.abspath(super().translate_path(path))

        # add single_include/nlohmann into path, if needed
        header = os.path.join('/', HEADER)
        header_path = os.path.join('/', SINGLE_INCLUDE, HEADER)
        if (path.endswith(header)
            and not path.endswith(header_path)):
            path = os.path.join(os.path.dirname(path), SINGLE_INCLUDE, HEADER)

        return path

    def send_head(self):
        # check if the translated path matches a working tree
        # and fulfill the request; otherwise, send 404
        path = self.translate_path(self.path)
        self.worktree = self.worktrees.find(path)
        if self.worktree is not None:
            self.worktree.amalgamate_header()
            logging.info(f'{self.worktree.name}; serving header (build count {self.worktree.build_count})')
            return super().send_head()
        logging.info(f'invalid request path: {self.path}')
        super().send_error(HTTPStatus.NOT_FOUND, 'Not Found')
        return None

    def send_header(self, keyword, value):
        # intercept Content-Length header; sent in copyfile later
        if keyword == 'Content-Length':
            return
        super().send_header(keyword, value)

    def end_headers (self):
        # intercept; called in copyfile() or indirectly
        # by send_head via super().send_error()
        pass

    def copyfile(self, source, outputfile):
        injected = False
        content = BytesIO()
        length = 0
        # inject build count and time into served header
        for line in source:
            line = line.decode('utf-8')
            if not injected and JSON_VERSION_RE.match(line):
                length += content.write(bytes('#define JSON_BUILD_COUNT '\
                                              f'{self.worktree.build_count}\n', 'utf-8'))
                length += content.write(bytes('#define JSON_BUILD_TIME '\
                                              f'"{self.worktree.build_time}"\n\n', 'utf-8'))
                injected = True
            length += content.write(bytes(line, 'utf-8'))

        # set content length
        super().send_header('Content-Length', length)
        # CORS header
        self.send_header('Access-Control-Allow-Origin', '*')
        # prevent caching
        self.send_header('Cache-Control', 'no-cache, no-store, must-revalidate')
        self.send_header('Pragma', 'no-cache')
        self.send_header('Expires', '0')
        super().end_headers()

        # send the header
        content.seek(0)
        shutil.copyfileobj(content, outputfile)

    def log_message(self, format, *args):
        pass

class DualStackServer(ThreadingHTTPServer):
    def __init__(self, addr, worktrees):
        """."""
        self.worktrees = worktrees
        super().__init__(addr, HeaderRequestHandler)

    def server_bind(self):
        # suppress exception when protocol is IPv4
        with contextlib.suppress(Exception):
            self.socket.setsockopt(
                socket.IPPROTO_IPV6, socket.IPV6_V6ONLY, 0)
        return super().server_bind()

if __name__ == '__main__':
    import argparse
    import ssl
    import socket
    import yaml

    # exit code
    ec = 0

    # setup logging
    logging.basicConfig(format='[%(asctime)s] %(levelname)s: %(message)s',
                        datefmt=DATETIME_FORMAT, level=logging.INFO)
    log = logging.getLogger()
    log.addHandler(ExitHandler(logging.ERROR))

    # parse command line arguments
    parser = argparse.ArgumentParser()
    parser.add_argument('--make', default='make',
                        help='the make command (default: make)')
    args = parser.parse_args()

    # propagate the make command to use for amalgamating headers
    WorkTree.make_command = args.make

    worktrees = None
    try:
        # change working directory to project root
        os.chdir(os.path.realpath(os.path.join(sys.path[0], '../../')))

        if not is_project_root():
            log.error('working directory does not look like project root')

        # load config
        config = {}
        config_file = os.path.abspath(CONFIG_FILE)
        try:
            with open(config_file, 'r') as f:
                config = yaml.safe_load(f)
        except FileNotFoundError:
            log.info(f'cannot find configuration file: {config_file}')
            log.info('using default configuration')

        # find and monitor working trees
        worktrees = WorkTrees(config.get('root', '.'))

        # start web server
        infos = socket.getaddrinfo(config.get('bind', None), config.get('port', 8443),
                                   type=socket.SOCK_STREAM, flags=socket.AI_PASSIVE)
        DualStackServer.address_family = infos[0][0]
        HeaderRequestHandler.protocol_version = 'HTTP/1.0'
        with DualStackServer(infos[0][4], worktrees) as httpd:
            scheme = 'HTTP'
            https = config.get('https', {})
            if https.get('enabled', True):
                cert_file = https.get('cert_file', 'localhost.pem')
                key_file = https.get('key_file', 'localhost-key.pem')
                ssl_ctx = ssl.create_default_context(ssl.Purpose.CLIENT_AUTH)
                ssl_ctx.minimum_version = ssl.TLSVersion.TLSv1_2
                ssl_ctx.maximum_version = ssl.TLSVersion.MAXIMUM_SUPPORTED
                ssl_ctx.load_cert_chain(cert_file, key_file)
                httpd.socket = ssl_ctx.wrap_socket(httpd.socket, server_side=True)
                scheme = 'HTTPS'
            host, port = httpd.socket.getsockname()[:2]
            log.info(f'serving {scheme} on {host} port {port}')
            log.info('press Ctrl+C to exit')
            httpd.serve_forever()

    except KeyboardInterrupt:
        log.info('exiting')
    except Exception:
        ec = 1
        log.exception('an error occurred:')
    finally:
        if worktrees is not None:
            worktrees.stop()
            sys.exit(ec)