From 6a6c32b562ec121714ba44622ae0677d8cb04dd7 Mon Sep 17 00:00:00 2001 From: pca006132 Date: Tue, 28 Jul 2020 17:33:59 +0800 Subject: [PATCH] Rewritten for parsing fields. --- README | 5 ++- main.py | 121 ++++++++++++++++++++++++++++++++++++++++++-------------- 2 files changed, 94 insertions(+), 32 deletions(-) diff --git a/README b/README index ccbfbbc..10ba6f2 100644 --- a/README +++ b/README @@ -1,4 +1,5 @@ A simple script for parsing TRM register definition... -Example Usage: -pdftotext -f 1431 -l 1434 -layout ug585-Zynq-7000-TRM.pdf - | python main.py 0x1000 0x1FFC +Experimenting + +pdftotext -f 1436 -l 1438 -layout ug585-Zynq-7000-TRM.pdf - | python main.py diff --git a/main.py b/main.py index 8a7b9c9..0c423e6 100644 --- a/main.py +++ b/main.py @@ -1,54 +1,104 @@ -from typing import Optional, Dict import re import sys -def parse_table_entries(fields, pattern, lines): +def parse_table_entries(fields): """Parse aligned table entries""" - prev: Optional[Dict] = None + pattern = r'\s+'.join([f"(?P<{v[:v.find(':')]}>{v[v.find(':') + 1:].strip()})" + for v in fields]) + pattern = re.compile(pattern) + fields = [v[:v.find(':')] for v in fields] + prev = None entries = [] def flush(): nonlocal prev if prev is not None: entries.append(prev['data']) prev = None - for line in lines: + + while True: + line = yield + if line == None: + break if len(line.strip()) == 0: flush() continue - - match = pattern.search(line.strip()) + match = pattern.fullmatch(line.strip()) if match is not None: flush() - starts = [match.span(v)[0] for v in fields] + [len(line)] + starts = [match.span(v)[0] for v in fields] prev = { - 'data': {v: match.group(v) for v in fields}, + 'data': {v: [match.group(v)] for v in fields}, 'starts': starts } elif prev is not None: for i, field in enumerate(fields): - v = line[prev['starts'][i]:prev['starts'][i + 1]].strip() + if i + 1 == len(fields): + v = line[prev['starts'][i]:].strip() + else: + v = line[prev['starts'][i]:prev['starts'][i + 1]].strip() if len(v) == 0: continue - if prev['data'][field][-1] not in ['-', '_']: - v = ' ' + v - prev['data'][field] += v - + prev['data'][field].append(v) flush() return entries +# def parse_register_list(lines): +# """Parse register entry from `pdftotext -layout output` from TRM table""" +# fields = [ +# r'id: \w+', +# r'address: 0x[0-9A-F]+', +# r'size: \d+', +# r'type: \w+', +# r'reset_value: 0x[0-9A-F]+', +# r'description: .+' +# ] +# return parse_table_entries(fields, lines) -def parse_registers(lines): - """Parse register entry from `pdftotext -layout output` from TRM table""" - fields = ['id', 'address', 'size', 'type', 'reset_value', 'description'] +def parse_register_fields(): + fields = [ + r'name: \w+', + r'bits: \d+(:\d+)?', + r'type: \w+', + r'reset: 0x[0-9A-F]+', + r'description: .+' + ] + it = parse_table_entries(fields) + next(it) + return it - ENTRY_PATTERN = re.compile(r'^' - r'(?P\w+)\s+' - r'(?P
0x[0-9A-F]+)\s+' - r'(?P\d+)\s+' - r'(?P\w+)\s+' - r'(?P0x[0-9A-F]+)\s+' - r'(?P.+)$') - return parse_table_entries(fields, ENTRY_PATTERN, lines) +def end_iterator(it): + try: + it.send(None) + except StopIteration as e: + return e.value + +def parse_registers(): + def inner(): + pattern = re.compile(r'Name\s+(\w+)') + state = 0 + name = '' + it = None + results = {} + while True: + line = yield + if line == None: + break + line = line.strip() + m = pattern.fullmatch(line) + if m is not None: + if it is not None: + results[name] = end_iterator(it) + name = m.group(1) + it = parse_register_fields() + else: + if it is not None: + it.send(line) + if it is not None: + results[name] = end_iterator(it) + return results + it = inner() + next(it) + return it def emit_rust(base_addr, ending_addr, registers): current_addr = base_addr @@ -89,10 +139,21 @@ def emit_rust(base_addr, ending_addr, registers): code.append(line) return code -if len(sys.argv) != 3: - print("Pipe pdftotext to stdin, and give starting and ending address" - "(inclusive) for the registers") - exit() -for line in emit_rust(int(sys.argv[1], 0), int(sys.argv[2], 0), parse_registers(sys.stdin)): - print(line) + +# if len(sys.argv) != 3: +# print("Pipe pdftotext to stdin, and give starting and ending address" +# "(inclusive) for the registers") +# exit() + +# for line in emit_rust(int(sys.argv[1], 0), int(sys.argv[2], 0), parse_register_list(sys.stdin)): +# print(line) + +parser = parse_registers() +for line in sys.stdin: + parser.send(line) +for k, v in end_iterator(parser).items(): + print(k) + for entry in v: + print(entry) + print('------------')