diff --git a/main.py b/main.py index 83bdeb6..8a7b9c9 100644 --- a/main.py +++ b/main.py @@ -1,6 +1,42 @@ +from typing import Optional, Dict import re import sys +def parse_table_entries(fields, pattern, lines): + """Parse aligned table entries""" + prev: Optional[Dict] = None + entries = [] + def flush(): + nonlocal prev + if prev is not None: + entries.append(prev['data']) + prev = None + for line in lines: + if len(line.strip()) == 0: + flush() + continue + + match = pattern.search(line.strip()) + if match is not None: + flush() + starts = [match.span(v)[0] for v in fields] + [len(line)] + prev = { + 'data': {v: match.group(v) for v in fields}, + 'starts': starts + } + elif prev is not None: + for i, field in enumerate(fields): + v = line[prev['starts'][i]:prev['starts'][i + 1]].strip() + if len(v) == 0: + continue + if prev['data'][field][-1] not in ['-', '_']: + v = ' ' + v + prev['data'][field] += v + + flush() + return entries + + def parse_registers(lines): """Parse register entry from `pdftotext -layout output` from TRM table""" fields = ['id', 'address', 'size', 'type', 'reset_value', 'description'] @@ -12,37 +48,7 @@ def parse_registers(lines): r'(?P\w+)\s+' r'(?P0x[0-9A-F]+)\s+' r'(?P.+)$') - prev = None - registers = [] - for line in lines: - if len(line.strip()) == 0: - if prev is not None: - registers.append(prev['reg']) - prev = None - continue - - match = ENTRY_PATTERN.search(line.strip()) - if match is not None: - if prev is not None: - registers.append(prev['reg']) - starts = [match.span(v)[0] for v in fields] + [len(line)] - prev = { - 'reg': {v: match.group(v) for v in fields}, - 'starts': starts - } - elif prev is not None: - for i, field in enumerate(fields): - v = line[prev['starts'][i]:prev['starts'][i + 1]].strip() - if len(v) == 0: - continue - if prev['reg'][field][-1] not in ['-', '_']: - v = ' ' + v - prev['reg'][field] += v - - if prev is not None: - registers.append(prev['reg']) - - return registers + return parse_table_entries(fields, ENTRY_PATTERN, lines) def emit_rust(base_addr, ending_addr, registers): current_addr = base_addr