Refactored table parsing.

This commit is contained in:
pca006132 2020-07-28 16:34:06 +08:00
parent 9ded99fbc6
commit 24b1fef890
1 changed files with 37 additions and 31 deletions

68
main.py
View File

@ -1,6 +1,42 @@
from typing import Optional, Dict
import re
import sys
def parse_table_entries(fields, pattern, lines):
"""Parse aligned table entries"""
prev: Optional[Dict] = None
entries = []
def flush():
nonlocal prev
if prev is not None:
entries.append(prev['data'])
prev = None
for line in lines:
if len(line.strip()) == 0:
flush()
continue
match = pattern.search(line.strip())
if match is not None:
flush()
starts = [match.span(v)[0] for v in fields] + [len(line)]
prev = {
'data': {v: match.group(v) for v in fields},
'starts': starts
}
elif prev is not None:
for i, field in enumerate(fields):
v = line[prev['starts'][i]:prev['starts'][i + 1]].strip()
if len(v) == 0:
continue
if prev['data'][field][-1] not in ['-', '_']:
v = ' ' + v
prev['data'][field] += v
flush()
return entries
def parse_registers(lines):
"""Parse register entry from `pdftotext -layout output` from TRM table"""
fields = ['id', 'address', 'size', 'type', 'reset_value', 'description']
@ -12,37 +48,7 @@ def parse_registers(lines):
r'(?P<type>\w+)\s+'
r'(?P<reset_value>0x[0-9A-F]+)\s+'
r'(?P<description>.+)$')
prev = None
registers = []
for line in lines:
if len(line.strip()) == 0:
if prev is not None:
registers.append(prev['reg'])
prev = None
continue
match = ENTRY_PATTERN.search(line.strip())
if match is not None:
if prev is not None:
registers.append(prev['reg'])
starts = [match.span(v)[0] for v in fields] + [len(line)]
prev = {
'reg': {v: match.group(v) for v in fields},
'starts': starts
}
elif prev is not None:
for i, field in enumerate(fields):
v = line[prev['starts'][i]:prev['starts'][i + 1]].strip()
if len(v) == 0:
continue
if prev['reg'][field][-1] not in ['-', '_']:
v = ' ' + v
prev['reg'][field] += v
if prev is not None:
registers.append(prev['reg'])
return registers
return parse_table_entries(fields, ENTRY_PATTERN, lines)
def emit_rust(base_addr, ending_addr, registers):
current_addr = base_addr