Rewritten for parsing fields.

This commit is contained in:
pca006132 2020-07-28 17:33:59 +08:00
parent 24b1fef890
commit 6a6c32b562
2 changed files with 94 additions and 32 deletions

5
README
View File

@ -1,4 +1,5 @@
A simple script for parsing TRM register definition...
Example Usage:
pdftotext -f 1431 -l 1434 -layout ug585-Zynq-7000-TRM.pdf - | python main.py 0x1000 0x1FFC
Experimenting
pdftotext -f 1436 -l 1438 -layout ug585-Zynq-7000-TRM.pdf - | python main.py

121
main.py
View File

@ -1,54 +1,104 @@
from typing import Optional, Dict
import re
import sys
def parse_table_entries(fields, pattern, lines):
def parse_table_entries(fields):
"""Parse aligned table entries"""
prev: Optional[Dict] = None
pattern = r'\s+'.join([f"(?P<{v[:v.find(':')]}>{v[v.find(':') + 1:].strip()})"
for v in fields])
pattern = re.compile(pattern)
fields = [v[:v.find(':')] for v in fields]
prev = None
entries = []
def flush():
nonlocal prev
if prev is not None:
entries.append(prev['data'])
prev = None
for line in lines:
while True:
line = yield
if line == None:
break
if len(line.strip()) == 0:
flush()
continue
match = pattern.search(line.strip())
match = pattern.fullmatch(line.strip())
if match is not None:
flush()
starts = [match.span(v)[0] for v in fields] + [len(line)]
starts = [match.span(v)[0] for v in fields]
prev = {
'data': {v: match.group(v) for v in fields},
'data': {v: [match.group(v)] for v in fields},
'starts': starts
}
elif prev is not None:
for i, field in enumerate(fields):
v = line[prev['starts'][i]:prev['starts'][i + 1]].strip()
if i + 1 == len(fields):
v = line[prev['starts'][i]:].strip()
else:
v = line[prev['starts'][i]:prev['starts'][i + 1]].strip()
if len(v) == 0:
continue
if prev['data'][field][-1] not in ['-', '_']:
v = ' ' + v
prev['data'][field] += v
prev['data'][field].append(v)
flush()
return entries
# def parse_register_list(lines):
# """Parse register entry from `pdftotext -layout output` from TRM table"""
# fields = [
# r'id: \w+',
# r'address: 0x[0-9A-F]+',
# r'size: \d+',
# r'type: \w+',
# r'reset_value: 0x[0-9A-F]+',
# r'description: .+'
# ]
# return parse_table_entries(fields, lines)
def parse_registers(lines):
"""Parse register entry from `pdftotext -layout output` from TRM table"""
fields = ['id', 'address', 'size', 'type', 'reset_value', 'description']
def parse_register_fields():
fields = [
r'name: \w+',
r'bits: \d+(:\d+)?',
r'type: \w+',
r'reset: 0x[0-9A-F]+',
r'description: .+'
]
it = parse_table_entries(fields)
next(it)
return it
ENTRY_PATTERN = re.compile(r'^'
r'(?P<id>\w+)\s+'
r'(?P<address>0x[0-9A-F]+)\s+'
r'(?P<size>\d+)\s+'
r'(?P<type>\w+)\s+'
r'(?P<reset_value>0x[0-9A-F]+)\s+'
r'(?P<description>.+)$')
return parse_table_entries(fields, ENTRY_PATTERN, lines)
def end_iterator(it):
try:
it.send(None)
except StopIteration as e:
return e.value
def parse_registers():
def inner():
pattern = re.compile(r'Name\s+(\w+)')
state = 0
name = ''
it = None
results = {}
while True:
line = yield
if line == None:
break
line = line.strip()
m = pattern.fullmatch(line)
if m is not None:
if it is not None:
results[name] = end_iterator(it)
name = m.group(1)
it = parse_register_fields()
else:
if it is not None:
it.send(line)
if it is not None:
results[name] = end_iterator(it)
return results
it = inner()
next(it)
return it
def emit_rust(base_addr, ending_addr, registers):
current_addr = base_addr
@ -89,10 +139,21 @@ def emit_rust(base_addr, ending_addr, registers):
code.append(line)
return code
if len(sys.argv) != 3:
print("Pipe pdftotext to stdin, and give starting and ending address"
"(inclusive) for the registers")
exit()
for line in emit_rust(int(sys.argv[1], 0), int(sys.argv[2], 0), parse_registers(sys.stdin)):
print(line)
# if len(sys.argv) != 3:
# print("Pipe pdftotext to stdin, and give starting and ending address"
# "(inclusive) for the registers")
# exit()
# for line in emit_rust(int(sys.argv[1], 0), int(sys.argv[2], 0), parse_register_list(sys.stdin)):
# print(line)
parser = parse_registers()
for line in sys.stdin:
parser.send(line)
for k, v in end_iterator(parser).items():
print(k)
for entry in v:
print(entry)
print('------------')