Updated parser.

This commit is contained in:
pca006132 2020-07-29 10:15:58 +08:00
parent 6a6c32b562
commit 5672ce8542
1 changed files with 43 additions and 41 deletions

84
main.py
View File

@ -1,5 +1,6 @@
import re import re
import sys import sys
import json
def parse_table_entries(fields): def parse_table_entries(fields):
"""Parse aligned table entries""" """Parse aligned table entries"""
@ -19,10 +20,11 @@ def parse_table_entries(fields):
line = yield line = yield
if line == None: if line == None:
break break
line = line.rstrip()
if len(line.strip()) == 0: if len(line.strip()) == 0:
flush() flush()
continue continue
match = pattern.fullmatch(line.strip()) match = pattern.fullmatch(line)
if match is not None: if match is not None:
flush() flush()
starts = [match.span(v)[0] for v in fields] starts = [match.span(v)[0] for v in fields]
@ -42,18 +44,6 @@ def parse_table_entries(fields):
flush() flush()
return entries return entries
# def parse_register_list(lines):
# """Parse register entry from `pdftotext -layout output` from TRM table"""
# fields = [
# r'id: \w+',
# r'address: 0x[0-9A-F]+',
# r'size: \d+',
# r'type: \w+',
# r'reset_value: 0x[0-9A-F]+',
# r'description: .+'
# ]
# return parse_table_entries(fields, lines)
def parse_register_fields(): def parse_register_fields():
fields = [ fields = [
r'name: \w+', r'name: \w+',
@ -73,28 +63,53 @@ def end_iterator(it):
return e.value return e.value
def parse_registers(): def parse_registers():
def two_column(width):
fields = [
f'key: \\w.{{1,{width-2}}}\\s',
r'value: .+'
]
it = parse_table_entries(fields)
next(it)
return it
def inner(): def inner():
pattern = re.compile(r'Name\s+(\w+)') def_start = re.compile(r'Name\s+(\w+)\s+')
field_start = re.compile(r'\s+Field Name\s+Bits\s+Type\s+Reset '
r'Value\s+Description\s+')
state = 0 state = 0
name = ''
it = None it = None
results = {} results = []
while True: while True:
line = yield line = yield
if line == None: if line == None:
break break
line = line.strip() line = line
m = pattern.fullmatch(line) if state == 0:
if m is not None: m = def_start.fullmatch(line)
if it is not None: if m is not None:
results[name] = end_iterator(it) if it is not None:
name = m.group(1) results[-1]['fields']= end_iterator(it)
it = parse_register_fields() it = two_column(m.span(1)[0])
else:
if it is not None:
it.send(line) it.send(line)
state = 1
else:
if it is not None:
it.send(line)
elif state == 1:
m = field_start.fullmatch(line)
if m is not None:
if it is not None:
results.append({'def': end_iterator(it)})
it = parse_register_fields()
state = 0
else:
if it is not None:
it.send(line)
if it is not None: if it is not None:
results[name] = end_iterator(it) if state == 0:
results[-1]['fields']= end_iterator(it)
return results return results
it = inner() it = inner()
next(it) next(it)
@ -139,21 +154,8 @@ def emit_rust(base_addr, ending_addr, registers):
code.append(line) code.append(line)
return code return code
# if len(sys.argv) != 3:
# print("Pipe pdftotext to stdin, and give starting and ending address"
# "(inclusive) for the registers")
# exit()
# for line in emit_rust(int(sys.argv[1], 0), int(sys.argv[2], 0), parse_register_list(sys.stdin)):
# print(line)
parser = parse_registers() parser = parse_registers()
for line in sys.stdin: for line in sys.stdin:
parser.send(line) parser.send(line)
for k, v in end_iterator(parser).items(): v = end_iterator(parser)
print(k) print(json.dumps(v, indent = 4))
for entry in v:
print(entry)
print('------------')