Generating a PEG Parser

Guido van Rossum
Aug 5 · 6 min read
statement: assignment | expr | if_statement
expr: expr '+' term | expr '-' term | term
term: term '*' atom | term '/' atom | atom
atom: NAME | NUMBER | '(' expr ')'
assignment: target '=' expr
target: NAME
if_statement: 'if' expr ':' statement
grammar: rule+ ENDMARKER
rule: NAME ':' alternative ('|' alternative)* NEWLINE
alternative: item+
item: NAME | STRING
class Rule:    def __init__(self, name, alts):
self.name = name
self.alts = alts
class GrammarParser(Parser):    def grammar(self):
pos = self.mark()
if rule := self.rule():
rules = [rule]
while rule := self.rule():
rules.append(rule)
if self.expect(ENDMARKER):
return rules # <------------- final result
self.reset(pos)
return None
def rule(self):
pos = self.mark()
if name := self.expect(NAME):
if self.expect(":"):
if alt := self.alternative():
alts = [alt]
apos = self.mark()
while (self.expect("|")
and (alt := self.alternative())):
alts.append(alt)
apos = self.mark()
self.reset(apos)
if self.expect(NEWLINE):
return Rule(name.string, alts)
self.reset(pos)
return None
def alternative(self):
items = []
while item := self.item():
items.append(item)
return items
def item(self):
if name := self.expect(NAME):
return name.string
if string := self.expect(STRING):
return string.string
return None
[
Rule('statement', [['assignment'], ['expr'], ['if_statement']]),
Rule('expr', [['term', "'+'", 'expr'],
['term', "'-'", 'term'],
['term']]),
Rule('term', [['atom', "'*'", 'term'],
['atom', "'/'", 'atom'],
['atom']]),
Rule('atom', [['NAME'], ['NUMBER'], ["'('", 'expr', "')'"]]),
Rule('assignment', [['target', "'='", 'expr']]),
Rule('target', [['NAME']]),
Rule('if_statement', [["'if'", 'expr', "':'", 'statement']]),
]
def generate_parser_class(rules):
print(f"class ToyParser(Parser):")
for rule in rules:
print()
print(f" @memoize")
print(f" def {rule.name}(self):")
print(f" pos = self.mark()")
for alt in rule.alts:
items = []
print(f" if (True")
for item in alt:
if item[0] in ('"', "'"):
print(f" and self.expect({item})")
else:
var = item.lower()
if var in items:
var += str(len(items))
items.append(var)
if item.isupper():
print(" " +
f"and ({var} := self.expect({item}))")
else:
print(f" " +
f"and ({var} := self.{item}())")
print(f" ):")
print(f" " +
f"return Node({rule.name!r}, [{', '.join(items)}])")
print(f" self.reset(pos)")
print(f" return None")
class ToyParser(Parser):    @memoize
def statement(self):
pos = self.mark()
if (True
and (assignment := self.assignment())
):
return Node('statement', [assignment])
self.reset(pos)
if (True
and (expr := self.expr())
):
return Node('statement', [expr])
self.reset(pos)
if (True
and (if_statement := self.if_statement())
):
return Node('statement', [if_statement])
self.reset(pos)
return None
...
def memoize(func):    def memoize_wrapper(self, *args):
pos = self.mark()
memo = self.memos.get(pos)
if memo is None:
memo = self.memos[pos] = {}
key = (func, args)
if key in memo:
res, endpos = memo[key]
self.reset(endpos)
else:
res = func(self, *args)
endpos = self.mark()
memo[key] = res, endpos
return res
return memoize_wrapper

Guido van Rossum

Written by

Welcome to a place where words matter. On Medium, smart voices and original ideas take center stage - with no ads in sight. Watch
Follow all the topics you care about, and we’ll deliver the best stories for you to your homepage and inbox. Explore
Get unlimited access to the best stories on Medium — and support writers while you’re at it. Just $5/month. Upgrade