super tiny compiler ported to Python 3

pull/28/head
Brian Edwards 8 years ago
parent 1c5da2aea0
commit 5da992116d

2
.gitignore vendored

@ -0,0 +1,2 @@
.cache
*.pyc

@ -0,0 +1,189 @@
from itertools import chain
from string import ascii_letters, digits, whitespace
NAME = 'name'
NUMBER = 'number'
PAREN = 'paren'
MULTICHAR_TYPES = [(NUMBER, digits, int),
(NAME, ascii_letters, lambda x: x)]
class BacktrackingGenerator():
def __init__(self, input_):
def generator():
"""Use send(True-ish) to backtrack."""
for char in input_:
if (yield char):
# send(True-ish) was called
if (yield):
raise ValueError('Cannot backtrack twice in a row.')
yield char
self.generator = generator()
def __iter__(self):
return self.generator
def __next__(self):
return next(self.generator)
def take_while(self, predicate):
for char in self.generator:
if not predicate(char):
self.generator.send(1) # backtrack
break
yield char
def Token(type_, value):
return {'type': type_, 'value': value}
def tokenize(input_):
char_gen = BacktrackingGenerator(input_)
def tokenize_multiple_chars(char):
for type_, charset, convert in MULTICHAR_TYPES:
if char in charset:
tail = char_gen.take_while(lambda c: c in charset)
value = convert(''.join(chain([char], tail)))
return type_, value
for char in char_gen:
if char in whitespace:
continue
if char in '()':
yield Token(PAREN, char)
elif char in digits + ascii_letters:
yield Token(*tokenize_multiple_chars(char))
else:
raise TypeError('I dont know what this character is: ' + char)
def NumberLiteral(value):
return {'type': 'NumberLiteral', 'value': value}
def CallExpression(name, params):
return {'type': 'CallExpression', 'name': name, 'params': params}
def parse(tokens):
def walk(token):
if token['type'] == NUMBER:
return NumberLiteral(token['value'])
if token == Token(PAREN, '('):
token = next(tokens)
node = CallExpression(token['value'], [])
for token in tokens:
if token == Token(PAREN, ')'):
break
node['params'].append(walk(token))
return node
raise TypeError(token['type'])
ast = {'type': 'Program', 'body': []}
for token in tokens:
ast['body'].append(walk(token))
return ast
class Traverser():
def __init__(self, visitor):
self.visitor = visitor
def traverse(self, ast):
self.traverse_node(ast)
def traverse_node(self, node, parent=None):
method = getattr(self.visitor, node['type'], None)
if method:
method(node, parent)
getattr(self, node['type'])(node)
def Program(self, node):
for expression in node['body']:
self.traverse_node(expression, node)
def CallExpression(self, node):
for param in node['params']:
self.traverse_node(param, node)
@staticmethod
def NumberLiteral(node):
pass
def ExpressionStatement(expression):
return {'type': 'ExpressionStatement', 'expression': expression}
def Identifier(name):
return {'type': 'Identifier', 'name': name}
def NewCallExpression(callee, arguments):
return {'type': 'CallExpression', 'callee': callee, 'arguments': arguments}
class Transformer():
@classmethod
def transform(cls, ast):
new_ast = {'type': 'Program', 'body': []}
ast['_context'] = new_ast['body']
Traverser(cls).traverse(ast)
return new_ast
@staticmethod
def NumberLiteral(node, parent):
parent['_context'].append(NumberLiteral(node['value']))
@staticmethod
def CallExpression(node, parent):
expression = NewCallExpression(Identifier(node['name']), [])
node['_context'] = expression['arguments']
if parent['type'] != 'CallExpression':
expression = ExpressionStatement(expression)
parent['_context'].append(expression)
class CodeGenerator():
@classmethod
def generate_code(cls, node):
return getattr(cls, node['type'])(node)
@classmethod
def Program(cls, node):
return '\n'.join(map(cls.generate_code, node['body']))
@classmethod
def ExpressionStatement(cls, node):
return cls.generate_code(node['expression']) + ';'
@classmethod
def CallExpression(cls, node):
return '{}({})'.format(
cls.generate_code(node['callee']),
', '.join(map(cls.generate_code, node['arguments'])))
@staticmethod
def Identifier(node):
return node['name']
@staticmethod
def NumberLiteral(node):
return str(node['value'])
def compile_(input_):
tokens = tokenize(input_)
ast = parse(tokens)
new_ast = Transformer.transform(ast)
return CodeGenerator.generate_code(new_ast)

@ -0,0 +1,84 @@
# py.test test.py -v
import pytest
from super_tiny_compiler_unannotated import (
NAME, NUMBER, PAREN,
BacktrackingGenerator,
CallExpression, ExpressionStatement, Identifier, NewCallExpression,
NumberLiteral, Token,
CodeGenerator, Transformer,
compile_, parse, tokenize)
TEST_CASES = [
{
'input': '(add 2 (subtract 4 2))',
'output': 'add(2, subtract(4, 2));',
'tokens': [Token(PAREN, '('),
Token(NAME, 'add'),
Token(NUMBER, 2),
Token(PAREN, '('),
Token(NAME, 'subtract'),
Token(NUMBER, 4),
Token(NUMBER, 2),
Token(PAREN, ')'),
Token(PAREN, ')')],
'ast': {'type': 'Program',
'body': [CallExpression('add',
[NumberLiteral(2),
CallExpression('subtract',
[NumberLiteral(4),
NumberLiteral(2)])])]
},
'new_ast': {
'type': 'Program',
'body': [
ExpressionStatement(
NewCallExpression(
Identifier('add'),
[NumberLiteral(2),
NewCallExpression(
Identifier('subtract'),
[NumberLiteral(4),
NumberLiteral(2)])
])
)
]}}
]
def test_BacktrackingGenerator():
g = BacktrackingGenerator('abcde')
assert next(g) == 'a'
assert ''.join(g.take_while(lambda c: c != 'e')) == 'bcd'
assert next(g) == 'e'
@pytest.mark.parametrize("input_, expected",
[(c['input'], c['tokens']) for c in TEST_CASES])
def test_tokenize(input_, expected):
assert list(tokenize(input_)) == expected
@pytest.mark.parametrize("tokens, expected",
[(c['tokens'], c['ast']) for c in TEST_CASES])
def test_parse(tokens, expected):
assert parse(iter(tokens)) == expected
@pytest.mark.parametrize("ast, expected",
[(c['ast'], c['new_ast']) for c in TEST_CASES])
def test_transform(ast, expected):
assert Transformer.transform(ast) == expected
@pytest.mark.parametrize("new_ast, expected",
[(c['new_ast'], c['output']) for c in TEST_CASES])
def test_generate_code(new_ast, expected):
assert CodeGenerator.generate_code(new_ast) == expected
@pytest.mark.parametrize("input_, expected",
[(c['input'], c['output']) for c in TEST_CASES])
def test_compile(input_, expected):
assert compile_(input_) == expected
Loading…
Cancel
Save