parent
1c5da2aea0
commit
5da992116d
@ -0,0 +1,2 @@
|
||||
.cache
|
||||
*.pyc
|
@ -0,0 +1,189 @@
|
||||
from itertools import chain
|
||||
from string import ascii_letters, digits, whitespace
|
||||
|
||||
NAME = 'name'
|
||||
NUMBER = 'number'
|
||||
PAREN = 'paren'
|
||||
|
||||
MULTICHAR_TYPES = [(NUMBER, digits, int),
|
||||
(NAME, ascii_letters, lambda x: x)]
|
||||
|
||||
|
||||
class BacktrackingGenerator():
|
||||
|
||||
def __init__(self, input_):
|
||||
|
||||
def generator():
|
||||
"""Use send(True-ish) to backtrack."""
|
||||
for char in input_:
|
||||
if (yield char):
|
||||
# send(True-ish) was called
|
||||
if (yield):
|
||||
raise ValueError('Cannot backtrack twice in a row.')
|
||||
yield char
|
||||
|
||||
self.generator = generator()
|
||||
|
||||
def __iter__(self):
|
||||
return self.generator
|
||||
|
||||
def __next__(self):
|
||||
return next(self.generator)
|
||||
|
||||
def take_while(self, predicate):
|
||||
for char in self.generator:
|
||||
if not predicate(char):
|
||||
self.generator.send(1) # backtrack
|
||||
break
|
||||
yield char
|
||||
|
||||
|
||||
def Token(type_, value):
|
||||
return {'type': type_, 'value': value}
|
||||
|
||||
|
||||
def tokenize(input_):
|
||||
char_gen = BacktrackingGenerator(input_)
|
||||
|
||||
def tokenize_multiple_chars(char):
|
||||
for type_, charset, convert in MULTICHAR_TYPES:
|
||||
if char in charset:
|
||||
tail = char_gen.take_while(lambda c: c in charset)
|
||||
value = convert(''.join(chain([char], tail)))
|
||||
return type_, value
|
||||
|
||||
for char in char_gen:
|
||||
if char in whitespace:
|
||||
continue
|
||||
if char in '()':
|
||||
yield Token(PAREN, char)
|
||||
elif char in digits + ascii_letters:
|
||||
yield Token(*tokenize_multiple_chars(char))
|
||||
else:
|
||||
raise TypeError('I dont know what this character is: ' + char)
|
||||
|
||||
|
||||
def NumberLiteral(value):
|
||||
return {'type': 'NumberLiteral', 'value': value}
|
||||
|
||||
|
||||
def CallExpression(name, params):
|
||||
return {'type': 'CallExpression', 'name': name, 'params': params}
|
||||
|
||||
|
||||
def parse(tokens):
|
||||
|
||||
def walk(token):
|
||||
if token['type'] == NUMBER:
|
||||
return NumberLiteral(token['value'])
|
||||
if token == Token(PAREN, '('):
|
||||
token = next(tokens)
|
||||
node = CallExpression(token['value'], [])
|
||||
for token in tokens:
|
||||
if token == Token(PAREN, ')'):
|
||||
break
|
||||
node['params'].append(walk(token))
|
||||
return node
|
||||
raise TypeError(token['type'])
|
||||
|
||||
ast = {'type': 'Program', 'body': []}
|
||||
for token in tokens:
|
||||
ast['body'].append(walk(token))
|
||||
return ast
|
||||
|
||||
|
||||
class Traverser():
|
||||
|
||||
def __init__(self, visitor):
|
||||
self.visitor = visitor
|
||||
|
||||
def traverse(self, ast):
|
||||
self.traverse_node(ast)
|
||||
|
||||
def traverse_node(self, node, parent=None):
|
||||
method = getattr(self.visitor, node['type'], None)
|
||||
if method:
|
||||
method(node, parent)
|
||||
getattr(self, node['type'])(node)
|
||||
|
||||
def Program(self, node):
|
||||
for expression in node['body']:
|
||||
self.traverse_node(expression, node)
|
||||
|
||||
def CallExpression(self, node):
|
||||
for param in node['params']:
|
||||
self.traverse_node(param, node)
|
||||
|
||||
@staticmethod
|
||||
def NumberLiteral(node):
|
||||
pass
|
||||
|
||||
|
||||
def ExpressionStatement(expression):
|
||||
return {'type': 'ExpressionStatement', 'expression': expression}
|
||||
|
||||
|
||||
def Identifier(name):
|
||||
return {'type': 'Identifier', 'name': name}
|
||||
|
||||
|
||||
def NewCallExpression(callee, arguments):
|
||||
return {'type': 'CallExpression', 'callee': callee, 'arguments': arguments}
|
||||
|
||||
|
||||
class Transformer():
|
||||
|
||||
@classmethod
|
||||
def transform(cls, ast):
|
||||
new_ast = {'type': 'Program', 'body': []}
|
||||
ast['_context'] = new_ast['body']
|
||||
Traverser(cls).traverse(ast)
|
||||
return new_ast
|
||||
|
||||
@staticmethod
|
||||
def NumberLiteral(node, parent):
|
||||
parent['_context'].append(NumberLiteral(node['value']))
|
||||
|
||||
@staticmethod
|
||||
def CallExpression(node, parent):
|
||||
expression = NewCallExpression(Identifier(node['name']), [])
|
||||
node['_context'] = expression['arguments']
|
||||
if parent['type'] != 'CallExpression':
|
||||
expression = ExpressionStatement(expression)
|
||||
parent['_context'].append(expression)
|
||||
|
||||
|
||||
class CodeGenerator():
|
||||
|
||||
@classmethod
|
||||
def generate_code(cls, node):
|
||||
return getattr(cls, node['type'])(node)
|
||||
|
||||
@classmethod
|
||||
def Program(cls, node):
|
||||
return '\n'.join(map(cls.generate_code, node['body']))
|
||||
|
||||
@classmethod
|
||||
def ExpressionStatement(cls, node):
|
||||
return cls.generate_code(node['expression']) + ';'
|
||||
|
||||
@classmethod
|
||||
def CallExpression(cls, node):
|
||||
return '{}({})'.format(
|
||||
cls.generate_code(node['callee']),
|
||||
', '.join(map(cls.generate_code, node['arguments'])))
|
||||
|
||||
@staticmethod
|
||||
def Identifier(node):
|
||||
return node['name']
|
||||
|
||||
@staticmethod
|
||||
def NumberLiteral(node):
|
||||
return str(node['value'])
|
||||
|
||||
|
||||
def compile_(input_):
|
||||
tokens = tokenize(input_)
|
||||
ast = parse(tokens)
|
||||
new_ast = Transformer.transform(ast)
|
||||
return CodeGenerator.generate_code(new_ast)
|
@ -0,0 +1,84 @@
|
||||
# py.test test.py -v
|
||||
|
||||
import pytest
|
||||
|
||||
from super_tiny_compiler_unannotated import (
|
||||
NAME, NUMBER, PAREN,
|
||||
BacktrackingGenerator,
|
||||
CallExpression, ExpressionStatement, Identifier, NewCallExpression,
|
||||
NumberLiteral, Token,
|
||||
CodeGenerator, Transformer,
|
||||
compile_, parse, tokenize)
|
||||
|
||||
TEST_CASES = [
|
||||
{
|
||||
'input': '(add 2 (subtract 4 2))',
|
||||
'output': 'add(2, subtract(4, 2));',
|
||||
'tokens': [Token(PAREN, '('),
|
||||
Token(NAME, 'add'),
|
||||
Token(NUMBER, 2),
|
||||
Token(PAREN, '('),
|
||||
Token(NAME, 'subtract'),
|
||||
Token(NUMBER, 4),
|
||||
Token(NUMBER, 2),
|
||||
Token(PAREN, ')'),
|
||||
Token(PAREN, ')')],
|
||||
'ast': {'type': 'Program',
|
||||
'body': [CallExpression('add',
|
||||
[NumberLiteral(2),
|
||||
CallExpression('subtract',
|
||||
[NumberLiteral(4),
|
||||
NumberLiteral(2)])])]
|
||||
},
|
||||
'new_ast': {
|
||||
'type': 'Program',
|
||||
'body': [
|
||||
ExpressionStatement(
|
||||
NewCallExpression(
|
||||
Identifier('add'),
|
||||
[NumberLiteral(2),
|
||||
NewCallExpression(
|
||||
Identifier('subtract'),
|
||||
[NumberLiteral(4),
|
||||
NumberLiteral(2)])
|
||||
])
|
||||
)
|
||||
]}}
|
||||
]
|
||||
|
||||
|
||||
def test_BacktrackingGenerator():
|
||||
g = BacktrackingGenerator('abcde')
|
||||
assert next(g) == 'a'
|
||||
assert ''.join(g.take_while(lambda c: c != 'e')) == 'bcd'
|
||||
assert next(g) == 'e'
|
||||
|
||||
|
||||
@pytest.mark.parametrize("input_, expected",
|
||||
[(c['input'], c['tokens']) for c in TEST_CASES])
|
||||
def test_tokenize(input_, expected):
|
||||
assert list(tokenize(input_)) == expected
|
||||
|
||||
|
||||
@pytest.mark.parametrize("tokens, expected",
|
||||
[(c['tokens'], c['ast']) for c in TEST_CASES])
|
||||
def test_parse(tokens, expected):
|
||||
assert parse(iter(tokens)) == expected
|
||||
|
||||
|
||||
@pytest.mark.parametrize("ast, expected",
|
||||
[(c['ast'], c['new_ast']) for c in TEST_CASES])
|
||||
def test_transform(ast, expected):
|
||||
assert Transformer.transform(ast) == expected
|
||||
|
||||
|
||||
@pytest.mark.parametrize("new_ast, expected",
|
||||
[(c['new_ast'], c['output']) for c in TEST_CASES])
|
||||
def test_generate_code(new_ast, expected):
|
||||
assert CodeGenerator.generate_code(new_ast) == expected
|
||||
|
||||
|
||||
@pytest.mark.parametrize("input_, expected",
|
||||
[(c['input'], c['output']) for c in TEST_CASES])
|
||||
def test_compile(input_, expected):
|
||||
assert compile_(input_) == expected
|
Loading…
Reference in new issue