Prepare for greatness

pull/51/head
James Kyle 7 years ago
parent 3356bea996
commit c4cb49b184

@ -1,4 +1,4 @@
<a href="super-tiny-compiler.js"><img width="731" alt="THE SUPER TINY COMPILER" src="https://cloud.githubusercontent.com/assets/952783/14171276/ed7bf716-f6e6-11e5-96df-80a031c2769d.png"/></a> <a href="the-super-tiny-compiler.js"><img width="731" alt="THE SUPER TINY COMPILER" src="https://cloud.githubusercontent.com/assets/952783/14171276/ed7bf716-f6e6-11e5-96df-80a031c2769d.png"/></a>
***Welcome to The Super Tiny Compiler!*** ***Welcome to The Super Tiny Compiler!***
@ -8,7 +8,7 @@ written in easy to read JavaScript.
Reading through the guided code will help you learn about how *most* compilers Reading through the guided code will help you learn about how *most* compilers
work from end to end. work from end to end.
### [Want to jump into the code? Click here](super-tiny-compiler.js) ### [Want to jump into the code? Click here](the-super-tiny-compiler.js)
--- ---
@ -27,7 +27,7 @@ the nerds are able to understand.
### Okay so where do I begin? ### Okay so where do I begin?
Awesome! Head on over to the [super-tiny-compiler.js](super-tiny-compiler.js) Awesome! Head on over to the [the-super-tiny-compiler.js](the-super-tiny-compiler.js)
file. file.
### I'm back, that didn't make sense ### I'm back, that didn't make sense

@ -0,0 +1,7 @@
{
"name": "the-super-tiny-compiler",
"version": "0.0.0",
"author": "James Kyle <me@thejameskyle.com> (thejameskyle.com)",
"license": "CC-BY-4.0",
"main": "./the-super-tiny-compiler.js"
}

@ -1,16 +1,16 @@
var superTinyCompiler = require('./super-tiny-compiler'); const {
var assert = require('assert'); tokenizer,
parser,
transformer,
codeGenerator,
compiler,
} = require('./super-tiny-compiler');
const assert = require('assert');
var tokenizer = superTinyCompiler.tokenizer; const input = '(add 2 (subtract 4 2))';
var parser = superTinyCompiler.parser; const output = 'add(2, subtract(4, 2));';
var transformer = superTinyCompiler.transformer;
var codeGenerator = superTinyCompiler.codeGenerator;
var compiler = superTinyCompiler.compiler;
var input = '(add 2 (subtract 4 2))'; const tokens = [
var output = 'add(2, subtract(4, 2));';
var tokens = [
{ type: 'paren', value: '(' }, { type: 'paren', value: '(' },
{ type: 'name', value: 'add' }, { type: 'name', value: 'add' },
{ type: 'number', value: '2' }, { type: 'number', value: '2' },
@ -22,7 +22,7 @@ var tokens = [
{ type: 'paren', value: ')' } { type: 'paren', value: ')' }
]; ];
var ast = { const ast = {
type: 'Program', type: 'Program',
body: [{ body: [{
type: 'CallExpression', type: 'CallExpression',
@ -44,7 +44,7 @@ var ast = {
}] }]
}; };
var newAst = { const newAst = {
type: 'Program', type: 'Program',
body: [{ body: [{
type: 'ExpressionStatement', type: 'ExpressionStatement',

@ -1,3 +1,5 @@
'use strict';
/** /**
* TTTTTTTTTTTTTTTTTTTTTTTHHHHHHHHH HHHHHHHHHEEEEEEEEEEEEEEEEEEEEEE * TTTTTTTTTTTTTTTTTTTTTTTHHHHHHHHH HHHHHHHHHEEEEEEEEEEEEEEEEEEEEEE
* T:::::::::::::::::::::TH:::::::H H:::::::HE::::::::::::::::::::E * T:::::::::::::::::::::TH:::::::H H:::::::HE::::::::::::::::::::E
@ -150,7 +152,7 @@
* { type: 'number', value: '4' }, * { type: 'number', value: '4' },
* { type: 'number', value: '2' }, * { type: 'number', value: '2' },
* { type: 'paren', value: ')' }, * { type: 'paren', value: ')' },
* { type: 'paren', value: ')' } * { type: 'paren', value: ')' },
* ] * ]
* *
* And an Abstract Syntax Tree (AST) might look like this: * And an Abstract Syntax Tree (AST) might look like this:
@ -162,16 +164,16 @@
* name: 'add', * name: 'add',
* params: [{ * params: [{
* type: 'NumberLiteral', * type: 'NumberLiteral',
* value: '2' * value: '2',
* }, { * }, {
* type: 'CallExpression', * type: 'CallExpression',
* name: 'subtract', * name: 'subtract',
* params: [{ * params: [{
* type: 'NumberLiteral', * type: 'NumberLiteral',
* value: '4' * value: '4',
* }, { * }, {
* type: 'NumberLiteral', * type: 'NumberLiteral',
* value: '2' * value: '2',
* }] * }]
* }] * }]
* }] * }]
@ -198,7 +200,7 @@
* *
* { * {
* type: 'NumberLiteral', * type: 'NumberLiteral',
* value: '2' * value: '2',
* } * }
* *
* Or maybe a node for a "CallExpression": * Or maybe a node for a "CallExpression":
@ -206,7 +208,7 @@
* { * {
* type: 'CallExpression', * type: 'CallExpression',
* name: 'subtract', * name: 'subtract',
* params: [...nested nodes go here...] * params: [...nested nodes go here...],
* } * }
* *
* When transforming the AST we can manipulate nodes by * When transforming the AST we can manipulate nodes by
@ -259,7 +261,7 @@
* we would likely introduce all sorts of abstractions here. But just visiting * we would likely introduce all sorts of abstractions here. But just visiting
* each node in the tree is enough. * each node in the tree is enough.
* *
* The reason I use the word visiting is because there is this pattern of how * The reason I use the word "visiting" is because there is this pattern of how
* to represent operations on elements of an object structure. * to represent operations on elements of an object structure.
* *
* Visitors * Visitors
@ -270,7 +272,7 @@
* *
* var visitor = { * var visitor = {
* NumberLiteral() {}, * NumberLiteral() {},
* CallExpression() {} * CallExpression() {},
* }; * };
* *
* When we traverse our AST we will call the methods on this visitor whenever we * When we traverse our AST we will call the methods on this visitor whenever we
@ -281,7 +283,45 @@
* *
* var visitor = { * var visitor = {
* NumberLiteral(node, parent) {}, * NumberLiteral(node, parent) {},
* CallExpression(node, parent) {} * CallExpression(node, parent) {},
* };
*
* We call these functions when we "enter" the node. But there is also the
* possibilty of calling things on "exit".
*
* Imagine our tree structure from before in list form:
*
* - Program
* - CallExpression
* - NumberLiteral
* - CallExpression
* - NumberLiteral
* - NumberLiteral
*
* As we traverse down, we're going to reach branches with dead ends. As we
* finish each branch of the tree we "exit" it. So going down the tree we
* "enter" each node, and going back up we "exit".
*
* -> Program (enter)
* -> CallExpression (enter)
* -> Number Literal (enter)
* <- Number Literal (exit)
* -> Call Expression (enter)
* -> Number Literal (enter)
* <- Number Literal (exit)
* -> Number Literal (enter)
* <- Number Literal (exit)
* <- CallExpression (exit)
* <- CallExpression (exit)
* <- Program (exit)
*
* In order to supper that, our visitors will look like this:
*
* var visitor = {
* NumberLiteral: {
* enter(node, parent) {},
* exit(node, parent) {},
* }
* }; * };
*/ */
@ -343,10 +383,10 @@
function tokenizer(input) { function tokenizer(input) {
// A `current` variable for tracking our position in the code like a cursor. // A `current` variable for tracking our position in the code like a cursor.
var current = 0; let current = 0;
// And a `tokens` array for pushing our tokens to. // And a `tokens` array for pushing our tokens to.
var tokens = []; let tokens = [];
// We start by creating a `while` loop where we are setting up our `current` // We start by creating a `while` loop where we are setting up our `current`
// variable to be incremented as much as we want `inside` the loop. // variable to be incremented as much as we want `inside` the loop.
@ -356,10 +396,10 @@ function tokenizer(input) {
while (current < input.length) { while (current < input.length) {
// We're also going to store the `current` character in the `input`. // We're also going to store the `current` character in the `input`.
var char = input[current]; let char = input[current];
// The first thing we want to check for is an open parenthesis. This will // The first thing we want to check for is an open parenthesis. This will
// later be used for `CallExpressions` but for now we only care about the // later be used for `CallExpression` but for now we only care about the
// character. // character.
// //
// We check to see if we have an open parenthesis: // We check to see if we have an open parenthesis:
@ -369,7 +409,7 @@ function tokenizer(input) {
// to an open parenthesis. // to an open parenthesis.
tokens.push({ tokens.push({
type: 'paren', type: 'paren',
value: '(' value: '(',
}); });
// Then we increment `current` // Then we increment `current`
@ -385,7 +425,7 @@ function tokenizer(input) {
if (char === ')') { if (char === ')') {
tokens.push({ tokens.push({
type: 'paren', type: 'paren',
value: ')' value: ')',
}); });
current++; current++;
continue; continue;
@ -398,7 +438,7 @@ function tokenizer(input) {
// //
// So here we're just going to test for existence and if it does exist we're // So here we're just going to test for existence and if it does exist we're
// going to just `continue` on. // going to just `continue` on.
var WHITESPACE = /\s/; let WHITESPACE = /\s/;
if (WHITESPACE.test(char)) { if (WHITESPACE.test(char)) {
current++; current++;
continue; continue;
@ -413,12 +453,12 @@ function tokenizer(input) {
// Only two separate tokens // Only two separate tokens
// //
// So we start this off when we encounter the first number in a sequence. // So we start this off when we encounter the first number in a sequence.
var NUMBERS = /[0-9]/; let NUMBERS = /[0-9]/;
if (NUMBERS.test(char)) { if (NUMBERS.test(char)) {
// We're going to create a `value` string that we are going to push // We're going to create a `value` string that we are going to push
// characters to. // characters to.
var value = ''; let value = '';
// Then we're going to loop through each character in the sequence until // Then we're going to loop through each character in the sequence until
// we encounter a character that is not a number, pushing each character // we encounter a character that is not a number, pushing each character
@ -429,15 +469,42 @@ function tokenizer(input) {
} }
// After that we push our `number` token to the `tokens` array. // After that we push our `number` token to the `tokens` array.
tokens.push({ tokens.push({ type: 'number', value });
type: 'number',
value: value
});
// And we continue on. // And we continue on.
continue; continue;
} }
// We'll also add support for strings in our language which will be any
// text surrounded by double quotes (").
//
// (concat "foo" "bar")
// ^^^ ^^^ string tokens
//
// We'll start by checking for the opening quote:
if (char === '"') {
// Keep a `value` variable for building up our string token.
let value = '';
// We'll skip the opening double quote in our token.
char = input[++current];
// Then we'll iterate through each character until we reach another
// double quote.
while (char !== '"') {
value += char;
char = input[++current];
}
// Skip the closing double quote.
char = input[++current];
// And add our `string` token to the `tokens` array.
tokens.push({ type: 'string', value });
continue;
}
// The last type of token will be a `name` token. This is a sequence of // The last type of token will be a `name` token. This is a sequence of
// letters instead of numbers, that are the names of functions in our lisp // letters instead of numbers, that are the names of functions in our lisp
// syntax. // syntax.
@ -446,9 +513,9 @@ function tokenizer(input) {
// ^^^ // ^^^
// Name token // Name token
// //
var LETTERS = /[a-z]/i; let LETTERS = /[a-z]/i;
if (LETTERS.test(char)) { if (LETTERS.test(char)) {
var value = ''; let value = '';
// Again we're just going to loop through all the letters pushing them to // Again we're just going to loop through all the letters pushing them to
// a value. // a value.
@ -458,10 +525,7 @@ function tokenizer(input) {
} }
// And pushing that value as a token with the type `name` and continuing. // And pushing that value as a token with the type `name` and continuing.
tokens.push({ tokens.push({ type: 'name', value });
type: 'name',
value: value
});
continue; continue;
} }
@ -493,14 +557,14 @@ function tokenizer(input) {
function parser(tokens) { function parser(tokens) {
// Again we keep a `current` variable that we will use as a cursor. // Again we keep a `current` variable that we will use as a cursor.
var current = 0; let current = 0;
// But this time we're going to use recursion instead of a `while` loop. So we // But this time we're going to use recursion instead of a `while` loop. So we
// define a `walk` function. // define a `walk` function.
function walk() { function walk() {
// Inside the walk function we start by grabbing the `current` token. // Inside the walk function we start by grabbing the `current` token.
var token = tokens[current]; let token = tokens[current];
// We're going to split each type of token off into a different code path, // We're going to split each type of token off into a different code path,
// starting off with `number` tokens. // starting off with `number` tokens.
@ -515,7 +579,18 @@ function parser(tokens) {
// value to the value of our token. // value to the value of our token.
return { return {
type: 'NumberLiteral', type: 'NumberLiteral',
value: token.value value: token.value,
};
}
// If we have a string we will do the same as number and create a
// `StringLiteral` node.
if (token.type === 'string') {
current++;
return {
type: 'StringLiteral',
value: token.value,
}; };
} }
@ -533,10 +608,10 @@ function parser(tokens) {
// We create a base node with the type `CallExpression`, and we're going // We create a base node with the type `CallExpression`, and we're going
// to set the name as the current token's value since the next token after // to set the name as the current token's value since the next token after
// the open parenthesis is the name of the function. // the open parenthesis is the name of the function.
var node = { let node = {
type: 'CallExpression', type: 'CallExpression',
name: token.value, name: token.value,
params: [] params: [],
}; };
// We increment `current` *again* to skip the name token. // We increment `current` *again* to skip the name token.
@ -567,11 +642,11 @@ function parser(tokens) {
// { type: 'number', value: '4' }, // { type: 'number', value: '4' },
// { type: 'number', value: '2' }, // { type: 'number', value: '2' },
// { type: 'paren', value: ')' }, <<< Closing parenthesis // { type: 'paren', value: ')' }, <<< Closing parenthesis
// { type: 'paren', value: ')' } <<< Closing parenthesis // { type: 'paren', value: ')' }, <<< Closing parenthesis
// ] // ]
// //
// We're going to rely on the nested `walk` function to increment our // We're going to rely on the nested `walk` function to increment our
// `current` variable past any nested `CallExpressions`. // `current` variable past any nested `CallExpression`.
// So we create a `while` loop that will continue until it encounters a // So we create a `while` loop that will continue until it encounters a
// token with a `type` of `'paren'` and a `value` of a closing // token with a `type` of `'paren'` and a `value` of a closing
@ -601,16 +676,16 @@ function parser(tokens) {
// Now, we're going to create our AST which will have a root which is a // Now, we're going to create our AST which will have a root which is a
// `Program` node. // `Program` node.
var ast = { let ast = {
type: 'Program', type: 'Program',
body: [] body: [],
}; };
// And we're going to kickstart our `walk` function, pushing nodes to our // And we're going to kickstart our `walk` function, pushing nodes to our
// `ast.body` array. // `ast.body` array.
// //
// The reason we are doing this inside a loop is because our program can have // The reason we are doing this inside a loop is because our program can have
// `CallExpressions` after one another instead of being nested. // `CallExpression` after one another instead of being nested.
// //
// (add 2 2) // (add 2 2)
// (subtract 4 2) // (subtract 4 2)
@ -646,7 +721,7 @@ function parser(tokens) {
* *
* NumberLiteral(node, parent) { * NumberLiteral(node, parent) {
* // ... * // ...
* } * },
* }); * });
*/ */
@ -657,7 +732,7 @@ function traverser(ast, visitor) {
// A `traverseArray` function that will allow us to iterate over an array and // A `traverseArray` function that will allow us to iterate over an array and
// call the next function that we will define: `traverseNode`. // call the next function that we will define: `traverseNode`.
function traverseArray(array, parent) { function traverseArray(array, parent) {
array.forEach(function(child) { array.forEach(child => {
traverseNode(child, parent); traverseNode(child, parent);
}); });
} }
@ -668,11 +743,12 @@ function traverser(ast, visitor) {
// We start by testing for the existence of a method on the visitor with a // We start by testing for the existence of a method on the visitor with a
// matching `type`. // matching `type`.
var method = visitor[node.type]; let methods = visitor[node.type];
// If it exists we'll call it with the `node` and its `parent`. // If there is an `enter` method for this node type we'll call it with the
if (method) { // `node` and its `parent`.
method(node, parent); if (methods && methods.enter) {
methods.enter(node, parent);
} }
// Next we are going to split things up by the current node type. // Next we are going to split things up by the current node type.
@ -688,14 +764,15 @@ function traverser(ast, visitor) {
traverseArray(node.body, node); traverseArray(node.body, node);
break; break;
// Next we do the same with `CallExpressions` and traverse their `params`. // Next we do the same with `CallExpression` and traverse their `params`.
case 'CallExpression': case 'CallExpression':
traverseArray(node.params, node); traverseArray(node.params, node);
break; break;
// In the case of `NumberLiterals` we don't have any child nodes to visit, // In the cases of `NumberLiteral` and `StringLiteral` we don't have any
// so we'll just break. // child nodes to visit, so we'll just break.
case 'NumberLiteral': case 'NumberLiteral':
case 'StringLiteral':
break; break;
// And again, if we haven't recognized the node type then we'll throw an // And again, if we haven't recognized the node type then we'll throw an
@ -703,6 +780,12 @@ function traverser(ast, visitor) {
default: default:
throw new TypeError(node.type); throw new TypeError(node.type);
} }
// If there is an `exit` method for this node type we'll call it with the
// `node` and its `parent`.
if (methods && methods.exit) {
methods.exit(node, parent);
}
} }
// Finally we kickstart the traverser by calling `traverseNode` with our ast // Finally we kickstart the traverser by calling `traverseNode` with our ast
@ -763,9 +846,9 @@ function transformer(ast) {
// We'll create a `newAst` which like our previous AST will have a program // We'll create a `newAst` which like our previous AST will have a program
// node. // node.
var newAst = { let newAst = {
type: 'Program', type: 'Program',
body: [] body: [],
}; };
// Next I'm going to cheat a little and create a bit of a hack. We're going to // Next I'm going to cheat a little and create a bit of a hack. We're going to
@ -780,51 +863,66 @@ function transformer(ast) {
// We'll start by calling the traverser function with our ast and a visitor. // We'll start by calling the traverser function with our ast and a visitor.
traverser(ast, { traverser(ast, {
// The first visitor method accepts `NumberLiterals` // The first visitor method accepts any `NumberLiteral`
NumberLiteral: function(node, parent) { NumberLiteral: {
// We'll create a new node also named `NumberLiteral` that we will push to // We'll visit them on enter.
// the parent context. enter(node, parent) {
parent._context.push({ // We'll create a new node also named `NumberLiteral` that we will push to
type: 'NumberLiteral', // the parent context.
value: node.value parent._context.push({
}); type: 'NumberLiteral',
value: node.value,
});
},
}, },
// Next up, `CallExpressions`. // Next we have `StringLiteral`
CallExpression: function(node, parent) { StringLiteral: {
enter(node, parent) {
// We start creating a new node `CallExpression` with a nested parent._context.push({
// `Identifier`. type: 'StringLiteral',
var expression = { value: node.value,
type: 'CallExpression', });
callee: { },
type: 'Identifier', },
name: node.name
},
arguments: []
};
// Next we're going to define a new context on the original // Next up, `CallExpression`.
// `CallExpression` node that will reference the `expression`'s arguments CallExpression: {
// so that we can push arguments. enter(node, parent) {
node._context = expression.arguments;
// We start creating a new node `CallExpression` with a nested
// Then we're going to check if the parent node is a `CallExpression`. // `Identifier`.
// If it is not... let expression = {
if (parent.type !== 'CallExpression') { type: 'CallExpression',
callee: {
// We're going to wrap our `CallExpression` node with an type: 'Identifier',
// `ExpressionStatement`. We do this because the top level name: node.name,
// `CallExpressions` in JavaScript are actually statements. },
expression = { arguments: [],
type: 'ExpressionStatement',
expression: expression
}; };
}
// Last, we push our (possibly wrapped) `CallExpression` to the `parent`'s // Next we're going to define a new context on the original
// `context`. // `CallExpression` node that will reference the `expression`'s arguments
parent._context.push(expression); // so that we can push arguments.
node._context = expression.arguments;
// Then we're going to check if the parent node is a `CallExpression`.
// If it is not...
if (parent.type !== 'CallExpression') {
// We're going to wrap our `CallExpression` node with an
// `ExpressionStatement`. We do this because the top level
// `CallExpression` in JavaScript are actually statements.
expression = {
type: 'ExpressionStatement',
expression: expression,
};
}
// Last, we push our (possibly wrapped) `CallExpression` to the `parent`'s
// `context`.
parent._context.push(expression);
},
} }
}); });
@ -858,7 +956,7 @@ function codeGenerator(node) {
return node.body.map(codeGenerator) return node.body.map(codeGenerator)
.join('\n'); .join('\n');
// For `ExpressionStatements` we'll call the code generator on the nested // For `ExpressionStatement` we'll call the code generator on the nested
// expression and we'll add a semicolon... // expression and we'll add a semicolon...
case 'ExpressionStatement': case 'ExpressionStatement':
return ( return (
@ -866,7 +964,7 @@ function codeGenerator(node) {
';' // << (...because we like to code the *correct* way) ';' // << (...because we like to code the *correct* way)
); );
// For `CallExpressions` we will print the `callee`, add an open // For `CallExpression` we will print the `callee`, add an open
// parenthesis, we'll map through each node in the `arguments` array and run // parenthesis, we'll map through each node in the `arguments` array and run
// them through the code generator, joining them with a comma, and then // them through the code generator, joining them with a comma, and then
// we'll add a closing parenthesis. // we'll add a closing parenthesis.
@ -879,14 +977,18 @@ function codeGenerator(node) {
')' ')'
); );
// For `Identifiers` we'll just return the `node`'s name. // For `Identifier` we'll just return the `node`'s name.
case 'Identifier': case 'Identifier':
return node.name; return node.name;
// For `NumberLiterals` we'll just return the `node`'s value. // For `NumberLiteral` we'll just return the `node`'s value.
case 'NumberLiteral': case 'NumberLiteral':
return node.value; return node.value;
// For `StringLiteral` we'll add quotations around the `node`'s value.
case 'StringLiteral':
return '"' + node.value + '"';
// And if we haven't recognized the node, we'll throw an error. // And if we haven't recognized the node, we'll throw an error.
default: default:
throw new TypeError(node.type); throw new TypeError(node.type);
@ -911,10 +1013,10 @@ function codeGenerator(node) {
*/ */
function compiler(input) { function compiler(input) {
var tokens = tokenizer(input); let tokens = tokenizer(input);
var ast = parser(tokens); let ast = parser(tokens);
var newAst = transformer(ast); let newAst = transformer(ast);
var output = codeGenerator(newAst); let output = codeGenerator(newAst);
// and simply return the output! // and simply return the output!
return output; return output;
@ -929,9 +1031,10 @@ function compiler(input) {
// Now I'm just exporting everything... // Now I'm just exporting everything...
module.exports = { module.exports = {
tokenizer: tokenizer, tokenizer,
parser: parser, parser,
transformer: transformer, traverser,
codeGenerator: codeGenerator, transformer,
compiler: compiler codeGenerator,
compiler,
}; };
Loading…
Cancel
Save