Prepare for greatness

This commit is contained in:
James Kyle 2016-12-31 15:31:43 -05:00
parent 3356bea996
commit c4cb49b184
4 changed files with 227 additions and 117 deletions

View File

@ -1,4 +1,4 @@
<a href="super-tiny-compiler.js"><img width="731" alt="THE SUPER TINY COMPILER" src="https://cloud.githubusercontent.com/assets/952783/14171276/ed7bf716-f6e6-11e5-96df-80a031c2769d.png"/></a>
<a href="the-super-tiny-compiler.js"><img width="731" alt="THE SUPER TINY COMPILER" src="https://cloud.githubusercontent.com/assets/952783/14171276/ed7bf716-f6e6-11e5-96df-80a031c2769d.png"/></a>
***Welcome to The Super Tiny Compiler!***
@ -8,7 +8,7 @@ written in easy to read JavaScript.
Reading through the guided code will help you learn about how *most* compilers
work from end to end.
### [Want to jump into the code? Click here](super-tiny-compiler.js)
### [Want to jump into the code? Click here](the-super-tiny-compiler.js)
---
@ -27,7 +27,7 @@ the nerds are able to understand.
### Okay so where do I begin?
Awesome! Head on over to the [super-tiny-compiler.js](super-tiny-compiler.js)
Awesome! Head on over to the [the-super-tiny-compiler.js](the-super-tiny-compiler.js)
file.
### I'm back, that didn't make sense

7
package.json Normal file
View File

@ -0,0 +1,7 @@
{
"name": "the-super-tiny-compiler",
"version": "0.0.0",
"author": "James Kyle <me@thejameskyle.com> (thejameskyle.com)",
"license": "CC-BY-4.0",
"main": "./the-super-tiny-compiler.js"
}

26
test.js
View File

@ -1,16 +1,16 @@
var superTinyCompiler = require('./super-tiny-compiler');
var assert = require('assert');
const {
tokenizer,
parser,
transformer,
codeGenerator,
compiler,
} = require('./super-tiny-compiler');
const assert = require('assert');
var tokenizer = superTinyCompiler.tokenizer;
var parser = superTinyCompiler.parser;
var transformer = superTinyCompiler.transformer;
var codeGenerator = superTinyCompiler.codeGenerator;
var compiler = superTinyCompiler.compiler;
const input = '(add 2 (subtract 4 2))';
const output = 'add(2, subtract(4, 2));';
var input = '(add 2 (subtract 4 2))';
var output = 'add(2, subtract(4, 2));';
var tokens = [
const tokens = [
{ type: 'paren', value: '(' },
{ type: 'name', value: 'add' },
{ type: 'number', value: '2' },
@ -22,7 +22,7 @@ var tokens = [
{ type: 'paren', value: ')' }
];
var ast = {
const ast = {
type: 'Program',
body: [{
type: 'CallExpression',
@ -44,7 +44,7 @@ var ast = {
}]
};
var newAst = {
const newAst = {
type: 'Program',
body: [{
type: 'ExpressionStatement',

View File

@ -1,3 +1,5 @@
'use strict';
/**
* TTTTTTTTTTTTTTTTTTTTTTTHHHHHHHHH HHHHHHHHHEEEEEEEEEEEEEEEEEEEEEE
* T:::::::::::::::::::::TH:::::::H H:::::::HE::::::::::::::::::::E
@ -150,7 +152,7 @@
* { type: 'number', value: '4' },
* { type: 'number', value: '2' },
* { type: 'paren', value: ')' },
* { type: 'paren', value: ')' }
* { type: 'paren', value: ')' },
* ]
*
* And an Abstract Syntax Tree (AST) might look like this:
@ -162,16 +164,16 @@
* name: 'add',
* params: [{
* type: 'NumberLiteral',
* value: '2'
* value: '2',
* }, {
* type: 'CallExpression',
* name: 'subtract',
* params: [{
* type: 'NumberLiteral',
* value: '4'
* value: '4',
* }, {
* type: 'NumberLiteral',
* value: '2'
* value: '2',
* }]
* }]
* }]
@ -198,7 +200,7 @@
*
* {
* type: 'NumberLiteral',
* value: '2'
* value: '2',
* }
*
* Or maybe a node for a "CallExpression":
@ -206,7 +208,7 @@
* {
* type: 'CallExpression',
* name: 'subtract',
* params: [...nested nodes go here...]
* params: [...nested nodes go here...],
* }
*
* When transforming the AST we can manipulate nodes by
@ -259,7 +261,7 @@
* we would likely introduce all sorts of abstractions here. But just visiting
* each node in the tree is enough.
*
* The reason I use the word visiting is because there is this pattern of how
* The reason I use the word "visiting" is because there is this pattern of how
* to represent operations on elements of an object structure.
*
* Visitors
@ -270,7 +272,7 @@
*
* var visitor = {
* NumberLiteral() {},
* CallExpression() {}
* CallExpression() {},
* };
*
* When we traverse our AST we will call the methods on this visitor whenever we
@ -281,7 +283,45 @@
*
* var visitor = {
* NumberLiteral(node, parent) {},
* CallExpression(node, parent) {}
* CallExpression(node, parent) {},
* };
*
* We call these functions when we "enter" the node. But there is also the
* possibilty of calling things on "exit".
*
* Imagine our tree structure from before in list form:
*
* - Program
* - CallExpression
* - NumberLiteral
* - CallExpression
* - NumberLiteral
* - NumberLiteral
*
* As we traverse down, we're going to reach branches with dead ends. As we
* finish each branch of the tree we "exit" it. So going down the tree we
* "enter" each node, and going back up we "exit".
*
* -> Program (enter)
* -> CallExpression (enter)
* -> Number Literal (enter)
* <- Number Literal (exit)
* -> Call Expression (enter)
* -> Number Literal (enter)
* <- Number Literal (exit)
* -> Number Literal (enter)
* <- Number Literal (exit)
* <- CallExpression (exit)
* <- CallExpression (exit)
* <- Program (exit)
*
* In order to supper that, our visitors will look like this:
*
* var visitor = {
* NumberLiteral: {
* enter(node, parent) {},
* exit(node, parent) {},
* }
* };
*/
@ -343,10 +383,10 @@
function tokenizer(input) {
// A `current` variable for tracking our position in the code like a cursor.
var current = 0;
let current = 0;
// And a `tokens` array for pushing our tokens to.
var tokens = [];
let tokens = [];
// We start by creating a `while` loop where we are setting up our `current`
// variable to be incremented as much as we want `inside` the loop.
@ -356,10 +396,10 @@ function tokenizer(input) {
while (current < input.length) {
// We're also going to store the `current` character in the `input`.
var char = input[current];
let char = input[current];
// The first thing we want to check for is an open parenthesis. This will
// later be used for `CallExpressions` but for now we only care about the
// later be used for `CallExpression` but for now we only care about the
// character.
//
// We check to see if we have an open parenthesis:
@ -369,7 +409,7 @@ function tokenizer(input) {
// to an open parenthesis.
tokens.push({
type: 'paren',
value: '('
value: '(',
});
// Then we increment `current`
@ -385,7 +425,7 @@ function tokenizer(input) {
if (char === ')') {
tokens.push({
type: 'paren',
value: ')'
value: ')',
});
current++;
continue;
@ -398,7 +438,7 @@ function tokenizer(input) {
//
// So here we're just going to test for existence and if it does exist we're
// going to just `continue` on.
var WHITESPACE = /\s/;
let WHITESPACE = /\s/;
if (WHITESPACE.test(char)) {
current++;
continue;
@ -413,12 +453,12 @@ function tokenizer(input) {
// Only two separate tokens
//
// So we start this off when we encounter the first number in a sequence.
var NUMBERS = /[0-9]/;
let NUMBERS = /[0-9]/;
if (NUMBERS.test(char)) {
// We're going to create a `value` string that we are going to push
// characters to.
var value = '';
let value = '';
// Then we're going to loop through each character in the sequence until
// we encounter a character that is not a number, pushing each character
@ -429,15 +469,42 @@ function tokenizer(input) {
}
// After that we push our `number` token to the `tokens` array.
tokens.push({
type: 'number',
value: value
});
tokens.push({ type: 'number', value });
// And we continue on.
continue;
}
// We'll also add support for strings in our language which will be any
// text surrounded by double quotes (").
//
// (concat "foo" "bar")
// ^^^ ^^^ string tokens
//
// We'll start by checking for the opening quote:
if (char === '"') {
// Keep a `value` variable for building up our string token.
let value = '';
// We'll skip the opening double quote in our token.
char = input[++current];
// Then we'll iterate through each character until we reach another
// double quote.
while (char !== '"') {
value += char;
char = input[++current];
}
// Skip the closing double quote.
char = input[++current];
// And add our `string` token to the `tokens` array.
tokens.push({ type: 'string', value });
continue;
}
// The last type of token will be a `name` token. This is a sequence of
// letters instead of numbers, that are the names of functions in our lisp
// syntax.
@ -446,9 +513,9 @@ function tokenizer(input) {
// ^^^
// Name token
//
var LETTERS = /[a-z]/i;
let LETTERS = /[a-z]/i;
if (LETTERS.test(char)) {
var value = '';
let value = '';
// Again we're just going to loop through all the letters pushing them to
// a value.
@ -458,10 +525,7 @@ function tokenizer(input) {
}
// And pushing that value as a token with the type `name` and continuing.
tokens.push({
type: 'name',
value: value
});
tokens.push({ type: 'name', value });
continue;
}
@ -493,14 +557,14 @@ function tokenizer(input) {
function parser(tokens) {
// Again we keep a `current` variable that we will use as a cursor.
var current = 0;
let current = 0;
// But this time we're going to use recursion instead of a `while` loop. So we
// define a `walk` function.
function walk() {
// Inside the walk function we start by grabbing the `current` token.
var token = tokens[current];
let token = tokens[current];
// We're going to split each type of token off into a different code path,
// starting off with `number` tokens.
@ -515,7 +579,18 @@ function parser(tokens) {
// value to the value of our token.
return {
type: 'NumberLiteral',
value: token.value
value: token.value,
};
}
// If we have a string we will do the same as number and create a
// `StringLiteral` node.
if (token.type === 'string') {
current++;
return {
type: 'StringLiteral',
value: token.value,
};
}
@ -533,10 +608,10 @@ function parser(tokens) {
// We create a base node with the type `CallExpression`, and we're going
// to set the name as the current token's value since the next token after
// the open parenthesis is the name of the function.
var node = {
let node = {
type: 'CallExpression',
name: token.value,
params: []
params: [],
};
// We increment `current` *again* to skip the name token.
@ -567,11 +642,11 @@ function parser(tokens) {
// { type: 'number', value: '4' },
// { type: 'number', value: '2' },
// { type: 'paren', value: ')' }, <<< Closing parenthesis
// { type: 'paren', value: ')' } <<< Closing parenthesis
// { type: 'paren', value: ')' }, <<< Closing parenthesis
// ]
//
// We're going to rely on the nested `walk` function to increment our
// `current` variable past any nested `CallExpressions`.
// `current` variable past any nested `CallExpression`.
// So we create a `while` loop that will continue until it encounters a
// token with a `type` of `'paren'` and a `value` of a closing
@ -601,16 +676,16 @@ function parser(tokens) {
// Now, we're going to create our AST which will have a root which is a
// `Program` node.
var ast = {
let ast = {
type: 'Program',
body: []
body: [],
};
// And we're going to kickstart our `walk` function, pushing nodes to our
// `ast.body` array.
//
// The reason we are doing this inside a loop is because our program can have
// `CallExpressions` after one another instead of being nested.
// `CallExpression` after one another instead of being nested.
//
// (add 2 2)
// (subtract 4 2)
@ -646,7 +721,7 @@ function parser(tokens) {
*
* NumberLiteral(node, parent) {
* // ...
* }
* },
* });
*/
@ -657,7 +732,7 @@ function traverser(ast, visitor) {
// A `traverseArray` function that will allow us to iterate over an array and
// call the next function that we will define: `traverseNode`.
function traverseArray(array, parent) {
array.forEach(function(child) {
array.forEach(child => {
traverseNode(child, parent);
});
}
@ -668,11 +743,12 @@ function traverser(ast, visitor) {
// We start by testing for the existence of a method on the visitor with a
// matching `type`.
var method = visitor[node.type];
let methods = visitor[node.type];
// If it exists we'll call it with the `node` and its `parent`.
if (method) {
method(node, parent);
// If there is an `enter` method for this node type we'll call it with the
// `node` and its `parent`.
if (methods && methods.enter) {
methods.enter(node, parent);
}
// Next we are going to split things up by the current node type.
@ -688,14 +764,15 @@ function traverser(ast, visitor) {
traverseArray(node.body, node);
break;
// Next we do the same with `CallExpressions` and traverse their `params`.
// Next we do the same with `CallExpression` and traverse their `params`.
case 'CallExpression':
traverseArray(node.params, node);
break;
// In the case of `NumberLiterals` we don't have any child nodes to visit,
// so we'll just break.
// In the cases of `NumberLiteral` and `StringLiteral` we don't have any
// child nodes to visit, so we'll just break.
case 'NumberLiteral':
case 'StringLiteral':
break;
// And again, if we haven't recognized the node type then we'll throw an
@ -703,6 +780,12 @@ function traverser(ast, visitor) {
default:
throw new TypeError(node.type);
}
// If there is an `exit` method for this node type we'll call it with the
// `node` and its `parent`.
if (methods && methods.exit) {
methods.exit(node, parent);
}
}
// Finally we kickstart the traverser by calling `traverseNode` with our ast
@ -763,9 +846,9 @@ function transformer(ast) {
// We'll create a `newAst` which like our previous AST will have a program
// node.
var newAst = {
let newAst = {
type: 'Program',
body: []
body: [],
};
// Next I'm going to cheat a little and create a bit of a hack. We're going to
@ -780,51 +863,66 @@ function transformer(ast) {
// We'll start by calling the traverser function with our ast and a visitor.
traverser(ast, {
// The first visitor method accepts `NumberLiterals`
NumberLiteral: function(node, parent) {
// We'll create a new node also named `NumberLiteral` that we will push to
// the parent context.
parent._context.push({
type: 'NumberLiteral',
value: node.value
});
// The first visitor method accepts any `NumberLiteral`
NumberLiteral: {
// We'll visit them on enter.
enter(node, parent) {
// We'll create a new node also named `NumberLiteral` that we will push to
// the parent context.
parent._context.push({
type: 'NumberLiteral',
value: node.value,
});
},
},
// Next up, `CallExpressions`.
CallExpression: function(node, parent) {
// Next we have `StringLiteral`
StringLiteral: {
enter(node, parent) {
parent._context.push({
type: 'StringLiteral',
value: node.value,
});
},
},
// We start creating a new node `CallExpression` with a nested
// `Identifier`.
var expression = {
type: 'CallExpression',
callee: {
type: 'Identifier',
name: node.name
},
arguments: []
};
// Next up, `CallExpression`.
CallExpression: {
enter(node, parent) {
// Next we're going to define a new context on the original
// `CallExpression` node that will reference the `expression`'s arguments
// so that we can push arguments.
node._context = expression.arguments;
// Then we're going to check if the parent node is a `CallExpression`.
// If it is not...
if (parent.type !== 'CallExpression') {
// We're going to wrap our `CallExpression` node with an
// `ExpressionStatement`. We do this because the top level
// `CallExpressions` in JavaScript are actually statements.
expression = {
type: 'ExpressionStatement',
expression: expression
// We start creating a new node `CallExpression` with a nested
// `Identifier`.
let expression = {
type: 'CallExpression',
callee: {
type: 'Identifier',
name: node.name,
},
arguments: [],
};
}
// Last, we push our (possibly wrapped) `CallExpression` to the `parent`'s
// `context`.
parent._context.push(expression);
// Next we're going to define a new context on the original
// `CallExpression` node that will reference the `expression`'s arguments
// so that we can push arguments.
node._context = expression.arguments;
// Then we're going to check if the parent node is a `CallExpression`.
// If it is not...
if (parent.type !== 'CallExpression') {
// We're going to wrap our `CallExpression` node with an
// `ExpressionStatement`. We do this because the top level
// `CallExpression` in JavaScript are actually statements.
expression = {
type: 'ExpressionStatement',
expression: expression,
};
}
// Last, we push our (possibly wrapped) `CallExpression` to the `parent`'s
// `context`.
parent._context.push(expression);
},
}
});
@ -858,7 +956,7 @@ function codeGenerator(node) {
return node.body.map(codeGenerator)
.join('\n');
// For `ExpressionStatements` we'll call the code generator on the nested
// For `ExpressionStatement` we'll call the code generator on the nested
// expression and we'll add a semicolon...
case 'ExpressionStatement':
return (
@ -866,7 +964,7 @@ function codeGenerator(node) {
';' // << (...because we like to code the *correct* way)
);
// For `CallExpressions` we will print the `callee`, add an open
// For `CallExpression` we will print the `callee`, add an open
// parenthesis, we'll map through each node in the `arguments` array and run
// them through the code generator, joining them with a comma, and then
// we'll add a closing parenthesis.
@ -879,14 +977,18 @@ function codeGenerator(node) {
')'
);
// For `Identifiers` we'll just return the `node`'s name.
// For `Identifier` we'll just return the `node`'s name.
case 'Identifier':
return node.name;
// For `NumberLiterals` we'll just return the `node`'s value.
// For `NumberLiteral` we'll just return the `node`'s value.
case 'NumberLiteral':
return node.value;
// For `StringLiteral` we'll add quotations around the `node`'s value.
case 'StringLiteral':
return '"' + node.value + '"';
// And if we haven't recognized the node, we'll throw an error.
default:
throw new TypeError(node.type);
@ -911,10 +1013,10 @@ function codeGenerator(node) {
*/
function compiler(input) {
var tokens = tokenizer(input);
var ast = parser(tokens);
var newAst = transformer(ast);
var output = codeGenerator(newAst);
let tokens = tokenizer(input);
let ast = parser(tokens);
let newAst = transformer(ast);
let output = codeGenerator(newAst);
// and simply return the output!
return output;
@ -929,9 +1031,10 @@ function compiler(input) {
// Now I'm just exporting everything...
module.exports = {
tokenizer: tokenizer,
parser: parser,
transformer: transformer,
codeGenerator: codeGenerator,
compiler: compiler
tokenizer,
parser,
traverser,
transformer,
codeGenerator,
compiler,
};