Add parser inline annotations

This commit is contained in:
James Kyle 2016-03-30 19:48:37 -07:00
parent 8466b6a67d
commit 0baea5b6ad

View File

@ -473,14 +473,6 @@ function tokenizer(input) {
return tokens;
}
/**
* -----------------------------------------------------------------------------
* *Note:* This is all I've written so far, so the code below isn't annnotated
* yet. You can still read it all and it totally works, but I plan on improving
* this in the near future
* -----------------------------------------------------------------------------
*/
/**
* ============================================================================
* /o ل͜ o\
@ -488,63 +480,155 @@ function tokenizer(input) {
* ============================================================================
*/
/**
* For our parser we're going to take our array of tokens and turn it into an
* AST.
*
* [{ type: 'paren', value: '(' }, ...] => { type: 'Program', body: [...] }
*/
// Okay, so we define a `parser` function that accepts our array of `tokens`.
function parser(tokens) {
// Again we keep a `current` variable that we will use as a cursor.
var current = 0;
// But this time we're going to use recursion instead of a `while` loop. So we
// define a `walk` function.
function walk() {
// Inside the walk function we start by grabbing the `current` token.
var token = tokens[current];
// We're going to split each type of token off into a different code path,
// starting off with `number` tokens.
//
// We test to see if we have a `number` token.
if (token.type === 'number') {
// If we have one, we'll increment `current`.
current++;
// And we'll return a new AST node called `NumberLiteral` and setting its
// value to the value of our token.
return {
type: 'NumberLiteral',
value: token.value
};
}
// Next we're going to look for CallExpressions. We start this off when we
// encounter an open parenthesis.
if (
token.type === 'paren' &&
token.value === '('
) {
current++;
// We'll increment `current` to skip the parenthesis since we don't care
// about it in our AST.
token = tokens[++current];
// We create an base node with the type `CallExpression`, and we're going
// to set the name as the current token's value since the next token after
// the open parenthesis is the name of the function.
var node = {
type: 'CallExpression',
name: tokens[current].value,
name: token.value,
params: []
};
current++;
// We increment `current` *again* to skip the name token.
token = tokens[++current];
// And now we want to loop through each token that will be the `params` of
// our `CallExpression` until we encounter a closing parenthesis.
//
// Now this is where recursion comes in. Instead of trying to parse a
// potentially infinitely nested set of nodes we're going to rely on
// recursion to resolve things.
//
// To explain this, let's take our Lisp code. You can see that the
// parameters of the `add` are a number and a nested `CallExpression` that
// includes its own numbers.
//
// (add 2 (subtract 4 2))
//
// You'll also notice that in our tokens array we have multiple closing
// parenthesis.
//
// [
// { type: 'paren', value: '(' },
// { type: 'name', value: 'add' },
// { type: 'number', value: '2' },
// { type: 'paren', value: '(' },
// { type: 'name', value: 'subtract' },
// { type: 'number', value: '4' },
// { type: 'number', value: '2' },
// { type: 'paren', value: ')' }, <<< Closing parenthesis
// { type: 'paren', value: ')' } <<< Closing parenthesis
// ]
//
// We're going to rely on the nested `walk` function to increment our
// `current` variable past any nested `CallExpressions`.
// So we create a `while` loop that will continue until it encounters a
// token with a `type` of `'paren'` and a `value` of a closing
// parenthesis.
while (
token.type !== 'paren' ||
token.value !== ')'
) {
// we'll call the `walk` function which will return a `node` and we'll
// push it into our `node.params`.
node.params.push(walk());
token = tokens[current];
}
// Finally we will increment `current` one last time to skip the closing
// parenthesis.
current++;
// And return the node.
return node;
}
// Again, if we haven't recognized the token type by now we're going to
// throw an error.
throw new TypeError(token.type);
}
var program = {
// Now, we're going to create our AST which will have a root which is a
// `Program` node.
var ast = {
type: 'Program',
body: []
};
// And we're going to kickstart our `walk` function, pushing nodes to our
// `ast.body` array.
//
// The reason we are doing this inside a loop is because our program can have
// `CallExpressions` after one another instead of being nested.
//
// (add 2 2)
// (subtract 4 2)
//
while (current < tokens.length) {
program.body.push(walk());
ast.body.push(walk());
}
return program;
// At the end of our parser we'll return the AST.
return ast;
}
/**
* ----------------------------------------------------------------------------
* *Note:* This is all I've written so far, so the code below isn't annnotated
* yet. You can still read it all and it totally works, but I plan on improving
* this in the near future
* ----------------------------------------------------------------------------
*/
/**
* ============================================================================
* (><)
@ -552,7 +636,7 @@ function parser(tokens) {
* ============================================================================
*/
function traverser(program, visitor) {
function traverser(ast, visitor) {
function traverseArray(array, parent) {
array.forEach(function(child) {
@ -581,7 +665,7 @@ function traverser(program, visitor) {
}
}
traverseNode(program, null);
traverseNode(ast, null);
}
/**
@ -591,15 +675,15 @@ function traverser(program, visitor) {
* ============================================================================
*/
function transformer(program) {
var ast = {
function transformer(ast) {
var newAst = {
type: 'Program',
body: []
};
program._context = ast.body;
ast._context = newAst.body;
traverser(program, {
traverser(ast, {
NumberLiteral: function(node, parent) {
parent._context.push({
type: 'NumberLiteral',
@ -630,7 +714,7 @@ function transformer(program) {
}
});
return ast;
return newAst;
}
/**