diff --git a/super-tiny-compiler.js b/super-tiny-compiler.js index 6be8b64..3df9936 100644 --- a/super-tiny-compiler.js +++ b/super-tiny-compiler.js @@ -473,14 +473,6 @@ function tokenizer(input) { return tokens; } -/** - * ----------------------------------------------------------------------------- - * *Note:* This is all I've written so far, so the code below isn't annnotated - * yet. You can still read it all and it totally works, but I plan on improving - * this in the near future - * ----------------------------------------------------------------------------- - */ - /** * ============================================================================ * ヽ/❀o ل͜ o\ノ @@ -488,63 +480,155 @@ function tokenizer(input) { * ============================================================================ */ +/** + * For our parser we're going to take our array of tokens and turn it into an + * AST. + * + * [{ type: 'paren', value: '(' }, ...] => { type: 'Program', body: [...] } + */ + +// Okay, so we define a `parser` function that accepts our array of `tokens`. function parser(tokens) { + + // Again we keep a `current` variable that we will use as a cursor. var current = 0; + // But this time we're going to use recursion instead of a `while` loop. So we + // define a `walk` function. function walk() { + + // Inside the walk function we start by grabbing the `current` token. var token = tokens[current]; + // We're going to split each type of token off into a different code path, + // starting off with `number` tokens. + // + // We test to see if we have a `number` token. if (token.type === 'number') { + + // If we have one, we'll increment `current`. current++; + // And we'll return a new AST node called `NumberLiteral` and setting its + // value to the value of our token. return { type: 'NumberLiteral', value: token.value }; } + // Next we're going to look for CallExpressions. We start this off when we + // encounter an open parenthesis. if ( token.type === 'paren' && token.value === '(' ) { - current++; + // We'll increment `current` to skip the parenthesis since we don't care + // about it in our AST. + token = tokens[++current]; + + // We create an base node with the type `CallExpression`, and we're going + // to set the name as the current token's value since the next token after + // the open parenthesis is the name of the function. var node = { type: 'CallExpression', - name: tokens[current].value, + name: token.value, params: [] }; - current++; + // We increment `current` *again* to skip the name token. + token = tokens[++current]; + // And now we want to loop through each token that will be the `params` of + // our `CallExpression` until we encounter a closing parenthesis. + // + // Now this is where recursion comes in. Instead of trying to parse a + // potentially infinitely nested set of nodes we're going to rely on + // recursion to resolve things. + // + // To explain this, let's take our Lisp code. You can see that the + // parameters of the `add` are a number and a nested `CallExpression` that + // includes its own numbers. + // + // (add 2 (subtract 4 2)) + // + // You'll also notice that in our tokens array we have multiple closing + // parenthesis. + // + // [ + // { type: 'paren', value: '(' }, + // { type: 'name', value: 'add' }, + // { type: 'number', value: '2' }, + // { type: 'paren', value: '(' }, + // { type: 'name', value: 'subtract' }, + // { type: 'number', value: '4' }, + // { type: 'number', value: '2' }, + // { type: 'paren', value: ')' }, <<< Closing parenthesis + // { type: 'paren', value: ')' } <<< Closing parenthesis + // ] + // + // We're going to rely on the nested `walk` function to increment our + // `current` variable past any nested `CallExpressions`. + + // So we create a `while` loop that will continue until it encounters a + // token with a `type` of `'paren'` and a `value` of a closing + // parenthesis. while ( token.type !== 'paren' || token.value !== ')' ) { + // we'll call the `walk` function which will return a `node` and we'll + // push it into our `node.params`. node.params.push(walk()); token = tokens[current]; } + // Finally we will increment `current` one last time to skip the closing + // parenthesis. current++; + // And return the node. return node; } + // Again, if we haven't recognized the token type by now we're going to + // throw an error. throw new TypeError(token.type); } - var program = { + // Now, we're going to create our AST which will have a root which is a + // `Program` node. + var ast = { type: 'Program', body: [] }; + // And we're going to kickstart our `walk` function, pushing nodes to our + // `ast.body` array. + // + // The reason we are doing this inside a loop is because our program can have + // `CallExpressions` after one another instead of being nested. + // + // (add 2 2) + // (subtract 4 2) + // while (current < tokens.length) { - program.body.push(walk()); + ast.body.push(walk()); } - return program; + // At the end of our parser we'll return the AST. + return ast; } +/** + * ---------------------------------------------------------------------------- + * *Note:* This is all I've written so far, so the code below isn't annnotated + * yet. You can still read it all and it totally works, but I plan on improving + * this in the near future + * ---------------------------------------------------------------------------- + */ + /** * ============================================================================ * ⌒(❀>◞౪◟<❀)⌒ @@ -552,7 +636,7 @@ function parser(tokens) { * ============================================================================ */ -function traverser(program, visitor) { +function traverser(ast, visitor) { function traverseArray(array, parent) { array.forEach(function(child) { @@ -581,7 +665,7 @@ function traverser(program, visitor) { } } - traverseNode(program, null); + traverseNode(ast, null); } /** @@ -591,15 +675,15 @@ function traverser(program, visitor) { * ============================================================================ */ -function transformer(program) { - var ast = { +function transformer(ast) { + var newAst = { type: 'Program', body: [] }; - program._context = ast.body; + ast._context = newAst.body; - traverser(program, { + traverser(ast, { NumberLiteral: function(node, parent) { parent._context.push({ type: 'NumberLiteral', @@ -630,7 +714,7 @@ function transformer(program) { } }); - return ast; + return newAst; } /**