mirror of
https://github.com/jamiebuilds/the-super-tiny-compiler.git
synced 2024-10-27 20:34:08 +00:00
Add parser inline annotations
This commit is contained in:
parent
8466b6a67d
commit
0baea5b6ad
@ -473,14 +473,6 @@ function tokenizer(input) {
|
||||
return tokens;
|
||||
}
|
||||
|
||||
/**
|
||||
* -----------------------------------------------------------------------------
|
||||
* *Note:* This is all I've written so far, so the code below isn't annnotated
|
||||
* yet. You can still read it all and it totally works, but I plan on improving
|
||||
* this in the near future
|
||||
* -----------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
/**
|
||||
* ============================================================================
|
||||
* ヽ/❀o ل͜ o\ノ
|
||||
@ -488,63 +480,155 @@ function tokenizer(input) {
|
||||
* ============================================================================
|
||||
*/
|
||||
|
||||
/**
|
||||
* For our parser we're going to take our array of tokens and turn it into an
|
||||
* AST.
|
||||
*
|
||||
* [{ type: 'paren', value: '(' }, ...] => { type: 'Program', body: [...] }
|
||||
*/
|
||||
|
||||
// Okay, so we define a `parser` function that accepts our array of `tokens`.
|
||||
function parser(tokens) {
|
||||
|
||||
// Again we keep a `current` variable that we will use as a cursor.
|
||||
var current = 0;
|
||||
|
||||
// But this time we're going to use recursion instead of a `while` loop. So we
|
||||
// define a `walk` function.
|
||||
function walk() {
|
||||
|
||||
// Inside the walk function we start by grabbing the `current` token.
|
||||
var token = tokens[current];
|
||||
|
||||
// We're going to split each type of token off into a different code path,
|
||||
// starting off with `number` tokens.
|
||||
//
|
||||
// We test to see if we have a `number` token.
|
||||
if (token.type === 'number') {
|
||||
|
||||
// If we have one, we'll increment `current`.
|
||||
current++;
|
||||
|
||||
// And we'll return a new AST node called `NumberLiteral` and setting its
|
||||
// value to the value of our token.
|
||||
return {
|
||||
type: 'NumberLiteral',
|
||||
value: token.value
|
||||
};
|
||||
}
|
||||
|
||||
// Next we're going to look for CallExpressions. We start this off when we
|
||||
// encounter an open parenthesis.
|
||||
if (
|
||||
token.type === 'paren' &&
|
||||
token.value === '('
|
||||
) {
|
||||
current++;
|
||||
|
||||
// We'll increment `current` to skip the parenthesis since we don't care
|
||||
// about it in our AST.
|
||||
token = tokens[++current];
|
||||
|
||||
// We create an base node with the type `CallExpression`, and we're going
|
||||
// to set the name as the current token's value since the next token after
|
||||
// the open parenthesis is the name of the function.
|
||||
var node = {
|
||||
type: 'CallExpression',
|
||||
name: tokens[current].value,
|
||||
name: token.value,
|
||||
params: []
|
||||
};
|
||||
|
||||
current++;
|
||||
// We increment `current` *again* to skip the name token.
|
||||
token = tokens[++current];
|
||||
|
||||
// And now we want to loop through each token that will be the `params` of
|
||||
// our `CallExpression` until we encounter a closing parenthesis.
|
||||
//
|
||||
// Now this is where recursion comes in. Instead of trying to parse a
|
||||
// potentially infinitely nested set of nodes we're going to rely on
|
||||
// recursion to resolve things.
|
||||
//
|
||||
// To explain this, let's take our Lisp code. You can see that the
|
||||
// parameters of the `add` are a number and a nested `CallExpression` that
|
||||
// includes its own numbers.
|
||||
//
|
||||
// (add 2 (subtract 4 2))
|
||||
//
|
||||
// You'll also notice that in our tokens array we have multiple closing
|
||||
// parenthesis.
|
||||
//
|
||||
// [
|
||||
// { type: 'paren', value: '(' },
|
||||
// { type: 'name', value: 'add' },
|
||||
// { type: 'number', value: '2' },
|
||||
// { type: 'paren', value: '(' },
|
||||
// { type: 'name', value: 'subtract' },
|
||||
// { type: 'number', value: '4' },
|
||||
// { type: 'number', value: '2' },
|
||||
// { type: 'paren', value: ')' }, <<< Closing parenthesis
|
||||
// { type: 'paren', value: ')' } <<< Closing parenthesis
|
||||
// ]
|
||||
//
|
||||
// We're going to rely on the nested `walk` function to increment our
|
||||
// `current` variable past any nested `CallExpressions`.
|
||||
|
||||
// So we create a `while` loop that will continue until it encounters a
|
||||
// token with a `type` of `'paren'` and a `value` of a closing
|
||||
// parenthesis.
|
||||
while (
|
||||
token.type !== 'paren' ||
|
||||
token.value !== ')'
|
||||
) {
|
||||
// we'll call the `walk` function which will return a `node` and we'll
|
||||
// push it into our `node.params`.
|
||||
node.params.push(walk());
|
||||
token = tokens[current];
|
||||
}
|
||||
|
||||
// Finally we will increment `current` one last time to skip the closing
|
||||
// parenthesis.
|
||||
current++;
|
||||
|
||||
// And return the node.
|
||||
return node;
|
||||
}
|
||||
|
||||
// Again, if we haven't recognized the token type by now we're going to
|
||||
// throw an error.
|
||||
throw new TypeError(token.type);
|
||||
}
|
||||
|
||||
var program = {
|
||||
// Now, we're going to create our AST which will have a root which is a
|
||||
// `Program` node.
|
||||
var ast = {
|
||||
type: 'Program',
|
||||
body: []
|
||||
};
|
||||
|
||||
// And we're going to kickstart our `walk` function, pushing nodes to our
|
||||
// `ast.body` array.
|
||||
//
|
||||
// The reason we are doing this inside a loop is because our program can have
|
||||
// `CallExpressions` after one another instead of being nested.
|
||||
//
|
||||
// (add 2 2)
|
||||
// (subtract 4 2)
|
||||
//
|
||||
while (current < tokens.length) {
|
||||
program.body.push(walk());
|
||||
ast.body.push(walk());
|
||||
}
|
||||
|
||||
return program;
|
||||
// At the end of our parser we'll return the AST.
|
||||
return ast;
|
||||
}
|
||||
|
||||
/**
|
||||
* ----------------------------------------------------------------------------
|
||||
* *Note:* This is all I've written so far, so the code below isn't annnotated
|
||||
* yet. You can still read it all and it totally works, but I plan on improving
|
||||
* this in the near future
|
||||
* ----------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
/**
|
||||
* ============================================================================
|
||||
* ⌒(❀>◞౪◟<❀)⌒
|
||||
@ -552,7 +636,7 @@ function parser(tokens) {
|
||||
* ============================================================================
|
||||
*/
|
||||
|
||||
function traverser(program, visitor) {
|
||||
function traverser(ast, visitor) {
|
||||
|
||||
function traverseArray(array, parent) {
|
||||
array.forEach(function(child) {
|
||||
@ -581,7 +665,7 @@ function traverser(program, visitor) {
|
||||
}
|
||||
}
|
||||
|
||||
traverseNode(program, null);
|
||||
traverseNode(ast, null);
|
||||
}
|
||||
|
||||
/**
|
||||
@ -591,15 +675,15 @@ function traverser(program, visitor) {
|
||||
* ============================================================================
|
||||
*/
|
||||
|
||||
function transformer(program) {
|
||||
var ast = {
|
||||
function transformer(ast) {
|
||||
var newAst = {
|
||||
type: 'Program',
|
||||
body: []
|
||||
};
|
||||
|
||||
program._context = ast.body;
|
||||
ast._context = newAst.body;
|
||||
|
||||
traverser(program, {
|
||||
traverser(ast, {
|
||||
NumberLiteral: function(node, parent) {
|
||||
parent._context.push({
|
||||
type: 'NumberLiteral',
|
||||
@ -630,7 +714,7 @@ function transformer(program) {
|
||||
}
|
||||
});
|
||||
|
||||
return ast;
|
||||
return newAst;
|
||||
}
|
||||
|
||||
/**
|
||||
|
Loading…
Reference in New Issue
Block a user