mirror of
https://github.com/jamiebuilds/the-super-tiny-compiler.git
synced 2024-10-27 20:34:08 +00:00
Add parser inline annotations
This commit is contained in:
parent
8466b6a67d
commit
0baea5b6ad
@ -473,14 +473,6 @@ function tokenizer(input) {
|
|||||||
return tokens;
|
return tokens;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* -----------------------------------------------------------------------------
|
|
||||||
* *Note:* This is all I've written so far, so the code below isn't annnotated
|
|
||||||
* yet. You can still read it all and it totally works, but I plan on improving
|
|
||||||
* this in the near future
|
|
||||||
* -----------------------------------------------------------------------------
|
|
||||||
*/
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* ============================================================================
|
* ============================================================================
|
||||||
* ヽ/❀o ل͜ o\ノ
|
* ヽ/❀o ل͜ o\ノ
|
||||||
@ -488,63 +480,155 @@ function tokenizer(input) {
|
|||||||
* ============================================================================
|
* ============================================================================
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
/**
|
||||||
|
* For our parser we're going to take our array of tokens and turn it into an
|
||||||
|
* AST.
|
||||||
|
*
|
||||||
|
* [{ type: 'paren', value: '(' }, ...] => { type: 'Program', body: [...] }
|
||||||
|
*/
|
||||||
|
|
||||||
|
// Okay, so we define a `parser` function that accepts our array of `tokens`.
|
||||||
function parser(tokens) {
|
function parser(tokens) {
|
||||||
|
|
||||||
|
// Again we keep a `current` variable that we will use as a cursor.
|
||||||
var current = 0;
|
var current = 0;
|
||||||
|
|
||||||
|
// But this time we're going to use recursion instead of a `while` loop. So we
|
||||||
|
// define a `walk` function.
|
||||||
function walk() {
|
function walk() {
|
||||||
|
|
||||||
|
// Inside the walk function we start by grabbing the `current` token.
|
||||||
var token = tokens[current];
|
var token = tokens[current];
|
||||||
|
|
||||||
|
// We're going to split each type of token off into a different code path,
|
||||||
|
// starting off with `number` tokens.
|
||||||
|
//
|
||||||
|
// We test to see if we have a `number` token.
|
||||||
if (token.type === 'number') {
|
if (token.type === 'number') {
|
||||||
|
|
||||||
|
// If we have one, we'll increment `current`.
|
||||||
current++;
|
current++;
|
||||||
|
|
||||||
|
// And we'll return a new AST node called `NumberLiteral` and setting its
|
||||||
|
// value to the value of our token.
|
||||||
return {
|
return {
|
||||||
type: 'NumberLiteral',
|
type: 'NumberLiteral',
|
||||||
value: token.value
|
value: token.value
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Next we're going to look for CallExpressions. We start this off when we
|
||||||
|
// encounter an open parenthesis.
|
||||||
if (
|
if (
|
||||||
token.type === 'paren' &&
|
token.type === 'paren' &&
|
||||||
token.value === '('
|
token.value === '('
|
||||||
) {
|
) {
|
||||||
current++;
|
|
||||||
|
|
||||||
|
// We'll increment `current` to skip the parenthesis since we don't care
|
||||||
|
// about it in our AST.
|
||||||
|
token = tokens[++current];
|
||||||
|
|
||||||
|
// We create an base node with the type `CallExpression`, and we're going
|
||||||
|
// to set the name as the current token's value since the next token after
|
||||||
|
// the open parenthesis is the name of the function.
|
||||||
var node = {
|
var node = {
|
||||||
type: 'CallExpression',
|
type: 'CallExpression',
|
||||||
name: tokens[current].value,
|
name: token.value,
|
||||||
params: []
|
params: []
|
||||||
};
|
};
|
||||||
|
|
||||||
current++;
|
// We increment `current` *again* to skip the name token.
|
||||||
|
token = tokens[++current];
|
||||||
|
|
||||||
|
// And now we want to loop through each token that will be the `params` of
|
||||||
|
// our `CallExpression` until we encounter a closing parenthesis.
|
||||||
|
//
|
||||||
|
// Now this is where recursion comes in. Instead of trying to parse a
|
||||||
|
// potentially infinitely nested set of nodes we're going to rely on
|
||||||
|
// recursion to resolve things.
|
||||||
|
//
|
||||||
|
// To explain this, let's take our Lisp code. You can see that the
|
||||||
|
// parameters of the `add` are a number and a nested `CallExpression` that
|
||||||
|
// includes its own numbers.
|
||||||
|
//
|
||||||
|
// (add 2 (subtract 4 2))
|
||||||
|
//
|
||||||
|
// You'll also notice that in our tokens array we have multiple closing
|
||||||
|
// parenthesis.
|
||||||
|
//
|
||||||
|
// [
|
||||||
|
// { type: 'paren', value: '(' },
|
||||||
|
// { type: 'name', value: 'add' },
|
||||||
|
// { type: 'number', value: '2' },
|
||||||
|
// { type: 'paren', value: '(' },
|
||||||
|
// { type: 'name', value: 'subtract' },
|
||||||
|
// { type: 'number', value: '4' },
|
||||||
|
// { type: 'number', value: '2' },
|
||||||
|
// { type: 'paren', value: ')' }, <<< Closing parenthesis
|
||||||
|
// { type: 'paren', value: ')' } <<< Closing parenthesis
|
||||||
|
// ]
|
||||||
|
//
|
||||||
|
// We're going to rely on the nested `walk` function to increment our
|
||||||
|
// `current` variable past any nested `CallExpressions`.
|
||||||
|
|
||||||
|
// So we create a `while` loop that will continue until it encounters a
|
||||||
|
// token with a `type` of `'paren'` and a `value` of a closing
|
||||||
|
// parenthesis.
|
||||||
while (
|
while (
|
||||||
token.type !== 'paren' ||
|
token.type !== 'paren' ||
|
||||||
token.value !== ')'
|
token.value !== ')'
|
||||||
) {
|
) {
|
||||||
|
// we'll call the `walk` function which will return a `node` and we'll
|
||||||
|
// push it into our `node.params`.
|
||||||
node.params.push(walk());
|
node.params.push(walk());
|
||||||
token = tokens[current];
|
token = tokens[current];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Finally we will increment `current` one last time to skip the closing
|
||||||
|
// parenthesis.
|
||||||
current++;
|
current++;
|
||||||
|
|
||||||
|
// And return the node.
|
||||||
return node;
|
return node;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Again, if we haven't recognized the token type by now we're going to
|
||||||
|
// throw an error.
|
||||||
throw new TypeError(token.type);
|
throw new TypeError(token.type);
|
||||||
}
|
}
|
||||||
|
|
||||||
var program = {
|
// Now, we're going to create our AST which will have a root which is a
|
||||||
|
// `Program` node.
|
||||||
|
var ast = {
|
||||||
type: 'Program',
|
type: 'Program',
|
||||||
body: []
|
body: []
|
||||||
};
|
};
|
||||||
|
|
||||||
|
// And we're going to kickstart our `walk` function, pushing nodes to our
|
||||||
|
// `ast.body` array.
|
||||||
|
//
|
||||||
|
// The reason we are doing this inside a loop is because our program can have
|
||||||
|
// `CallExpressions` after one another instead of being nested.
|
||||||
|
//
|
||||||
|
// (add 2 2)
|
||||||
|
// (subtract 4 2)
|
||||||
|
//
|
||||||
while (current < tokens.length) {
|
while (current < tokens.length) {
|
||||||
program.body.push(walk());
|
ast.body.push(walk());
|
||||||
}
|
}
|
||||||
|
|
||||||
return program;
|
// At the end of our parser we'll return the AST.
|
||||||
|
return ast;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* ----------------------------------------------------------------------------
|
||||||
|
* *Note:* This is all I've written so far, so the code below isn't annnotated
|
||||||
|
* yet. You can still read it all and it totally works, but I plan on improving
|
||||||
|
* this in the near future
|
||||||
|
* ----------------------------------------------------------------------------
|
||||||
|
*/
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* ============================================================================
|
* ============================================================================
|
||||||
* ⌒(❀>◞౪◟<❀)⌒
|
* ⌒(❀>◞౪◟<❀)⌒
|
||||||
@ -552,7 +636,7 @@ function parser(tokens) {
|
|||||||
* ============================================================================
|
* ============================================================================
|
||||||
*/
|
*/
|
||||||
|
|
||||||
function traverser(program, visitor) {
|
function traverser(ast, visitor) {
|
||||||
|
|
||||||
function traverseArray(array, parent) {
|
function traverseArray(array, parent) {
|
||||||
array.forEach(function(child) {
|
array.forEach(function(child) {
|
||||||
@ -581,7 +665,7 @@ function traverser(program, visitor) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
traverseNode(program, null);
|
traverseNode(ast, null);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -591,15 +675,15 @@ function traverser(program, visitor) {
|
|||||||
* ============================================================================
|
* ============================================================================
|
||||||
*/
|
*/
|
||||||
|
|
||||||
function transformer(program) {
|
function transformer(ast) {
|
||||||
var ast = {
|
var newAst = {
|
||||||
type: 'Program',
|
type: 'Program',
|
||||||
body: []
|
body: []
|
||||||
};
|
};
|
||||||
|
|
||||||
program._context = ast.body;
|
ast._context = newAst.body;
|
||||||
|
|
||||||
traverser(program, {
|
traverser(ast, {
|
||||||
NumberLiteral: function(node, parent) {
|
NumberLiteral: function(node, parent) {
|
||||||
parent._context.push({
|
parent._context.push({
|
||||||
type: 'NumberLiteral',
|
type: 'NumberLiteral',
|
||||||
@ -630,7 +714,7 @@ function transformer(program) {
|
|||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
return ast;
|
return newAst;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
Loading…
Reference in New Issue
Block a user