jamiebuilds_the-super-tiny-.../2-parser.js
2017-04-26 15:45:39 +00:00

192 lines
6.6 KiB
JavaScript
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

/**
* ============================================================================
* ヽ/❀o ل͜ o\ノ
* THE PARSER!!!
* ============================================================================
*/
/**
* For our parser we're going to take our array of tokens and turn it into an
* AST.
*
* [{ type: 'paren', value: '(' }, ...] => { type: 'Program', body: [...] }
*/
// Okay, so we define a `parser` function that accepts our array of `tokens`.
function parser(tokens) {
// Again we keep a `current` variable that we will use as a cursor.
// Because we're not iterating over the source code, we are just
// using a number.
let current = 0;
// But this time we're going to use recursion instead of a `while` loop. So we
// define a `walk` function.
function walk() {
// Inside the walk function we start by grabbing the `current` token.
let token = tokens[current];
if (!token) return;
// We're going to split each type of token off into a different code path,
// starting off with `number` tokens.
//
// We test to see if we have a `number` token.
if (token.type === 'number') {
// If we have one, we'll increment `current`.
current++;
// And we'll return a new AST node called `NumberLiteral` and setting its
// value to the value of our token.
return {
type: 'NumberLiteral',
value: token.value,
start: token.start,
end: token.end,
};
}
// If we have a string we will do the same as number and create a
// `StringLiteral` node.
if (token.type === 'string') {
current++;
return {
type: 'StringLiteral',
value: token.value,
start: token.start,
end: token.end,
};
}
// Next we're going to look for CallExpressions. We start this off when we
// encounter an open parenthesis.
if (
token.type === 'paren' &&
token.value === '('
) {
const initial = token;
// We'll increment `current` to skip the parenthesis since we don't care
// about it in our AST.
token = tokens[++current];
// If we've reached the end of the program, throw a syntax error.
if (!token) {
throw new SyntaxError(`Unclosed function call at ${initial.start}`);
}
// We create a base node with the type `CallExpression`, and we're going
// to set the name as the current token's value since the next token after
// the open parenthesis is the name of the function.
let node = {
type: 'CallExpression',
name: token.value,
params: [],
start: token.start,
};
// If the user writes `()` as code, throw a syntax error.
if (token.type === 'paren' && token.value === ')') {
throw new SyntaxError(`Unexpected empty function call at ${tokens[current - 1].start}${token.end}`)
}
// We increment `current` *again* to skip the name token.
token = tokens[++current];
// If we've reached the end of the program (for example: `(foo`), throw a syntax error.
if (!token) {
throw new SyntaxError(`Unclosed function call at ${initial.start}-${tokens[current - 1].end}`);
}
// And now we want to loop through each token that will be the `params` of
// our `CallExpression` until we encounter a closing parenthesis.
//
// Now this is where recursion comes in. Instead of trying to parse a
// potentially infinitely nested set of nodes we're going to rely on
// recursion to resolve things.
//
// To explain this, let's take our Lisp code. You can see that the
// parameters of the `add` are a number and a nested `CallExpression` that
// includes its own numbers.
//
// (add 2 (subtract 4 2))
//
// You'll also notice that in our tokens array we have multiple closing
// parenthesis.
//
// [
// { type: 'paren', value: '(' },
// { type: 'name', value: 'add' },
// { type: 'number', value: '2' },
// { type: 'paren', value: '(' },
// { type: 'name', value: 'subtract' },
// { type: 'number', value: '4' },
// { type: 'number', value: '2' },
// { type: 'paren', value: ')' }, <<< Closing parenthesis
// { type: 'paren', value: ')' }, <<< Closing parenthesis
// ]
//
// We're going to rely on the nested `walk` function to increment our
// `current` variable past any nested `CallExpression`.
// So we create a `while` loop that will continue until it encounters a
// token with a `type` of `'paren'` and a `value` of a closing
// parenthesis.
while (
(token.type !== 'paren') ||
(token.type === 'paren' && token.value !== ')')
) {
// we'll call the `walk` function which will return a `node` and we'll
// push it into our `node.params`.
node.params.push(walk());
token = tokens[current];
// If we've reached the end of the program, throw a syntax error.
if (!token) {
throw new SyntaxError('Unclosed function call at ' + initial.start + '-' + tokens[current - 1].end);
}
}
// Next, we save the end position of the call
node.end = token.end;
// Finally we will increment `current` one last time to skip the closing
// parenthesis.
current++;
// And return the node.
return node;
}
// If the user writes a literal name token (`foo`), throw a syntax error.
if (token.type === 'name') {
throw new SyntaxError(`Unexpected name '${token.value}' at ${token.start}${token.end}`)
}
// Again, if we haven't recognized the token type by now we're going to
// throw an error.
throw new SyntaxError(`Unrecognized token: ${JSON.stringify(token)}`);
}
// Now, we're going to create our AST which will have a root which is a
// `Program` node.
let ast = {
type: 'Program',
body: [],
};
// And we're going to kickstart our `walk` function, pushing nodes to our
// `ast.body` array.
//
// The reason we are doing this inside a loop is because our program can have
// `CallExpression` after one another instead of being nested.
//
// (add 2 2)
// (subtract 4 2)
//
while (current < tokens.length) {
ast.body.push(walk());
}
// At the end of our parser we'll return the AST.
return ast;
}
// Just exporting our parser to be used in the final compiler...
module.exports = parser;