Add parser inline annotations

2025-06-13 12:54:07 +00:00 · 2016-03-30 19:48:37 -07:00 · 2016-03-30 19:48:37 -07:00 · 0baea5b6ad
commit 0baea5b6ad
parent 8466b6a67d
1 changed files with 105 additions and 21 deletions
--- a/super-tiny-compiler.js
+++ b/super-tiny-compiler.js
@ -473,14 +473,6 @@ function tokenizer(input) {
  return tokens;
 }
 /**
 * -----------------------------------------------------------------------------
 * *Note:* This is all I've written so far, so the code below isn't annnotated
 * yet. You can still read it all and it totally works, but I plan on improving
 * this in the near future
 * -----------------------------------------------------------------------------
 */
 /**
 * ============================================================================
 *                                 ヽ/❀o ل͜ o\ﾉ
@ -488,63 +480,155 @@ function tokenizer(input) {
 * ============================================================================
 */
 /**
 * For our parser we're going to take our array of tokens and turn it into an
 * AST.
 *
 *   [{ type: 'paren', value: '(' }, ...]   =>   { type: 'Program', body: [...] }
 */
 // Okay, so we define a `parser` function that accepts our array of `tokens`.
 function parser(tokens) {
  // Again we keep a `current` variable that we will use as a cursor.
  var current = 0;
  // But this time we're going to use recursion instead of a `while` loop. So we
  // define a `walk` function.
  function walk() {
    // Inside the walk function we start by grabbing the `current` token.
    var token = tokens[current];
    // We're going to split each type of token off into a different code path,
    // starting off with `number` tokens.
    //
    // We test to see if we have a `number` token.
    if (token.type === 'number') {
      // If we have one, we'll increment `current`.
      current++;
      // And we'll return a new AST node called `NumberLiteral` and setting its
      // value to the value of our token.
      return {
        type: 'NumberLiteral',
        value: token.value
      };
    }
    // Next we're going to look for CallExpressions. We start this off when we
    // encounter an open parenthesis.
    if (
      token.type === 'paren' &&
      token.value === '('
    ) {
      current++;
      // We'll increment `current` to skip the parenthesis since we don't care
      // about it in our AST.
      token = tokens[++current];
      // We create an base node with the type `CallExpression`, and we're going
      // to set the name as the current token's value since the next token after
      // the open parenthesis is the name of the function.
      var node = {
        type: 'CallExpression',
-        name: tokens[current].value,
+        name: token.value,
        params: []
      };
-      current++;
+      // We increment `current` *again* to skip the name token.
      token = tokens[++current];
      // And now we want to loop through each token that will be the `params` of
      // our `CallExpression` until we encounter a closing parenthesis.
      //
      // Now this is where recursion comes in. Instead of trying to parse a
      // potentially infinitely nested set of nodes we're going to rely on
      // recursion to resolve things.
      //
      // To explain this, let's take our Lisp code. You can see that the
      // parameters of the `add` are a number and a nested `CallExpression` that
      // includes its own numbers.
      //
      //   (add 2 (subtract 4 2))
      //
      // You'll also notice that in our tokens array we have multiple closing
      // parenthesis.
      //
      //   [
      //     { type: 'paren',  value: '('        },
      //     { type: 'name',   value: 'add'      },
      //     { type: 'number', value: '2'        },
      //     { type: 'paren',  value: '('        },
      //     { type: 'name',   value: 'subtract' },
      //     { type: 'number', value: '4'        },
      //     { type: 'number', value: '2'        },
      //     { type: 'paren',  value: ')'        }, <<< Closing parenthesis
      //     { type: 'paren',  value: ')'        }  <<< Closing parenthesis
      //   ]
      //
      // We're going to rely on the nested `walk` function to increment our
      // `current` variable past any nested `CallExpressions`.
      // So we create a `while` loop that will continue until it encounters a
      // token with a `type` of `'paren'` and a `value` of a closing
      // parenthesis.
      while (
        token.type !== 'paren' ||
        token.value !== ')'
      ) {
        // we'll call the `walk` function which will return a `node` and we'll
        // push it into our `node.params`.
        node.params.push(walk());
        token = tokens[current];
      }
      // Finally we will increment `current` one last time to skip the closing
      // parenthesis.
      current++;
      // And return the node.
      return node;
    }
    // Again, if we haven't recognized the token type by now we're going to
    // throw an error.
    throw new TypeError(token.type);
  }
-  var program = {
+  // Now, we're going to create our AST which will have a root which is a
  // `Program` node.
  var ast = {
    type: 'Program',
    body: []
  };
  // And we're going to kickstart our `walk` function, pushing nodes to our
  // `ast.body` array.
  //
  // The reason we are doing this inside a loop is because our program can have
  // `CallExpressions` after one another instead of being nested.
  //
  //   (add 2 2)
  //   (subtract 4 2)
  //
  while (current < tokens.length) {
-    program.body.push(walk());
+    ast.body.push(walk());
  }
-  return program;
+  // At the end of our parser we'll return the AST.
  return ast;
 }
 /**
 * ----------------------------------------------------------------------------
 * *Note:* This is all I've written so far, so the code below isn't annnotated
 * yet. You can still read it all and it totally works, but I plan on improving
 * this in the near future
 * ----------------------------------------------------------------------------
 */
 /**
 * ============================================================================
 *                                 ⌒(❀>◞౪◟<❀)⌒
@ -552,7 +636,7 @@ function parser(tokens) {
 * ============================================================================
 */
-function traverser(program, visitor) {
+function traverser(ast, visitor) {
  function traverseArray(array, parent) {
    array.forEach(function(child) {
@ -581,7 +665,7 @@ function traverser(program, visitor) {
    }
  }
-  traverseNode(program, null);
+  traverseNode(ast, null);
 }
 /**
@ -591,15 +675,15 @@ function traverser(program, visitor) {
 * ============================================================================
 */
-function transformer(program) {
+function transformer(ast) {
-  var ast = {
+  var newAst = {
    type: 'Program',
    body: []
  };
-  program._context = ast.body;
+  ast._context = newAst.body;
-  traverser(program, {
+  traverser(ast, {
    NumberLiteral: function(node, parent) {
      parent._context.push({
        type: 'NumberLiteral',
@ -630,7 +714,7 @@ function transformer(program) {
    }
  });
-  return ast;
+  return newAst;
 }
 /**