Add parser inline annotations

2026-01-21 00:00:10 +00:00 · 2016-03-30 19:48:37 -07:00 · 2016-03-30 19:48:37 -07:00 · 0baea5b6ad
commit 0baea5b6ad
parent 8466b6a67d
1 changed files with 105 additions and 21 deletions
--- a/super-tiny-compiler.js
+++ b/super-tiny-compiler.js
@ -473,14 +473,6 @@ function tokenizer(input) {
  return tokens;
 }

-/**
- * -----------------------------------------------------------------------------
- * *Note:* This is all I've written so far, so the code below isn't annnotated
- * yet. You can still read it all and it totally works, but I plan on improving
- * this in the near future
- * -----------------------------------------------------------------------------
- */
-
 /**
 * ============================================================================
 *                                 ヽ/❀o ل͜ o\ﾉ
@ -488,63 +480,155 @@ function tokenizer(input) {
 * ============================================================================
 */

+/**
+ * For our parser we're going to take our array of tokens and turn it into an
+ * AST.
+ *
+ *   [{ type: 'paren', value: '(' }, ...]   =>   { type: 'Program', body: [...] }
+ */
+
+// Okay, so we define a `parser` function that accepts our array of `tokens`.
 function parser(tokens) {
+
+  // Again we keep a `current` variable that we will use as a cursor.
  var current = 0;

+  // But this time we're going to use recursion instead of a `while` loop. So we
+  // define a `walk` function.
  function walk() {
+
+    // Inside the walk function we start by grabbing the `current` token.
    var token = tokens[current];

+    // We're going to split each type of token off into a different code path,
+    // starting off with `number` tokens.
+    //
+    // We test to see if we have a `number` token.
    if (token.type === 'number') {
+
+      // If we have one, we'll increment `current`.
      current++;

+      // And we'll return a new AST node called `NumberLiteral` and setting its
+      // value to the value of our token.
      return {
        type: 'NumberLiteral',
        value: token.value
      };
    }

+    // Next we're going to look for CallExpressions. We start this off when we
+    // encounter an open parenthesis.
    if (
      token.type === 'paren' &&
      token.value === '('
    ) {
-      current++;

+      // We'll increment `current` to skip the parenthesis since we don't care
+      // about it in our AST.
+      token = tokens[++current];
+
+      // We create an base node with the type `CallExpression`, and we're going
+      // to set the name as the current token's value since the next token after
+      // the open parenthesis is the name of the function.
      var node = {
        type: 'CallExpression',
-        name: tokens[current].value,
+        name: token.value,
        params: []
      };

-      current++;
+      // We increment `current` *again* to skip the name token.
+      token = tokens[++current];

+      // And now we want to loop through each token that will be the `params` of
+      // our `CallExpression` until we encounter a closing parenthesis.
+      //
+      // Now this is where recursion comes in. Instead of trying to parse a
+      // potentially infinitely nested set of nodes we're going to rely on
+      // recursion to resolve things.
+      //
+      // To explain this, let's take our Lisp code. You can see that the
+      // parameters of the `add` are a number and a nested `CallExpression` that
+      // includes its own numbers.
+      //
+      //   (add 2 (subtract 4 2))
+      //
+      // You'll also notice that in our tokens array we have multiple closing
+      // parenthesis.
+      //
+      //   [
+      //     { type: 'paren',  value: '('        },
+      //     { type: 'name',   value: 'add'      },
+      //     { type: 'number', value: '2'        },
+      //     { type: 'paren',  value: '('        },
+      //     { type: 'name',   value: 'subtract' },
+      //     { type: 'number', value: '4'        },
+      //     { type: 'number', value: '2'        },
+      //     { type: 'paren',  value: ')'        }, <<< Closing parenthesis
+      //     { type: 'paren',  value: ')'        }  <<< Closing parenthesis
+      //   ]
+      //
+      // We're going to rely on the nested `walk` function to increment our
+      // `current` variable past any nested `CallExpressions`.
+
+      // So we create a `while` loop that will continue until it encounters a
+      // token with a `type` of `'paren'` and a `value` of a closing
+      // parenthesis.
      while (
        token.type !== 'paren' ||
        token.value !== ')'
      ) {
+        // we'll call the `walk` function which will return a `node` and we'll
+        // push it into our `node.params`.
        node.params.push(walk());
        token = tokens[current];
      }

+      // Finally we will increment `current` one last time to skip the closing
+      // parenthesis.
      current++;

+      // And return the node.
      return node;
    }

+    // Again, if we haven't recognized the token type by now we're going to
+    // throw an error.
    throw new TypeError(token.type);
  }

-  var program = {
+  // Now, we're going to create our AST which will have a root which is a
+  // `Program` node.
+  var ast = {
    type: 'Program',
    body: []
  };

+  // And we're going to kickstart our `walk` function, pushing nodes to our
+  // `ast.body` array.
+  //
+  // The reason we are doing this inside a loop is because our program can have
+  // `CallExpressions` after one another instead of being nested.
+  //
+  //   (add 2 2)
+  //   (subtract 4 2)
+  //
  while (current < tokens.length) {
-    program.body.push(walk());
+    ast.body.push(walk());
  }

-  return program;
+  // At the end of our parser we'll return the AST.
+  return ast;
 }

+/**
+ * ----------------------------------------------------------------------------
+ * *Note:* This is all I've written so far, so the code below isn't annnotated
+ * yet. You can still read it all and it totally works, but I plan on improving
+ * this in the near future
+ * ----------------------------------------------------------------------------
+ */
+
 /**
 * ============================================================================
 *                                 ⌒(❀>◞౪◟<❀)⌒
@ -552,7 +636,7 @@ function parser(tokens) {
 * ============================================================================
 */

-function traverser(program, visitor) {
+function traverser(ast, visitor) {

  function traverseArray(array, parent) {
    array.forEach(function(child) {
@ -581,7 +665,7 @@ function traverser(program, visitor) {
    }
  }

-  traverseNode(program, null);
+  traverseNode(ast, null);
 }

 /**
@ -591,15 +675,15 @@ function traverser(program, visitor) {
 * ============================================================================
 */

-function transformer(program) {
-  var ast = {
+function transformer(ast) {
+  var newAst = {
    type: 'Program',
    body: []
  };

-  program._context = ast.body;
+  ast._context = newAst.body;

-  traverser(program, {
+  traverser(ast, {
    NumberLiteral: function(node, parent) {
      parent._context.push({
        type: 'NumberLiteral',
@ -630,7 +714,7 @@ function transformer(program) {
    }
  });

-  return ast;
+  return newAst;
 }

 /**