From 8466b6a67df9d815f1b7a8b5d4eec5abca0aea44 Mon Sep 17 00:00:00 2001
From: James Kyle <me@thejameskyle.com>
Date: Wed, 30 Mar 2016 19:27:29 -0700
Subject: [PATCH] Add tokenizer inline annotations

---
 super-tiny-compiler.js | 91 ++++++++++++++++++++++++++++++++++++++----
 1 file changed, 83 insertions(+), 8 deletions(-)

diff --git a/super-tiny-compiler.js b/super-tiny-compiler.js
index 1181a6c..6be8b64 100644
--- a/super-tiny-compiler.js
+++ b/super-tiny-compiler.js
@@ -321,36 +321,65 @@
  * So let's begin...
  */
 
-/**
- * -----------------------------------------------------------------------------
- * *Note:* This is all I've written so far, so the code below isn't annnotated
- * yet. You can still read it all and it totally works, but I plan on improving
- * this in the near future
- * -----------------------------------------------------------------------------
- */
-
  /**
   * ============================================================================
   *                                   (/^▽^)/
   *                                THE TOKENIZER!
   * ============================================================================
   */
+
+/**
+ * We're gonna start of with our first phase of parsing, lexical analysis, with the tokenizer.
+ *
+ * We're just going to take our string of code and break it down into an array of tokens.
+ *
+ *   (add 2 (subtract 4 2))   =>   [{ type: 'paren', value: '(' }, ...]
+ */
+
+// We start by accepting an input string of code, and we're gonna set up two
+// things...
 function tokenizer(input) {
+
+  // A `current` variable for tracking our position in the code like a cursor.
   var current = 0;
+
+  // And a `tokens` array for pushing our tokens to.
   var tokens = [];
 
+  // We start by creating a `while` loop where we are setting up our `current`
+  // variable to be incremented as much as we want `inside` the loop.
+  //
+  // We do this because we may want to increment `current` many times within a
+  // single loop because our tokens can be any length.
   while (current < input.length) {
+
+    // We're also going to store the `current` character in the `input`.
     var char = input[current];
 
+    // The first thing we want to check for is an open parenthesis. This will
+    // later be used for `CallExpressions` but for now we only care about the
+    // character.
+    //
+    // We check to see if we have an open parenthesis:
     if (char === '(') {
+
+      // If we do, we push a new token with the type `paren` and set the value
+      // to an open parenthesis.
       tokens.push({
         type: 'paren',
         value: '('
       });
+
+      // Then we increment `current`
       current++;
+
+      // And we `continue` onto the next cycle of the loop.
       continue;
     }
 
+    // Next we're going to check for a closing parenthesis. We do the same exact
+    // thing as before: Check for a closing parenthesis, add a new token,
+    // increment current, and `continue`.
     if (char === ')') {
       tokens.push({
         type: 'paren',
@@ -360,38 +389,73 @@ function tokenizer(input) {
       continue;
     }
 
+    // Moving on we're now going to check for whitespace. This is interesting
+    // because we care that whitespace exists to separate characters, but it
+    // isn't actually important for us to store as a token. We would only throw
+    // it out later.
+    //
+    // So here we're just going to test for existance and if it does exist we're
+    // going to just `continue` on.
     var WHITESPACE = /\s/;
     if (WHITESPACE.test(char)) {
       current++;
       continue;
     }
 
+    // The next type of token is a number. This is different than what we have
+    // seen before because a number could many any number of characters and we
+    // want to capture the entire sequence of characters as one token.
+    //
+    //   (add 123 456)
+    //        ^^^ ^^^
+    //        Only two separate tokens
+    //
+    // So we start this off when we encounter the first number in a sequence.
     var NUMBERS = /[0-9]/;
     if (NUMBERS.test(char)) {
+
+      // We're going to create a `value` string that we are going to push
+      // characters to.
       var value = '';
 
+      // Then we're going to loop through each character in the sequence until
+      // we encounter a character that is not a number, pushing each character
+      // that is a number to our `value` and incrementing `current` as we go.
       while (NUMBERS.test(char)) {
         value += char;
         char = input[++current];
       }
 
+      // After that we push our `number` token to the `tokens` array.
       tokens.push({
         type: 'number',
         value: value
       });
 
+      // And we continue on.
       continue;
     }
 
+    // The last type of token will be a `name` token. This is a sequence of
+    // letters instead of numbers, that are the names of functions in our lisp
+    // syntax.
+    //
+    //   (add 2 4)
+    //    ^^^
+    //    Name token
+    //
     var LETTERS = /[a-zA-Z]/;
     if (LETTERS.test(char)) {
       var value = '';
 
+      // Again we're just going to loop through all the letters pushing them to
+      // a value.
       while (LETTERS.test(char)) {
         value += char;
         char = input[++current];
       }
 
+      // And pushing that value as a token with the type `name` and continuing.
       tokens.push({
         type: 'name',
         value: value
@@ -400,12 +464,23 @@ function tokenizer(input) {
       continue;
     }
 
+    // Finally if we have not matched a character by now, we're going to throw
+    // an error and completely exit.
     throw new TypeError('I dont know what this character is: ' + char);
   }
 
+  // Then at the end of our `tokenizer` we simply return the tokens array.
   return tokens;
 }
 
+/**
+ * -----------------------------------------------------------------------------
+ * *Note:* This is all I've written so far, so the code below isn't annnotated
+ * yet. You can still read it all and it totally works, but I plan on improving
+ * this in the near future
+ * -----------------------------------------------------------------------------
+ */
+
 /**
  * ============================================================================
  *                                 ヽ/❀o ل͜ o\ﾉ