@ -1,3 +1,5 @@
'use strict' ;
/ * *
/ * *
* TTTTTTTTTTTTTTTTTTTTTTTHHHHHHHHH HHHHHHHHHEEEEEEEEEEEEEEEEEEEEEE
* TTTTTTTTTTTTTTTTTTTTTTTHHHHHHHHH HHHHHHHHHEEEEEEEEEEEEEEEEEEEEEE
* T : : : : : : : : : : : : : : : : : : : : : TH : : : : : : : H H : : : : : : : HE : : : : : : : : : : : : : : : : : : : : E
* T : : : : : : : : : : : : : : : : : : : : : TH : : : : : : : H H : : : : : : : HE : : : : : : : : : : : : : : : : : : : : E
@ -150,7 +152,7 @@
* { type : 'number' , value : '4' } ,
* { type : 'number' , value : '4' } ,
* { type : 'number' , value : '2' } ,
* { type : 'number' , value : '2' } ,
* { type : 'paren' , value : ')' } ,
* { type : 'paren' , value : ')' } ,
* { type : 'paren' , value : ')' }
* { type : 'paren' , value : ')' } ,
* ]
* ]
*
*
* And an Abstract Syntax Tree ( AST ) might look like this :
* And an Abstract Syntax Tree ( AST ) might look like this :
@ -162,16 +164,16 @@
* name : 'add' ,
* name : 'add' ,
* params : [ {
* params : [ {
* type : 'NumberLiteral' ,
* type : 'NumberLiteral' ,
* value : '2'
* value : '2' ,
* } , {
* } , {
* type : 'CallExpression' ,
* type : 'CallExpression' ,
* name : 'subtract' ,
* name : 'subtract' ,
* params : [ {
* params : [ {
* type : 'NumberLiteral' ,
* type : 'NumberLiteral' ,
* value : '4'
* value : '4' ,
* } , {
* } , {
* type : 'NumberLiteral' ,
* type : 'NumberLiteral' ,
* value : '2'
* value : '2' ,
* } ]
* } ]
* } ]
* } ]
* } ]
* } ]
@ -198,7 +200,7 @@
*
*
* {
* {
* type : 'NumberLiteral' ,
* type : 'NumberLiteral' ,
* value : '2'
* value : '2' ,
* }
* }
*
*
* Or maybe a node for a "CallExpression" :
* Or maybe a node for a "CallExpression" :
@ -206,7 +208,7 @@
* {
* {
* type : 'CallExpression' ,
* type : 'CallExpression' ,
* name : 'subtract' ,
* name : 'subtract' ,
* params : [ ... nested nodes go here ... ]
* params : [ ... nested nodes go here ... ] ,
* }
* }
*
*
* When transforming the AST we can manipulate nodes by
* When transforming the AST we can manipulate nodes by
@ -259,7 +261,7 @@
* we would likely introduce all sorts of abstractions here . But just visiting
* we would likely introduce all sorts of abstractions here . But just visiting
* each node in the tree is enough .
* each node in the tree is enough .
*
*
* The reason I use the word “ visiting ” is because there is this pattern of how
* The reason I use the word "visiting" is because there is this pattern of how
* to represent operations on elements of an object structure .
* to represent operations on elements of an object structure .
*
*
* Visitors
* Visitors
@ -270,7 +272,7 @@
*
*
* var visitor = {
* var visitor = {
* NumberLiteral ( ) { } ,
* NumberLiteral ( ) { } ,
* CallExpression ( ) { }
* CallExpression ( ) { } ,
* } ;
* } ;
*
*
* When we traverse our AST we will call the methods on this visitor whenever we
* When we traverse our AST we will call the methods on this visitor whenever we
@ -281,7 +283,45 @@
*
*
* var visitor = {
* var visitor = {
* NumberLiteral ( node , parent ) { } ,
* NumberLiteral ( node , parent ) { } ,
* CallExpression ( node , parent ) { }
* CallExpression ( node , parent ) { } ,
* } ;
*
* We call these functions when we "enter" the node . But there is also the
* possibilty of calling things on "exit" .
*
* Imagine our tree structure from before in list form :
*
* - Program
* - CallExpression
* - NumberLiteral
* - CallExpression
* - NumberLiteral
* - NumberLiteral
*
* As we traverse down , we ' re going to reach branches with dead ends . As we
* finish each branch of the tree we "exit" it . So going down the tree we
* "enter" each node , and going back up we "exit" .
*
* - > Program ( enter )
* - > CallExpression ( enter )
* - > Number Literal ( enter )
* < - Number Literal ( exit )
* - > Call Expression ( enter )
* - > Number Literal ( enter )
* < - Number Literal ( exit )
* - > Number Literal ( enter )
* < - Number Literal ( exit )
* < - CallExpression ( exit )
* < - CallExpression ( exit )
* < - Program ( exit )
*
* In order to supper that , our visitors will look like this :
*
* var visitor = {
* NumberLiteral : {
* enter ( node , parent ) { } ,
* exit ( node , parent ) { } ,
* }
* } ;
* } ;
* /
* /
@ -343,10 +383,10 @@
function tokenizer ( input ) {
function tokenizer ( input ) {
// A `current` variable for tracking our position in the code like a cursor.
// A `current` variable for tracking our position in the code like a cursor.
var current = 0 ;
let current = 0 ;
// And a `tokens` array for pushing our tokens to.
// And a `tokens` array for pushing our tokens to.
var tokens = [ ] ;
let tokens = [ ] ;
// We start by creating a `while` loop where we are setting up our `current`
// We start by creating a `while` loop where we are setting up our `current`
// variable to be incremented as much as we want `inside` the loop.
// variable to be incremented as much as we want `inside` the loop.
@ -356,10 +396,10 @@ function tokenizer(input) {
while ( current < input . length ) {
while ( current < input . length ) {
// We're also going to store the `current` character in the `input`.
// We're also going to store the `current` character in the `input`.
var char = input [ current ] ;
let char = input [ current ] ;
// The first thing we want to check for is an open parenthesis. This will
// The first thing we want to check for is an open parenthesis. This will
// later be used for `CallExpression s ` but for now we only care about the
// later be used for `CallExpression ` but for now we only care about the
// character.
// character.
//
//
// We check to see if we have an open parenthesis:
// We check to see if we have an open parenthesis:
@ -369,7 +409,7 @@ function tokenizer(input) {
// to an open parenthesis.
// to an open parenthesis.
tokens . push ( {
tokens . push ( {
type : 'paren' ,
type : 'paren' ,
value : '('
value : '(' ,
} ) ;
} ) ;
// Then we increment `current`
// Then we increment `current`
@ -385,7 +425,7 @@ function tokenizer(input) {
if ( char === ')' ) {
if ( char === ')' ) {
tokens . push ( {
tokens . push ( {
type : 'paren' ,
type : 'paren' ,
value : ')'
value : ')' ,
} ) ;
} ) ;
current ++ ;
current ++ ;
continue ;
continue ;
@ -398,7 +438,7 @@ function tokenizer(input) {
//
//
// So here we're just going to test for existence and if it does exist we're
// So here we're just going to test for existence and if it does exist we're
// going to just `continue` on.
// going to just `continue` on.
var WHITESPACE = /\s/ ;
let WHITESPACE = /\s/ ;
if ( WHITESPACE . test ( char ) ) {
if ( WHITESPACE . test ( char ) ) {
current ++ ;
current ++ ;
continue ;
continue ;
@ -413,12 +453,12 @@ function tokenizer(input) {
// Only two separate tokens
// Only two separate tokens
//
//
// So we start this off when we encounter the first number in a sequence.
// So we start this off when we encounter the first number in a sequence.
var NUMBERS = /[0-9]/ ;
let NUMBERS = /[0-9]/ ;
if ( NUMBERS . test ( char ) ) {
if ( NUMBERS . test ( char ) ) {
// We're going to create a `value` string that we are going to push
// We're going to create a `value` string that we are going to push
// characters to.
// characters to.
var value = '' ;
let value = '' ;
// Then we're going to loop through each character in the sequence until
// Then we're going to loop through each character in the sequence until
// we encounter a character that is not a number, pushing each character
// we encounter a character that is not a number, pushing each character
@ -429,15 +469,42 @@ function tokenizer(input) {
}
}
// After that we push our `number` token to the `tokens` array.
// After that we push our `number` token to the `tokens` array.
tokens . push ( {
tokens . push ( { type : 'number' , value } ) ;
type : 'number' ,
value : value
} ) ;
// And we continue on.
// And we continue on.
continue ;
continue ;
}
}
// We'll also add support for strings in our language which will be any
// text surrounded by double quotes (").
//
// (concat "foo" "bar")
// ^^^ ^^^ string tokens
//
// We'll start by checking for the opening quote:
if ( char === '"' ) {
// Keep a `value` variable for building up our string token.
let value = '' ;
// We'll skip the opening double quote in our token.
char = input [ ++ current ] ;
// Then we'll iterate through each character until we reach another
// double quote.
while ( char !== '"' ) {
value += char ;
char = input [ ++ current ] ;
}
// Skip the closing double quote.
char = input [ ++ current ] ;
// And add our `string` token to the `tokens` array.
tokens . push ( { type : 'string' , value } ) ;
continue ;
}
// The last type of token will be a `name` token. This is a sequence of
// The last type of token will be a `name` token. This is a sequence of
// letters instead of numbers, that are the names of functions in our lisp
// letters instead of numbers, that are the names of functions in our lisp
// syntax.
// syntax.
@ -446,9 +513,9 @@ function tokenizer(input) {
// ^^^
// ^^^
// Name token
// Name token
//
//
var LETTERS = /[a-z]/i ;
let LETTERS = /[a-z]/i ;
if ( LETTERS . test ( char ) ) {
if ( LETTERS . test ( char ) ) {
var value = '' ;
let value = '' ;
// Again we're just going to loop through all the letters pushing them to
// Again we're just going to loop through all the letters pushing them to
// a value.
// a value.
@ -458,10 +525,7 @@ function tokenizer(input) {
}
}
// And pushing that value as a token with the type `name` and continuing.
// And pushing that value as a token with the type `name` and continuing.
tokens . push ( {
tokens . push ( { type : 'name' , value } ) ;
type : 'name' ,
value : value
} ) ;
continue ;
continue ;
}
}
@ -493,14 +557,14 @@ function tokenizer(input) {
function parser ( tokens ) {
function parser ( tokens ) {
// Again we keep a `current` variable that we will use as a cursor.
// Again we keep a `current` variable that we will use as a cursor.
var current = 0 ;
let current = 0 ;
// But this time we're going to use recursion instead of a `while` loop. So we
// But this time we're going to use recursion instead of a `while` loop. So we
// define a `walk` function.
// define a `walk` function.
function walk ( ) {
function walk ( ) {
// Inside the walk function we start by grabbing the `current` token.
// Inside the walk function we start by grabbing the `current` token.
var token = tokens [ current ] ;
let token = tokens [ current ] ;
// We're going to split each type of token off into a different code path,
// We're going to split each type of token off into a different code path,
// starting off with `number` tokens.
// starting off with `number` tokens.
@ -515,7 +579,18 @@ function parser(tokens) {
// value to the value of our token.
// value to the value of our token.
return {
return {
type : 'NumberLiteral' ,
type : 'NumberLiteral' ,
value : token . value
value : token . value ,
} ;
}
// If we have a string we will do the same as number and create a
// `StringLiteral` node.
if ( token . type === 'string' ) {
current ++ ;
return {
type : 'StringLiteral' ,
value : token . value ,
} ;
} ;
}
}
@ -533,10 +608,10 @@ function parser(tokens) {
// We create a base node with the type `CallExpression`, and we're going
// We create a base node with the type `CallExpression`, and we're going
// to set the name as the current token's value since the next token after
// to set the name as the current token's value since the next token after
// the open parenthesis is the name of the function.
// the open parenthesis is the name of the function.
var node = {
let node = {
type : 'CallExpression' ,
type : 'CallExpression' ,
name : token . value ,
name : token . value ,
params : [ ]
params : [ ] ,
} ;
} ;
// We increment `current` *again* to skip the name token.
// We increment `current` *again* to skip the name token.
@ -567,11 +642,11 @@ function parser(tokens) {
// { type: 'number', value: '4' },
// { type: 'number', value: '4' },
// { type: 'number', value: '2' },
// { type: 'number', value: '2' },
// { type: 'paren', value: ')' }, <<< Closing parenthesis
// { type: 'paren', value: ')' }, <<< Closing parenthesis
// { type: 'paren', value: ')' } <<< Closing parenthesis
// { type: 'paren', value: ')' } , <<< Closing parenthesis
// ]
// ]
//
//
// We're going to rely on the nested `walk` function to increment our
// We're going to rely on the nested `walk` function to increment our
// `current` variable past any nested `CallExpression s `.
// `current` variable past any nested `CallExpression `.
// So we create a `while` loop that will continue until it encounters a
// So we create a `while` loop that will continue until it encounters a
// token with a `type` of `'paren'` and a `value` of a closing
// token with a `type` of `'paren'` and a `value` of a closing
@ -601,16 +676,16 @@ function parser(tokens) {
// Now, we're going to create our AST which will have a root which is a
// Now, we're going to create our AST which will have a root which is a
// `Program` node.
// `Program` node.
var ast = {
let ast = {
type : 'Program' ,
type : 'Program' ,
body : [ ]
body : [ ] ,
} ;
} ;
// And we're going to kickstart our `walk` function, pushing nodes to our
// And we're going to kickstart our `walk` function, pushing nodes to our
// `ast.body` array.
// `ast.body` array.
//
//
// The reason we are doing this inside a loop is because our program can have
// The reason we are doing this inside a loop is because our program can have
// `CallExpression s ` after one another instead of being nested.
// `CallExpression ` after one another instead of being nested.
//
//
// (add 2 2)
// (add 2 2)
// (subtract 4 2)
// (subtract 4 2)
@ -646,7 +721,7 @@ function parser(tokens) {
*
*
* NumberLiteral ( node , parent ) {
* NumberLiteral ( node , parent ) {
* // ...
* // ...
* }
* } ,
* } ) ;
* } ) ;
* /
* /
@ -657,7 +732,7 @@ function traverser(ast, visitor) {
// A `traverseArray` function that will allow us to iterate over an array and
// A `traverseArray` function that will allow us to iterate over an array and
// call the next function that we will define: `traverseNode`.
// call the next function that we will define: `traverseNode`.
function traverseArray ( array , parent ) {
function traverseArray ( array , parent ) {
array . forEach ( function ( child ) {
array . forEach ( child => {
traverseNode ( child , parent ) ;
traverseNode ( child , parent ) ;
} ) ;
} ) ;
}
}
@ -668,11 +743,12 @@ function traverser(ast, visitor) {
// We start by testing for the existence of a method on the visitor with a
// We start by testing for the existence of a method on the visitor with a
// matching `type`.
// matching `type`.
var method = visitor [ node . type ] ;
let methods = visitor [ node . type ] ;
// If it exists we'll call it with the `node` and its `parent`.
// If there is an `enter` method for this node type we'll call it with the
if ( method ) {
// `node` and its `parent`.
method ( node , parent ) ;
if ( methods && methods . enter ) {
methods . enter ( node , parent ) ;
}
}
// Next we are going to split things up by the current node type.
// Next we are going to split things up by the current node type.
@ -688,14 +764,15 @@ function traverser(ast, visitor) {
traverseArray ( node . body , node ) ;
traverseArray ( node . body , node ) ;
break ;
break ;
// Next we do the same with `CallExpression s ` and traverse their `params`.
// Next we do the same with `CallExpression ` and traverse their `params`.
case 'CallExpression' :
case 'CallExpression' :
traverseArray ( node . params , node ) ;
traverseArray ( node . params , node ) ;
break ;
break ;
// In the case of `NumberLiterals` we don't have any child nodes to visit,
// In the case s of `NumberLiteral` and `StringLiteral` we don't have any
// so we'll just break.
// child nodes to visit, so we'll just break.
case 'NumberLiteral' :
case 'NumberLiteral' :
case 'StringLiteral' :
break ;
break ;
// And again, if we haven't recognized the node type then we'll throw an
// And again, if we haven't recognized the node type then we'll throw an
@ -703,6 +780,12 @@ function traverser(ast, visitor) {
default :
default :
throw new TypeError ( node . type ) ;
throw new TypeError ( node . type ) ;
}
}
// If there is an `exit` method for this node type we'll call it with the
// `node` and its `parent`.
if ( methods && methods . exit ) {
methods . exit ( node , parent ) ;
}
}
}
// Finally we kickstart the traverser by calling `traverseNode` with our ast
// Finally we kickstart the traverser by calling `traverseNode` with our ast
@ -763,9 +846,9 @@ function transformer(ast) {
// We'll create a `newAst` which like our previous AST will have a program
// We'll create a `newAst` which like our previous AST will have a program
// node.
// node.
var newAst = {
let newAst = {
type : 'Program' ,
type : 'Program' ,
body : [ ]
body : [ ] ,
} ;
} ;
// Next I'm going to cheat a little and create a bit of a hack. We're going to
// Next I'm going to cheat a little and create a bit of a hack. We're going to
@ -780,51 +863,66 @@ function transformer(ast) {
// We'll start by calling the traverser function with our ast and a visitor.
// We'll start by calling the traverser function with our ast and a visitor.
traverser ( ast , {
traverser ( ast , {
// The first visitor method accepts `NumberLiterals`
// The first visitor method accepts any `NumberLiteral`
NumberLiteral : function ( node , parent ) {
NumberLiteral : {
// We'll create a new node also named `NumberLiteral` that we will push to
// We'll visit them on enter.
// the parent context.
enter ( node , parent ) {
parent . _context . push ( {
// We'll create a new node also named `NumberLiteral` that we will push to
type : 'NumberLiteral' ,
// the parent context.
value : node . value
parent . _context . push ( {
} ) ;
type : 'NumberLiteral' ,
value : node . value ,
} ) ;
} ,
} ,
} ,
// Next up, `CallExpressions`.
// Next we have `StringLiteral`
CallExpression : function ( node , parent ) {
StringLiteral : {
enter ( node , parent ) {
// We start creating a new node `CallExpression` with a nested
parent . _context . push ( {
// `Identifier`.
type : 'StringLiteral' ,
var expression = {
value : node . value ,
type : 'CallExpression' ,
} ) ;
callee : {
} ,
type : 'Identifier' ,
} ,
name : node . name
} ,
arguments : [ ]
} ;
// Next we're going to define a new context on the original
// Next up, `CallExpression`.
// `CallExpression` node that will reference the `expression`'s arguments
CallExpression : {
// so that we can push arguments.
enter ( node , parent ) {
node . _context = expression . arguments ;
// We start creating a new node `CallExpression` with a nested
// Then we're going to check if the parent node is a `CallExpression`.
// `Identifier`.
// If it is not...
let expression = {
if ( parent . type !== 'CallExpression' ) {
type : 'CallExpression' ,
callee : {
// We're going to wrap our `CallExpression` node with an
type : 'Identifier' ,
// `ExpressionStatement`. We do this because the top level
name : node . name ,
// `CallExpressions` in JavaScript are actually statements.
} ,
expression = {
arguments : [ ] ,
type : 'ExpressionStatement' ,
expression : expression
} ;
} ;
}
// Last, we push our (possibly wrapped) `CallExpression` to the `parent`'s
// Next we're going to define a new context on the original
// `context`.
// `CallExpression` node that will reference the `expression`'s arguments
parent . _context . push ( expression ) ;
// so that we can push arguments.
node . _context = expression . arguments ;
// Then we're going to check if the parent node is a `CallExpression`.
// If it is not...
if ( parent . type !== 'CallExpression' ) {
// We're going to wrap our `CallExpression` node with an
// `ExpressionStatement`. We do this because the top level
// `CallExpression` in JavaScript are actually statements.
expression = {
type : 'ExpressionStatement' ,
expression : expression ,
} ;
}
// Last, we push our (possibly wrapped) `CallExpression` to the `parent`'s
// `context`.
parent . _context . push ( expression ) ;
} ,
}
}
} ) ;
} ) ;
@ -858,7 +956,7 @@ function codeGenerator(node) {
return node . body . map ( codeGenerator )
return node . body . map ( codeGenerator )
. join ( '\n' ) ;
. join ( '\n' ) ;
// For `ExpressionStatement s ` we'll call the code generator on the nested
// For `ExpressionStatement ` we'll call the code generator on the nested
// expression and we'll add a semicolon...
// expression and we'll add a semicolon...
case 'ExpressionStatement' :
case 'ExpressionStatement' :
return (
return (
@ -866,7 +964,7 @@ function codeGenerator(node) {
';' // << (...because we like to code the *correct* way)
';' // << (...because we like to code the *correct* way)
) ;
) ;
// For `CallExpression s ` we will print the `callee`, add an open
// For `CallExpression ` we will print the `callee`, add an open
// parenthesis, we'll map through each node in the `arguments` array and run
// parenthesis, we'll map through each node in the `arguments` array and run
// them through the code generator, joining them with a comma, and then
// them through the code generator, joining them with a comma, and then
// we'll add a closing parenthesis.
// we'll add a closing parenthesis.
@ -879,14 +977,18 @@ function codeGenerator(node) {
')'
')'
) ;
) ;
// For `Identifier s ` we'll just return the `node`'s name.
// For `Identifier ` we'll just return the `node`'s name.
case 'Identifier' :
case 'Identifier' :
return node . name ;
return node . name ;
// For `NumberLiteral s ` we'll just return the `node`'s value.
// For `NumberLiteral ` we'll just return the `node`'s value.
case 'NumberLiteral' :
case 'NumberLiteral' :
return node . value ;
return node . value ;
// For `StringLiteral` we'll add quotations around the `node`'s value.
case 'StringLiteral' :
return '"' + node . value + '"' ;
// And if we haven't recognized the node, we'll throw an error.
// And if we haven't recognized the node, we'll throw an error.
default :
default :
throw new TypeError ( node . type ) ;
throw new TypeError ( node . type ) ;
@ -911,10 +1013,10 @@ function codeGenerator(node) {
* /
* /
function compiler ( input ) {
function compiler ( input ) {
var tokens = tokenizer ( input ) ;
let tokens = tokenizer ( input ) ;
var ast = parser ( tokens ) ;
let ast = parser ( tokens ) ;
var newAst = transformer ( ast ) ;
let newAst = transformer ( ast ) ;
var output = codeGenerator ( newAst ) ;
let output = codeGenerator ( newAst ) ;
// and simply return the output!
// and simply return the output!
return output ;
return output ;
@ -929,9 +1031,10 @@ function compiler(input) {
// Now I'm just exporting everything...
// Now I'm just exporting everything...
module . exports = {
module . exports = {
tokenizer : tokenizer ,
tokenizer ,
parser : parser ,
parser ,
transformer : transformer ,
traverser ,
codeGenerator : codeGenerator ,
transformer ,
compiler : compiler
codeGenerator ,
compiler ,
} ;
} ;