2016-03-30 22:22:59 +00:00
|
|
|
|
/**
|
|
|
|
|
* TTTTTTTTTTTTTTTTTTTTTTTHHHHHHHHH HHHHHHHHHEEEEEEEEEEEEEEEEEEEEEE
|
|
|
|
|
* T:::::::::::::::::::::TH:::::::H H:::::::HE::::::::::::::::::::E
|
|
|
|
|
* T:::::::::::::::::::::TH:::::::H H:::::::HE::::::::::::::::::::E
|
|
|
|
|
* T:::::TT:::::::TT:::::THH::::::H H::::::HHEE::::::EEEEEEEEE::::E
|
|
|
|
|
* TTTTTT T:::::T TTTTTT H:::::H H:::::H E:::::E EEEEEE
|
|
|
|
|
* T:::::T H:::::H H:::::H E:::::E
|
|
|
|
|
* T:::::T H::::::HHHHH::::::H E::::::EEEEEEEEEE
|
|
|
|
|
* T:::::T H:::::::::::::::::H E:::::::::::::::E
|
|
|
|
|
* T:::::T H:::::::::::::::::H E:::::::::::::::E
|
|
|
|
|
* T:::::T H::::::HHHHH::::::H E::::::EEEEEEEEEE
|
|
|
|
|
* T:::::T H:::::H H:::::H E:::::E
|
|
|
|
|
* T:::::T H:::::H H:::::H E:::::E EEEEEE
|
|
|
|
|
* TT:::::::TT HH::::::H H::::::HHEE::::::EEEEEEEE:::::E
|
|
|
|
|
* T:::::::::T H:::::::H H:::::::HE::::::::::::::::::::E
|
|
|
|
|
* T:::::::::T H:::::::H H:::::::HE::::::::::::::::::::E
|
|
|
|
|
* TTTTTTTTTTT HHHHHHHHH HHHHHHHHHEEEEEEEEEEEEEEEEEEEEEE
|
|
|
|
|
*
|
|
|
|
|
* SSSSSSSSSSSSSSS UUUUUUUU UUUUUUUUPPPPPPPPPPPPPPPPP EEEEEEEEEEEEEEEEEEEEEERRRRRRRRRRRRRRRRR
|
|
|
|
|
* SS:::::::::::::::SU::::::U U::::::UP::::::::::::::::P E::::::::::::::::::::ER::::::::::::::::R
|
|
|
|
|
* S:::::SSSSSS::::::SU::::::U U::::::UP::::::PPPPPP:::::P E::::::::::::::::::::ER::::::RRRRRR:::::R
|
|
|
|
|
* S:::::S SSSSSSSUU:::::U U:::::UUPP:::::P P:::::PEE::::::EEEEEEEEE::::ERR:::::R R:::::R
|
|
|
|
|
* S:::::S U:::::U U:::::U P::::P P:::::P E:::::E EEEEEE R::::R R:::::R
|
2016-03-30 23:34:24 +00:00
|
|
|
|
* S:::::S U:::::U U:::::U P::::P P:::::P E:::::E R::::R R:::::R
|
|
|
|
|
* S::::SSSS U:::::U U:::::U P::::PPPPPP:::::P E::::::EEEEEEEEEE R::::RRRRRR:::::R
|
|
|
|
|
* SS::::::SSSSS U:::::U U:::::U P:::::::::::::PP E:::::::::::::::E R:::::::::::::RR
|
|
|
|
|
* SSS::::::::SS U:::::U U:::::U P::::PPPPPPPPP E:::::::::::::::E R::::RRRRRR:::::R
|
|
|
|
|
* SSSSSS::::S U:::::U U:::::U P::::P E::::::EEEEEEEEEE R::::R R:::::R
|
|
|
|
|
* S:::::S U:::::U U:::::U P::::P E:::::E R::::R R:::::R
|
2016-03-30 22:22:59 +00:00
|
|
|
|
* S:::::S U::::::U U::::::U P::::P E:::::E EEEEEE R::::R R:::::R
|
|
|
|
|
* SSSSSSS S:::::S U:::::::UUU:::::::U PP::::::PP EE::::::EEEEEEEE:::::ERR:::::R R:::::R
|
|
|
|
|
* S::::::SSSSSS:::::S UU:::::::::::::UU P::::::::P E::::::::::::::::::::ER::::::R R:::::R
|
|
|
|
|
* S:::::::::::::::SS UU:::::::::UU P::::::::P E::::::::::::::::::::ER::::::R R:::::R
|
|
|
|
|
* SSSSSSSSSSSSSSS UUUUUUUUU PPPPPPPPPP EEEEEEEEEEEEEEEEEEEEEERRRRRRRR RRRRRRR
|
|
|
|
|
*
|
|
|
|
|
* TTTTTTTTTTTTTTTTTTTTTTTIIIIIIIIIINNNNNNNN NNNNNNNNYYYYYYY YYYYYYY
|
|
|
|
|
* T:::::::::::::::::::::TI::::::::IN:::::::N N::::::NY:::::Y Y:::::Y
|
|
|
|
|
* T:::::::::::::::::::::TI::::::::IN::::::::N N::::::NY:::::Y Y:::::Y
|
|
|
|
|
* T:::::TT:::::::TT:::::TII::::::IIN:::::::::N N::::::NY::::::Y Y::::::Y
|
|
|
|
|
* TTTTTT T:::::T TTTTTT I::::I N::::::::::N N::::::NYYY:::::Y Y:::::YYY
|
|
|
|
|
* T:::::T I::::I N:::::::::::N N::::::N Y:::::Y Y:::::Y
|
|
|
|
|
* T:::::T I::::I N:::::::N::::N N::::::N Y:::::Y:::::Y
|
|
|
|
|
* T:::::T I::::I N::::::N N::::N N::::::N Y:::::::::Y
|
|
|
|
|
* T:::::T I::::I N::::::N N::::N:::::::N Y:::::::Y
|
|
|
|
|
* T:::::T I::::I N::::::N N:::::::::::N Y:::::Y
|
|
|
|
|
* T:::::T I::::I N::::::N N::::::::::N Y:::::Y
|
|
|
|
|
* T:::::T I::::I N::::::N N:::::::::N Y:::::Y
|
|
|
|
|
* TT:::::::TT II::::::IIN::::::N N::::::::N Y:::::Y
|
|
|
|
|
* T:::::::::T I::::::::IN::::::N N:::::::N YYYY:::::YYYY
|
|
|
|
|
* T:::::::::T I::::::::IN::::::N N::::::N Y:::::::::::Y
|
|
|
|
|
* TTTTTTTTTTT IIIIIIIIIINNNNNNNN NNNNNNN YYYYYYYYYYYYY
|
|
|
|
|
*
|
|
|
|
|
* CCCCCCCCCCCCC OOOOOOOOO MMMMMMMM MMMMMMMMPPPPPPPPPPPPPPPPP IIIIIIIIIILLLLLLLLLLL EEEEEEEEEEEEEEEEEEEEEERRRRRRRRRRRRRRRRR
|
|
|
|
|
* CCC::::::::::::C OO:::::::::OO M:::::::M M:::::::MP::::::::::::::::P I::::::::IL:::::::::L E::::::::::::::::::::ER::::::::::::::::R
|
|
|
|
|
* CC:::::::::::::::C OO:::::::::::::OO M::::::::M M::::::::MP::::::PPPPPP:::::P I::::::::IL:::::::::L E::::::::::::::::::::ER::::::RRRRRR:::::R
|
|
|
|
|
* C:::::CCCCCCCC::::CO:::::::OOO:::::::OM:::::::::M M:::::::::MPP:::::P P:::::PII::::::IILL:::::::LL EE::::::EEEEEEEEE::::ERR:::::R R:::::R
|
|
|
|
|
* C:::::C CCCCCCO::::::O O::::::OM::::::::::M M::::::::::M P::::P P:::::P I::::I L:::::L E:::::E EEEEEE R::::R R:::::R
|
|
|
|
|
* C:::::C O:::::O O:::::OM:::::::::::M M:::::::::::M P::::P P:::::P I::::I L:::::L E:::::E R::::R R:::::R
|
|
|
|
|
* C:::::C O:::::O O:::::OM:::::::M::::M M::::M:::::::M P::::PPPPPP:::::P I::::I L:::::L E::::::EEEEEEEEEE R::::RRRRRR:::::R
|
|
|
|
|
* C:::::C O:::::O O:::::OM::::::M M::::M M::::M M::::::M P:::::::::::::PP I::::I L:::::L E:::::::::::::::E R:::::::::::::RR
|
|
|
|
|
* C:::::C O:::::O O:::::OM::::::M M::::M::::M M::::::M P::::PPPPPPPPP I::::I L:::::L E:::::::::::::::E R::::RRRRRR:::::R
|
|
|
|
|
* C:::::C O:::::O O:::::OM::::::M M:::::::M M::::::M P::::P I::::I L:::::L E::::::EEEEEEEEEE R::::R R:::::R
|
|
|
|
|
* C:::::C O:::::O O:::::OM::::::M M:::::M M::::::M P::::P I::::I L:::::L E:::::E R::::R R:::::R
|
|
|
|
|
* C:::::C CCCCCCO::::::O O::::::OM::::::M MMMMM M::::::M P::::P I::::I L:::::L LLLLLL E:::::E EEEEEE R::::R R:::::R
|
|
|
|
|
* C:::::CCCCCCCC::::CO:::::::OOO:::::::OM::::::M M::::::MPP::::::PP II::::::IILL:::::::LLLLLLLLL:::::LEE::::::EEEEEEEE:::::ERR:::::R R:::::R
|
|
|
|
|
* CC:::::::::::::::C OO:::::::::::::OO M::::::M M::::::MP::::::::P I::::::::IL::::::::::::::::::::::LE::::::::::::::::::::ER::::::R R:::::R
|
|
|
|
|
* CCC::::::::::::C OO:::::::::OO M::::::M M::::::MP::::::::P I::::::::IL::::::::::::::::::::::LE::::::::::::::::::::ER::::::R R:::::R
|
|
|
|
|
* CCCCCCCCCCCCC OOOOOOOOO MMMMMMMM MMMMMMMMPPPPPPPPPP IIIIIIIIIILLLLLLLLLLLLLLLLLLLLLLLLEEEEEEEEEEEEEEEEEEEEEERRRRRRRR RRRRRRR
|
|
|
|
|
*
|
|
|
|
|
* =======================================================================================================================================================================
|
|
|
|
|
* =======================================================================================================================================================================
|
|
|
|
|
* =======================================================================================================================================================================
|
|
|
|
|
* =======================================================================================================================================================================
|
|
|
|
|
*/
|
|
|
|
|
|
2016-03-31 00:39:16 +00:00
|
|
|
|
/**
|
|
|
|
|
* Today we're going write a compiler together. But not just any compiler... A
|
|
|
|
|
* super duper tiny teeny compiler! A compiler that is so small that if you
|
|
|
|
|
* remove all the comments this file would only be ~200 lines of actual code.
|
|
|
|
|
*
|
|
|
|
|
* We're going to compile some lisp-like function calls into some C-like
|
|
|
|
|
* function calls.
|
|
|
|
|
*
|
|
|
|
|
* If you are familiar with one or the other. I'll just give you a quick intro.
|
|
|
|
|
*
|
|
|
|
|
* If we had two functions `add` and `subtract` they would be written like this:
|
|
|
|
|
*
|
|
|
|
|
* LISP C
|
|
|
|
|
*
|
|
|
|
|
* 2 + 2 (add 2 2) add(2, 2)
|
|
|
|
|
* 4 - 2 (subtract 4 2) subtract(4, 2)
|
|
|
|
|
* 2 + (4 - 2) (add 2 (subtract 4 2)) add(2, subtract(4, 2))
|
|
|
|
|
*
|
|
|
|
|
* Easy peezy right?
|
|
|
|
|
*
|
|
|
|
|
* Well good, because this is exactly what we are going to compile. While this
|
|
|
|
|
* is neither a complete LISP or C syntax, it will be enough of the syntax to
|
|
|
|
|
* demonstrate many of major pieces of a modern compiler.
|
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Most compiler break down into three primary stages: Parsing, Transformation,
|
|
|
|
|
* and Code Generation
|
|
|
|
|
*
|
|
|
|
|
* 1. *Parsing* is taking raw code and turning it into a more abstract
|
|
|
|
|
* representation of the code.
|
|
|
|
|
*
|
|
|
|
|
* 2. *Transformation* takes this abstract representation and manipulates to do
|
|
|
|
|
* whatever the compiler wants it to.
|
|
|
|
|
*
|
|
|
|
|
* 3. *Code Generation* takes the transformed representation of the code and
|
|
|
|
|
* turns it into new code.
|
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Parsing
|
|
|
|
|
* -------
|
|
|
|
|
*
|
|
|
|
|
* Parsing typically gets broken down into two phases: Lexical Analysis and
|
|
|
|
|
* Syntactic Analysis.
|
|
|
|
|
*
|
|
|
|
|
* 1. *Lexical Analysis* takes the raw code and splits it apart into these things
|
|
|
|
|
* called tokens by a thing called a tokenizer (or lexer).
|
|
|
|
|
*
|
|
|
|
|
* Tokens are an array of tiny little objects that describe an isolated piece
|
|
|
|
|
* of the syntax. They could be numbers, labels, punctuation, operators,
|
|
|
|
|
* whatever.
|
|
|
|
|
*
|
|
|
|
|
* 2. *Syntactic Analysis* takes the tokens and reformats them into a
|
|
|
|
|
* representation that describes each part of the syntax and their relation
|
|
|
|
|
* to one another. This is known as an intermediate representation or
|
|
|
|
|
* Abstract Syntax Tree.
|
|
|
|
|
*
|
|
|
|
|
* An Abstract Syntax Tree or AST for short is a deeply nested object that
|
|
|
|
|
* represents code in a way that is both easy to work with and tells us a lot
|
|
|
|
|
* of information.
|
|
|
|
|
*
|
|
|
|
|
* For the following syntax:
|
|
|
|
|
*
|
|
|
|
|
* (add 2 (subtract 4 2))
|
|
|
|
|
*
|
|
|
|
|
* Tokens might look something like this:
|
|
|
|
|
*
|
|
|
|
|
* [
|
|
|
|
|
* { type: 'paren', value: '(' },
|
|
|
|
|
* { type: 'name', value: 'add' },
|
|
|
|
|
* { type: 'number', value: '2' },
|
|
|
|
|
* { type: 'paren', value: '(' },
|
|
|
|
|
* { type: 'name', value: 'subtract' },
|
|
|
|
|
* { type: 'number', value: '4' },
|
|
|
|
|
* { type: 'number', value: '2' },
|
|
|
|
|
* { type: 'paren', value: ')' },
|
|
|
|
|
* { type: 'paren', value: ')' }
|
|
|
|
|
* ]
|
|
|
|
|
*
|
|
|
|
|
* And an Abstract Syntax Tree (AST) might look like this:
|
|
|
|
|
*
|
|
|
|
|
* {
|
|
|
|
|
* type: 'Program',
|
|
|
|
|
* body: [{
|
|
|
|
|
* type: 'CallExpression',
|
|
|
|
|
* name: 'add',
|
|
|
|
|
* params: [{
|
|
|
|
|
* type: 'NumberLiteral',
|
|
|
|
|
* value: '2'
|
|
|
|
|
* }, {
|
|
|
|
|
* type: 'CallExpression',
|
|
|
|
|
* name: 'subtract',
|
|
|
|
|
* params: [{
|
|
|
|
|
* type: 'NumberLiteral',
|
|
|
|
|
* value: '4'
|
|
|
|
|
* }, {
|
|
|
|
|
* type: 'NumberLiteral',
|
|
|
|
|
* value: '2'
|
|
|
|
|
* }]
|
|
|
|
|
* }]
|
|
|
|
|
* }]
|
|
|
|
|
* }
|
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Transformation
|
|
|
|
|
* --------------
|
|
|
|
|
*
|
|
|
|
|
* The next type of stage of a compiler is transformation. Again, this just
|
|
|
|
|
* takes the AST from the last step and makes changes to it. It can manipulate
|
|
|
|
|
* the AST in the same language or it can translate it into an entirely new
|
|
|
|
|
* language.
|
|
|
|
|
*
|
|
|
|
|
* Let’s look at how we would transform an AST.
|
|
|
|
|
*
|
|
|
|
|
* You might notice that our AST has elements within it that look very similar.
|
|
|
|
|
* There are these objects with a type property. Each of these are known as an
|
|
|
|
|
* AST Node. These nodes have defined properties on them that describe one
|
|
|
|
|
* isolated part of the tree.
|
|
|
|
|
*
|
|
|
|
|
* We can have a node for a "NumberLiteral":
|
|
|
|
|
*
|
|
|
|
|
* {
|
|
|
|
|
* type: 'NumberLiteral',
|
|
|
|
|
* value: '2'
|
|
|
|
|
* }
|
|
|
|
|
*
|
|
|
|
|
* Or maybe a node for a "CallExpression":
|
|
|
|
|
*
|
|
|
|
|
* {
|
|
|
|
|
* type: 'CallExpression',
|
|
|
|
|
* name: 'subtract',
|
|
|
|
|
* params: [...nested nodes go here...]
|
|
|
|
|
* }
|
|
|
|
|
*
|
|
|
|
|
* When transforming the AST we can manipulate nodes by
|
|
|
|
|
* adding/removing/replacing properties, we can add new nodes, remove nodes, or
|
|
|
|
|
* we could leave the existing AST alone and create and entirely new one based
|
|
|
|
|
* on it.
|
|
|
|
|
*
|
|
|
|
|
* Since we’re targeting a new language, we’re going to focus on creating an
|
|
|
|
|
* entirely new AST that is specific to the target language.
|
|
|
|
|
*
|
|
|
|
|
* Traversal
|
|
|
|
|
* ---------
|
|
|
|
|
*
|
|
|
|
|
* In order to navigate through all of these nodes, we need to be able to
|
|
|
|
|
* traverse through them. This traversal process goes to each node in the AST
|
|
|
|
|
* depth-first.
|
|
|
|
|
*
|
|
|
|
|
* {
|
|
|
|
|
* type: 'Program',
|
|
|
|
|
* body: [{
|
|
|
|
|
* type: 'CallExpression',
|
|
|
|
|
* name: 'add',
|
|
|
|
|
* params: [{
|
|
|
|
|
* type: 'NumberLiteral',
|
|
|
|
|
* value: '2'
|
|
|
|
|
* }, {
|
|
|
|
|
* type: 'CallExpression',
|
|
|
|
|
* name: 'subtract',
|
|
|
|
|
* params: [{
|
|
|
|
|
* type: 'NumberLiteral',
|
|
|
|
|
* value: '4'
|
|
|
|
|
* }, {
|
|
|
|
|
* type: 'NumberLiteral',
|
|
|
|
|
* value: '2'
|
|
|
|
|
* }]
|
|
|
|
|
* }]
|
|
|
|
|
* }]
|
|
|
|
|
* }
|
|
|
|
|
*
|
|
|
|
|
* So for the above AST we would go:
|
|
|
|
|
*
|
|
|
|
|
* 1. Program - Starting at the top level of the AST
|
|
|
|
|
* 2. CallExpression (add) - Moving to the first element of the Program's body
|
|
|
|
|
* 3. NumberLiteral (2) - Moving to the first element of CallExpression's params
|
|
|
|
|
* 4. CallExpression (subtract) - Moving to the second element of CallExpression's params
|
|
|
|
|
* 5. NumberLiteral (4) - Moving to the first element of CallExpression's params
|
|
|
|
|
* 6. NumberLiteral (2) - Moving to the second element of CallExpression's params
|
|
|
|
|
*
|
|
|
|
|
* If we were manipulating this AST directly instead of creating a separate AST
|
|
|
|
|
* we would likely introduce all sorts of abstractions here. But just visiting
|
|
|
|
|
* each node in the tree is enough.
|
|
|
|
|
*
|
|
|
|
|
* The reason I use the word “visiting” is because there is this pattern of how
|
|
|
|
|
* to represent operations on elements of an object structure.
|
|
|
|
|
*
|
|
|
|
|
* Visitors
|
|
|
|
|
* --------
|
|
|
|
|
*
|
|
|
|
|
* The basic idea here is that we are going to create a “visitor” object that
|
|
|
|
|
* has methods that will accept different node types.
|
|
|
|
|
*
|
|
|
|
|
* var visitor = {
|
|
|
|
|
* NumberLiteral() {},
|
|
|
|
|
* CallExpression() {}
|
|
|
|
|
* };
|
|
|
|
|
*
|
|
|
|
|
* When we traverse our AST we will call the methods on this visitor whenever we
|
|
|
|
|
* encounter a node of a matching type.
|
|
|
|
|
*
|
|
|
|
|
* In order to make this useful we will also pass the node and a reference to
|
|
|
|
|
* the parent node.
|
|
|
|
|
*
|
|
|
|
|
* var visitor = {
|
|
|
|
|
* NumberLiteral(node, parent) {},
|
|
|
|
|
* CallExpression(node, parent) {}
|
|
|
|
|
* };
|
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Code Generation
|
|
|
|
|
* ---------------
|
|
|
|
|
*
|
|
|
|
|
* The final phase of a compiler is code generation. Sometimes compilers will do
|
|
|
|
|
* things that overlap with transformation, but for the most part code
|
|
|
|
|
* generation just means take our AST and string-ify code back out.
|
|
|
|
|
*
|
|
|
|
|
* Code generators work several different ways, some compilers will reuse the
|
|
|
|
|
* tokens from earlier, others will have created a separate representation of
|
|
|
|
|
* the code so that they can print node linearly, but from what I can tell most
|
|
|
|
|
* will use the same AST we just created which is what we’re going to focus on.
|
|
|
|
|
*
|
|
|
|
|
* Effectively our code generator will know how to “print” all of the different
|
|
|
|
|
* node types of the AST, and it will recursively call itself to print nested
|
|
|
|
|
* nodes until everything is printed into one long string of code.
|
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* And that's it! That's all the different pieces of a compiler.
|
|
|
|
|
*
|
|
|
|
|
* Now that isn’t to say every compiler looks exactly like I described here.
|
|
|
|
|
* Compilers serve many different purposes, and they might need more steps than
|
|
|
|
|
* I have detailed.
|
|
|
|
|
*
|
|
|
|
|
* But now you should have a general high-level idea of what most compilers look
|
|
|
|
|
* like.
|
|
|
|
|
*
|
|
|
|
|
* Now that I’ve explained all of this, you’re all good to go write your own
|
|
|
|
|
* compilers right?
|
|
|
|
|
*
|
|
|
|
|
* Just kidding, that's what I'm here to help with :P
|
|
|
|
|
*
|
|
|
|
|
* So let's begin...
|
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* -----------------------------------------------------------------------------
|
|
|
|
|
* *Note:* This is all I've written so far, so the code below isn't annnotated
|
|
|
|
|
* yet. You can still read it all and it totally works, but I plan on improving
|
|
|
|
|
* this in the near future
|
|
|
|
|
* -----------------------------------------------------------------------------
|
|
|
|
|
*/
|
|
|
|
|
|
2016-03-30 22:22:59 +00:00
|
|
|
|
/**
|
|
|
|
|
* ============================================================================
|
|
|
|
|
* (/^▽^)/
|
|
|
|
|
* THE TOKENIZER!
|
|
|
|
|
* ============================================================================
|
|
|
|
|
*/
|
|
|
|
|
function tokenizer(input) {
|
|
|
|
|
var current = 0;
|
|
|
|
|
var tokens = [];
|
|
|
|
|
|
|
|
|
|
while (current < input.length) {
|
|
|
|
|
var char = input[current];
|
|
|
|
|
|
|
|
|
|
if (char === '(') {
|
|
|
|
|
tokens.push({
|
|
|
|
|
type: 'paren',
|
|
|
|
|
value: '('
|
|
|
|
|
});
|
|
|
|
|
current++;
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (char === ')') {
|
|
|
|
|
tokens.push({
|
|
|
|
|
type: 'paren',
|
|
|
|
|
value: ')'
|
|
|
|
|
});
|
|
|
|
|
current++;
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
var WHITESPACE = /\s/;
|
|
|
|
|
if (WHITESPACE.test(char)) {
|
|
|
|
|
current++;
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
var NUMBERS = /[0-9]/;
|
|
|
|
|
if (NUMBERS.test(char)) {
|
|
|
|
|
var value = '';
|
|
|
|
|
|
|
|
|
|
while (NUMBERS.test(char)) {
|
|
|
|
|
value += char;
|
|
|
|
|
char = input[++current];
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
tokens.push({
|
|
|
|
|
type: 'number',
|
|
|
|
|
value: value
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
var LETTERS = /[a-zA-Z]/;
|
|
|
|
|
if (LETTERS.test(char)) {
|
|
|
|
|
var value = '';
|
|
|
|
|
|
|
|
|
|
while (LETTERS.test(char)) {
|
|
|
|
|
value += char;
|
|
|
|
|
char = input[++current];
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
tokens.push({
|
|
|
|
|
type: 'name',
|
|
|
|
|
value: value
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
throw new TypeError('I dont know what this character is: ' + char);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return tokens;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* ============================================================================
|
|
|
|
|
* ヽ/❀o ل͜ o\ノ
|
|
|
|
|
* THE PARSER!!!
|
|
|
|
|
* ============================================================================
|
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
function parser(tokens) {
|
|
|
|
|
var current = 0;
|
|
|
|
|
|
|
|
|
|
function walk() {
|
|
|
|
|
var token = tokens[current];
|
|
|
|
|
|
|
|
|
|
if (token.type === 'number') {
|
|
|
|
|
current++;
|
|
|
|
|
|
|
|
|
|
return {
|
|
|
|
|
type: 'NumberLiteral',
|
|
|
|
|
value: token.value
|
|
|
|
|
};
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (
|
|
|
|
|
token.type === 'paren' &&
|
|
|
|
|
token.value === '('
|
|
|
|
|
) {
|
|
|
|
|
current++;
|
|
|
|
|
|
|
|
|
|
var node = {
|
|
|
|
|
type: 'CallExpression',
|
|
|
|
|
name: tokens[current].value,
|
|
|
|
|
params: []
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
current++;
|
|
|
|
|
|
|
|
|
|
while (
|
|
|
|
|
token.type !== 'paren' ||
|
|
|
|
|
token.value !== ')'
|
|
|
|
|
) {
|
|
|
|
|
node.params.push(walk());
|
|
|
|
|
token = tokens[current];
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
current++;
|
|
|
|
|
|
|
|
|
|
return node;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
throw new TypeError(token.type);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
var program = {
|
|
|
|
|
type: 'Program',
|
|
|
|
|
body: []
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
while (current < tokens.length) {
|
|
|
|
|
program.body.push(walk());
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return program;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* ============================================================================
|
|
|
|
|
* ⌒(❀>◞౪◟<❀)⌒
|
|
|
|
|
* THE TRAVERSER!!!
|
|
|
|
|
* ============================================================================
|
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
function traverser(program, visitor) {
|
|
|
|
|
|
|
|
|
|
function traverseArray(array, parent) {
|
|
|
|
|
array.forEach(function(child) {
|
|
|
|
|
traverseNode(child, parent);
|
|
|
|
|
});
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
function traverseNode(node, parent) {
|
|
|
|
|
var method = visitor[node.type];
|
|
|
|
|
|
|
|
|
|
if (method) {
|
|
|
|
|
method(node, parent);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
switch (node.type) {
|
|
|
|
|
case 'Program':
|
|
|
|
|
traverseArray(node.body, node);
|
|
|
|
|
break;
|
|
|
|
|
case 'CallExpression':
|
|
|
|
|
traverseArray(node.params, node);
|
|
|
|
|
break;
|
|
|
|
|
case 'NumberLiteral':
|
|
|
|
|
break;
|
|
|
|
|
default:
|
|
|
|
|
throw new TypeError(node.type);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
traverseNode(program, null);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* ============================================================================
|
|
|
|
|
* ⁽(◍˃̵͈̑ᴗ˂̵͈̑)⁽
|
|
|
|
|
* THE TRANSFORMER!!!
|
|
|
|
|
* ============================================================================
|
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
function transformer(program) {
|
|
|
|
|
var ast = {
|
|
|
|
|
type: 'Program',
|
|
|
|
|
body: []
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
program._context = ast.body;
|
|
|
|
|
|
|
|
|
|
traverser(program, {
|
|
|
|
|
NumberLiteral: function(node, parent) {
|
|
|
|
|
parent._context.push({
|
|
|
|
|
type: 'NumberLiteral',
|
|
|
|
|
value: node.value
|
|
|
|
|
});
|
|
|
|
|
},
|
|
|
|
|
|
|
|
|
|
CallExpression: function(node, parent) {
|
|
|
|
|
var expression = {
|
|
|
|
|
type: 'CallExpression',
|
|
|
|
|
callee: {
|
|
|
|
|
type: 'Identifier',
|
|
|
|
|
name: node.name
|
|
|
|
|
},
|
|
|
|
|
arguments: []
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
node._context = expression.arguments;
|
|
|
|
|
|
|
|
|
|
if (parent.type !== 'CallExpression') {
|
|
|
|
|
expression = {
|
|
|
|
|
type: 'ExpressionStatement',
|
|
|
|
|
expression: expression
|
|
|
|
|
};
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
parent._context.push(expression);
|
|
|
|
|
}
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
return ast;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* ============================================================================
|
|
|
|
|
* ヾ(〃^∇^)ノ♪
|
|
|
|
|
* THE CODE GENERATOR!!!!
|
|
|
|
|
* ============================================================================
|
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
function codeGenerator(node) {
|
|
|
|
|
switch (node.type) {
|
|
|
|
|
case 'Program':
|
|
|
|
|
return node.body.map(codeGenerator)
|
|
|
|
|
.join('\n');
|
|
|
|
|
|
|
|
|
|
case 'ExpressionStatement':
|
|
|
|
|
return (
|
|
|
|
|
codeGenerator(node.expression) +
|
|
|
|
|
';'
|
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
case 'CallExpression':
|
|
|
|
|
return (
|
|
|
|
|
codeGenerator(node.callee) +
|
|
|
|
|
'(' +
|
|
|
|
|
node.arguments.map(codeGenerator)
|
|
|
|
|
.join(', ') +
|
|
|
|
|
')'
|
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
case 'Identifier':
|
|
|
|
|
return node.name;
|
|
|
|
|
|
|
|
|
|
case 'NumberLiteral':
|
|
|
|
|
return node.value;
|
|
|
|
|
|
|
|
|
|
default:
|
|
|
|
|
throw new TypeError(node.type);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* ============================================================================
|
|
|
|
|
* (۶* ‘ヮ’)۶”
|
|
|
|
|
* !!!!!!!!THE COMPILER!!!!!!!!
|
|
|
|
|
* ============================================================================
|
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
function compiler(input) {
|
|
|
|
|
var tokens = tokenizer(input);
|
|
|
|
|
var ast = parser(tokens);
|
|
|
|
|
var newAst = transformer(ast);
|
|
|
|
|
var output = codeGenerator(newAst);
|
|
|
|
|
|
|
|
|
|
return output;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* ============================================================================
|
|
|
|
|
* (๑˃̵ᴗ˂̵)و
|
|
|
|
|
* !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!YOU MADE IT!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
|
|
|
|
|
* ============================================================================
|
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
// Now I'm just exporting everything...
|
|
|
|
|
module.exports = {
|
|
|
|
|
tokenizer: tokenizer,
|
|
|
|
|
parser: parser,
|
|
|
|
|
transformer: transformer,
|
|
|
|
|
codeGenerator: codeGenerator,
|
|
|
|
|
compiler: compiler
|
|
|
|
|
};
|