From 2173d6062a94b6b03d4c190788ad515d7768f1fb Mon Sep 17 00:00:00 2001 From: James Kyle Date: Wed, 30 Mar 2016 15:22:59 -0700 Subject: [PATCH 01/31] Give Super Tiny Compiler --- LICENSE | 393 +++++++++++++++++++++++++++++++++++++++++ README.md | 34 ++++ super-tiny-compiler.js | 375 +++++++++++++++++++++++++++++++++++++++ 3 files changed, 802 insertions(+) create mode 100644 LICENSE create mode 100644 README.md create mode 100644 super-tiny-compiler.js diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..40f19b9 --- /dev/null +++ b/LICENSE @@ -0,0 +1,393 @@ +Creative Commons Attribution 4.0 International + +======================================================================= + +Creative Commons Corporation ("Creative Commons") is not a law firm and +does not provide legal services or legal advice. Distribution of +Creative Commons public licenses does not create a lawyer-client or +other relationship. Creative Commons makes its licenses and related +information available on an "as-is" basis. Creative Commons gives no +warranties regarding its licenses, any material licensed under their +terms and conditions, or any related information. Creative Commons +disclaims all liability for damages resulting from their use to the +fullest extent possible. + +Using Creative Commons Public Licenses + +Creative Commons public licenses provide a standard set of terms and +conditions that creators and other rights holders may use to share +original works of authorship and other material subject to copyright +and certain other rights specified in the public license below. The +following considerations are for informational purposes only, are not +exhaustive, and do not form part of our licenses. + + Considerations for licensors: Our public licenses are + intended for use by those authorized to give the public + permission to use material in ways otherwise restricted by + copyright and certain other rights. Our licenses are + irrevocable. Licensors should read and understand the terms + and conditions of the license they choose before applying it. + Licensors should also secure all rights necessary before + applying our licenses so that the public can reuse the + material as expected. Licensors should clearly mark any + material not subject to the license. This includes other CC- + licensed material, or material used under an exception or + limitation to copyright. More considerations for licensors: + wiki.creativecommons.org/Considerations_for_licensors + + Considerations for the public: By using one of our public + licenses, a licensor grants the public permission to use the + licensed material under specified terms and conditions. If + the licensor's permission is not necessary for any reason--for + example, because of any applicable exception or limitation to + copyright--then that use is not regulated by the license. Our + licenses grant only permissions under copyright and certain + other rights that a licensor has authority to grant. Use of + the licensed material may still be restricted for other + reasons, including because others have copyright or other + rights in the material. A licensor may make special requests, + such as asking that all changes be marked or described. + Although not required by our licenses, you are encouraged to + respect those requests where reasonable. More_considerations + for the public: + wiki.creativecommons.org/Considerations_for_licensees + +======================================================================= + +Creative Commons Attribution 4.0 International Public License + +By exercising the Licensed Rights (defined below), You accept and agree +to be bound by the terms and conditions of this Creative Commons +Attribution 4.0 International Public License ("Public License"). To the +extent this Public License may be interpreted as a contract, You are +granted the Licensed Rights in consideration of Your acceptance of +these terms and conditions, and the Licensor grants You such rights in +consideration of benefits the Licensor receives from making the +Licensed Material available under these terms and conditions. + + +Section 1 -- Definitions. + + a. Adapted Material means material subject to Copyright and Similar + Rights that is derived from or based upon the Licensed Material + and in which the Licensed Material is translated, altered, + arranged, transformed, or otherwise modified in a manner requiring + permission under the Copyright and Similar Rights held by the + Licensor. For purposes of this Public License, where the Licensed + Material is a musical work, performance, or sound recording, + Adapted Material is always produced where the Licensed Material is + synched in timed relation with a moving image. + + b. Adapter's License means the license You apply to Your Copyright + and Similar Rights in Your contributions to Adapted Material in + accordance with the terms and conditions of this Public License. + + c. Copyright and Similar Rights means copyright and/or similar rights + closely related to copyright including, without limitation, + performance, broadcast, sound recording, and Sui Generis Database + Rights, without regard to how the rights are labeled or + categorized. For purposes of this Public License, the rights + specified in Section 2(b)(1)-(2) are not Copyright and Similar + Rights. + + d. Effective Technological Measures means those measures that, in the + absence of proper authority, may not be circumvented under laws + fulfilling obligations under Article 11 of the WIPO Copyright + Treaty adopted on December 20, 1996, and/or similar international + agreements. + + e. Exceptions and Limitations means fair use, fair dealing, and/or + any other exception or limitation to Copyright and Similar Rights + that applies to Your use of the Licensed Material. + + f. Licensed Material means the artistic or literary work, database, + or other material to which the Licensor applied this Public + License. + + g. Licensed Rights means the rights granted to You subject to the + terms and conditions of this Public License, which are limited to + all Copyright and Similar Rights that apply to Your use of the + Licensed Material and that the Licensor has authority to license. + + h. Licensor means the individual(s) or entity(ies) granting rights + under this Public License. + + i. Share means to provide material to the public by any means or + process that requires permission under the Licensed Rights, such + as reproduction, public display, public performance, distribution, + dissemination, communication, or importation, and to make material + available to the public including in ways that members of the + public may access the material from a place and at a time + individually chosen by them. + + j. Sui Generis Database Rights means rights other than copyright + resulting from Directive 96/9/EC of the European Parliament and of + the Council of 11 March 1996 on the legal protection of databases, + as amended and/or succeeded, as well as other essentially + equivalent rights anywhere in the world. + + k. You means the individual or entity exercising the Licensed Rights + under this Public License. Your has a corresponding meaning. + + +Section 2 -- Scope. + + a. License grant. + + 1. Subject to the terms and conditions of this Public License, + the Licensor hereby grants You a worldwide, royalty-free, + non-sublicensable, non-exclusive, irrevocable license to + exercise the Licensed Rights in the Licensed Material to: + + a. reproduce and Share the Licensed Material, in whole or + in part; and + + b. produce, reproduce, and Share Adapted Material. + + 2. Exceptions and Limitations. For the avoidance of doubt, where + Exceptions and Limitations apply to Your use, this Public + License does not apply, and You do not need to comply with + its terms and conditions. + + 3. Term. The term of this Public License is specified in Section + 6(a). + + 4. Media and formats; technical modifications allowed. The + Licensor authorizes You to exercise the Licensed Rights in + all media and formats whether now known or hereafter created, + and to make technical modifications necessary to do so. The + Licensor waives and/or agrees not to assert any right or + authority to forbid You from making technical modifications + necessary to exercise the Licensed Rights, including + technical modifications necessary to circumvent Effective + Technological Measures. For purposes of this Public License, + simply making modifications authorized by this Section 2(a) + (4) never produces Adapted Material. + + 5. Downstream recipients. + + a. Offer from the Licensor -- Licensed Material. Every + recipient of the Licensed Material automatically + receives an offer from the Licensor to exercise the + Licensed Rights under the terms and conditions of this + Public License. + + b. No downstream restrictions. You may not offer or impose + any additional or different terms or conditions on, or + apply any Effective Technological Measures to, the + Licensed Material if doing so restricts exercise of the + Licensed Rights by any recipient of the Licensed + Material. + + 6. No endorsement. Nothing in this Public License constitutes or + may be construed as permission to assert or imply that You + are, or that Your use of the Licensed Material is, connected + with, or sponsored, endorsed, or granted official status by, + the Licensor or others designated to receive attribution as + provided in Section 3(a)(1)(A)(i). + + b. Other rights. + + 1. Moral rights, such as the right of integrity, are not + licensed under this Public License, nor are publicity, + privacy, and/or other similar personality rights; however, to + the extent possible, the Licensor waives and/or agrees not to + assert any such rights held by the Licensor to the limited + extent necessary to allow You to exercise the Licensed + Rights, but not otherwise. + + 2. Patent and trademark rights are not licensed under this + Public License. + + 3. To the extent possible, the Licensor waives any right to + collect royalties from You for the exercise of the Licensed + Rights, whether directly or through a collecting society + under any voluntary or waivable statutory or compulsory + licensing scheme. In all other cases the Licensor expressly + reserves any right to collect such royalties. + + +Section 3 -- License Conditions. + +Your exercise of the Licensed Rights is expressly made subject to the +following conditions. + + a. Attribution. + + 1. If You Share the Licensed Material (including in modified + form), You must: + + a. retain the following if it is supplied by the Licensor + with the Licensed Material: + + i. identification of the creator(s) of the Licensed + Material and any others designated to receive + attribution, in any reasonable manner requested by + the Licensor (including by pseudonym if + designated); + + ii. a copyright notice; + + iii. a notice that refers to this Public License; + + iv. a notice that refers to the disclaimer of + warranties; + + v. a URI or hyperlink to the Licensed Material to the + extent reasonably practicable; + + b. indicate if You modified the Licensed Material and + retain an indication of any previous modifications; and + + c. indicate the Licensed Material is licensed under this + Public License, and include the text of, or the URI or + hyperlink to, this Public License. + + 2. You may satisfy the conditions in Section 3(a)(1) in any + reasonable manner based on the medium, means, and context in + which You Share the Licensed Material. For example, it may be + reasonable to satisfy the conditions by providing a URI or + hyperlink to a resource that includes the required + information. + + 3. If requested by the Licensor, You must remove any of the + information required by Section 3(a)(1)(A) to the extent + reasonably practicable. + + 4. If You Share Adapted Material You produce, the Adapter's + License You apply must not prevent recipients of the Adapted + Material from complying with this Public License. + + +Section 4 -- Sui Generis Database Rights. + +Where the Licensed Rights include Sui Generis Database Rights that +apply to Your use of the Licensed Material: + + a. for the avoidance of doubt, Section 2(a)(1) grants You the right + to extract, reuse, reproduce, and Share all or a substantial + portion of the contents of the database; + + b. if You include all or a substantial portion of the database + contents in a database in which You have Sui Generis Database + Rights, then the database in which You have Sui Generis Database + Rights (but not its individual contents) is Adapted Material; and + + c. You must comply with the conditions in Section 3(a) if You Share + all or a substantial portion of the contents of the database. + +For the avoidance of doubt, this Section 4 supplements and does not +replace Your obligations under this Public License where the Licensed +Rights include other Copyright and Similar Rights. + + +Section 5 -- Disclaimer of Warranties and Limitation of Liability. + + a. UNLESS OTHERWISE SEPARATELY UNDERTAKEN BY THE LICENSOR, TO THE + EXTENT POSSIBLE, THE LICENSOR OFFERS THE LICENSED MATERIAL AS-IS + AND AS-AVAILABLE, AND MAKES NO REPRESENTATIONS OR WARRANTIES OF + ANY KIND CONCERNING THE LICENSED MATERIAL, WHETHER EXPRESS, + IMPLIED, STATUTORY, OR OTHER. THIS INCLUDES, WITHOUT LIMITATION, + WARRANTIES OF TITLE, MERCHANTABILITY, FITNESS FOR A PARTICULAR + PURPOSE, NON-INFRINGEMENT, ABSENCE OF LATENT OR OTHER DEFECTS, + ACCURACY, OR THE PRESENCE OR ABSENCE OF ERRORS, WHETHER OR NOT + KNOWN OR DISCOVERABLE. WHERE DISCLAIMERS OF WARRANTIES ARE NOT + ALLOWED IN FULL OR IN PART, THIS DISCLAIMER MAY NOT APPLY TO YOU. + + b. TO THE EXTENT POSSIBLE, IN NO EVENT WILL THE LICENSOR BE LIABLE + TO YOU ON ANY LEGAL THEORY (INCLUDING, WITHOUT LIMITATION, + NEGLIGENCE) OR OTHERWISE FOR ANY DIRECT, SPECIAL, INDIRECT, + INCIDENTAL, CONSEQUENTIAL, PUNITIVE, EXEMPLARY, OR OTHER LOSSES, + COSTS, EXPENSES, OR DAMAGES ARISING OUT OF THIS PUBLIC LICENSE OR + USE OF THE LICENSED MATERIAL, EVEN IF THE LICENSOR HAS BEEN + ADVISED OF THE POSSIBILITY OF SUCH LOSSES, COSTS, EXPENSES, OR + DAMAGES. WHERE A LIMITATION OF LIABILITY IS NOT ALLOWED IN FULL OR + IN PART, THIS LIMITATION MAY NOT APPLY TO YOU. + + c. The disclaimer of warranties and limitation of liability provided + above shall be interpreted in a manner that, to the extent + possible, most closely approximates an absolute disclaimer and + waiver of all liability. + + +Section 6 -- Term and Termination. + + a. This Public License applies for the term of the Copyright and + Similar Rights licensed here. However, if You fail to comply with + this Public License, then Your rights under this Public License + terminate automatically. + + b. Where Your right to use the Licensed Material has terminated under + Section 6(a), it reinstates: + + 1. automatically as of the date the violation is cured, provided + it is cured within 30 days of Your discovery of the + violation; or + + 2. upon express reinstatement by the Licensor. + + For the avoidance of doubt, this Section 6(b) does not affect any + right the Licensor may have to seek remedies for Your violations + of this Public License. + + c. For the avoidance of doubt, the Licensor may also offer the + Licensed Material under separate terms or conditions or stop + distributing the Licensed Material at any time; however, doing so + will not terminate this Public License. + + d. Sections 1, 5, 6, 7, and 8 survive termination of this Public + License. + + +Section 7 -- Other Terms and Conditions. + + a. The Licensor shall not be bound by any additional or different + terms or conditions communicated by You unless expressly agreed. + + b. Any arrangements, understandings, or agreements regarding the + Licensed Material not stated herein are separate from and + independent of the terms and conditions of this Public License. + + +Section 8 -- Interpretation. + + a. For the avoidance of doubt, this Public License does not, and + shall not be interpreted to, reduce, limit, restrict, or impose + conditions on any use of the Licensed Material that could lawfully + be made without permission under this Public License. + + b. To the extent possible, if any provision of this Public License is + deemed unenforceable, it shall be automatically reformed to the + minimum extent necessary to make it enforceable. If the provision + cannot be reformed, it shall be severed from this Public License + without affecting the enforceability of the remaining terms and + conditions. + + c. No term or condition of this Public License will be waived and no + failure to comply consented to unless expressly agreed to by the + Licensor. + + d. Nothing in this Public License constitutes or may be interpreted + as a limitation upon, or waiver of, any privileges and immunities + that apply to the Licensor or You, including from the legal + processes of any jurisdiction or authority. + + +======================================================================= + +Creative Commons is not a party to its public licenses. +Notwithstanding, Creative Commons may elect to apply one of its public +licenses to material it publishes and in those instances will be +considered the "Licensor." Except for the limited purpose of indicating +that material is shared under a Creative Commons public license or as +otherwise permitted by the Creative Commons policies published at +creativecommons.org/policies, Creative Commons does not authorize the +use of the trademark "Creative Commons" or any other trademark or logo +of Creative Commons without its prior written consent including, +without limitation, in connection with any unauthorized modifications +to any of its public licenses or any other arrangements, +understandings, or agreements concerning use of licensed material. For +the avoidance of doubt, this paragraph does not form part of the public +licenses. + +Creative Commons may be contacted at creativecommons.org. diff --git a/README.md b/README.md new file mode 100644 index 0000000..85f11c4 --- /dev/null +++ b/README.md @@ -0,0 +1,34 @@ +# THE SUPER TINY COMPILER + +Welcome to The Super Tiny Compiler. It's an ultra simplified example of all the +major pieces of a modern compiler written in easy to read JavaScript. Reading +through it can help you learn about how most compilers work end to end. + +### Why should I care? + +That's fair, most people don't really have to think about compilers in their day +jobs. However, compilers are all around you, tons of the tools you use are based +on concepts borrowed from compilers. + +### But compilers are scary! + +Yes, they are. But that's our fault (the people who write compilers), we've +taken something that is reasonably straightforward and made it so scary that +most think of it as this totally unapproachable thing that only the nerdiest of +the nerds are able to understand. + +### Okay so where do I begin? + +Awesome! Head on over to the [super-tiny-compiler.js](super-tiny-compiler.js) +file. + +### I'm back, that didn't make sense + +Ouch, I'm really sorry. I'm planning on doing a lot more work on this to add +inline annotations. If you want to come back when that's done, you can either +watch/star this repo or follow me on +[twitter](https://twitter.com/thejameskyle) for updates. + +--- + +[![cc-by-4.0](https://licensebuttons.net/l/by/4.0/80x15.png)](http://creativecommons.org/licenses/by/4.0/) diff --git a/super-tiny-compiler.js b/super-tiny-compiler.js new file mode 100644 index 0000000..1b60002 --- /dev/null +++ b/super-tiny-compiler.js @@ -0,0 +1,375 @@ +/** + * TTTTTTTTTTTTTTTTTTTTTTTHHHHHHHHH HHHHHHHHHEEEEEEEEEEEEEEEEEEEEEE + * T:::::::::::::::::::::TH:::::::H H:::::::HE::::::::::::::::::::E + * T:::::::::::::::::::::TH:::::::H H:::::::HE::::::::::::::::::::E + * T:::::TT:::::::TT:::::THH::::::H H::::::HHEE::::::EEEEEEEEE::::E + * TTTTTT T:::::T TTTTTT H:::::H H:::::H E:::::E EEEEEE + * T:::::T H:::::H H:::::H E:::::E + * T:::::T H::::::HHHHH::::::H E::::::EEEEEEEEEE + * T:::::T H:::::::::::::::::H E:::::::::::::::E + * T:::::T H:::::::::::::::::H E:::::::::::::::E + * T:::::T H::::::HHHHH::::::H E::::::EEEEEEEEEE + * T:::::T H:::::H H:::::H E:::::E + * T:::::T H:::::H H:::::H E:::::E EEEEEE + * TT:::::::TT HH::::::H H::::::HHEE::::::EEEEEEEE:::::E + * T:::::::::T H:::::::H H:::::::HE::::::::::::::::::::E + * T:::::::::T H:::::::H H:::::::HE::::::::::::::::::::E + * TTTTTTTTTTT HHHHHHHHH HHHHHHHHHEEEEEEEEEEEEEEEEEEEEEE + * + * SSSSSSSSSSSSSSS UUUUUUUU UUUUUUUUPPPPPPPPPPPPPPPPP EEEEEEEEEEEEEEEEEEEEEERRRRRRRRRRRRRRRRR + * SS:::::::::::::::SU::::::U U::::::UP::::::::::::::::P E::::::::::::::::::::ER::::::::::::::::R + * S:::::SSSSSS::::::SU::::::U U::::::UP::::::PPPPPP:::::P E::::::::::::::::::::ER::::::RRRRRR:::::R + * S:::::S SSSSSSSUU:::::U U:::::UUPP:::::P P:::::PEE::::::EEEEEEEEE::::ERR:::::R R:::::R + * S:::::S U:::::U U:::::U P::::P P:::::P E:::::E EEEEEE R::::R R:::::R + * S:::::S U:::::D D:::::U P::::P P:::::P E:::::E R::::R R:::::R + * S::::SSSS U:::::D D:::::U P::::PPPPPP:::::P E::::::EEEEEEEEEE R::::RRRRRR:::::R + * SS::::::SSSSS U:::::D D:::::U P:::::::::::::PP E:::::::::::::::E R:::::::::::::RR + * SSS::::::::SS U:::::D D:::::U P::::PPPPPPPPP E:::::::::::::::E R::::RRRRRR:::::R + * SSSSSS::::S U:::::D D:::::U P::::P E::::::EEEEEEEEEE R::::R R:::::R + * S:::::S U:::::D D:::::U P::::P E:::::E R::::R R:::::R + * S:::::S U::::::U U::::::U P::::P E:::::E EEEEEE R::::R R:::::R + * SSSSSSS S:::::S U:::::::UUU:::::::U PP::::::PP EE::::::EEEEEEEE:::::ERR:::::R R:::::R + * S::::::SSSSSS:::::S UU:::::::::::::UU P::::::::P E::::::::::::::::::::ER::::::R R:::::R + * S:::::::::::::::SS UU:::::::::UU P::::::::P E::::::::::::::::::::ER::::::R R:::::R + * SSSSSSSSSSSSSSS UUUUUUUUU PPPPPPPPPP EEEEEEEEEEEEEEEEEEEEEERRRRRRRR RRRRRRR + * + * TTTTTTTTTTTTTTTTTTTTTTTIIIIIIIIIINNNNNNNN NNNNNNNNYYYYYYY YYYYYYY + * T:::::::::::::::::::::TI::::::::IN:::::::N N::::::NY:::::Y Y:::::Y + * T:::::::::::::::::::::TI::::::::IN::::::::N N::::::NY:::::Y Y:::::Y + * T:::::TT:::::::TT:::::TII::::::IIN:::::::::N N::::::NY::::::Y Y::::::Y + * TTTTTT T:::::T TTTTTT I::::I N::::::::::N N::::::NYYY:::::Y Y:::::YYY + * T:::::T I::::I N:::::::::::N N::::::N Y:::::Y Y:::::Y + * T:::::T I::::I N:::::::N::::N N::::::N Y:::::Y:::::Y + * T:::::T I::::I N::::::N N::::N N::::::N Y:::::::::Y + * T:::::T I::::I N::::::N N::::N:::::::N Y:::::::Y + * T:::::T I::::I N::::::N N:::::::::::N Y:::::Y + * T:::::T I::::I N::::::N N::::::::::N Y:::::Y + * T:::::T I::::I N::::::N N:::::::::N Y:::::Y + * TT:::::::TT II::::::IIN::::::N N::::::::N Y:::::Y + * T:::::::::T I::::::::IN::::::N N:::::::N YYYY:::::YYYY + * T:::::::::T I::::::::IN::::::N N::::::N Y:::::::::::Y + * TTTTTTTTTTT IIIIIIIIIINNNNNNNN NNNNNNN YYYYYYYYYYYYY + * + * CCCCCCCCCCCCC OOOOOOOOO MMMMMMMM MMMMMMMMPPPPPPPPPPPPPPPPP IIIIIIIIIILLLLLLLLLLL EEEEEEEEEEEEEEEEEEEEEERRRRRRRRRRRRRRRRR + * CCC::::::::::::C OO:::::::::OO M:::::::M M:::::::MP::::::::::::::::P I::::::::IL:::::::::L E::::::::::::::::::::ER::::::::::::::::R + * CC:::::::::::::::C OO:::::::::::::OO M::::::::M M::::::::MP::::::PPPPPP:::::P I::::::::IL:::::::::L E::::::::::::::::::::ER::::::RRRRRR:::::R + * C:::::CCCCCCCC::::CO:::::::OOO:::::::OM:::::::::M M:::::::::MPP:::::P P:::::PII::::::IILL:::::::LL EE::::::EEEEEEEEE::::ERR:::::R R:::::R + * C:::::C CCCCCCO::::::O O::::::OM::::::::::M M::::::::::M P::::P P:::::P I::::I L:::::L E:::::E EEEEEE R::::R R:::::R + * C:::::C O:::::O O:::::OM:::::::::::M M:::::::::::M P::::P P:::::P I::::I L:::::L E:::::E R::::R R:::::R + * C:::::C O:::::O O:::::OM:::::::M::::M M::::M:::::::M P::::PPPPPP:::::P I::::I L:::::L E::::::EEEEEEEEEE R::::RRRRRR:::::R + * C:::::C O:::::O O:::::OM::::::M M::::M M::::M M::::::M P:::::::::::::PP I::::I L:::::L E:::::::::::::::E R:::::::::::::RR + * C:::::C O:::::O O:::::OM::::::M M::::M::::M M::::::M P::::PPPPPPPPP I::::I L:::::L E:::::::::::::::E R::::RRRRRR:::::R + * C:::::C O:::::O O:::::OM::::::M M:::::::M M::::::M P::::P I::::I L:::::L E::::::EEEEEEEEEE R::::R R:::::R + * C:::::C O:::::O O:::::OM::::::M M:::::M M::::::M P::::P I::::I L:::::L E:::::E R::::R R:::::R + * C:::::C CCCCCCO::::::O O::::::OM::::::M MMMMM M::::::M P::::P I::::I L:::::L LLLLLL E:::::E EEEEEE R::::R R:::::R + * C:::::CCCCCCCC::::CO:::::::OOO:::::::OM::::::M M::::::MPP::::::PP II::::::IILL:::::::LLLLLLLLL:::::LEE::::::EEEEEEEE:::::ERR:::::R R:::::R + * CC:::::::::::::::C OO:::::::::::::OO M::::::M M::::::MP::::::::P I::::::::IL::::::::::::::::::::::LE::::::::::::::::::::ER::::::R R:::::R + * CCC::::::::::::C OO:::::::::OO M::::::M M::::::MP::::::::P I::::::::IL::::::::::::::::::::::LE::::::::::::::::::::ER::::::R R:::::R + * CCCCCCCCCCCCC OOOOOOOOO MMMMMMMM MMMMMMMMPPPPPPPPPP IIIIIIIIIILLLLLLLLLLLLLLLLLLLLLLLLEEEEEEEEEEEEEEEEEEEEEERRRRRRRR RRRRRRR + * + * ======================================================================================================================================================================= + * ======================================================================================================================================================================= + * ======================================================================================================================================================================= + * ======================================================================================================================================================================= + */ + + /** + * ============================================================================ + * (/^▽^)/ + * THE TOKENIZER! + * ============================================================================ + */ +function tokenizer(input) { + var current = 0; + var tokens = []; + + while (current < input.length) { + var char = input[current]; + + if (char === '(') { + tokens.push({ + type: 'paren', + value: '(' + }); + current++; + continue; + } + + if (char === ')') { + tokens.push({ + type: 'paren', + value: ')' + }); + current++; + continue; + } + + var WHITESPACE = /\s/; + if (WHITESPACE.test(char)) { + current++; + continue; + } + + var NUMBERS = /[0-9]/; + if (NUMBERS.test(char)) { + var value = ''; + + while (NUMBERS.test(char)) { + value += char; + char = input[++current]; + } + + tokens.push({ + type: 'number', + value: value + }); + + continue; + } + + var LETTERS = /[a-zA-Z]/; + if (LETTERS.test(char)) { + var value = ''; + + while (LETTERS.test(char)) { + value += char; + char = input[++current]; + } + + tokens.push({ + type: 'name', + value: value + }); + + continue; + } + + throw new TypeError('I dont know what this character is: ' + char); + } + + return tokens; +} + +/** + * ============================================================================ + * ヽ/❀o ل͜ o\ノ + * THE PARSER!!! + * ============================================================================ + */ + +function parser(tokens) { + var current = 0; + + function walk() { + var token = tokens[current]; + + if (token.type === 'number') { + current++; + + return { + type: 'NumberLiteral', + value: token.value + }; + } + + if ( + token.type === 'paren' && + token.value === '(' + ) { + current++; + + var node = { + type: 'CallExpression', + name: tokens[current].value, + params: [] + }; + + current++; + + while ( + token.type !== 'paren' || + token.value !== ')' + ) { + node.params.push(walk()); + token = tokens[current]; + } + + current++; + + return node; + } + + throw new TypeError(token.type); + } + + var program = { + type: 'Program', + body: [] + }; + + while (current < tokens.length) { + program.body.push(walk()); + } + + return program; +} + +/** + * ============================================================================ + * ⌒(❀>◞౪◟<❀)⌒ + * THE TRAVERSER!!! + * ============================================================================ + */ + +function traverser(program, visitor) { + + function traverseArray(array, parent) { + array.forEach(function(child) { + traverseNode(child, parent); + }); + } + + function traverseNode(node, parent) { + var method = visitor[node.type]; + + if (method) { + method(node, parent); + } + + switch (node.type) { + case 'Program': + traverseArray(node.body, node); + break; + case 'CallExpression': + traverseArray(node.params, node); + break; + case 'NumberLiteral': + break; + default: + throw new TypeError(node.type); + } + } + + traverseNode(program, null); +} + +/** + * ============================================================================ + * ⁽(◍˃̵͈̑ᴗ˂̵͈̑)⁽ + * THE TRANSFORMER!!! + * ============================================================================ + */ + +function transformer(program) { + var ast = { + type: 'Program', + body: [] + }; + + program._context = ast.body; + + traverser(program, { + NumberLiteral: function(node, parent) { + parent._context.push({ + type: 'NumberLiteral', + value: node.value + }); + }, + + CallExpression: function(node, parent) { + var expression = { + type: 'CallExpression', + callee: { + type: 'Identifier', + name: node.name + }, + arguments: [] + }; + + node._context = expression.arguments; + + if (parent.type !== 'CallExpression') { + expression = { + type: 'ExpressionStatement', + expression: expression + }; + } + + parent._context.push(expression); + } + }); + + return ast; +} + +/** + * ============================================================================ + * ヾ(〃^∇^)ノ♪ + * THE CODE GENERATOR!!!! + * ============================================================================ + */ + +function codeGenerator(node) { + switch (node.type) { + case 'Program': + return node.body.map(codeGenerator) + .join('\n'); + + case 'ExpressionStatement': + return ( + codeGenerator(node.expression) + + ';' + ); + + case 'CallExpression': + return ( + codeGenerator(node.callee) + + '(' + + node.arguments.map(codeGenerator) + .join(', ') + + ')' + ); + + case 'Identifier': + return node.name; + + case 'NumberLiteral': + return node.value; + + default: + throw new TypeError(node.type); + } +} + +/** + * ============================================================================ + * (۶* ‘ヮ’)۶” + * !!!!!!!!THE COMPILER!!!!!!!! + * ============================================================================ + */ + +function compiler(input) { + var tokens = tokenizer(input); + var ast = parser(tokens); + var newAst = transformer(ast); + var output = codeGenerator(newAst); + + return output; +} + + +/** + * ============================================================================ + * (๑˃̵ᴗ˂̵)و + * !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!YOU MADE IT!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! + * ============================================================================ + */ + +// Now I'm just exporting everything... +module.exports = { + tokenizer: tokenizer, + parser: parser, + transformer: transformer, + codeGenerator: codeGenerator, + compiler: compiler +}; From bdc62b6542181b51d0c63f2c00629368f294d2d4 Mon Sep 17 00:00:00 2001 From: Ben Lesh Date: Wed, 30 Mar 2016 16:34:24 -0700 Subject: [PATCH 02/31] fix(super-tiny-compiler): Ds that should have been Us are now Us and not Ds fixes #1 --- super-tiny-compiler.js | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/super-tiny-compiler.js b/super-tiny-compiler.js index 1b60002..41d9e90 100644 --- a/super-tiny-compiler.js +++ b/super-tiny-compiler.js @@ -21,12 +21,12 @@ * S:::::SSSSSS::::::SU::::::U U::::::UP::::::PPPPPP:::::P E::::::::::::::::::::ER::::::RRRRRR:::::R * S:::::S SSSSSSSUU:::::U U:::::UUPP:::::P P:::::PEE::::::EEEEEEEEE::::ERR:::::R R:::::R * S:::::S U:::::U U:::::U P::::P P:::::P E:::::E EEEEEE R::::R R:::::R - * S:::::S U:::::D D:::::U P::::P P:::::P E:::::E R::::R R:::::R - * S::::SSSS U:::::D D:::::U P::::PPPPPP:::::P E::::::EEEEEEEEEE R::::RRRRRR:::::R - * SS::::::SSSSS U:::::D D:::::U P:::::::::::::PP E:::::::::::::::E R:::::::::::::RR - * SSS::::::::SS U:::::D D:::::U P::::PPPPPPPPP E:::::::::::::::E R::::RRRRRR:::::R - * SSSSSS::::S U:::::D D:::::U P::::P E::::::EEEEEEEEEE R::::R R:::::R - * S:::::S U:::::D D:::::U P::::P E:::::E R::::R R:::::R + * S:::::S U:::::U U:::::U P::::P P:::::P E:::::E R::::R R:::::R + * S::::SSSS U:::::U U:::::U P::::PPPPPP:::::P E::::::EEEEEEEEEE R::::RRRRRR:::::R + * SS::::::SSSSS U:::::U U:::::U P:::::::::::::PP E:::::::::::::::E R:::::::::::::RR + * SSS::::::::SS U:::::U U:::::U P::::PPPPPPPPP E:::::::::::::::E R::::RRRRRR:::::R + * SSSSSS::::S U:::::U U:::::U P::::P E::::::EEEEEEEEEE R::::R R:::::R + * S:::::S U:::::U U:::::U P::::P E:::::E R::::R R:::::R * S:::::S U::::::U U::::::U P::::P E:::::E EEEEEE R::::R R:::::R * SSSSSSS S:::::S U:::::::UUU:::::::U PP::::::PP EE::::::EEEEEEEE:::::ERR:::::R R:::::R * S::::::SSSSSS:::::S UU:::::::::::::UU P::::::::P E::::::::::::::::::::ER::::::R R:::::R From ed6d5b5eb29103dc8339b1233a29f41d94b8a429 Mon Sep 17 00:00:00 2001 From: James Kyle Date: Wed, 30 Mar 2016 17:39:16 -0700 Subject: [PATCH 03/31] Init code comments --- super-tiny-compiler.js | 256 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 256 insertions(+) diff --git a/super-tiny-compiler.js b/super-tiny-compiler.js index 41d9e90..1181a6c 100644 --- a/super-tiny-compiler.js +++ b/super-tiny-compiler.js @@ -73,6 +73,262 @@ * ======================================================================================================================================================================= */ +/** + * Today we're going write a compiler together. But not just any compiler... A + * super duper tiny teeny compiler! A compiler that is so small that if you + * remove all the comments this file would only be ~200 lines of actual code. + * + * We're going to compile some lisp-like function calls into some C-like + * function calls. + * + * If you are familiar with one or the other. I'll just give you a quick intro. + * + * If we had two functions `add` and `subtract` they would be written like this: + * + * LISP C + * + * 2 + 2 (add 2 2) add(2, 2) + * 4 - 2 (subtract 4 2) subtract(4, 2) + * 2 + (4 - 2) (add 2 (subtract 4 2)) add(2, subtract(4, 2)) + * + * Easy peezy right? + * + * Well good, because this is exactly what we are going to compile. While this + * is neither a complete LISP or C syntax, it will be enough of the syntax to + * demonstrate many of major pieces of a modern compiler. + */ + +/** + * Most compiler break down into three primary stages: Parsing, Transformation, + * and Code Generation + * + * 1. *Parsing* is taking raw code and turning it into a more abstract + * representation of the code. + * + * 2. *Transformation* takes this abstract representation and manipulates to do + * whatever the compiler wants it to. + * + * 3. *Code Generation* takes the transformed representation of the code and + * turns it into new code. + */ + +/** + * Parsing + * ------- + * + * Parsing typically gets broken down into two phases: Lexical Analysis and + * Syntactic Analysis. + * + * 1. *Lexical Analysis* takes the raw code and splits it apart into these things + * called tokens by a thing called a tokenizer (or lexer). + * + * Tokens are an array of tiny little objects that describe an isolated piece + * of the syntax. They could be numbers, labels, punctuation, operators, + * whatever. + * + * 2. *Syntactic Analysis* takes the tokens and reformats them into a + * representation that describes each part of the syntax and their relation + * to one another. This is known as an intermediate representation or + * Abstract Syntax Tree. + * + * An Abstract Syntax Tree or AST for short is a deeply nested object that + * represents code in a way that is both easy to work with and tells us a lot + * of information. + * + * For the following syntax: + * + * (add 2 (subtract 4 2)) + * + * Tokens might look something like this: + * + * [ + * { type: 'paren', value: '(' }, + * { type: 'name', value: 'add' }, + * { type: 'number', value: '2' }, + * { type: 'paren', value: '(' }, + * { type: 'name', value: 'subtract' }, + * { type: 'number', value: '4' }, + * { type: 'number', value: '2' }, + * { type: 'paren', value: ')' }, + * { type: 'paren', value: ')' } + * ] + * + * And an Abstract Syntax Tree (AST) might look like this: + * + * { + * type: 'Program', + * body: [{ + * type: 'CallExpression', + * name: 'add', + * params: [{ + * type: 'NumberLiteral', + * value: '2' + * }, { + * type: 'CallExpression', + * name: 'subtract', + * params: [{ + * type: 'NumberLiteral', + * value: '4' + * }, { + * type: 'NumberLiteral', + * value: '2' + * }] + * }] + * }] + * } + */ + +/** + * Transformation + * -------------- + * + * The next type of stage of a compiler is transformation. Again, this just + * takes the AST from the last step and makes changes to it. It can manipulate + * the AST in the same language or it can translate it into an entirely new + * language. + * + * Let’s look at how we would transform an AST. + * + * You might notice that our AST has elements within it that look very similar. + * There are these objects with a type property. Each of these are known as an + * AST Node. These nodes have defined properties on them that describe one + * isolated part of the tree. + * + * We can have a node for a "NumberLiteral": + * + * { + * type: 'NumberLiteral', + * value: '2' + * } + * + * Or maybe a node for a "CallExpression": + * + * { + * type: 'CallExpression', + * name: 'subtract', + * params: [...nested nodes go here...] + * } + * + * When transforming the AST we can manipulate nodes by + * adding/removing/replacing properties, we can add new nodes, remove nodes, or + * we could leave the existing AST alone and create and entirely new one based + * on it. + * + * Since we’re targeting a new language, we’re going to focus on creating an + * entirely new AST that is specific to the target language. + * + * Traversal + * --------- + * + * In order to navigate through all of these nodes, we need to be able to + * traverse through them. This traversal process goes to each node in the AST + * depth-first. + * + * { + * type: 'Program', + * body: [{ + * type: 'CallExpression', + * name: 'add', + * params: [{ + * type: 'NumberLiteral', + * value: '2' + * }, { + * type: 'CallExpression', + * name: 'subtract', + * params: [{ + * type: 'NumberLiteral', + * value: '4' + * }, { + * type: 'NumberLiteral', + * value: '2' + * }] + * }] + * }] + * } + * + * So for the above AST we would go: + * + * 1. Program - Starting at the top level of the AST + * 2. CallExpression (add) - Moving to the first element of the Program's body + * 3. NumberLiteral (2) - Moving to the first element of CallExpression's params + * 4. CallExpression (subtract) - Moving to the second element of CallExpression's params + * 5. NumberLiteral (4) - Moving to the first element of CallExpression's params + * 6. NumberLiteral (2) - Moving to the second element of CallExpression's params + * + * If we were manipulating this AST directly instead of creating a separate AST + * we would likely introduce all sorts of abstractions here. But just visiting + * each node in the tree is enough. + * + * The reason I use the word “visiting” is because there is this pattern of how + * to represent operations on elements of an object structure. + * + * Visitors + * -------- + * + * The basic idea here is that we are going to create a “visitor” object that + * has methods that will accept different node types. + * + * var visitor = { + * NumberLiteral() {}, + * CallExpression() {} + * }; + * + * When we traverse our AST we will call the methods on this visitor whenever we + * encounter a node of a matching type. + * + * In order to make this useful we will also pass the node and a reference to + * the parent node. + * + * var visitor = { + * NumberLiteral(node, parent) {}, + * CallExpression(node, parent) {} + * }; + */ + +/** + * Code Generation + * --------------- + * + * The final phase of a compiler is code generation. Sometimes compilers will do + * things that overlap with transformation, but for the most part code + * generation just means take our AST and string-ify code back out. + * + * Code generators work several different ways, some compilers will reuse the + * tokens from earlier, others will have created a separate representation of + * the code so that they can print node linearly, but from what I can tell most + * will use the same AST we just created which is what we’re going to focus on. + * + * Effectively our code generator will know how to “print” all of the different + * node types of the AST, and it will recursively call itself to print nested + * nodes until everything is printed into one long string of code. + */ + +/** + * And that's it! That's all the different pieces of a compiler. + * + * Now that isn’t to say every compiler looks exactly like I described here. + * Compilers serve many different purposes, and they might need more steps than + * I have detailed. + * + * But now you should have a general high-level idea of what most compilers look + * like. + * + * Now that I’ve explained all of this, you’re all good to go write your own + * compilers right? + * + * Just kidding, that's what I'm here to help with :P + * + * So let's begin... + */ + +/** + * ----------------------------------------------------------------------------- + * *Note:* This is all I've written so far, so the code below isn't annnotated + * yet. You can still read it all and it totally works, but I plan on improving + * this in the near future + * ----------------------------------------------------------------------------- + */ + /** * ============================================================================ * (/^▽^)/ From 9bee727f773fdb340655e310d7638418f1c695c2 Mon Sep 17 00:00:00 2001 From: Henry Zhu Date: Wed, 30 Mar 2016 21:25:27 -0400 Subject: [PATCH 04/31] Add a test --- README.md | 4 ++ test/test.js | 101 +++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 105 insertions(+) create mode 100644 test/test.js diff --git a/README.md b/README.md index 85f11c4..3389501 100644 --- a/README.md +++ b/README.md @@ -29,6 +29,10 @@ inline annotations. If you want to come back when that's done, you can either watch/star this repo or follow me on [twitter](https://twitter.com/thejameskyle) for updates. +### Tests + +Run with `node ./test/test.js` + --- [![cc-by-4.0](https://licensebuttons.net/l/by/4.0/80x15.png)](http://creativecommons.org/licenses/by/4.0/) diff --git a/test/test.js b/test/test.js new file mode 100644 index 0000000..7fcfdda --- /dev/null +++ b/test/test.js @@ -0,0 +1,101 @@ +var assert = require('assert'); +var superTinyCompiler = require('../super-tiny-compiler'); + +var tokenizer = superTinyCompiler.tokenizer; +var parser = superTinyCompiler.parser; +var transformer = superTinyCompiler.transformer; +var codeGenerator = superTinyCompiler.codeGenerator; +var compiler = superTinyCompiler.compiler; + +var input = '(add 2 (subtract 4 2))'; + +var tokens = [ + { type: 'paren', value: '(' }, + { type: 'name', value: 'add' }, + { type: 'number', value: '2' }, + { type: 'paren', value: '(' }, + { type: 'name', value: 'subtract' }, + { type: 'number', value: '4' }, + { type: 'number', value: '2' }, + { type: 'paren', value: ')' }, + { type: 'paren', value: ')' } +]; + +//test tokenizer +assert.deepStrictEqual(tokenizer(input), tokens); + +var ast = { + type: 'Program', + body: [{ + type: 'CallExpression', + name: 'add', + params: [{ + type: 'NumberLiteral', + value: '2' + }, { + type: 'CallExpression', + name: 'subtract', + params: [{ + type: 'NumberLiteral', + value: '4' + }, { + type: 'NumberLiteral', + value: '2' + }] + }] + }] +}; + +// test parser/ast +assert.deepStrictEqual(parser(tokens), ast); + +var newAst = { + "type": "Program", + "body": [ + { + "type": "ExpressionStatement", + "expression": { + "type": "CallExpression", + "callee": { + "type": "Identifier", + "name": "add" + }, + "arguments": [ + { + "type": "NumberLiteral", + "value": "2" + }, + { + "type": "CallExpression", + "callee": { + "type": "Identifier", + "name": "subtract" + }, + "arguments": [ + { + "type": "NumberLiteral", + "value": "4" + }, + { + "type": "NumberLiteral", + "value": "2" + } + ] + } + ] + } + } + ] +}; + +assert.deepStrictEqual(transformer(ast), newAst); + +var output = 'add(2, subtract(4, 2));'; + +// test generator +assert.deepStrictEqual(codeGenerator(newAst), output); + +// test whole compiler +assert.deepStrictEqual(compiler(input), output); + +console.log('All Passed!'); From 6c2eedab23a646bfca928287e4d7add1cabf83d4 Mon Sep 17 00:00:00 2001 From: James Kyle Date: Wed, 30 Mar 2016 18:45:44 -0700 Subject: [PATCH 05/31] Clean up test file --- test.js | 84 ++++++++++++++++++++++++++++++++++++++++++ test/test.js | 101 --------------------------------------------------- 2 files changed, 84 insertions(+), 101 deletions(-) create mode 100644 test.js delete mode 100644 test/test.js diff --git a/test.js b/test.js new file mode 100644 index 0000000..f8a2793 --- /dev/null +++ b/test.js @@ -0,0 +1,84 @@ +var superTinyCompiler = require('./super-tiny-compiler'); +var assert = require('assert'); + +var tokenizer = superTinyCompiler.tokenizer; +var parser = superTinyCompiler.parser; +var transformer = superTinyCompiler.transformer; +var codeGenerator = superTinyCompiler.codeGenerator; +var compiler = superTinyCompiler.compiler; + +var input = '(add 2 (subtract 4 2))'; +var output = 'add(2, subtract(4, 2));'; + +var tokens = [ + { type: 'paren', value: '(' }, + { type: 'name', value: 'add' }, + { type: 'number', value: '2' }, + { type: 'paren', value: '(' }, + { type: 'name', value: 'subtract' }, + { type: 'number', value: '4' }, + { type: 'number', value: '2' }, + { type: 'paren', value: ')' }, + { type: 'paren', value: ')' } +]; + +var ast = { + type: 'Program', + body: [{ + type: 'CallExpression', + name: 'add', + params: [{ + type: 'NumberLiteral', + value: '2' + }, { + type: 'CallExpression', + name: 'subtract', + params: [{ + type: 'NumberLiteral', + value: '4' + }, { + type: 'NumberLiteral', + value: '2' + }] + }] + }] +}; + +var newAst = { + type: 'Program', + body: [{ + type: 'ExpressionStatement', + expression: { + type: 'CallExpression', + callee: { + type: 'Identifier', + name: 'add' + }, + arguments: [{ + type: 'NumberLiteral', + value: '2' + }, { + type: 'CallExpression', + callee: { + type: 'Identifier', + name: 'subtract' + }, + arguments: [{ + type: 'NumberLiteral', + value: '4' + }, { + type: 'NumberLiteral', + value: '2' + }] + }] + } + }] +}; + +assert.deepStrictEqual(tokenizer(input), tokens, 'Tokeizer should turn `input` string into `tokens` array'); +assert.deepStrictEqual(parser(tokens), ast, 'Parser should turn `tokens` array into `ast`'); +assert.deepStrictEqual(transformer(ast), newAst, 'Transformer should turn `ast` into a `newAst`'); +assert.deepStrictEqual(codeGenerator(newAst), output, 'Code Generator should turn `newAst` into `output` string'); +assert.deepStrictEqual(compiler(input), output, 'Compiler should turn `input` into `output`'); + +console.log('All Passed!'); diff --git a/test/test.js b/test/test.js deleted file mode 100644 index 7fcfdda..0000000 --- a/test/test.js +++ /dev/null @@ -1,101 +0,0 @@ -var assert = require('assert'); -var superTinyCompiler = require('../super-tiny-compiler'); - -var tokenizer = superTinyCompiler.tokenizer; -var parser = superTinyCompiler.parser; -var transformer = superTinyCompiler.transformer; -var codeGenerator = superTinyCompiler.codeGenerator; -var compiler = superTinyCompiler.compiler; - -var input = '(add 2 (subtract 4 2))'; - -var tokens = [ - { type: 'paren', value: '(' }, - { type: 'name', value: 'add' }, - { type: 'number', value: '2' }, - { type: 'paren', value: '(' }, - { type: 'name', value: 'subtract' }, - { type: 'number', value: '4' }, - { type: 'number', value: '2' }, - { type: 'paren', value: ')' }, - { type: 'paren', value: ')' } -]; - -//test tokenizer -assert.deepStrictEqual(tokenizer(input), tokens); - -var ast = { - type: 'Program', - body: [{ - type: 'CallExpression', - name: 'add', - params: [{ - type: 'NumberLiteral', - value: '2' - }, { - type: 'CallExpression', - name: 'subtract', - params: [{ - type: 'NumberLiteral', - value: '4' - }, { - type: 'NumberLiteral', - value: '2' - }] - }] - }] -}; - -// test parser/ast -assert.deepStrictEqual(parser(tokens), ast); - -var newAst = { - "type": "Program", - "body": [ - { - "type": "ExpressionStatement", - "expression": { - "type": "CallExpression", - "callee": { - "type": "Identifier", - "name": "add" - }, - "arguments": [ - { - "type": "NumberLiteral", - "value": "2" - }, - { - "type": "CallExpression", - "callee": { - "type": "Identifier", - "name": "subtract" - }, - "arguments": [ - { - "type": "NumberLiteral", - "value": "4" - }, - { - "type": "NumberLiteral", - "value": "2" - } - ] - } - ] - } - } - ] -}; - -assert.deepStrictEqual(transformer(ast), newAst); - -var output = 'add(2, subtract(4, 2));'; - -// test generator -assert.deepStrictEqual(codeGenerator(newAst), output); - -// test whole compiler -assert.deepStrictEqual(compiler(input), output); - -console.log('All Passed!'); From 992362ba9cb37bc16fc1419e4ae3c38fd00a18f2 Mon Sep 17 00:00:00 2001 From: Sarbbottam Bandyopadhyay Date: Wed, 30 Mar 2016 18:51:28 -0700 Subject: [PATCH 06/31] fixed test instruction --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 3389501..f556545 100644 --- a/README.md +++ b/README.md @@ -31,7 +31,7 @@ watch/star this repo or follow me on ### Tests -Run with `node ./test/test.js` +Run with `node test.js` --- From 8466b6a67df9d815f1b7a8b5d4eec5abca0aea44 Mon Sep 17 00:00:00 2001 From: James Kyle Date: Wed, 30 Mar 2016 19:27:29 -0700 Subject: [PATCH 07/31] Add tokenizer inline annotations --- super-tiny-compiler.js | 91 ++++++++++++++++++++++++++++++++++++++---- 1 file changed, 83 insertions(+), 8 deletions(-) diff --git a/super-tiny-compiler.js b/super-tiny-compiler.js index 1181a6c..6be8b64 100644 --- a/super-tiny-compiler.js +++ b/super-tiny-compiler.js @@ -321,36 +321,65 @@ * So let's begin... */ -/** - * ----------------------------------------------------------------------------- - * *Note:* This is all I've written so far, so the code below isn't annnotated - * yet. You can still read it all and it totally works, but I plan on improving - * this in the near future - * ----------------------------------------------------------------------------- - */ - /** * ============================================================================ * (/^▽^)/ * THE TOKENIZER! * ============================================================================ */ + +/** + * We're gonna start of with our first phase of parsing, lexical analysis, with the tokenizer. + * + * We're just going to take our string of code and break it down into an array of tokens. + * + * (add 2 (subtract 4 2)) => [{ type: 'paren', value: '(' }, ...] + */ + +// We start by accepting an input string of code, and we're gonna set up two +// things... function tokenizer(input) { + + // A `current` variable for tracking our position in the code like a cursor. var current = 0; + + // And a `tokens` array for pushing our tokens to. var tokens = []; + // We start by creating a `while` loop where we are setting up our `current` + // variable to be incremented as much as we want `inside` the loop. + // + // We do this because we may want to increment `current` many times within a + // single loop because our tokens can be any length. while (current < input.length) { + + // We're also going to store the `current` character in the `input`. var char = input[current]; + // The first thing we want to check for is an open parenthesis. This will + // later be used for `CallExpressions` but for now we only care about the + // character. + // + // We check to see if we have an open parenthesis: if (char === '(') { + + // If we do, we push a new token with the type `paren` and set the value + // to an open parenthesis. tokens.push({ type: 'paren', value: '(' }); + + // Then we increment `current` current++; + + // And we `continue` onto the next cycle of the loop. continue; } + // Next we're going to check for a closing parenthesis. We do the same exact + // thing as before: Check for a closing parenthesis, add a new token, + // increment current, and `continue`. if (char === ')') { tokens.push({ type: 'paren', @@ -360,38 +389,73 @@ function tokenizer(input) { continue; } + // Moving on we're now going to check for whitespace. This is interesting + // because we care that whitespace exists to separate characters, but it + // isn't actually important for us to store as a token. We would only throw + // it out later. + // + // So here we're just going to test for existance and if it does exist we're + // going to just `continue` on. var WHITESPACE = /\s/; if (WHITESPACE.test(char)) { current++; continue; } + // The next type of token is a number. This is different than what we have + // seen before because a number could many any number of characters and we + // want to capture the entire sequence of characters as one token. + // + // (add 123 456) + // ^^^ ^^^ + // Only two separate tokens + // + // So we start this off when we encounter the first number in a sequence. var NUMBERS = /[0-9]/; if (NUMBERS.test(char)) { + + // We're going to create a `value` string that we are going to push + // characters to. var value = ''; + // Then we're going to loop through each character in the sequence until + // we encounter a character that is not a number, pushing each character + // that is a number to our `value` and incrementing `current` as we go. while (NUMBERS.test(char)) { value += char; char = input[++current]; } + // After that we push our `number` token to the `tokens` array. tokens.push({ type: 'number', value: value }); + // And we continue on. continue; } + // The last type of token will be a `name` token. This is a sequence of + // letters instead of numbers, that are the names of functions in our lisp + // syntax. + // + // (add 2 4) + // ^^^ + // Name token + // var LETTERS = /[a-zA-Z]/; if (LETTERS.test(char)) { var value = ''; + // Again we're just going to loop through all the letters pushing them to + // a value. while (LETTERS.test(char)) { value += char; char = input[++current]; } + // And pushing that value as a token with the type `name` and continuing. tokens.push({ type: 'name', value: value @@ -400,12 +464,23 @@ function tokenizer(input) { continue; } + // Finally if we have not matched a character by now, we're going to throw + // an error and completely exit. throw new TypeError('I dont know what this character is: ' + char); } + // Then at the end of our `tokenizer` we simply return the tokens array. return tokens; } +/** + * ----------------------------------------------------------------------------- + * *Note:* This is all I've written so far, so the code below isn't annnotated + * yet. You can still read it all and it totally works, but I plan on improving + * this in the near future + * ----------------------------------------------------------------------------- + */ + /** * ============================================================================ * ヽ/❀o ل͜ o\ノ From 0baea5b6ad229e8b24d6d8a3a5c9154d5fd55ae1 Mon Sep 17 00:00:00 2001 From: James Kyle Date: Wed, 30 Mar 2016 19:48:37 -0700 Subject: [PATCH 08/31] Add parser inline annotations --- super-tiny-compiler.js | 126 ++++++++++++++++++++++++++++++++++------- 1 file changed, 105 insertions(+), 21 deletions(-) diff --git a/super-tiny-compiler.js b/super-tiny-compiler.js index 6be8b64..3df9936 100644 --- a/super-tiny-compiler.js +++ b/super-tiny-compiler.js @@ -473,14 +473,6 @@ function tokenizer(input) { return tokens; } -/** - * ----------------------------------------------------------------------------- - * *Note:* This is all I've written so far, so the code below isn't annnotated - * yet. You can still read it all and it totally works, but I plan on improving - * this in the near future - * ----------------------------------------------------------------------------- - */ - /** * ============================================================================ * ヽ/❀o ل͜ o\ノ @@ -488,63 +480,155 @@ function tokenizer(input) { * ============================================================================ */ +/** + * For our parser we're going to take our array of tokens and turn it into an + * AST. + * + * [{ type: 'paren', value: '(' }, ...] => { type: 'Program', body: [...] } + */ + +// Okay, so we define a `parser` function that accepts our array of `tokens`. function parser(tokens) { + + // Again we keep a `current` variable that we will use as a cursor. var current = 0; + // But this time we're going to use recursion instead of a `while` loop. So we + // define a `walk` function. function walk() { + + // Inside the walk function we start by grabbing the `current` token. var token = tokens[current]; + // We're going to split each type of token off into a different code path, + // starting off with `number` tokens. + // + // We test to see if we have a `number` token. if (token.type === 'number') { + + // If we have one, we'll increment `current`. current++; + // And we'll return a new AST node called `NumberLiteral` and setting its + // value to the value of our token. return { type: 'NumberLiteral', value: token.value }; } + // Next we're going to look for CallExpressions. We start this off when we + // encounter an open parenthesis. if ( token.type === 'paren' && token.value === '(' ) { - current++; + // We'll increment `current` to skip the parenthesis since we don't care + // about it in our AST. + token = tokens[++current]; + + // We create an base node with the type `CallExpression`, and we're going + // to set the name as the current token's value since the next token after + // the open parenthesis is the name of the function. var node = { type: 'CallExpression', - name: tokens[current].value, + name: token.value, params: [] }; - current++; + // We increment `current` *again* to skip the name token. + token = tokens[++current]; + // And now we want to loop through each token that will be the `params` of + // our `CallExpression` until we encounter a closing parenthesis. + // + // Now this is where recursion comes in. Instead of trying to parse a + // potentially infinitely nested set of nodes we're going to rely on + // recursion to resolve things. + // + // To explain this, let's take our Lisp code. You can see that the + // parameters of the `add` are a number and a nested `CallExpression` that + // includes its own numbers. + // + // (add 2 (subtract 4 2)) + // + // You'll also notice that in our tokens array we have multiple closing + // parenthesis. + // + // [ + // { type: 'paren', value: '(' }, + // { type: 'name', value: 'add' }, + // { type: 'number', value: '2' }, + // { type: 'paren', value: '(' }, + // { type: 'name', value: 'subtract' }, + // { type: 'number', value: '4' }, + // { type: 'number', value: '2' }, + // { type: 'paren', value: ')' }, <<< Closing parenthesis + // { type: 'paren', value: ')' } <<< Closing parenthesis + // ] + // + // We're going to rely on the nested `walk` function to increment our + // `current` variable past any nested `CallExpressions`. + + // So we create a `while` loop that will continue until it encounters a + // token with a `type` of `'paren'` and a `value` of a closing + // parenthesis. while ( token.type !== 'paren' || token.value !== ')' ) { + // we'll call the `walk` function which will return a `node` and we'll + // push it into our `node.params`. node.params.push(walk()); token = tokens[current]; } + // Finally we will increment `current` one last time to skip the closing + // parenthesis. current++; + // And return the node. return node; } + // Again, if we haven't recognized the token type by now we're going to + // throw an error. throw new TypeError(token.type); } - var program = { + // Now, we're going to create our AST which will have a root which is a + // `Program` node. + var ast = { type: 'Program', body: [] }; + // And we're going to kickstart our `walk` function, pushing nodes to our + // `ast.body` array. + // + // The reason we are doing this inside a loop is because our program can have + // `CallExpressions` after one another instead of being nested. + // + // (add 2 2) + // (subtract 4 2) + // while (current < tokens.length) { - program.body.push(walk()); + ast.body.push(walk()); } - return program; + // At the end of our parser we'll return the AST. + return ast; } +/** + * ---------------------------------------------------------------------------- + * *Note:* This is all I've written so far, so the code below isn't annnotated + * yet. You can still read it all and it totally works, but I plan on improving + * this in the near future + * ---------------------------------------------------------------------------- + */ + /** * ============================================================================ * ⌒(❀>◞౪◟<❀)⌒ @@ -552,7 +636,7 @@ function parser(tokens) { * ============================================================================ */ -function traverser(program, visitor) { +function traverser(ast, visitor) { function traverseArray(array, parent) { array.forEach(function(child) { @@ -581,7 +665,7 @@ function traverser(program, visitor) { } } - traverseNode(program, null); + traverseNode(ast, null); } /** @@ -591,15 +675,15 @@ function traverser(program, visitor) { * ============================================================================ */ -function transformer(program) { - var ast = { +function transformer(ast) { + var newAst = { type: 'Program', body: [] }; - program._context = ast.body; + ast._context = newAst.body; - traverser(program, { + traverser(ast, { NumberLiteral: function(node, parent) { parent._context.push({ type: 'NumberLiteral', @@ -630,7 +714,7 @@ function transformer(program) { } }); - return ast; + return newAst; } /** From 30c6f1c9a5b4c9c79e856209900321cd4568f2ec Mon Sep 17 00:00:00 2001 From: Stephen Margheim Date: Wed, 30 Mar 2016 23:28:19 -0400 Subject: [PATCH 09/31] Fix typos, grammar errors, etc --- super-tiny-compiler.js | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/super-tiny-compiler.js b/super-tiny-compiler.js index 3df9936..1b8f81e 100644 --- a/super-tiny-compiler.js +++ b/super-tiny-compiler.js @@ -95,11 +95,11 @@ * * Well good, because this is exactly what we are going to compile. While this * is neither a complete LISP or C syntax, it will be enough of the syntax to - * demonstrate many of major pieces of a modern compiler. + * demonstrate many of the major pieces of a modern compiler. */ /** - * Most compiler break down into three primary stages: Parsing, Transformation, + * Most compilers break down into three primary stages: Parsing, Transformation, * and Code Generation * * 1. *Parsing* is taking raw code and turning it into a more abstract @@ -131,7 +131,7 @@ * to one another. This is known as an intermediate representation or * Abstract Syntax Tree. * - * An Abstract Syntax Tree or AST for short is a deeply nested object that + * An Abstract Syntax Tree, or AST for short, is a deeply nested object that * represents code in a way that is both easy to work with and tells us a lot * of information. * @@ -182,7 +182,7 @@ * Transformation * -------------- * - * The next type of stage of a compiler is transformation. Again, this just + * The next type of stage for a compiler is transformation. Again, this just * takes the AST from the last step and makes changes to it. It can manipulate * the AST in the same language or it can translate it into an entirely new * language. @@ -255,7 +255,7 @@ * 5. NumberLiteral (4) - Moving to the first element of CallExpression's params * 6. NumberLiteral (2) - Moving to the second element of CallExpression's params * - * If we were manipulating this AST directly instead of creating a separate AST + * If we were manipulating this AST directly, instead of creating a separate AST, * we would likely introduce all sorts of abstractions here. But just visiting * each node in the tree is enough. * @@ -296,7 +296,7 @@ * Code generators work several different ways, some compilers will reuse the * tokens from earlier, others will have created a separate representation of * the code so that they can print node linearly, but from what I can tell most - * will use the same AST we just created which is what we’re going to focus on. + * will use the same AST we just created, which is what we’re going to focus on. * * Effectively our code generator will know how to “print” all of the different * node types of the AST, and it will recursively call itself to print nested @@ -329,7 +329,7 @@ */ /** - * We're gonna start of with our first phase of parsing, lexical analysis, with the tokenizer. + * We're gonna start off with our first phase of parsing--lexical analysis--with the tokenizer. * * We're just going to take our string of code and break it down into an array of tokens. * @@ -379,7 +379,7 @@ function tokenizer(input) { // Next we're going to check for a closing parenthesis. We do the same exact // thing as before: Check for a closing parenthesis, add a new token, - // increment current, and `continue`. + // increment `current`, and `continue`. if (char === ')') { tokens.push({ type: 'paren', @@ -389,7 +389,7 @@ function tokenizer(input) { continue; } - // Moving on we're now going to check for whitespace. This is interesting + // Moving on, we're now going to check for whitespace. This is interesting // because we care that whitespace exists to separate characters, but it // isn't actually important for us to store as a token. We would only throw // it out later. @@ -403,7 +403,7 @@ function tokenizer(input) { } // The next type of token is a number. This is different than what we have - // seen before because a number could many any number of characters and we + // seen before because a number could be any number of characters and we // want to capture the entire sequence of characters as one token. // // (add 123 456) @@ -528,7 +528,7 @@ function parser(tokens) { // about it in our AST. token = tokens[++current]; - // We create an base node with the type `CallExpression`, and we're going + // We create a base node with the type `CallExpression`, and we're going // to set the name as the current token's value since the next token after // the open parenthesis is the name of the function. var node = { From 7fa8ab691fa5433272867c12cf0287bab20e1b5f Mon Sep 17 00:00:00 2001 From: James Kyle Date: Thu, 31 Mar 2016 01:11:16 -0700 Subject: [PATCH 10/31] Add traverser inline annotations --- super-tiny-compiler.js | 70 ++++++++++++++++++++++++++++++++++++------ 1 file changed, 60 insertions(+), 10 deletions(-) diff --git a/super-tiny-compiler.js b/super-tiny-compiler.js index 1b8f81e..c414c2c 100644 --- a/super-tiny-compiler.js +++ b/super-tiny-compiler.js @@ -329,9 +329,11 @@ */ /** - * We're gonna start off with our first phase of parsing--lexical analysis--with the tokenizer. + * We're gonna start off with our first phase of parsing, lexical analysis, with + * the tokenizer. * - * We're just going to take our string of code and break it down into an array of tokens. + * We're just going to take our string of code and break it down into an array + * of tokens. * * (add 2 (subtract 4 2)) => [{ type: 'paren', value: '(' }, ...] */ @@ -621,14 +623,6 @@ function parser(tokens) { return ast; } -/** - * ---------------------------------------------------------------------------- - * *Note:* This is all I've written so far, so the code below isn't annnotated - * yet. You can still read it all and it totally works, but I plan on improving - * this in the near future - * ---------------------------------------------------------------------------- - */ - /** * ============================================================================ * ⌒(❀>◞౪◟<❀)⌒ @@ -636,38 +630,94 @@ function parser(tokens) { * ============================================================================ */ +/** + * So now we have our AST, and we want to be able to visit different nodes with + * a visitor. We need to be able to call the methods on the visitor whenever we + * encounter a node with a matching type. + * + * traverse(ast, { + * Program(node, parent) { + * // ... + * }, + * + * CallExpression(node, parent) { + * // ... + * }, + * + * NumberLiteral(node, parent) { + * // ... + * } + * }); + */ + +// So we define a traverser function which accepts an AST and a +// visitor. Inside we're going to define two functions... function traverser(ast, visitor) { + // A `traverseArray` function that will allow us to iterate over an array and + // call the next function that we will define: `traverseNode`. function traverseArray(array, parent) { array.forEach(function(child) { traverseNode(child, parent); }); } + // `traverseNode` will accept a `node` and its `parent` node. So that it can + // pass both to our visitor methods. function traverseNode(node, parent) { + + // We start by testing for the existance of a method on the visitor with a + // matching `type`. var method = visitor[node.type]; + // If it exists we'll call it with the `node` and its `parent`. if (method) { method(node, parent); } + // Next we are going to split things up by the current node type. switch (node.type) { + + // We'll start with our top level `Program`. Since Program nodes have a + // property named body that has an array of nodes, we will call + // `traverseArray` to traverse down into them. + // + // (Remember that `traverseArray` will in turn call `traverseNode` so we + // are causing the tree to be traversed recursively) case 'Program': traverseArray(node.body, node); break; + + // Next we do the same will `CallExpressions` and traverse their `params`. case 'CallExpression': traverseArray(node.params, node); break; + + // In the case of `NumberLiterals` we don't have any child nodes to visit, + // so we'll just break. case 'NumberLiteral': break; + + // And again, if we haven't recognized the node type then we'll throw an + // error. default: throw new TypeError(node.type); } } + // Finally we kickstart the traverser by calling `traverseNode` with our ast + // with no `parent` because the top level of the AST doesn't have a parent. traverseNode(ast, null); } +/** + * ---------------------------------------------------------------------------- + * *Note:* This is all I've written so far, so the code below isn't annnotated + * yet. You can still read it all and it totally works, but I plan on improving + * this in the near future + * ---------------------------------------------------------------------------- + */ + /** * ============================================================================ * ⁽(◍˃̵͈̑ᴗ˂̵͈̑)⁽ From 7d5c7592bb3242e685b8ee3f20ba443621396a78 Mon Sep 17 00:00:00 2001 From: James Kyle Date: Thu, 31 Mar 2016 02:13:54 -0700 Subject: [PATCH 11/31] Add transformer/code generator/compiler inline annotations --- super-tiny-compiler.js | 117 +++++++++++++++++++++++++++++++++++++---- 1 file changed, 107 insertions(+), 10 deletions(-) diff --git a/super-tiny-compiler.js b/super-tiny-compiler.js index c414c2c..95b7fd6 100644 --- a/super-tiny-compiler.js +++ b/super-tiny-compiler.js @@ -710,14 +710,6 @@ function traverser(ast, visitor) { traverseNode(ast, null); } -/** - * ---------------------------------------------------------------------------- - * *Note:* This is all I've written so far, so the code below isn't annnotated - * yet. You can still read it all and it totally works, but I plan on improving - * this in the near future - * ---------------------------------------------------------------------------- - */ - /** * ============================================================================ * ⁽(◍˃̵͈̑ᴗ˂̵͈̑)⁽ @@ -725,23 +717,84 @@ function traverser(ast, visitor) { * ============================================================================ */ +/** + * Next up, the transformer. Our transformer is going to take the AST that we + * have built and pass it to our traverser function with a visitor and will + * create a new ast. + * + * ---------------------------------------------------------------------------- + * Original AST | Transformed AST + * ---------------------------------------------------------------------------- + * { | { + * type: 'Program', | type: 'Program', + * body: [{ | body: [{ + * type: 'CallExpression', | type: 'ExpressionStatement', + * name: 'add', | expression: { + * params: [{ | type: 'CallExpression', + * type: 'NumberLiteral', | callee: { + * value: '2' | type: 'Identifier', + * }, { | name: 'add' + * type: 'CallExpression', | }, + * name: 'subtract', | arguments: [{ + * params: [{ | type: 'NumberLiteral', + * type: 'NumberLiteral', | value: '2' + * value: '4' | }, { + * }, { | type: 'CallExpression', + * type: 'NumberLiteral', | callee: { + * value: '2' | type: 'Identifier', + * }] | name: 'subtract' + * }] | }, + * }] | arguments: [{ + * } | type: 'NumberLiteral', + * | value: '4' + * ---------------------------------- | }, { + * | type: 'NumberLiteral', + * | value: '2' + * | }] + * (sorry the other one is longer.) | } + * | } + * | }] + * | } + * ---------------------------------------------------------------------------- + */ + +// So we have our transformer function which will accept the lisp ast. function transformer(ast) { + + // We'll create a `newAst` which like our previous AST will have a program + // node. var newAst = { type: 'Program', body: [] }; + // Next I'm going to cheat a little and create a bit of a hack. We're going to + // use a property named `context` on our parent nodes that we're going to push + // nodes to their parent's `context`. Normally you would have a better + // abstraction than this, but for our purposes this keeps things simple. + // + // Just take note that the context is a reference *from* the old ast *to* the + // new ast. ast._context = newAst.body; + // We'll start by calling the traverser function with our ast and a visitor. traverser(ast, { + + // The first visitor method accepts `NumberLiterals` NumberLiteral: function(node, parent) { + // We'll create a new node also named `NumberLiteral` that we will push to + // the parent context. parent._context.push({ type: 'NumberLiteral', value: node.value }); }, + // Next up, `CallExpressions`. CallExpression: function(node, parent) { + + // We start creating a new node `CallExpression` with a nested + // `Identifier`. var expression = { type: 'CallExpression', callee: { @@ -751,19 +804,32 @@ function transformer(ast) { arguments: [] }; + // Next we're going to define a new context on the original + // `CallExpression` node that will reference the `expression`'s arguments + // so that we can push arguments. node._context = expression.arguments; + // Then we're going to check if the parent node is a `CallExpression`. + // If it is not... if (parent.type !== 'CallExpression') { + + // We're going to wrap our `CallExpression` node with an + // `ExpressionStatement`. We do this because the top level + // `CallExpressions` in JavaScript are actually statements. expression = { type: 'ExpressionStatement', expression: expression }; } + // Last, we push our (possibly wrapped) `CallExpression` to the `parent`'s + // `context`. parent._context.push(expression); } }); + // At the end of our transformer function we'll return the new ast that we + // just created. return newAst; } @@ -774,18 +840,36 @@ function transformer(ast) { * ============================================================================ */ +/** + * Now let's move onto our last phase: The Code Generator. + * + * Our code generator is going to recursively call itself to print each node in + * the tree into one giant string. + */ + function codeGenerator(node) { + + // We'll break things down by the `type` of the `node`. switch (node.type) { + + // If we have a `Program` node. We will map through each node in the `body` + // and run them through the code generator and join them with a newline. case 'Program': return node.body.map(codeGenerator) .join('\n'); + // For `ExpressionStatements` we'll call the code generator on the nested + // expression and we'll add a semicolon... case 'ExpressionStatement': return ( codeGenerator(node.expression) + - ';' + ';' // << (...because we like to code the *correct* way) ); + // For `CallExpressions` we will print the `callee`, add an open + // parenthesis, we'll map through each node in the `arguments` array and run + // them through the code generator, joining them with a comma, and then + // we'll add a closing parenthesis. case 'CallExpression': return ( codeGenerator(node.callee) + @@ -795,12 +879,15 @@ function codeGenerator(node) { ')' ); + // For `Identifiers` we'll just return the `node`'s name. case 'Identifier': return node.name; + // For `NumberLiterals` we'll just return the `node`'s value. case 'NumberLiteral': return node.value; + // And if we haven't recognized the node, we'll throw an error. default: throw new TypeError(node.type); } @@ -813,16 +900,26 @@ function codeGenerator(node) { * ============================================================================ */ +/** + * FINALLY! We'll create our `compiler` function. Here we will link together + * every part of the pipeline. + * + * 1. input => tokenizer => tokens + * 2. tokens => parser => ast + * 3. ast => transformer => newAst + * 4. newAst => generator => output + */ + function compiler(input) { var tokens = tokenizer(input); var ast = parser(tokens); var newAst = transformer(ast); var output = codeGenerator(newAst); + // and simply return the output! return output; } - /** * ============================================================================ * (๑˃̵ᴗ˂̵)و From f5cbcf7aff1a62c19a6949a77c211d442bba823e Mon Sep 17 00:00:00 2001 From: James Kyle Date: Thu, 31 Mar 2016 02:20:13 -0700 Subject: [PATCH 12/31] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index f556545..43f3998 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -# THE SUPER TINY COMPILER +THE SUPER TINY COMPILER Welcome to The Super Tiny Compiler. It's an ultra simplified example of all the major pieces of a modern compiler written in easy to read JavaScript. Reading From 84dde7b9061314483efc26e09cef83cdb71564da Mon Sep 17 00:00:00 2001 From: James Kyle Date: Thu, 31 Mar 2016 02:25:28 -0700 Subject: [PATCH 13/31] Improve intro --- README.md | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 43f3998..cbd8c47 100644 --- a/README.md +++ b/README.md @@ -1,8 +1,14 @@ THE SUPER TINY COMPILER -Welcome to The Super Tiny Compiler. It's an ultra simplified example of all the -major pieces of a modern compiler written in easy to read JavaScript. Reading -through it can help you learn about how most compilers work end to end. +***Welcome to The Super Tiny Compiler!*** + +This is an ultra-simplified example of all the major pieces of a modern compiler +written in easy to read JavaScript. + +Reading through the guided code will help you learn about how *most* compilers +work from end to end. + +# [Want to jump into the code? Click here](super-tiny-compiler.js) ### Why should I care? From 5292d35dc03918a1656b2f50d71750c5faf92eab Mon Sep 17 00:00:00 2001 From: James Kyle Date: Thu, 31 Mar 2016 02:26:00 -0700 Subject: [PATCH 14/31] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index cbd8c47..c280bc8 100644 --- a/README.md +++ b/README.md @@ -8,7 +8,7 @@ written in easy to read JavaScript. Reading through the guided code will help you learn about how *most* compilers work from end to end. -# [Want to jump into the code? Click here](super-tiny-compiler.js) +## [Want to jump into the code? Click here](super-tiny-compiler.js) ### Why should I care? From 13be31ebfcf948129c1c3e7fc2a15bf0be4d9d98 Mon Sep 17 00:00:00 2001 From: James Kyle Date: Thu, 31 Mar 2016 02:26:18 -0700 Subject: [PATCH 15/31] Update README.md --- README.md | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index c280bc8..ef6e9d8 100644 --- a/README.md +++ b/README.md @@ -8,7 +8,9 @@ written in easy to read JavaScript. Reading through the guided code will help you learn about how *most* compilers work from end to end. -## [Want to jump into the code? Click here](super-tiny-compiler.js) +### [Want to jump into the code? Click here](super-tiny-compiler.js) + +--- ### Why should I care? From ee573d73ff8dd6ad3c7754552eb5cf548b4231e3 Mon Sep 17 00:00:00 2001 From: James Kyle Date: Thu, 31 Mar 2016 02:29:28 -0700 Subject: [PATCH 16/31] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index ef6e9d8..27cdc65 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -THE SUPER TINY COMPILER +THE SUPER TINY COMPILER ***Welcome to The Super Tiny Compiler!*** From 256489ebc044c6e641bf85b48ce79c2ffbaeca65 Mon Sep 17 00:00:00 2001 From: James Kyle Date: Thu, 31 Mar 2016 02:37:16 -0700 Subject: [PATCH 17/31] Add unannotated file --- README.md | 2 + super-tiny-compiler-unannotated.js | 252 +++++++++++++++++++++++++++++ 2 files changed, 254 insertions(+) create mode 100644 super-tiny-compiler-unannotated.js diff --git a/README.md b/README.md index 27cdc65..7958f5d 100644 --- a/README.md +++ b/README.md @@ -10,6 +10,8 @@ work from end to end. ### [Want to jump into the code? Click here](super-tiny-compiler.js) +[(Or if you would prefer to read it without annotations, click here.)](super-tiny-compiler-unannotated.js) + --- ### Why should I care? diff --git a/super-tiny-compiler-unannotated.js b/super-tiny-compiler-unannotated.js new file mode 100644 index 0000000..6b98eb8 --- /dev/null +++ b/super-tiny-compiler-unannotated.js @@ -0,0 +1,252 @@ +function tokenizer(input) { + var current = 0; + var tokens = []; + + while (current < input.length) { + var char = input[current]; + + if (char === '(') { + tokens.push({ + type: 'paren', + value: '(' + }); + current++; + continue; + } + + if (char === ')') { + tokens.push({ + type: 'paren', + value: ')' + }); + current++; + continue; + } + + var WHITESPACE = /\s/; + if (WHITESPACE.test(char)) { + current++; + continue; + } + + var NUMBERS = /[0-9]/; + if (NUMBERS.test(char)) { + var value = ''; + + while (NUMBERS.test(char)) { + value += char; + char = input[++current]; + } + + tokens.push({ + type: 'number', + value: value + }); + + continue; + } + + var LETTERS = /[a-zA-Z]/; + if (LETTERS.test(char)) { + var value = ''; + + while (LETTERS.test(char)) { + value += char; + char = input[++current]; + } + + tokens.push({ + type: 'name', + value: value + }); + + continue; + } + + throw new TypeError('I dont know what this character is: ' + char); + } + + return tokens; +} + +function parser(tokens) { + var current = 0; + + function walk() { + var token = tokens[current]; + + if (token.type === 'number') { + current++; + + return { + type: 'NumberLiteral', + value: token.value + }; + } + + if ( + token.type === 'paren' && + token.value === '(' + ) { + token = tokens[++current]; + + var node = { + type: 'CallExpression', + name: token.value, + params: [] + }; + + token = tokens[++current]; + + while ( + token.type !== 'paren' || + token.value !== ')' + ) { + node.params.push(walk()); + token = tokens[current]; + } + + current++; + + return node; + } + + throw new TypeError(token.type); + } + + var ast = { + type: 'Program', + body: [] + }; + + while (current < tokens.length) { + ast.body.push(walk()); + } + + return ast; +} + +function traverser(ast, visitor) { + function traverseArray(array, parent) { + array.forEach(function(child) { + traverseNode(child, parent); + }); + } + + function traverseNode(node, parent) { + var method = visitor[node.type]; + + if (method) { + method(node, parent); + } + + switch (node.type) { + case 'Program': + traverseArray(node.body, node); + break; + + case 'CallExpression': + traverseArray(node.params, node); + break; + + case 'NumberLiteral': + break; + + default: + throw new TypeError(node.type); + } + } + + traverseNode(ast, null); +} + +function transformer(ast) { + var newAst = { + type: 'Program', + body: [] + }; + + ast._context = newAst.body; + + traverser(ast, { + NumberLiteral: function(node, parent) { + parent._context.push({ + type: 'NumberLiteral', + value: node.value + }); + }, + + CallExpression: function(node, parent) { + var expression = { + type: 'CallExpression', + callee: { + type: 'Identifier', + name: node.name + }, + arguments: [] + }; + + node._context = expression.arguments; + + if (parent.type !== 'CallExpression') { + expression = { + type: 'ExpressionStatement', + expression: expression + }; + } + + parent._context.push(expression); + } + }); + + return newAst; +} + +function codeGenerator(node) { + switch (node.type) { + case 'Program': + return node.body.map(codeGenerator) + .join('\n'); + + case 'ExpressionStatement': + return ( + codeGenerator(node.expression) + + ';' + ); + + case 'CallExpression': + return ( + codeGenerator(node.callee) + + '(' + + node.arguments.map(codeGenerator) + .join(', ') + + ')' + ); + + case 'Identifier': + return node.name; + + case 'NumberLiteral': + return node.value; + + default: + throw new TypeError(node.type); + } +} + +function compiler(input) { + var tokens = tokenizer(input); + var ast = parser(tokens); + var newAst = transformer(ast); + var output = codeGenerator(newAst); + + return output; +} + +module.exports = { + tokenizer: tokenizer, + parser: parser, + transformer: transformer, + codeGenerator: codeGenerator, + compiler: compiler +}; From 5469752621da7fe0ab8a6e7a882f8764ed9f86e4 Mon Sep 17 00:00:00 2001 From: nino-porcino Date: Thu, 31 Mar 2016 13:18:50 +0200 Subject: [PATCH 18/31] typo --- test.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test.js b/test.js index f8a2793..ef351a8 100644 --- a/test.js +++ b/test.js @@ -75,7 +75,7 @@ var newAst = { }] }; -assert.deepStrictEqual(tokenizer(input), tokens, 'Tokeizer should turn `input` string into `tokens` array'); +assert.deepStrictEqual(tokenizer(input), tokens, 'Tokenizer should turn `input` string into `tokens` array'); assert.deepStrictEqual(parser(tokens), ast, 'Parser should turn `tokens` array into `ast`'); assert.deepStrictEqual(transformer(ast), newAst, 'Transformer should turn `ast` into a `newAst`'); assert.deepStrictEqual(codeGenerator(newAst), output, 'Code Generator should turn `newAst` into `output` string'); From 2636d7d135cd9a9848599f2a3fe00333715f5fdc Mon Sep 17 00:00:00 2001 From: Lucas Kent Date: Thu, 31 Mar 2016 23:35:05 +1100 Subject: [PATCH 19/31] Fix typo in comment --- super-tiny-compiler.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/super-tiny-compiler.js b/super-tiny-compiler.js index 95b7fd6..1b2ecde 100644 --- a/super-tiny-compiler.js +++ b/super-tiny-compiler.js @@ -688,7 +688,7 @@ function traverser(ast, visitor) { traverseArray(node.body, node); break; - // Next we do the same will `CallExpressions` and traverse their `params`. + // Next we do the same with `CallExpressions` and traverse their `params`. case 'CallExpression': traverseArray(node.params, node); break; From b4b87208496b617fee25edcd75c2925da714c6d7 Mon Sep 17 00:00:00 2001 From: Gilles Castel Date: Thu, 31 Mar 2016 15:14:19 +0200 Subject: [PATCH 20/31] Center headings --- super-tiny-compiler.js | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/super-tiny-compiler.js b/super-tiny-compiler.js index 95b7fd6..f3d47cf 100644 --- a/super-tiny-compiler.js +++ b/super-tiny-compiler.js @@ -321,12 +321,12 @@ * So let's begin... */ - /** - * ============================================================================ - * (/^▽^)/ - * THE TOKENIZER! - * ============================================================================ - */ +/** + * ============================================================================ + * (/^▽^)/ + * THE TOKENIZER! + * ============================================================================ + */ /** * We're gonna start off with our first phase of parsing, lexical analysis, with @@ -478,7 +478,7 @@ function tokenizer(input) { /** * ============================================================================ * ヽ/❀o ل͜ o\ノ - * THE PARSER!!! + * THE PARSER!!! * ============================================================================ */ @@ -712,7 +712,7 @@ function traverser(ast, visitor) { /** * ============================================================================ - * ⁽(◍˃̵͈̑ᴗ˂̵͈̑)⁽ + * ⁽(◍˃̵͈̑ᴗ˂̵͈̑)⁽ * THE TRANSFORMER!!! * ============================================================================ */ @@ -835,7 +835,7 @@ function transformer(ast) { /** * ============================================================================ - * ヾ(〃^∇^)ノ♪ + * ヾ(〃^∇^)ノ♪ * THE CODE GENERATOR!!!! * ============================================================================ */ @@ -895,8 +895,8 @@ function codeGenerator(node) { /** * ============================================================================ - * (۶* ‘ヮ’)۶” - * !!!!!!!!THE COMPILER!!!!!!!! + * (۶* ‘ヮ’)۶” + * !!!!!!!!THE COMPILER!!!!!!!! * ============================================================================ */ From 8a18ac0a3cb6e679a0d0dcce3e820ce2c954ddfc Mon Sep 17 00:00:00 2001 From: James Kyle Date: Thu, 31 Mar 2016 08:41:54 -0700 Subject: [PATCH 21/31] Fix closing paren parsing logic --- super-tiny-compiler-unannotated.js | 4 ++-- super-tiny-compiler.js | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/super-tiny-compiler-unannotated.js b/super-tiny-compiler-unannotated.js index 6b98eb8..6fb0a9c 100644 --- a/super-tiny-compiler-unannotated.js +++ b/super-tiny-compiler-unannotated.js @@ -99,8 +99,8 @@ function parser(tokens) { token = tokens[++current]; while ( - token.type !== 'paren' || - token.value !== ')' + (token.type !== 'paren') || + (token.type === 'paren' && token.value !== ')') ) { node.params.push(walk()); token = tokens[current]; diff --git a/super-tiny-compiler.js b/super-tiny-compiler.js index 8ceefe9..16effec 100644 --- a/super-tiny-compiler.js +++ b/super-tiny-compiler.js @@ -577,8 +577,8 @@ function parser(tokens) { // token with a `type` of `'paren'` and a `value` of a closing // parenthesis. while ( - token.type !== 'paren' || - token.value !== ')' + (token.type !== 'paren') || + (token.type === 'paren' && token.value !== ')') ) { // we'll call the `walk` function which will return a `node` and we'll // push it into our `node.params`. From 22670a21f695959d367ec136e0f821c99de07b5b Mon Sep 17 00:00:00 2001 From: miltonsegura Date: Thu, 31 Mar 2016 12:50:14 -0400 Subject: [PATCH 22/31] Update super-tiny-compiler.js :: wording, grammar 78 tiny teeny --> teeny tiny 214 create and entirely --> create an entirely 654 traverser --> traverser --- super-tiny-compiler.js | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/super-tiny-compiler.js b/super-tiny-compiler.js index 16effec..214842e 100644 --- a/super-tiny-compiler.js +++ b/super-tiny-compiler.js @@ -75,7 +75,7 @@ /** * Today we're going write a compiler together. But not just any compiler... A - * super duper tiny teeny compiler! A compiler that is so small that if you + * super duper teeny tiny compiler! A compiler that is so small that if you * remove all the comments this file would only be ~200 lines of actual code. * * We're going to compile some lisp-like function calls into some C-like @@ -211,7 +211,7 @@ * * When transforming the AST we can manipulate nodes by * adding/removing/replacing properties, we can add new nodes, remove nodes, or - * we could leave the existing AST alone and create and entirely new one based + * we could leave the existing AST alone and create an entirely new one based * on it. * * Since we’re targeting a new language, we’re going to focus on creating an @@ -650,7 +650,7 @@ function parser(tokens) { * }); */ -// So we define a traverser function which accepts an AST and a +// So we define a traverser function which accepts an AST and a // visitor. Inside we're going to define two functions... function traverser(ast, visitor) { From d264d92b9bc6d338cbcbdc852d6a14c4a89e3e69 Mon Sep 17 00:00:00 2001 From: Andrew Clark Date: Thu, 31 Mar 2016 10:51:37 -0700 Subject: [PATCH 23/31] "existance" -> "existence" --- super-tiny-compiler.js | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/super-tiny-compiler.js b/super-tiny-compiler.js index 214842e..c9e5c0b 100644 --- a/super-tiny-compiler.js +++ b/super-tiny-compiler.js @@ -396,7 +396,7 @@ function tokenizer(input) { // isn't actually important for us to store as a token. We would only throw // it out later. // - // So here we're just going to test for existance and if it does exist we're + // So here we're just going to test for existence and if it does exist we're // going to just `continue` on. var WHITESPACE = /\s/; if (WHITESPACE.test(char)) { @@ -666,7 +666,7 @@ function traverser(ast, visitor) { // pass both to our visitor methods. function traverseNode(node, parent) { - // We start by testing for the existance of a method on the visitor with a + // We start by testing for the existence of a method on the visitor with a // matching `type`. var method = visitor[node.type]; From 6eb15466705ac943cbd508c788036b7bdbfb2a06 Mon Sep 17 00:00:00 2001 From: Sarbbottam Bandyopadhyay Date: Thu, 31 Mar 2016 14:31:39 -0700 Subject: [PATCH 24/31] not familiar instead of familiar --- super-tiny-compiler.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/super-tiny-compiler.js b/super-tiny-compiler.js index 1181a6c..a4d23d7 100644 --- a/super-tiny-compiler.js +++ b/super-tiny-compiler.js @@ -81,7 +81,7 @@ * We're going to compile some lisp-like function calls into some C-like * function calls. * - * If you are familiar with one or the other. I'll just give you a quick intro. + * If you are not familiar with one or the other. I'll just give you a quick intro. * * If we had two functions `add` and `subtract` they would be written like this: * From 4efa9f85227f9f8d9c39c9503c01f9eac0d92f8a Mon Sep 17 00:00:00 2001 From: Sarbbottam Bandyopadhyay Date: Thu, 31 Mar 2016 16:35:31 -0700 Subject: [PATCH 25/31] updated LETTERS regex --- super-tiny-compiler.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/super-tiny-compiler.js b/super-tiny-compiler.js index e37f1a4..953b83d 100644 --- a/super-tiny-compiler.js +++ b/super-tiny-compiler.js @@ -446,7 +446,7 @@ function tokenizer(input) { // ^^^ // Name token // - var LETTERS = /[a-zA-Z]/; + var LETTERS = /[a-z]i/; if (LETTERS.test(char)) { var value = ''; From 8ab8a42fc05b24fd5578c0bb6c21cc605960636f Mon Sep 17 00:00:00 2001 From: James Kyle Date: Thu, 31 Mar 2016 16:44:54 -0700 Subject: [PATCH 26/31] Update super-tiny-compiler-unannotated.js --- super-tiny-compiler-unannotated.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/super-tiny-compiler-unannotated.js b/super-tiny-compiler-unannotated.js index 6fb0a9c..af26cc9 100644 --- a/super-tiny-compiler-unannotated.js +++ b/super-tiny-compiler-unannotated.js @@ -46,7 +46,7 @@ function tokenizer(input) { continue; } - var LETTERS = /[a-zA-Z]/; + var LETTERS = /[a-z]i/; if (LETTERS.test(char)) { var value = ''; From 974ca6ad29c4fa049e0f70474babbc74ebe514ed Mon Sep 17 00:00:00 2001 From: James Kyle Date: Thu, 31 Mar 2016 16:54:04 -0700 Subject: [PATCH 27/31] Delete super-tiny-compiler-unannotated.js --- super-tiny-compiler-unannotated.js | 252 ----------------------------- 1 file changed, 252 deletions(-) delete mode 100644 super-tiny-compiler-unannotated.js diff --git a/super-tiny-compiler-unannotated.js b/super-tiny-compiler-unannotated.js deleted file mode 100644 index af26cc9..0000000 --- a/super-tiny-compiler-unannotated.js +++ /dev/null @@ -1,252 +0,0 @@ -function tokenizer(input) { - var current = 0; - var tokens = []; - - while (current < input.length) { - var char = input[current]; - - if (char === '(') { - tokens.push({ - type: 'paren', - value: '(' - }); - current++; - continue; - } - - if (char === ')') { - tokens.push({ - type: 'paren', - value: ')' - }); - current++; - continue; - } - - var WHITESPACE = /\s/; - if (WHITESPACE.test(char)) { - current++; - continue; - } - - var NUMBERS = /[0-9]/; - if (NUMBERS.test(char)) { - var value = ''; - - while (NUMBERS.test(char)) { - value += char; - char = input[++current]; - } - - tokens.push({ - type: 'number', - value: value - }); - - continue; - } - - var LETTERS = /[a-z]i/; - if (LETTERS.test(char)) { - var value = ''; - - while (LETTERS.test(char)) { - value += char; - char = input[++current]; - } - - tokens.push({ - type: 'name', - value: value - }); - - continue; - } - - throw new TypeError('I dont know what this character is: ' + char); - } - - return tokens; -} - -function parser(tokens) { - var current = 0; - - function walk() { - var token = tokens[current]; - - if (token.type === 'number') { - current++; - - return { - type: 'NumberLiteral', - value: token.value - }; - } - - if ( - token.type === 'paren' && - token.value === '(' - ) { - token = tokens[++current]; - - var node = { - type: 'CallExpression', - name: token.value, - params: [] - }; - - token = tokens[++current]; - - while ( - (token.type !== 'paren') || - (token.type === 'paren' && token.value !== ')') - ) { - node.params.push(walk()); - token = tokens[current]; - } - - current++; - - return node; - } - - throw new TypeError(token.type); - } - - var ast = { - type: 'Program', - body: [] - }; - - while (current < tokens.length) { - ast.body.push(walk()); - } - - return ast; -} - -function traverser(ast, visitor) { - function traverseArray(array, parent) { - array.forEach(function(child) { - traverseNode(child, parent); - }); - } - - function traverseNode(node, parent) { - var method = visitor[node.type]; - - if (method) { - method(node, parent); - } - - switch (node.type) { - case 'Program': - traverseArray(node.body, node); - break; - - case 'CallExpression': - traverseArray(node.params, node); - break; - - case 'NumberLiteral': - break; - - default: - throw new TypeError(node.type); - } - } - - traverseNode(ast, null); -} - -function transformer(ast) { - var newAst = { - type: 'Program', - body: [] - }; - - ast._context = newAst.body; - - traverser(ast, { - NumberLiteral: function(node, parent) { - parent._context.push({ - type: 'NumberLiteral', - value: node.value - }); - }, - - CallExpression: function(node, parent) { - var expression = { - type: 'CallExpression', - callee: { - type: 'Identifier', - name: node.name - }, - arguments: [] - }; - - node._context = expression.arguments; - - if (parent.type !== 'CallExpression') { - expression = { - type: 'ExpressionStatement', - expression: expression - }; - } - - parent._context.push(expression); - } - }); - - return newAst; -} - -function codeGenerator(node) { - switch (node.type) { - case 'Program': - return node.body.map(codeGenerator) - .join('\n'); - - case 'ExpressionStatement': - return ( - codeGenerator(node.expression) + - ';' - ); - - case 'CallExpression': - return ( - codeGenerator(node.callee) + - '(' + - node.arguments.map(codeGenerator) - .join(', ') + - ')' - ); - - case 'Identifier': - return node.name; - - case 'NumberLiteral': - return node.value; - - default: - throw new TypeError(node.type); - } -} - -function compiler(input) { - var tokens = tokenizer(input); - var ast = parser(tokens); - var newAst = transformer(ast); - var output = codeGenerator(newAst); - - return output; -} - -module.exports = { - tokenizer: tokenizer, - parser: parser, - transformer: transformer, - codeGenerator: codeGenerator, - compiler: compiler -}; From 93572febb7ba6fbcfdc5c186f36cd0ddcd0fbc54 Mon Sep 17 00:00:00 2001 From: James Kyle Date: Thu, 31 Mar 2016 17:06:15 -0700 Subject: [PATCH 28/31] Update README.md --- README.md | 2 -- 1 file changed, 2 deletions(-) diff --git a/README.md b/README.md index 7958f5d..27cdc65 100644 --- a/README.md +++ b/README.md @@ -10,8 +10,6 @@ work from end to end. ### [Want to jump into the code? Click here](super-tiny-compiler.js) -[(Or if you would prefer to read it without annotations, click here.)](super-tiny-compiler-unannotated.js) - --- ### Why should I care? From d8cafdb00216c4b6231baddd2c6b53d4e2a9ef32 Mon Sep 17 00:00:00 2001 From: Sarbbottam Bandyopadhyay Date: Thu, 31 Mar 2016 18:10:54 -0700 Subject: [PATCH 29/31] fixed regex `/[a-z]i/` to `/[a-z]/i` fixed #25 --- super-tiny-compiler.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/super-tiny-compiler.js b/super-tiny-compiler.js index 953b83d..d7bd38a 100644 --- a/super-tiny-compiler.js +++ b/super-tiny-compiler.js @@ -446,7 +446,7 @@ function tokenizer(input) { // ^^^ // Name token // - var LETTERS = /[a-z]i/; + var LETTERS = /[a-z]/i; if (LETTERS.test(char)) { var value = ''; From bd11625f5cfeed3e36eb190b26f40a19b2949d9a Mon Sep 17 00:00:00 2001 From: Vishal Telangre Date: Fri, 1 Apr 2016 11:56:51 +0530 Subject: [PATCH 30/31] Fix grammar --- super-tiny-compiler.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/super-tiny-compiler.js b/super-tiny-compiler.js index d7bd38a..e4e214d 100644 --- a/super-tiny-compiler.js +++ b/super-tiny-compiler.js @@ -74,7 +74,7 @@ */ /** - * Today we're going write a compiler together. But not just any compiler... A + * Today we're going to write a compiler together. But not just any compiler... A * super duper teeny tiny compiler! A compiler that is so small that if you * remove all the comments this file would only be ~200 lines of actual code. * From cd24a4d2e6a17ce21b59f7b3905a79416c6a5f08 Mon Sep 17 00:00:00 2001 From: BafS Date: Fri, 1 Apr 2016 10:56:35 +0200 Subject: [PATCH 31/31] Add PHP version --- super-tiny-compiler.php | 930 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 930 insertions(+) create mode 100644 super-tiny-compiler.php diff --git a/super-tiny-compiler.php b/super-tiny-compiler.php new file mode 100644 index 0000000..5c45000 --- /dev/null +++ b/super-tiny-compiler.php @@ -0,0 +1,930 @@ + [{ type: 'paren', value: '(' }, ...] + */ + +// We start by accepting an input string of code, and we're gonna set up two +// things... +function tokenizer($input) { + + // A `current` variable for tracking our position in the code like a cursor. + $current = 0; + + // And a `tokens` array for pushing our tokens to. + $tokens = []; + + // We start by creating a `while` loop where we are setting up our `current` + // variable to be incremented as much as we want `inside` the loop. + // + // We do this because we may want to increment `current` many times within a + // single loop because our tokens can be any length. + while ($current < strlen($input)) { + + // We're also going to store the `current` character in the `input`. + $char = $input[$current]; + + // The first thing we want to check for is an open parenthesis. This will + // later be used for `CallExpressions` but for now we only care about the + // character. + // + // We check to see if we have an open parenthesis: + if ($char === '(') { + + // If we do, we push a new token with the type `paren` and set the value + // to an open parenthesis. + $tokens[] = [ + 'type' => 'paren', + 'value' => '(' + ]; + + // Then we increment `current` + $current++; + + // And we `continue` onto the next cycle of the loop. + continue; + } + + // Next we're going to check for a closing parenthesis. We do the same exact + // thing as before: Check for a closing parenthesis, add a new token, + // increment `current`, and `continue`. + if ($char === ')') { + $tokens[] = [ + 'type' => 'paren', + 'value' => ')' + ]; + $current++; + continue; + } + + // Moving on, we're now going to check for whitespace. This is interesting + // because we care that whitespace exists to separate characters, but it + // isn't actually important for us to store as a token. We would only throw + // it out later. + // + // So here we're just going to test for existence and if it does exist we're + // going to just `continue` on. + $WHITESPACE = '/\s/'; + if (preg_match($WHITESPACE, $char)) { + $current++; + continue; + } + + // The next type of token is a number. This is different than what we have + // seen before because a number could be any number of characters and we + // want to capture the entire sequence of characters as one token. + // + // (add 123 456) + // ^^^ ^^^ + // Only two separate tokens + // + // So we start this off when we encounter the first number in a sequence. + $NUMBERS = '/[0-9]/'; + if (preg_match($NUMBERS, $char)) { + + // We're going to create a `value` string that we are going to push + // characters to. + $value = ''; + + // Then we're going to loop through each character in the sequence until + // we encounter a character that is not a number, pushing each character + // that is a number to our `value` and incrementing `current` as we go. + while (preg_match($NUMBERS, $char)) { + $value .= $char; + $char = $input[++$current]; + } + + // After that we push our `number` token to the `tokens` array. + $tokens[] = [ + 'type' => 'number', + 'value' => $value + ]; + + // And we continue on. + continue; + } + + // The last type of token will be a `name` token. This is a sequence of + // letters instead of numbers, that are the names of functions in our lisp + // syntax. + // + // (add 2 4) + // ^^^ + // Name token + // + $LETTERS = '/[a-zA-Z]/'; + if (preg_match($LETTERS, $char)) { + $value = ''; + + // Again we're just going to loop through all the letters pushing them to + // a value. + while (preg_match($LETTERS, $char)) { + $value .= $char; + $char = $input[++$current]; + } + + // And pushing that value as a token with the type `name` and continuing. + $tokens[] = [ + 'type' => 'name', + 'value' => $value + ]; + + continue; + } + + // Finally if we have not matched a character by now, we're going to throw + // an error and completely exit. + throw new Exception('I dont know what this character is: ' . $char); + } + + // Then at the end of our `tokenizer` we simply return the tokens array. + return $tokens; +} + +/** + * ============================================================================ + * ヽ/❀o ل͜ o\ノ + * THE PARSER!!! + * ============================================================================ + */ + +/** + * For our parser we're going to take our array of tokens and turn it into an + * AST. + * + * [{ type: 'paren', value: '(' }, ...] => { type: 'Program', body: [...] } + */ + +// Okay, so we define a `parser` function that accepts our array of `tokens`. +function parser($tokens) { + + // Again we keep a `current` variable that we will use as a cursor. + $current = 0; + + // But this time we're going to use recursion instead of a `while` loop. So we + // define a `walk` function. + function walk(&$current, $tokens) { + // $walk = function walk($cur) use ($current, $tokens) { + + // Inside the walk function we start by grabbing the `current` token. + $token = $tokens[$current]; + + // We're going to split each type of token off into a different code path, + // starting off with `number` tokens. + // + // We test to see if we have a `number` token. + if ($token['type'] === 'number') { + + // If we have one, we'll increment `current`. + $current++; + + // And we'll return a new AST node called `NumberLiteral` and setting its + // value to the value of our token. + return [ + 'type' => 'NumberLiteral', + 'value' => $token['value'] + ]; + } + + // Next we're going to look for CallExpressions. We start this off when we + // encounter an open parenthesis. + if ( + $token['type'] === 'paren' && + $token['value'] === '(' + ) { + + // We'll increment `current` to skip the parenthesis since we don't care + // about it in our AST. + $token = $tokens[++$current]; + + // We create a base node with the type `CallExpression`, and we're going + // to set the name as the current token's value since the next token after + // the open parenthesis is the name of the function. + $node = [ + 'type' => 'CallExpression', + 'name' => $token['value'], + 'params' => [] + ]; + + // We increment `current` *again* to skip the name token. + $token = $tokens[++$current]; + + // And now we want to loop through each token that will be the `params` of + // our `CallExpression` until we encounter a closing parenthesis. + // + // Now this is where recursion comes in. Instead of trying to parse a + // potentially infinitely nested set of nodes we're going to rely on + // recursion to resolve things. + // + // To explain this, let's take our Lisp code. You can see that the + // parameters of the `add` are a number and a nested `CallExpression` that + // includes its own numbers. + // + // (add 2 (subtract 4 2)) + // + // You'll also notice that in our tokens array we have multiple closing + // parenthesis. + // + // [ + // { type: 'paren', value: '(' }, + // { type: 'name', value: 'add' }, + // { type: 'number', value: '2' }, + // { type: 'paren', value: '(' }, + // { type: 'name', value: 'subtract' }, + // { type: 'number', value: '4' }, + // { type: 'number', value: '2' }, + // { type: 'paren', value: ')' }, <<< Closing parenthesis + // { type: 'paren', value: ')' } <<< Closing parenthesis + // ] + // + // We're going to rely on the nested `walk` function to increment our + // `current` variable past any nested `CallExpressions`. + + // So we create a `while` loop that will continue until it encounters a + // token with a `type` of `'paren'` and a `value` of a closing + // parenthesis. + while ( + ($token['type'] !== 'paren') || + ($token['type'] === 'paren' && $token['value'] !== ')') + ) { + // we'll call the `walk` function which will return a `node` and we'll + // push it into our `node.params`. + $node['params'][] = walk($current, $tokens); + $token = $tokens[$current]; + } + + // Finally we will increment `current` one last time to skip the closing + // parenthesis. + $current++; + + // And return the node. + return $node; + } + + // Again, if we haven't recognized the token type by now we're going to + // throw an error. + throw new Exceptions($token['type']); + } + + // Now, we're going to create our AST which will have a root which is a + // `Program` node. + $ast = [ + 'type' => 'Program', + 'body' => [] + ]; + + // And we're going to kickstart our `walk` function, pushing nodes to our + // `ast.body` array. + // + // The reason we are doing this inside a loop is because our program can have + // `CallExpressions` after one another instead of being nested. + // + // (add 2 2) + // (subtract 4 2) + // + while ($current < count($tokens)) { + $ast['body'][] = walk($current, $tokens); + } + + // At the end of our parser we'll return the AST. + return $ast; +} + +/** + * ============================================================================ + * ⌒(❀>◞౪◟<❀)⌒ + * THE TRAVERSER!!! + * ============================================================================ + */ + +/** + * So now we have our AST, and we want to be able to visit different nodes with + * a visitor. We need to be able to call the methods on the visitor whenever we + * encounter a node with a matching type. + * + * traverse(ast, { + * Program(node, parent) { + * // ... + * }, + * + * CallExpression(node, parent) { + * // ... + * }, + * + * NumberLiteral(node, parent) { + * // ... + * } + * }); + */ + +// So we define a traverser function which accepts an AST and a +// visitor. Inside we're going to define two functions... +function traverser($ast, $visitor) { + + // A `traverseArray` function that will allow us to iterate over an array and + // call the next function that we will define: `traverseNode`. + function traverseArray($array, $parent, $visitor) { + foreach ($array as $child) { + traverseNode($child, $parent, $visitor); + } + } + + // `traverseNode` will accept a `node` and its `parent` node. So that it can + // pass both to our visitor methods. + function traverseNode($node, $parent, $visitor) { + + + // If it exists we'll call it with the `node` and its `parent`. + if (!empty($visitor[$node['type']])) { + // We start by testing for the existence of a method on the visitor with a + // matching `type`. + $method = $visitor[$node['type']]; + + // print_r($node['type']); + // print_r($method); + + ($method($node, $parent)); + } + + // Next we are going to split things up by the current node type. + switch ($node['type']) { + + // We'll start with our top level `Program`. Since Program nodes have a + // property named body that has an array of nodes, we will call + // `traverseArray` to traverse down into them. + // + // (Remember that `traverseArray` will in turn call `traverseNode` so we + // are causing the tree to be traversed recursively) + case 'Program': + traverseArray($node['body'], $node, $visitor); + break; + + // Next we do the same with `CallExpressions` and traverse their `params`. + case 'CallExpression': + traverseArray($node['params'], $node, $visitor); + break; + + // In the case of `NumberLiterals` we don't have any child nodes to visit, + // so we'll just break. + case 'NumberLiteral': + break; + + // And again, if we haven't recognized the node type then we'll throw an + // error. + default: + throw new Exception($node['type']); + } + } + + // Finally we kickstart the traverser by calling `traverseNode` with our ast + // with no `parent` because the top level of the AST doesn't have a parent. + traverseNode($ast, null, $visitor); +} + + +/** + * ============================================================================ + * ⁽(◍˃̵͈̑ᴗ˂̵͈̑)⁽ + * THE TRANSFORMER!!! + * ============================================================================ + */ + +/** + * Next up, the transformer. Our transformer is going to take the AST that we + * have built and pass it to our traverser function with a visitor and will + * create a new ast. + * + * ---------------------------------------------------------------------------- + * Original AST | Transformed AST + * ---------------------------------------------------------------------------- + * { | { + * type: 'Program', | type: 'Program', + * body: [{ | body: [{ + * type: 'CallExpression', | type: 'ExpressionStatement', + * name: 'add', | expression: { + * params: [{ | type: 'CallExpression', + * type: 'NumberLiteral', | callee: { + * value: '2' | type: 'Identifier', + * }, { | name: 'add' + * type: 'CallExpression', | }, + * name: 'subtract', | arguments: [{ + * params: [{ | type: 'NumberLiteral', + * type: 'NumberLiteral', | value: '2' + * value: '4' | }, { + * }, { | type: 'CallExpression', + * type: 'NumberLiteral', | callee: { + * value: '2' | type: 'Identifier', + * }] | name: 'subtract' + * }] | }, + * }] | arguments: [{ + * } | type: 'NumberLiteral', + * | value: '4' + * ---------------------------------- | }, { + * | type: 'NumberLiteral', + * | value: '2' + * | }] + * (sorry the other one is longer.) | } + * | } + * | }] + * | } + * ---------------------------------------------------------------------------- + */ + +// So we have our transformer function which will accept the lisp ast. +function transformer($ast) { + + // We'll create a `newAst` which like our previous AST will have a program + // node. + $newAst = [ + 'type' => 'Program', + 'body' => [] + ]; + + // Next I'm going to cheat a little and create a bit of a hack. We're going to + // use a property named `context` on our parent nodes that we're going to push + // nodes to their parent's `context`. Normally you would have a better + // abstraction than this, but for our purposes this keeps things simple. + // + // Just take note that the context is a reference *from* the old ast *to* the + // new ast. + $ast['_context'] = &$newAst['body']; + + // We'll start by calling the traverser function with our ast and a visitor. + traverser($ast, [ + + // The first visitor method accepts `NumberLiterals` + 'NumberLiteral' => function($node, $parent) { + // We'll create a new node also named `NumberLiteral` that we will push to + // the parent context. + $parent['_context'][] = [ + 'type' => 'NumberLiteral', + 'value' => $node['value'] + ]; + }, + + // Next up, `CallExpressions`. + 'CallExpression' => function (&$node, $parent) { + + // We start creating a new node `CallExpression` with a nested + // `Identifier`. + $expression = [ + 'type' => 'CallExpression', + 'callee' => [ + 'type' => 'Identifier', + 'name' => $node['name'] + ], + 'arguments' => [] + ]; + + // Next we're going to define a new context on the original + // `CallExpression` node that will reference the `expression`'s arguments + // so that we can push arguments. + $node['_context'] = &$expression['arguments']; + + // Then we're going to check if the parent node is a `CallExpression`. + // If it is not... + if ($parent['type'] !== 'CallExpression') { + + // We're going to wrap our `CallExpression` node with an + // `ExpressionStatement`. We do this because the top level + // `CallExpressions` in JavaScript are actually statements. + $expression = [ + 'type' => 'ExpressionStatement', + 'expression' => $expression + ]; + } + + // Last, we push our (possibly wrapped) `CallExpression` to the `parent`'s + // `context`. + $parent['_context'][] = $expression; + + return $parent; + } + ]); + + // At the end of our transformer function we'll return the new ast that we + // just created. + return $newAst; +} + + +/** + * ============================================================================ + * ヾ(〃^∇^)ノ♪ + * THE CODE GENERATOR!!!! + * ============================================================================ + */ + +/** + * Now let's move onto our last phase: The Code Generator. + * + * Our code generator is going to recursively call itself to print each node in + * the tree into one giant string. + */ + +function codeGenerator($node) { + + // We'll break things down by the `type` of the `node`. + switch ($node['type']) { + + // If we have a `Program` node. We will map through each node in the `body` + // and run them through the code generator and join them with a newline. + case 'Program': + return implode(PHP_EOL, array_map('codeGenerator', $node['body'])); + + // For `ExpressionStatements` we'll call the code generator on the nested + // expression and we'll add a semicolon... + case 'ExpressionStatement': + return ( + codeGenerator($node['expression']) . + ';' // << (...because we like to code the *correct* way) + ); + + // For `CallExpressions` we will print the `callee`, add an open + // parenthesis, we'll map through each node in the `arguments` array and run + // them through the code generator, joining them with a comma, and then + // we'll add a closing parenthesis. + case 'CallExpression': + return ( + codeGenerator($node['callee']) . + '(' . + implode(', ', array_map('codeGenerator', $node['arguments'])) . + ')' + ); + + // For `Identifiers` we'll just return the `node`'s name. + case 'Identifier': + return $node['name']; + + // For `NumberLiterals` we'll just return the `node`'s value. + case 'NumberLiteral': + return $node['value']; + + // And if we haven't recognized the node, we'll throw an error. + default: + throw new Exception($node['type']); + } +} + + +/** + * ============================================================================ + * (۶* ‘ヮ’)۶” + * !!!!!!!!THE COMPILER!!!!!!!! + * ============================================================================ + */ + +/** + * FINALLY! We'll create our `compiler` function. Here we will link together + * every part of the pipeline. + * + * 1. input => tokenizer => tokens + * 2. tokens => parser => ast + * 3. ast => transformer => newAst + * 4. newAst => generator => output + */ + +function compiler($input) { + $tokens = tokenizer($input); + $ast = parser($tokens); + $newAst = transformer($ast); + $output = codeGenerator($newAst); + + // and simply return the output! + return $output; +}