jamiebuilds_the-super-tiny-.../the-super-thiny-compiler-net.cs

733 lines
24 KiB
C#
Raw Normal View History

using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Text.RegularExpressions;
using System.Threading.Tasks;
namespace TheSuperThinyCompiler
{
public class TheSuperThinyCompiler
{
/**
* =============================================================================
* The Compiler like lisp-like function calls into some c-like function calls
* =============================================================================
*/
#region Compiler
/**
* ============================================================================
* (/^^)/
* THE TOKENIZER!
* ============================================================================
*/
public List<Token> tokenizer(string input)
{
var current = 0;
var tokens = new List<Token>();
while (current < input.Length)
{
var @char = input[current];
if (@char == '(')
{
tokens.Push(new Token()
{
Type = TokenTypeEnum.paren,
Value = "("
});
current++;
continue;
}
if (@char == ')')
{
tokens.Push(new Token()
{
Type = TokenTypeEnum.paren,
Value = ")"
});
current++;
continue;
}
Regex whitespace = new Regex(@"\s");
if (whitespace.IsMatch(@char.ToString()))
{
current++;
continue;
}
Regex numbers = new Regex(@"[0-9]");
if (numbers.IsMatch(@char.ToString()))
{
string value = string.Empty;
while (numbers.IsMatch(@char.ToString()))
{
value += @char;
@char = input[++current];
}
tokens.Push(new Token()
{
Type = TokenTypeEnum.number,
Value = value
});
continue;
}
Regex letters = new Regex(@"[a-z]", RegexOptions.IgnoreCase);
if (letters.IsMatch(@char.ToString()))
{
string value = string.Empty;
while (letters.IsMatch(@char.ToString()))
{
value += @char;
@char = input[++current];
}
tokens.Push(new Token()
{
Type = TokenTypeEnum.name,
Value = value
});
continue;
}
throw new Exception($"I dont know what this character is: '{@char}'");
}
return tokens;
}
/**
* ============================================================================
* /o ل͜ o\
* THE PARSER!!!
* ============================================================================
*/
public LispAstNode parser(List<Token> tokens)
{
int current = 0;
LispAstNode lispAst = new LispAstNode()
{
Type = LispAstTypeEnum.Program,
Body = new List<LispAstNode>()
};
while (current < tokens.Count)
{
lispAst.Body.Push(walk(tokens, ref current));
}
return lispAst;
}
protected LispAstNode walk(List<Token> tokens, ref int current)
{
var token = tokens[current];
if (token.Type == TokenTypeEnum.number)
{
current++;
return new LispAstNode()
{
Type = LispAstTypeEnum.NumberLiteral,
Value = token.Value
};
}
if (token.Type == TokenTypeEnum.paren && token.Value == "(")
{
token = tokens[++current];
var node = new LispAstNode()
{
Type = LispAstTypeEnum.CallExpression,
Name = token.Value,
Params = new List<LispAstNode>()
};
token = tokens[++current];
while ((token.Type != TokenTypeEnum.paren) || (token.Type == TokenTypeEnum.paren && token.Value != ")"))
{
node.Params.Push(walk(tokens, ref current));
token = tokens[current];
}
// skip the ')'
current++;
return node;
}
throw new Exception($"{token.Type}");
}
/**
* ============================================================================
* (><)
* THE TRAVERSER!!!
* ============================================================================
*/
public void traverser(LispAstNode lispAst, LispVisitorType lispVisitor)
{
traverseNode(lispAst, null, lispVisitor);
}
protected void traverseArray(List<LispAstNode> array, LispAstNode parent, LispVisitorType lispVisitor)
{
array.ForEach(child => traverseNode(child, parent, lispVisitor));
}
protected void traverseNode(LispAstNode node, LispAstNode parent, LispVisitorType lispVisitor)
{
Action<LispAstNode, LispAstNode> method = null;
lispVisitor.TryGetValue(node.Type, out method);
method?.Invoke(node, parent);
switch (node.Type)
{
case LispAstTypeEnum.Program:
traverseArray(node.Body, node, lispVisitor);
break;
case LispAstTypeEnum.CallExpression:
traverseArray(node.Params, node, lispVisitor);
break;
case LispAstTypeEnum.NumberLiteral:
break;
default:
throw new Exception($"{node.Type}");
}
}
/**
* ============================================================================
* (˃̵͈̑˂̵͈̑)
* THE TRANSFORMER!!!
* ============================================================================
*/
public CAstNode transformer(LispAstNode lispAst)
{
var newAst = new CAstNode()
{
Type = CAstTypeEnum.Program,
Body = new List<CAstNode>()
};
lispAst.Context = newAst.Body;
traverser(lispAst, new LispVisitorType()
{
// The first lispVisitor method accepts any `NumberLiteral`
[LispAstTypeEnum.NumberLiteral] = (node, parent) =>
{
parent.Context.Push(new CAstNode()
{
Type = CAstTypeEnum.NumberLiteral,
Value = node.Value,
});
},
// Next up, `CallExpression`.
[LispAstTypeEnum.CallExpression] = (node, parent) =>
{
CAstNode expression = new CAstNode()
{
Type = CAstTypeEnum.CallExpression,
Callee = new CAstNode()
{
Type = CAstTypeEnum.Identifier,
Name = node.Name,
},
arguments = new List<CAstNode>()
};
node.Context = expression.arguments;
if (parent.Type != LispAstTypeEnum.CallExpression)
{
expression = new CAstNode()
{
Type = CAstTypeEnum.ExpressionStatement,
Expression = expression
};
}
parent.Context.Push(expression);
}
});
return newAst;
}
/**
* ============================================================================
*
* THE CODE GENERATOR!!!!
* ============================================================================
*/
public string codeGenerator(CAstNode node)
{
switch (node.Type)
{
case CAstTypeEnum.Program:
return node.Body.Map(codeGenerator).Join("\n");
case CAstTypeEnum.ExpressionStatement:
return codeGenerator(node.Expression) + ";";
case CAstTypeEnum.CallExpression:
return codeGenerator(node.Callee) +
"(" +
node.arguments.Map(codeGenerator).Join(",") +
")";
case CAstTypeEnum.Identifier:
return node.Name;
case CAstTypeEnum.NumberLiteral:
return node.Value;
default:
throw new Exception($"{node.Type}");
}
}
/**
* ============================================================================
* (۶* )۶
* !!!!!!!!THE COMPILER!!!!!!!!
* ============================================================================
*/
public string compiler(string input)
{
var tokens = tokenizer(input);
var ast = parser(tokens);
var newAst = transformer(ast);
var output = codeGenerator(newAst);
return output;
}
#endregion
/**
* =============================================================================
* The Decompiler like c-like function calls into some lisp-like function calls
* =============================================================================
*/
#region DeCompiler
/**
* ============================================================================
* (/^^)/
* THE TOKENIZER!
* ============================================================================
*/
public List<Token> detokenizer(string input)
{
var tokens = new List<Token>();
var current = 0;
while (current < input.Length)
{
var szChar = input[current].ToString();
Regex whitespace = new Regex(@"\s");
if (whitespace.IsMatch(szChar))
{
current++;
continue;
}
Regex letter = new Regex(@"[a-z]", RegexOptions.IgnoreCase);
if (letter.IsMatch(szChar))
{
string value = string.Empty;
while (letter.IsMatch(szChar))
{
value += szChar;
szChar = input[++current].ToString();
}
tokens.Push(new Token()
{
Type = TokenTypeEnum.name,
Value = value
});
continue;
}
if (szChar == ")")
{
tokens.Push(new Token()
{
Type = TokenTypeEnum.paren,
Value = szChar,
});
current++;
continue;
}
Regex number = new Regex(@"[0-9]");
if (number.IsMatch(szChar))
{
string value = string.Empty;
while (number.IsMatch(szChar))
{
value += szChar;
szChar = input[++current].ToString();
}
tokens.Push(new Token()
{
Type = TokenTypeEnum.number,
Value = value
});
continue;
}
if (szChar == ",")
{
tokens.Push(new Token()
{
Type = TokenTypeEnum.paren,
Value = szChar,
});
current++;
continue;
}
if (szChar == "(")
{
tokens.Push(new Token()
{
Type = TokenTypeEnum.paren,
Value = szChar,
});
current++;
continue;
}
if (szChar == ";")
{
tokens.Push(new Token()
{
Type = TokenTypeEnum.paren,
Value = szChar,
});
current++;
continue;
}
throw new Exception($"I dont know what this character is: {szChar}");
}
return tokens;
}
/**
* ============================================================================
* /o ل͜ o\
* THE PARSER!!!
* ============================================================================
*/
public CAstNode deparser(List<Token> tokens)
{
CAstNode nast = new CAstNode()
{
Type = CAstTypeEnum.Program,
Body = new List<CAstNode>()
};
var current = 0;
while (current < tokens.Count)
{
nast.Body.Push(dewalk(tokens, ref current));
}
return nast;
}
protected CAstNode dewalk(List<Token> tokens, ref int current)
{
var token = tokens[current];
// 数字类型直接返回
if (token.Type == TokenTypeEnum.number)
{
current++;
return new CAstNode()
{
Type = CAstTypeEnum.NumberLiteral,
Value = token.Value
};
}
// 函数名也直接返回
if (token.Type == TokenTypeEnum.name)
{
var expression = new CAstNode()
{
Type = CAstTypeEnum.CallExpression,
Callee = new CAstNode()
{
Type = CAstTypeEnum.Identifier,
Name = token.Value,
},
arguments = new List<CAstNode>()
};
List<CAstNode> arguments = new List<CAstNode>();
// 跳过左括号
++current; // 这个位置是左括号,要跳过
token = tokens[++current];
while ((token.Type != TokenTypeEnum.paren) || (token.Type == TokenTypeEnum.paren && token.Value != ")"))
{
if ((token.Type == TokenTypeEnum.paren) && (token.Value == "," || token.Value == ";"))
{
token = tokens[++current];
continue;
}
expression.arguments.Push(dewalk(tokens, ref current));
token = tokens[current];
}
// 跳过右圆括号
current++;
// 跳过分号
token = tokens[current];
if (token.Type == TokenTypeEnum.paren && token.Value == ";") current++;
return expression;
}
throw new Exception($"{token.Type}");
}
/**
* ============================================================================
* (><)
* THE TRAVERSER!!!
* ============================================================================
*/
public void detraverser(CAstNode ast, CVisitorType visitor)
{
detraverseNode(ast, null, visitor);
}
protected void detraverseArray(List<CAstNode> array, CAstNode parent, CVisitorType visitor)
{
array.ForEach(child => detraverseNode(child, parent, visitor));
}
protected void detraverseNode(CAstNode node, CAstNode parent, CVisitorType visitor)
{
Action<CAstNode, CAstNode> method = null;
visitor.TryGetValue(node.Type, out method);
method?.Invoke(node, parent);
switch (node.Type)
{
case CAstTypeEnum.Program:
detraverseArray(node.Body, node, visitor);
break;
case CAstTypeEnum.ExpressionStatement:
detraverseNode(node.Expression, node, visitor);
break;
case CAstTypeEnum.CallExpression:
detraverseNode(node.Callee, node, visitor);
detraverseArray(node.arguments, node, visitor);
break;
case CAstTypeEnum.Identifier:
break;
case CAstTypeEnum.NumberLiteral:
break;
}
}
/**
* ============================================================================
* (˃̵͈̑˂̵͈̑)
* THE TRANSFORMER!!!
* ============================================================================
*/
public LispAstNode detransformer(CAstNode ast)
{
var newAst = new LispAstNode()
{
Type = LispAstTypeEnum.Program,
Body = new List<LispAstNode>()
};
ast.Context = newAst.Body;
detraverser(ast, new CVisitorType()
{
[CAstTypeEnum.NumberLiteral] = (node, parent) =>
{
parent.Context.Push(new LispAstNode()
{
Type = LispAstTypeEnum.NumberLiteral,
Value = node.Value,
});
},
[CAstTypeEnum.CallExpression] = (node, parent) =>
{
LispAstNode @params = new LispAstNode()
{
Type = LispAstTypeEnum.CallExpression,
Name = node.Callee.Name,
Params = new List<LispAstNode>(),
};
node.Context = @params.Params;
parent.Context.Push(@params);
},
});
return newAst;
}
/**
* ============================================================================
*
* THE CODE GENERATOR!!!!
* ============================================================================
*/
public string decodeGenerator(LispAstNode node)
{
switch (node.Type)
{
case LispAstTypeEnum.Program:
return node.Body.Map(decodeGenerator).Join("\n");
case LispAstTypeEnum.NumberLiteral:
return node.Value;
case LispAstTypeEnum.CallExpression:
return "(" + node.Name + " " + node.Params.Map(decodeGenerator).Join(" ") + ")";
default:
throw new Exception($"{node.Type}");
}
}
/**
* ============================================================================
* (۶* )۶
* !!!!!!!!THE DECOMPILER!!!!!!!!
* ============================================================================
*/
public string decompiler(string input)
{
var tokens = detokenizer(input);
var ast = deparser(tokens);
var newAst = detransformer(ast);
var output = decodeGenerator(newAst);
return output;
}
#endregion
}
#region Models & Extenses
public static class TypeFunctionWrapperExtenses
{
public static void Push<T>(this List<T> source, T value) => source.Add(value);
public static string Join(this string[] strs, string s) => string.Join(s, strs);
public static V[] Map<T, V>(this List<T> source, Func<T, V> codeGenerator) =>
source == null || source.Count == 0 ? new V[0] : source.Select(codeGenerator).ToArray();
}
#region Token Model
public class Token
{
public TokenTypeEnum Type { get; set; }
public string Value { get; set; }
}
public enum TokenTypeEnum
{
paren,
name,
number,
}
#endregion
#region Lisp Style AST Model
public class LispAstNode
{
public LispAstTypeEnum Type { get; set; }
public string Name { get; set; }
public string Value { get; set; }
public List<LispAstNode> Params { get; set; } = new List<LispAstNode>();
public List<LispAstNode> Body { get; set; } = new List<LispAstNode>();
public List<CAstNode> Context { get; set; } = new List<CAstNode>();
}
public enum LispAstTypeEnum
{
Program,
CallExpression,
NumberLiteral
}
#endregion
#region C Style AST Model
public class CAstNode
{
public CAstTypeEnum Type { get; set; }
public string Name { get; set; }
public string Value { get; set; }
public CAstNode Expression { get; set; }
public CAstNode Callee { get; set; }
public List<CAstNode> arguments { get; set; } = new List<CAstNode>();
public List<CAstNode> Body { get; set; } = new List<CAstNode>();
public List<LispAstNode> Context { get; set; } = new List<LispAstNode>();
}
public enum CAstTypeEnum
{
Program,
ExpressionStatement,
CallExpression,
Identifier,
NumberLiteral,
}
#endregion
#region VisitorWrap
// I hate writing long types over and over……
public class LispVisitorType : Dictionary<LispAstTypeEnum, Action<LispAstNode, LispAstNode>>
{
// for visitor lisp-like AST
}
public class CVisitorType : Dictionary<CAstTypeEnum, Action<CAstNode, CAstNode>>
{
// for visitor C-like AST
}
#endregion
#endregion
}