1 // Toy interpreter lexer. 2 // 3 // Copyright (C) 2014-2015 Iain Buclaw. 4 // This program is free software; you can redistribute it and/or modify 5 // it under the terms of the GNU General Public License as published by 6 // the Free Software Foundation; either version 3 of the License, or 7 // (at your option) any later version. 8 // 9 // This program is distributed in the hope that it will be useful, 10 // but WITHOUT ANY WARRANTY; without even the implied warranty of 11 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 // GNU General Public License for more details. 13 14 // You should have received a copy of the GNU General Public License 15 // along with this program. If not, see <http://www.gnu.org/licenses/>. 16 17 // Written by Iain Buclaw <ibuclaw@gdcproject.org> 18 19 module toy.lex; 20 21 import std.array; 22 import std.regex; 23 import std..string; 24 25 ///// Tokenizer ///// 26 enum Tag 27 { 28 None, 29 Reserved, 30 Integer, 31 Identifier, 32 } 33 34 struct Token 35 { 36 string value; 37 Tag tag; 38 } 39 40 Token[] token_exprs = [ 41 Token(`^[ \n\t]+`, Tag.None), 42 Token(`^#[^\n]*`, Tag.None), 43 Token(`^\:=`, Tag.Reserved), 44 Token(`^\(`, Tag.Reserved), 45 Token(`^\)`, Tag.Reserved), 46 Token(`^;`, Tag.Reserved), 47 Token(`^\+`, Tag.Reserved), 48 Token(`^-`, Tag.Reserved), 49 Token(`^\*`, Tag.Reserved), 50 Token(`^/`, Tag.Reserved), 51 Token(`^<=`, Tag.Reserved), 52 Token(`^<`, Tag.Reserved), 53 Token(`^=>`, Tag.Reserved), 54 Token(`^>`, Tag.Reserved), 55 Token(`^=`, Tag.Reserved), 56 Token(`^!=`, Tag.Reserved), 57 Token(`^and\b`, Tag.Reserved), 58 Token(`^or\b`, Tag.Reserved), 59 Token(`^not\b`, Tag.Reserved), 60 Token(`^if\b`, Tag.Reserved), 61 Token(`^then\b`, Tag.Reserved), 62 Token(`^else\b`, Tag.Reserved), 63 Token(`^while\b`, Tag.Reserved), 64 Token(`^do\b`, Tag.Reserved), 65 Token(`^end\b`, Tag.Reserved), 66 Token(`^print\b`, Tag.Reserved), 67 Token(`^[0-9]+\b`, Tag.Integer), 68 Token(`^[A-Za-z][A-Za-z0-9_]*\b`, Tag.Identifier), 69 ]; 70 71 Token[] lex(string input) 72 { 73 int pos = 0; 74 Appender!(Token[]) tokens; 75 76 Lnext: 77 while (pos < input.length) 78 { 79 foreach (token; token_exprs) 80 { 81 auto match = matchFirst(input[pos .. $], regex(token.value)); 82 if (match) 83 { 84 if (token.tag != Tag.None) 85 tokens.put(Token(match[0], token.tag)); 86 pos += match[0].length; 87 continue Lnext; 88 } 89 } 90 throw new Exception(format("Illegal character: %s", input[pos])); 91 } 92 return tokens.data; 93 } 94