1 //  Toy interpreter lexer.
2 //
3 // Copyright (C) 2014-2015 Iain Buclaw.
4 // This program is free software; you can redistribute it and/or modify
5 // it under the terms of the GNU General Public License as published by
6 // the Free Software Foundation; either version 3 of the License, or
7 // (at your option) any later version.
8 //
9 // This program is distributed in the hope that it will be useful,
10 // but WITHOUT ANY WARRANTY; without even the implied warranty of
11 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12 // GNU General Public License for more details.
13 
14 // You should have received a copy of the GNU General Public License
15 // along with this program.  If not, see <http://www.gnu.org/licenses/>.
16 
17 // Written by Iain  Buclaw <ibuclaw@gdcproject.org>
18 
19 module toy.lex;
20 
21 import std.array;
22 import std.regex;
23 import std..string;
24 
25 ///// Tokenizer /////
26 enum Tag
27 {
28     None,
29     Reserved,
30     Integer,
31     Identifier,
32 }
33 
34 struct Token
35 {
36     string value;
37     Tag tag;
38 }
39 
40 Token[] token_exprs = [
41     Token(`^[ \n\t]+`, Tag.None),
42     Token(`^#[^\n]*`,  Tag.None),
43     Token(`^\:=`,      Tag.Reserved),
44     Token(`^\(`,       Tag.Reserved),
45     Token(`^\)`,       Tag.Reserved),
46     Token(`^;`,        Tag.Reserved),
47     Token(`^\+`,       Tag.Reserved),
48     Token(`^-`,        Tag.Reserved),
49     Token(`^\*`,       Tag.Reserved),
50     Token(`^/`,        Tag.Reserved),
51     Token(`^<=`,       Tag.Reserved),
52     Token(`^<`,        Tag.Reserved),
53     Token(`^=>`,       Tag.Reserved),
54     Token(`^>`,        Tag.Reserved),
55     Token(`^=`,        Tag.Reserved),
56     Token(`^!=`,       Tag.Reserved),
57     Token(`^and\b`,    Tag.Reserved),
58     Token(`^or\b`,     Tag.Reserved),
59     Token(`^not\b`,    Tag.Reserved),
60     Token(`^if\b`,     Tag.Reserved),
61     Token(`^then\b`,   Tag.Reserved),
62     Token(`^else\b`,   Tag.Reserved),
63     Token(`^while\b`,  Tag.Reserved),
64     Token(`^do\b`,     Tag.Reserved),
65     Token(`^end\b`,    Tag.Reserved),
66     Token(`^print\b`,  Tag.Reserved),
67     Token(`^[0-9]+\b`, Tag.Integer),
68     Token(`^[A-Za-z][A-Za-z0-9_]*\b`, Tag.Identifier),
69 ];
70 
71 Token[] lex(string input)
72 {
73     int pos = 0;
74     Appender!(Token[]) tokens;
75 
76 Lnext:
77     while (pos < input.length)
78     {
79         foreach (token; token_exprs)
80         {
81             auto match = matchFirst(input[pos .. $], regex(token.value));
82             if (match)
83             {
84                 if (token.tag != Tag.None)
85                     tokens.put(Token(match[0], token.tag));
86                 pos += match[0].length;
87                 continue Lnext;
88             }
89         }
90         throw new Exception(format("Illegal character: %s", input[pos]));
91     }
92     return tokens.data;
93 }
94