1 module ddc.lexer.tokenizer;
2 
3 import ddc.lexer.textsource;
4 import ddc.lexer.exceptions;
5 
6 import std.stdio;
7 import std.datetime;
8 import std.conv;
9 import std.utf;
10 import std.math;
11 
12 enum TokenType : ubyte {
13     EOF,
14     //EOL,
15     WHITESPACE,
16     COMMENT,
17     IDENTIFIER,
18     STRING,
19     CHARACTER,
20     INTEGER,
21     FLOAT,
22     KEYWORD,
23     OP,
24     INVALID
25 }
26 
27 // table for fast checking of UniversalAlpha (as per ISO/IEC 9899:1999 Annex E) OR a..z OR A..Z OR _
28 // max code is 0xd7ff
29 //1728
30 const uint[1728] UNIVERSAL_ALPHA_FLAGS = [
31     0x00000000,0x00000000,0x87fffffe,0x07fffffe,0x00000000,0x04a00400,0xff7fffff,0xff7fffff,// 0000-00ff
32     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xfc3fffff,// 0100-01ff
33     0x00ffffff,0x00000000,0xffff0000,0xffffffff,0xffffffff,0xe9ff01ff,0x00030003,0x0000001f,// 0200-02ff
34     0x00000000,0x00000000,0x00000000,0x04000000,0xffffd740,0xfffffffb,0x547f7fff,0x000ffffd,// 0300-03ff
35     0xffffdffe,0xffffffff,0xdffeffff,0xffffffff,0xffff0003,0xffffffff,0xffff199f,0x033fcfff,// 0400-04ff
36     0x00000000,0xfffe0000,0x027fffff,0xfffffffe,0x000000ff,0xbbff0000,0xffff0006,0x000707ff,// 0500-05ff
37     0x00000000,0x07fffffe,0x0007ffff,0xffff03ff,0xffffffff,0x7cffffff,0x1fff7fff,0x03ff3de0,// 0600-06ff
38     0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 0700-07ff
39     0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 0800-08ff
40     0xffffffee,0xe3ffffff,0xff073fff,0x0000ffcf,0xfff99fee,0xc3c5fdff,0xb000399f,0x0003ffcf,// 0900-09ff
41     0xfff987e4,0xc36dfdff,0x5e003987,0x0010ffc0,0xfffbafee,0xe3edfdff,0x00013bbf,0x0000ffc1,// 0a00-0aff
42     0xfff99fee,0xe3cdfdff,0xb000398f,0x0000ffc3,0xd63dc7ec,0xc3bfc718,0x00003dc7,0x0000ff80,// 0b00-0bff
43     0xfffddfee,0xc3effdff,0x00003ddf,0x0000ffc3,0xfffddfec,0xc3effdff,0x40003ddf,0x0000ffc3,// 0c00-0cff
44     0xfffddfec,0xc3fffdff,0x00003dcf,0x0000ffc3,0x00000000,0x00000000,0x00000000,0x00000000,// 0d00-0dff
45     0xfffffffe,0x07ffffff,0x0fffffff,0x00000000,0xfef02596,0x3bff6cae,0x33ff3f5f,0x00000000,// 0e00-0eff
46     0x03000001,0xc2afffff,0xfffffeff,0xfffe03ff,0xfebf0fdf,0x02fe3fff,0x00000000,0x00000000,// 0f00-0fff
47     0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0xffffffff,0xffff003f,0x007fffff,// 1000-10ff
48     0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 1100-11ff
49     0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 1200-12ff
50     0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 1300-13ff
51     0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 1400-14ff
52     0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 1500-15ff
53     0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 1600-16ff
54     0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 1700-17ff
55     0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 1800-18ff
56     0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 1900-19ff
57     0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 1a00-1aff
58     0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 1b00-1bff
59     0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 1c00-1cff
60     0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 1d00-1dff
61     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0x0fffffff,0xffffffff,0xffffffff,0x03ffffff,// 1e00-1eff
62     0x3f3fffff,0xffffffff,0xaaff3f3f,0x3fffffff,0xffffffff,0x5fdfffff,0x0fcf1fdc,0x1fdc1fff,// 1f00-1fff
63     0x00000000,0x80000000,0x00000001,0x80000000,0x00000000,0x00000000,0x00000000,0x00000000,// 2000-20ff
64     0x3f2ffc84,0x01fbfd50,0x00000000,0xffffffff,0x00000007,0x00000000,0x00000000,0x00000000,// 2100-21ff
65     0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 2200-22ff
66     0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 2300-23ff
67     0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 2400-24ff
68     0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 2500-25ff
69     0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 2600-26ff
70     0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 2700-27ff
71     0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 2800-28ff
72     0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 2900-29ff
73     0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 2a00-2aff
74     0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 2b00-2bff
75     0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 2c00-2cff
76     0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 2d00-2dff
77     0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 2e00-2eff
78     0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 2f00-2fff
79     0x000000e0,0x000003fe,0xfffffffe,0xffffffff,0x180fffff,0xfffffffe,0xffffffff,0x187fffff,// 3000-30ff
80     0xffffffe0,0x00001fff,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 3100-31ff
81     0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 3200-32ff
82     0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 3300-33ff
83     0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 3400-34ff
84     0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 3500-35ff
85     0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 3600-36ff
86     0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 3700-37ff
87     0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 3800-38ff
88     0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 3900-39ff
89     0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 3a00-3aff
90     0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 3b00-3bff
91     0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 3c00-3cff
92     0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 3d00-3dff
93     0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 3e00-3eff
94     0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 3f00-3fff
95     0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 4000-40ff
96     0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 4100-41ff
97     0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 4200-42ff
98     0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 4300-43ff
99     0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 4400-44ff
100     0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 4500-45ff
101     0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 4600-46ff
102     0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 4700-47ff
103     0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 4800-48ff
104     0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 4900-49ff
105     0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 4a00-4aff
106     0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 4b00-4bff
107     0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 4c00-4cff
108     0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 4d00-4dff
109     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 4e00-4eff
110     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 4f00-4fff
111     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 5000-50ff
112     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 5100-51ff
113     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 5200-52ff
114     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 5300-53ff
115     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 5400-54ff
116     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 5500-55ff
117     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 5600-56ff
118     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 5700-57ff
119     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 5800-58ff
120     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 5900-59ff
121     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 5a00-5aff
122     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 5b00-5bff
123     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 5c00-5cff
124     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 5d00-5dff
125     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 5e00-5eff
126     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 5f00-5fff
127     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 6000-60ff
128     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 6100-61ff
129     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 6200-62ff
130     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 6300-63ff
131     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 6400-64ff
132     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 6500-65ff
133     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 6600-66ff
134     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 6700-67ff
135     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 6800-68ff
136     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 6900-69ff
137     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 6a00-6aff
138     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 6b00-6bff
139     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 6c00-6cff
140     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 6d00-6dff
141     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 6e00-6eff
142     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 6f00-6fff
143     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 7000-70ff
144     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 7100-71ff
145     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 7200-72ff
146     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 7300-73ff
147     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 7400-74ff
148     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 7500-75ff
149     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 7600-76ff
150     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 7700-77ff
151     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 7800-78ff
152     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 7900-79ff
153     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 7a00-7aff
154     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 7b00-7bff
155     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 7c00-7cff
156     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 7d00-7dff
157     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 7e00-7eff
158     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 7f00-7fff
159     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 8000-80ff
160     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 8100-81ff
161     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 8200-82ff
162     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 8300-83ff
163     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 8400-84ff
164     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 8500-85ff
165     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 8600-86ff
166     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 8700-87ff
167     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 8800-88ff
168     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 8900-89ff
169     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 8a00-8aff
170     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 8b00-8bff
171     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 8c00-8cff
172     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 8d00-8dff
173     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 8e00-8eff
174     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 8f00-8fff
175     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 9000-90ff
176     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 9100-91ff
177     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 9200-92ff
178     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 9300-93ff
179     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 9400-94ff
180     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 9500-95ff
181     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 9600-96ff
182     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 9700-97ff
183     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 9800-98ff
184     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 9900-99ff
185     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 9a00-9aff
186     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 9b00-9bff
187     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 9c00-9cff
188     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 9d00-9dff
189     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 9e00-9eff
190     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0x0000003f,0x00000000,0x00000000,// 9f00-9fff
191     0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// a000-a0ff
192     0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// a100-a1ff
193     0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// a200-a2ff
194     0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// a300-a3ff
195     0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// a400-a4ff
196     0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// a500-a5ff
197     0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// a600-a6ff
198     0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// a700-a7ff
199     0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// a800-a8ff
200     0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// a900-a9ff
201     0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// aa00-aaff
202     0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// ab00-abff
203     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// ac00-acff
204     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// ad00-adff
205     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// ae00-aeff
206     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// af00-afff
207     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// b000-b0ff
208     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// b100-b1ff
209     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// b200-b2ff
210     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// b300-b3ff
211     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// b400-b4ff
212     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// b500-b5ff
213     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// b600-b6ff
214     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// b700-b7ff
215     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// b800-b8ff
216     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// b900-b9ff
217     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// ba00-baff
218     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// bb00-bbff
219     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// bc00-bcff
220     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// bd00-bdff
221     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// be00-beff
222     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// bf00-bfff
223     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// c000-c0ff
224     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// c100-c1ff
225     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// c200-c2ff
226     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// c300-c3ff
227     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// c400-c4ff
228     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// c500-c5ff
229     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// c600-c6ff
230     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// c700-c7ff
231     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// c800-c8ff
232     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// c900-c9ff
233     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// ca00-caff
234     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// cb00-cbff
235     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// cc00-ccff
236     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// cd00-cdff
237     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// ce00-ceff
238     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// cf00-cfff
239     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// d000-d0ff
240     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// d100-d1ff
241     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// d200-d2ff
242     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// d300-d3ff
243     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// d400-d4ff
244     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// d500-d5ff
245     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// d600-d6ff
246     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0x0000000f,0x00000000,0x00000000// d700-d7ff
247 ];
248 
249 /// returns true if character is A..Z, a..z, _ or universal alpha
250 bool isUniversalAlpha(dchar ch) pure nothrow {
251     return (ch <= 0xd7ff && (UNIVERSAL_ALPHA_FLAGS[ch >> 5] & (1 << (ch & 31))));
252 }
253 
254 /// character can present at the beginning of identifier
255 bool isIdentStartChar(dchar ch) pure nothrow {
256     return isUniversalAlpha(ch);
257 }
258 
259 /// character can present in middle of identifier
260 bool isIdentMiddleChar(dchar ch) pure nothrow {
261     return (ch >= '0' && ch <='9') || isUniversalAlpha(ch);
262 }
263     
264 immutable bool ENABLE_DUMP_UNIVERSAL_ALPHA_TABLE = false;
265 static if (ENABLE_DUMP_UNIVERSAL_ALPHA_TABLE) {
266         bool r(dchar ch, wchar v) pure nothrow {
267             return ch == v;
268         }
269         
270         bool r(dchar ch, wchar v1, wchar v2) pure nothrow {
271             return ch >= v1 && ch <= v2;
272         }
273 
274         bool isUniversalAlphaSlow(dchar c)  pure nothrow {
275             return 
276                 // Latin: 00AA, 00BA, 00C0−00D6, 00D8−00F6, 00F8−01F5, 01FA−0217,
277                 // 0250−02A8, 1E00−1E9B, 1EA0−1EF9, 207F
278                 r(c, 0xAA) || r(c, 0x00BA) || r(c, 0x00C0,0x00D6) || r(c, 0x00D8,0x00F6) || r(c, 0x00F8,0x01F5) || r(c, 0x01FA,0x0217)
279                 || r(c, 0x0250,0x02A8) || r(c, 0x1E00,0x1E9B) || r(c, 0x1EA0,0x1EF9) || r(c, 0x207F)
280                 //Greek: 0386, 0388−038A, 038C, 038E−03A1, 03A3−03CE, 03D0−03D6,
281                 //03DA, 03DC, 03DE, 03E0, 03E2−03F3, 1F00−1F15, 1F18−1F1D,
282                 //1F20−1F45, 1F48−1F4D, 1F50−1F57, 1F59, 1F5B, 1F5D,
283                 //1F5F−1F7D, 1F80−1FB4, 1FB6−1FBC, 1FC2−1FC4, 1FC6−1FCC,
284                 //1FD0−1FD3, 1FD6−1FDB, 1FE0−1FEC, 1FF2−1FF4, 1FF6−1FFC
285                 || r(c, 0x0386) || r(c, 0x0388,0x038A) || r(c, 0x038C) || r(c, 0x038E,0x03A1) || r(c, 0x03A3,0x03CE) || r(c, 0x03D0,0x03D6)
286                 || r(c, 0x03DA) || r(c, 0x03DC) || r(c, 0x03DE) || r(c, 0x03E0) || r(c, 0x03E2,0x03F3) || r(c, 0x1F00,0x1F15) || r(c, 0x1F18,0x1F1D)
287                 || r(c, 0x1F20,0x1F45) || r(c, 0x1F48,0x1F4D) || r(c, 0x1F50,0x1F57) || r(c, 0x1F59) || r(c, 0x1F5B) || r(c, 0x1F5D)
288                 || r(c, 0x1F5F,0x1F7D) || r(c, 0x1F80,0x1FB4) || r(c, 0x1FB6,0x1FBC) || r(c, 0x1FC2,0x1FC4) || r(c, 0x1FC6,0x1FCC)
289                 || r(c, 0x1FD0,0x1FD3) || r(c, 0x1FD6,0x1FDB) || r(c, 0x1FE0,0x1FEC) || r(c, 0x1FF2,0x1FF4) || r(c, 0x1FF6,0x1FFC)
290                 //Cyrillic: 0401−040C, 040E−044F, 0451−045C, 045E−0481, 0490−04C4,
291                 //04C7−04C8, 04CB−04CC, 04D0−04EB, 04EE−04F5, 04F8−04F9
292                 || r(c, 0x0401,0x040C) || r(c, 0x040E,0x044F) || r(c, 0x0451,0x045C) || r(c, 0x045E,0x0481) || r(c, 0x0490,0x04C4)
293                 || r(c, 0x04C7,0x04C8) || r(c, 0x04CB,0x04CC) || r(c, 0x04D0,0x04EB) || r(c, 0x04EE,0x04F5) || r(c, 0x04F8,0x04F9)
294                 //Armenian: 0531−0556, 0561−0587
295                 || r(c, 0x0531,0x0556) || r(c, 0x0561,0x0587)
296                 //Hebrew: 05B0−05B9, 05BB−05BD, 05BF, 05C1−05C2, 05D0−05EA,
297                 //05F0−05F2
298                 || r(c, 0x05B0,0x05B9) || r(c, 0x05BB,0x05BD) || r(c, 0x05BF) || r(c, 0x05C1,0x05C2) || r(c, 0x05D0,0x05EA)
299                 || r(c, 0x05F0,0x05F2)
300                 //Arabic: 0621−063A, 0640−0652, 0670−06B7, 06BA−06BE, 06C0−06CE,
301                 //06D0−06DC, 06E5−06E8, 06EA−06ED
302                 || r(c, 0x0621,0x063A) || r(c, 0x0640,0x0652) || r(c, 0x0670,0x06B7) || r(c, 0x06BA,0x06BE) || r(c, 0x06C0,0x06CE)
303                 || r(c, 0x06D0,0x06DC) || r(c, 0x06E5,0x06E8) || r(c, 0x06EA,0x06ED)
304                 //Devanagari: 0901−0903, 0905−0939, 093E−094D, 0950−0952, 0958−0963
305                 || r(c, 0x0901,0x0903) || r(c, 0x0905,0x0939) || r(c, 0x093E,0x094D) || r(c, 0x0950,0x0952) || r(c, 0x0958,0x0963)
306                 //Bengali: 0981−0983, 0985−098C, 098F−0990, 0993−09A8, 09AA−09B0,
307                 //09B2, 09B6−09B9, 09BE−09C4, 09C7−09C8, 09CB−09CD,
308                 //09DC−09DD, 09DF−09E3, 09F0−09F1
309                 || r(c, 0x0981,0x0983) || r(c, 0x0985,0x098C) || r(c, 0x098F,0x0990) || r(c, 0x0993,0x09A8) || r(c, 0x09AA,0x09B0)
310                 || r(c, 0x09B2) || r(c, 0x09B6,0x09B9) || r(c, 0x09BE,0x09C4) || r(c, 0x09C7,0x09C8) || r(c, 0x09CB,0x09CD)
311                 || r(c, 0x09DC,0x09DD) || r(c, 0x09DF,0x09E3) || r(c, 0x09F0,0x09F1)
312                 //Gurmukhi: 0A02, 0A05−0A0A, 0A0F−0A10, 0A13−0A28, 0A2A−0A30,
313                 //0A32−0A33, 0A35−0A36, 0A38−0A39, 0A3E−0A42, 0A47−0A48,
314                 //0A4B−0A4D, 0A59−0A5C, 0A5E, 0A74
315                 || r(c, 0x0A02) || r(c, 0x0A05,0x0A0A) || r(c, 0x0A0F,0x0A10) || r(c, 0x0A13,0x0A28) || r(c, 0x0A2A,0x0A30)
316                 || r(c, 0x0A32,0x0A33) || r(c, 0x0A35,0x0A36) || r(c, 0x0A38,0x0A39) || r(c, 0x0A3E,0x0A42) || r(c, 0x0A47,0x0A48)
317                 || r(c, 0x0A4B,0x0A4D) || r(c, 0x0A59,0x0A5C) || r(c, 0x0A5E) || r(c, 0x0A74)
318                 //Gujarati: 0A81−0A83, 0A85−0A8B, 0A8D, 0A8F−0A91, 0A93−0AA8,
319                 //0AAA−0AB0, 0AB2−0AB3, 0AB5−0AB9, 0ABD−0AC5,
320                 //0AC7−0AC9, 0ACB−0ACD, 0AD0, 0AE0
321                 || r(c, 0x0A81,0x0A83) || r(c, 0x0A85,0x0A8B) || r(c, 0x0A8D) || r(c, 0x0A8F,0x0A91) || r(c, 0x0A93,0x0AA8)
322                 || r(c, 0x0AAA,0x0AB0) || r(c, 0x0AB2,0x0AB3) || r(c, 0x0AB5,0x0AB9) || r(c, 0x0ABD,0x0AC5)
323                 || r(c, 0x0AC7,0x0AC9) || r(c, 0x0ACB,0x0ACD) || r(c, 0x0AD0) || r(c, 0x0AE0)
324                 // Oriya: 0B01−0B03, 0B05−0B0C, 0B0F−0B10, 0B13−0B28, 0B2A−0B30,
325                 //0B32−0B33, 0B36−0B39, 0B3E−0B43, 0B47−0B48, 0B4B−0B4D,
326                 //0B5C−0B5D, 0B5F−0B61
327                 || r(c, 0x0B01,0x0B03) || r(c, 0x0B05,0x0B0C) || r(c, 0x0B0F,0x0B10) || r(c, 0x0B13,0x0B28) || r(c, 0x0B2A,0x0B30)
328                 || r(c, 0x0B32,0x0B33) || r(c, 0x0B36,0x0B39) || r(c, 0x0B3E,0x0B43) || r(c, 0x0B47,0x0B48) || r(c, 0x0B4B,0x0B4D)
329                 || r(c, 0x0B5C,0x0B5D) || r(c, 0x0B5F,0x0B61)
330                 //Tamil: 0B82−0B83, 0B85−0B8A, 0B8E−0B90, 0B92−0B95, 0B99−0B9A,
331                 //0B9C, 0B9E−0B9F, 0BA3−0BA4, 0BA8−0BAA, 0BAE−0BB5,
332                 //0BB7−0BB9, 0BBE−0BC2, 0BC6−0BC8, 0BCA−0BCD
333                 || r(c, 0x0B82,0x0B83) || r(c, 0x0B85,0x0B8A) || r(c, 0x0B8E,0x0B90) || r(c, 0x0B92,0x0B95) || r(c, 0x0B99,0x0B9A)
334                 || r(c, 0x0B9C) || r(c, 0x0B9E,0x0B9F) || r(c, 0x0BA3,0x0BA4) || r(c, 0x0BA8,0x0BAA) || r(c, 0x0BAE,0x0BB5)
335                 || r(c, 0x0BB7,0x0BB9) || r(c, 0x0BBE,0x0BC2) || r(c, 0x0BC6,0x0BC8) || r(c, 0x0BCA,0x0BCD)
336                 //Telugu: 0C01−0C03, 0C05−0C0C, 0C0E−0C10, 0C12−0C28, 0C2A−0C33,
337                 //0C35−0C39, 0C3E−0C44, 0C46−0C48, 0C4A−0C4D, 0C60−0C61
338                 || r(c, 0x0C01,0x0C03) || r(c, 0x0C05,0x0C0C) || r(c, 0x0C0E,0x0C10) || r(c, 0x0C12,0x0C28) || r(c, 0x0C2A,0x0C33)
339                 || r(c, 0x0C35,0x0C39) || r(c, 0x0C3E,0x0C44) || r(c, 0x0C46,0x0C48) || r(c, 0x0C4A,0x0C4D) || r(c, 0x0C60,0x0C61)
340                 //Kannada: 0C82−0C83, 0C85−0C8C, 0C8E−0C90, 0C92−0CA8, 0CAA−0CB3,
341                 //0CB5−0CB9, 0CBE−0CC4, 0CC6−0CC8, 0CCA−0CCD, 0CDE,
342                 //0CE0−0CE1
343                 || r(c, 0x0C82,0x0C83) || r(c, 0x0C85,0x0C8C) || r(c, 0x0C8E,0x0C90) || r(c, 0x0C92,0x0CA8) || r(c, 0x0CAA,0x0CB3)
344                 || r(c, 0x0CB5,0x0CB9) || r(c, 0x0CBE,0x0CC4) || r(c, 0x0CC6,0x0CC8) || r(c, 0x0CCA,0x0CCD) || r(c, 0x0CDE)
345                 || r(c, 0x0CE0,0x0CE1)
346                 //Malayalam: 0D02−0D03, 0D05−0D0C, 0D0E−0D10, 0D12−0D28, 0D2A−0D39,
347                 //0D3E−0D43, 0D46−0D48, 0D4A−0D4D, 0D60−0D61
348                 || r(c, 0x0D02,0x0D03) || r(c, 0x0D05,0x0D0C) || r(c, 0x0D0E,0x0D10) || r(c, 0x0D12,0x0D28) || r(c, 0x0D2A,0x0D39)
349                 || r(c, 0xD3E,0x0D43) || r(c, 0x0D46,0x0D48) || r(c, 0x0D4A,0x0D4D) || r(c, 0x0D60,0x0D61)
350                 //Thai: 0E01−0E3A, 0E40−0E5B
351                 || r(c, 0x0E01,0x0E3A) || r(c, 0x0E40,0x0E5B)
352                 //Lao: 0E81−0E82, 0E84, 0E87−0E88, 0E8A, 0E8D, 0E94−0E97,
353                 //0E99−0E9F, 0EA1−0EA3, 0EA5, 0EA7, 0EAA−0EAB,
354                 //0EAD−0EAE, 0EB0−0EB9, 0EBB−0EBD, 0EC0−0EC4, 0EC6,
355                 //0EC8−0ECD, 0EDC−0EDD
356                 || r(c, 0x0E81,0x0E82) || r(c, 0x0E84) || r(c, 0x0E87,0x0E88) || r(c, 0x0E8A) || r(c, 0x0E8D) || r(c, 0x0E94,0x0E97)
357                 || r(c, 0x0E99,0x0E9F) || r(c, 0x0EA1,0x0EA3) || r(c, 0x0EA5) || r(c, 0x0EA7) || r(c, 0x0EAA,0x0EAB)
358                 || r(c, 0x0EAD,0x0EAE) || r(c, 0x0EB0,0x0EB9) || r(c, 0x0EBB,0x0EBD) || r(c, 0x0EC0,0x0EC4) || r(c, 0x0EC6)
359                 || r(c, 0x0EC8,0x0ECD) || r(c, 0x0EDC,0x0EDD)
360                 //Tibetan: 0F00, 0F18−0F19, 0F35, 0F37, 0F39, 0F3E−0F47, 0F49−0F69,
361                 //0F71−0F84, 0F86−0F8B, 0F90−0F95, 0F97, 0F99−0FAD,
362                 //0FB1−0FB7, 0FB9
363                 || r(c, 0x0F00) || r(c, 0x0F18,0x0F19) || r(c, 0x0F35) || r(c, 0x0F37) || r(c, 0x0F39) || r(c, 0x0F3E,0x0F47) || r(c, 0x0F49,0x0F69)
364                 || r(c, 0x0F71,0x0F84) || r(c, 0x0F86,0x0F8B) || r(c, 0x0F90,0x0F95) || r(c, 0x0F97) || r(c, 0x0F99,0x0FAD)
365                 || r(c, 0x0FB1,0x0FB7) || r(c, 0x0FB9)
366                 //Georgian: 10A0−10C5, 10D0−10F6
367                 || r(c, 0x10A0,0x10C5) || r(c, 0x10D0,0x10F6)
368                 //Hiragana: 3041−3093, 309B−309C
369                 || r(c, 0x3041,0x3093) || r(c, 0x309B,0x309C)
370                 //Katakana: 30A1−30F6, 30FB−30FC
371                 || r(c, 0x30A1,0x30F6) || r(c, 0x30FB,0x30FC)
372                 //Bopomofo: 3105−312C
373                 || r(c, 0x3105,0x312C)
374                 //CJK Unified Ideographs: 4E00−9FA5
375                 || r(c, 0x4E00,0x9FA5)
376                 //Hangul: AC00−D7A3
377                 || r(c, 0xAC00,0xD7A3)
378                 //Digits: 0660−0669, 06F0−06F9, 0966−096F, 09E6−09EF, 0A66−0A6F,
379                 //0AE6−0AEF, 0B66−0B6F, 0BE7−0BEF, 0C66−0C6F, 0CE6−0CEF,
380                 //0D66−0D6F, 0E50−0E59, 0ED0−0ED9, 0F20−0F33
381                 || r(c, 0x0660,0x0669) || r(c, 0x06F0,0x06F9) || r(c, 0x0966,0x096F) || r(c, 0x09E6,0x09EF) || r(c, 0x0A66,0x0A6F)
382                 || r(c, 0x0AE6,0x0AEF) || r(c, 0x0B66,0x0B6F) || r(c, 0x0BE7,0x0BEF) || r(c, 0x0C66,0x0C6F) || r(c, 0x0CE6,0x0CEF)
383                 || r(c, 0x0D66,0x0D6F) || r(c, 0x0E50,0x0E59) || r(c, 0x0ED0,0x0ED9) || r(c, 0x0F20,0x0F33)
384                 //Special characters: 00B5, 00B7, 02B0−02B8, 02BB, 02BD−02C1, 02D0−02D1,
385                 //02E0−02E4, 037A, 0559, 093D, 0B3D, 1FBE, 203F−2040, 2102,
386                 //2107, 210A−2113, 2115, 2118−211D, 2124, 2126, 2128, 212A−2131,
387                 //2133−2138, 2160−2182, 3005−3007, 3021−3029
388                 || r(c, 0x00B5) || r(c, 0x00B7) || r(c, 0x02B0,0x02B8) || r(c, 0x02BB) || r(c, 0x02BD,0x02C1) || r(c, 0x02D0,0x02D1)
389                 || r(c, 0x2E0,0x02E4) || r(c, 0x037A) || r(c, 0x0559) || r(c, 0x093D) || r(c, 0x0B3D) || r(c, 0x1FBE) || r(c, 0x203F,0x2040) || r(c, 0x2102)
390                 || r(c, 0x2107) || r(c, 0x210A,0x2113) || r(c, 0x2115) || r(c, 0x2118,0x211D) || r(c, 0x2124) || r(c, 0x2126) || r(c, 0x2128) || r(c, 0x212A,0x2131)
391                 || r(c, 0x2133,0x2138) || r(c, 0x2160,0x2182) || r(c, 0x3005,0x3007) || r(c, 0x3021,0x3029)
392                 ;
393         }
394 
395 }
396 
397 unittest {
398     
399         
400     static if (ENABLE_DUMP_UNIVERSAL_ALPHA_TABLE) {
401         immutable uint itemsInRow = 8;
402         
403         uint maxAlpha = 0;
404         for (uint i = 0; i < 0x10000; i++) {
405             uint ch = i;
406             if (isUniversalAlphaSlow(ch) || ch == '_' || (ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z'))
407                 maxAlpha = i;
408         }
409         maxAlpha = (maxAlpha + itemsInRow * 32 - 1) / (itemsInRow * 32) * (itemsInRow * 32) - 1;
410         writeln("// table for fast checking of UniversalAlpha (as per ISO/IEC 9899:1999 Annex E) OR a..z OR A..Z OR _");
411         writefln("// max code is 0x%04x", maxAlpha);
412         writeln("immutable uint[", (maxAlpha + 1) / 32,"] UNIVERSAL_ALPHA_FLAGS = [");
413         for (uint i = 0; i <= maxAlpha; i += 32) {
414             if ((i / 32) % itemsInRow  == 0)
415                 write("    ");
416             uint flags = 0;
417             for (uint j = 0; j < 32; j++) {
418                 uint ch = i + j;
419                 bool flag = isUniversalAlphaSlow(ch) || ch == '_' || (ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z');
420                 if (flag)
421                     flags |= (1 << j);
422             }
423             writef("0x%08x", flags);
424             if (i != maxAlpha / 32 * 32)
425                 write(",");
426             if ((i / 32) % itemsInRow  == itemsInRow - 1)
427                 writefln("// %04x-%04x", i - itemsInRow * 32 + 1 + 31, i + 31);
428         }
429         writeln("];");
430         
431         for (uint ch = 0; ch < 0x100000; ch++) {
432             bool flag = isUniversalAlphaSlow(ch) || ch == '_' || (ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z');
433             bool flag2 = isUniversalAlpha(ch);
434             if (flag2 != flag) {
435                 isUniversalAlpha(ch);
436                 writefln("universalAlpha test failed for char %06x expeced %d actual %d", ch, flag ? 1 : 0, flag2 ? 1 : 0);
437             }
438             assert(flag2 == flag);
439         }
440     }
441 }
442 
443 enum OpCode : ubyte {
444     NONE,       //    no op
445     DIV,         //    /
446     DIV_EQ,     //    /=
447     DOT,         //    .
448     DOT_DOT,     //    ..
449     DOT_DOT_DOT,//    ...
450     AND,         //    &
451     AND_EQ,     //    &=
452     LOG_AND,     //    &&
453     OR,         //    |
454     OR_EQ,         //    |=
455     LOG_OR,     //    ||
456     MINUS,         //    -
457     MINUS_EQ,     //    -=
458     MINUS_MINUS,//    --
459     PLUS,         //    +
460     PLUS_EQ,     //    +=
461     PLUS_PLUS,     //    ++
462     LT,         //    <
463     LT_EQ,         //    <=
464     SHL,         //    <<
465     SHL_EQ,     //    <<=
466     LT_GT,         //    <>
467     NE_EQ,         //    <>=
468     GT,         //    >
469     GT_EQ,         //    >=
470     SHR_EQ,        //    >>=
471     ASR_EQ,     //    >>>=
472     SHR,         //    >>
473     ASR,         //    >>>
474     NOT,         //    !
475     NOT_EQ,        //    !=
476     NOT_LT_GT,     //    !<>
477     NOT_LT_GT_EQ, //    !<>=
478     NOT_LT,     //    !<
479     NOT_LT_EQ,     //    !<=
480     NOT_GT,     //    !>
481     NOT_GT_EQ,     //    !>=
482     PAR_OPEN,     //    (
483     PAR_CLOSE,     //    )
484     SQ_OPEN,     //    [
485     SQ_CLOSE,     //    ]
486     CURL_OPEN,     //    {
487     CURL_CLOSE, //    }
488     QUEST,         //    ?
489     COMMA,         //    ,
490     SEMICOLON,  //    ;
491     COLON,         //    :
492     DOLLAR,     //    $
493     EQ,         //    =
494     QE_EQ,         //    ==
495     MUL,         //    *
496     MUL_EQ,     //    *=
497     MOD,     //    %
498     MOD_EQ, //    %=
499     XOR,         //    ^
500     XOR_EQ,     //    ^=
501     LOG_XOR,     //    ^^
502     LOG_XOR_EQ, //    ^^=
503     INV,         //    ~
504     INV_EQ,     //    ~=
505     AT,         //    @
506     EQ_GT,         //    =>
507     SHARP         //    #
508 };
509 
510 immutable dstring[] OP_CODE_STRINGS = [
511     "",
512     "/",
513     "/=",
514     ".",
515     "..",
516     "...",
517     "&",
518     "&=",
519     "&&",
520     "|",
521     "|=",
522     "||",
523     "-",
524     "-=",
525     "--",
526     "+",
527     "+=",
528     "++",
529     "<",
530     "<=",
531     "<<",
532     "<<=",
533     "<>",
534     "<>=",
535     ">",
536     ">=",
537     ">>=",
538     ">>>=",
539     ">>",
540     ">>>",
541     "!",
542     "!=",
543     "!<>",
544     "!<>=",
545     "!<",
546     "!<=",
547     "!>",
548     "!>=",
549     "(",
550     ")",
551     "[",
552     "]",
553     "{",
554     "}",
555     "?",
556     ",",
557     ";",
558     ":",
559     "$",
560     "=",
561     "==",
562     "*",
563     "*=",
564     "%",
565     "%=",
566     "^",
567     "^=",
568     "^^",
569     "^^=",
570     "~",
571     "~=",
572     "@",
573     "=>",
574     "#"
575 ];
576 
577 dstring getOpNameD(OpCode op) pure nothrow {
578     return OP_CODE_STRINGS[op];
579 };
580 
581 enum Keyword : ubyte {
582     NONE,
583     ABSTRACT,
584     ALIAS,
585     ALIGN,
586     ASM,
587     ASSERT,
588     AUTO,
589 
590     BODY,
591     BOOL,
592     BREAK,
593     BYTE,
594 
595     CASE,
596     CAST,
597     CATCH,
598     CDOUBLE,
599     CENT,
600     CFLOAT,
601     CHAR,
602     CLASS,
603     CONST,
604     CONTINUE,
605     CREAL,
606 
607     DCHAR,
608     DEBUG,
609     DEFAULT,
610     DELEGATE,
611     DELETE,
612     DEPRECATED,
613     DO,
614     DOUBLE,
615 
616     ELSE,
617     ENUM,
618     EXPORT,
619     EXTERN,
620 
621     FALSE,
622     FINAL,
623     FINALLY,
624     FLOAT,
625     FOR,
626     FOREACH,
627     FOREACH_REVERSE,
628     FUNCTION,
629 
630     GOTO,
631 
632     IDOUBLE,
633     IF,
634     IFLOAT,
635     IMMUTABLE,
636     IMPORT,
637     IN,
638     INOUT,
639     INT,
640     INTERFACE,
641     INVARIANT,
642     IREAL,
643     IS,
644 
645     LAZY,
646     LONG,
647 
648     MACRO,
649     MIXIN,
650     MODULE,
651 
652     NEW,
653     NOTHROW,
654     NULL,
655 
656     OUT,
657     OVERRIDE,
658 
659     PACKAGE,
660     PRAGMA,
661     PRIVATE,
662     PROTECTED,
663     PUBLIC,
664     PURE,
665 
666     REAL,
667     REF,
668     RETURN,
669 
670     SCOPE,
671     SHARED,
672     SHORT,
673     STATIC,
674     STRUCT,
675     SUPER,
676     SWITCH,
677     SYNCHRONIZED,
678 
679     TEMPLATE,
680     THIS,
681     THROW,
682     TRUE,
683     TRY,
684     TYPEDEF,
685     TYPEID,
686     TYPEOF,
687 
688     UBYTE,
689     UCENT,
690     UINT,
691     ULONG,
692     UNION,
693     UNITTEST,
694     USHORT,
695 
696     VERSION,
697     VOID,
698     VOLATILE,
699 
700     WCHAR,
701     WHILE,
702     WITH,
703 
704     FILE,
705     MODULE__,
706     LINE,
707     FUNCTION__,
708     PRETTY_FUNCTION,
709 
710     //Special Token    Replaced with
711     DATE, //    string literal of the date of compilation "mmm dd yyyy"
712     EOF, //    sets the scanner to the end of the file
713     TIME, //    string literal of the time of compilation "hh:mm:ss"
714     TIMESTAMP, //    string literal of the date and time of compilation "www mmm dd hh:mm:ss yyyy"
715     VENDOR, //    Compiler vendor string, such as "Digital Mars D"
716     VERSION_, //    Compiler version as an integer, such as 2001
717     
718     GSHARED,
719     TRAITS,
720     VECTOR,
721     PARAMETERS,
722 
723 }
724 
725 immutable dstring[] KEYWORD_STRINGS = [
726     "",
727     "abstract",
728     "alias",
729     "align",
730     "asm",
731     "assert",
732     "auto",
733 
734     "body",
735     "bool",
736     "break",
737     "byte",
738 
739     "case",
740     "cast",
741     "catch",
742     "cdouble",
743     "cent",
744     "cfloat",
745     "char",
746     "class",
747     "const",
748     "continue",
749     "creal",
750 
751     "dchar",
752     "debug",
753     "default",
754     "delegate",
755     "delete",
756     "deprecated",
757     "do",
758     "double",
759 
760     "else",
761     "enum",
762     "export",
763     "extern",
764 
765     "false",
766     "final",
767     "finally",
768     "float",
769     "for",
770     "foreach",
771     "foreach_reverse",
772     "function",
773 
774     "goto",
775 
776     "idouble",
777     "if",
778     "ifloat",
779     "immutable",
780     "import",
781     "in",
782     "inout", 
783     "int",
784     "interface",
785     "invariant",
786     "ireal",
787     "is",
788 
789     "lazy",
790     "long",
791 
792     "macro",
793     "mixin",
794     "module",
795 
796     "new",
797     "nothrow",
798     "null",
799 
800     "out",
801     "override",
802 
803     "package",
804     "pragma",
805     "private",
806     "protected",
807     "public",
808     "pure",
809 
810     "real",
811     "ref",
812     "return",
813 
814     "scope",
815     "shared",
816     "short",
817     "static",
818     "struct",
819     "super",
820     "switch",
821     "synchronized",
822 
823     "template",
824     "this",
825     "throw",
826     "true",
827     "try",
828     "typedef",
829     "typeid",
830     "typeof",
831 
832     "ubyte",
833     "ucent",
834     "uint",
835     "ulong",
836     "union",
837     "unittest",
838     "ushort",
839 
840     "version",
841     "void",
842     "volatile",
843 
844     "wchar",
845     "while",
846     "with",
847 
848     "__FILE__",
849     "__MODULE__",
850     "__LINE__",
851     "__FUNCTION__",
852     "__PRETTY_FUNCTION__",
853 
854     //Special Token    Replaced with
855     "__DATE__", //    string literal of the date of compilation "mmm dd yyyy"
856     "__EOF__", //    sets the scanner to the end of the file
857     "__TIME__", //    string literal of the time of compilation "hh:mm:ss"
858     "__TIMESTAMP__", //    string literal of the date and time of compilation "www mmm dd hh:mm:ss yyyy"
859     "__VENDOR__", //    Compiler vendor string, such as "Digital Mars D"
860     "__VERSION__", //    Compiler version as an integer, such as 2001
861 
862         
863     "__gshared",
864     "__traits",
865     "__vector",
866     "__parameters"
867 ];
868 
869 public dstring getKeywordNameD(Keyword keyword) pure nothrow {
870     return KEYWORD_STRINGS[keyword];
871 };
872 
873 public Keyword findKeyword(Keyword start, Keyword end, dchar * name, int len, ref int pos) pure nothrow {
874     for (Keyword i = start; i <= end; i++) {
875         dstring s = KEYWORD_STRINGS[i];
876         if (s.length > len + 1)
877             continue; // too long
878         bool found = true;
879         for (uint j = 1; j < s.length; j++) {
880             if (s[j] != name[j - 1]) {
881                 found = false;
882                 break;
883             }
884         }
885         if (found) {
886             if (s.length == len - 1 || !isIdentMiddleChar(name[s.length - 1])) {
887                 pos += s.length - 1;
888                 return i;
889             }
890         }
891     }
892     return Keyword.NONE;
893 }
894 
895 /**
896  * Token.
897  */
898 class Token {
899     //                                 32bit      64bit platform
900     //                    vtable       4 bytes    8 bytes
901     protected SourceFile _file;   //   4 bytes    8 bytes
902     protected int _line;          //   4 bytes    4 bytes
903     protected int _pos;           //   4 bytes    4 bytes
904     protected TokenType _type;    //   1 byte     1 byte
905     //                    total        17 bytes   25 bytes
906     /// returns token type
907     @property TokenType type() { return _type; }
908     /// returns file info for source
909     @property SourceFile filename() { return _file; }
910     /// returns 1-based source line number of token start
911     @property int line() { return _line; }
912     /// returns 1-based source line position of token start
913     @property int pos() { return _pos; }
914     /// returns token text
915     @property dstring text() { return null; }
916 
917     // number token properties
918     @property dchar literalType() { return 0; }
919     @property ulong intValue() { return 0; }
920     @property bool isUnsigned() { return false; }
921     @property ulong isLong() { return false; }
922     @property real realValue() { return 0; }
923     @property double doubleValue() { return 0; }
924     @property float floatValue() { return 0; }
925     @property byte precision() { return 0; }
926     @property bool isImaginary() { return false; }
927     @property bool isBracket() {
928         OpCode op = opCode; 
929         return op == OpCode.PAR_OPEN 
930             || op == OpCode.PAR_CLOSE 
931             || op == OpCode.SQ_OPEN 
932             || op == OpCode.SQ_CLOSE 
933             || op == OpCode.CURL_OPEN 
934             || op == OpCode.CURL_CLOSE; 
935     }
936     @property bool isOpenBracket() {
937         OpCode op = opCode;
938         return op == OpCode.PAR_OPEN
939             || op == OpCode.SQ_OPEN
940             || op == OpCode.CURL_OPEN;
941     }
942     @property bool isCloseBracket() {
943         OpCode op = opCode; 
944         return op == OpCode.PAR_CLOSE
945             || op == OpCode.SQ_CLOSE
946             || op == OpCode.CURL_CLOSE;
947     }
948     @property bool isEof() { return type == TokenType.EOF; }
949 
950     /// returns opcode ID - for opcode tokens
951     @property OpCode opCode() { return OpCode.NONE; }
952     /// returns keyword ID - for keyword tokens
953     @property Keyword keyword() { return Keyword.NONE; }
954     /// returns true if this is documentation comment token
955     @property bool isDocumentationComment() { return false; }
956     /// returns true if this is multiline
957     @property bool isMultilineComment() { return false; }
958 
959     // error handling
960 
961     /// returns true if it's invalid token (can be returned in error tolerant mode of tokenizer)
962     @property bool isError() { return type == TokenType.INVALID; }
963     /// returns error message if it's invalid token (can be returned in error tolerant mode of tokenizer)
964     @property string errorMessage() { return null; }
965     /// returns error code if it's invalid token (can be returned in error tolerant mode of tokenizer)
966     @property int errorCode() { return 0; }
967     /// returns type of token parsing of which has been failed - if it's invalid token (can be returned in error tolerant mode of tokenizer)
968     @property TokenType invalidTokenType() { return TokenType.INVALID; }
969 
970 
971     this(TokenType type) {
972         _type = type;
973     }
974 
975     this(TokenType type, SourceFile file, int line, int pos) {
976         _type = type;
977         _file = file;
978         _line = line;
979         _pos = pos;
980     }
981     /// set start position for token (line is 1-based, pos is 0-based)
982     void setPos(SourceFile file, int line, int pos) {
983         _file = file;
984         _line = line;
985         _pos = pos + 1;
986     }
987     /// set source file information for token
988     void setFile(SourceFile file) {
989         _file = file;
990     }
991     /// set start position for token (line is 1-based, pos is 0-based)
992     void setPos(int line, int pos) {
993         _line = line;
994         _pos = pos + 1;
995     }
996 
997     public abstract Token clone();
998     public override @property string toString() {
999         return "" ~ to!string(_line) ~ ":" ~ to!string(_pos) ~ " " ~ to!string(type) ~ " " ~ to!string(opCode) ~ " " ~ to!string(keyword) 
1000             ~" \"" ~ toUTF8(text()) ~ "\"";
1001     }
1002 }
1003 
1004 class EofToken : Token {
1005     this() {
1006         super(TokenType.EOF);
1007     }
1008     this(SourceFile file, uint line, uint pos) {
1009         super(TokenType.EOF, file, line, pos);
1010     }
1011     override public Token clone() {
1012         return new EofToken(_file, _line, _pos);
1013     }
1014     public override @property string toString() {
1015         return "EOF";
1016     }
1017 }
1018 
1019 // treat as white space
1020 //class EolToken : Token {
1021 //    this(string file, uint line, uint pos) {
1022 //        super(TokenType.EOL, file, line, pos);
1023 //    }
1024 //}
1025 
1026 /// white space token
1027 class WhiteSpaceToken : Token {
1028     this() {
1029         super(TokenType.WHITESPACE);
1030     }
1031     this(SourceFile file, uint line, uint pos) {
1032         super(TokenType.WHITESPACE, file, line, pos);
1033     }
1034     override public Token clone() {
1035         return new WhiteSpaceToken(_file, _line, _pos);
1036     }
1037     public override @property string toString() {
1038         return "WhiteSpace";
1039     }
1040 }
1041 
1042 class OpToken : Token {
1043     OpCode _op;
1044     public @property override OpCode opCode() { return _op; }
1045     public @property void opCode(OpCode op) { _op = op; }
1046     public @property override dstring text() { return getOpNameD(_op); }
1047     this() {
1048         super(TokenType.OP);
1049     }
1050     this(SourceFile file, uint line, uint pos) {
1051         super(TokenType.OP, file, line, pos);
1052     }
1053     override public Token clone() {
1054         OpToken res = new OpToken(_file, _line, _pos);
1055         res._op = _op;
1056         return res;
1057     }
1058     public override @property string toString() {
1059         return "Op:" ~ to!string(_op);
1060     }
1061 }
1062 
1063 class KeywordToken : Token {
1064     Keyword _keyword;
1065     public @property override Keyword keyword() { return _keyword; }
1066     public @property void keyword(Keyword keyword) { _keyword = keyword; }
1067     public @property override dstring text() { return getKeywordNameD(_keyword); }
1068     this() {
1069         super(TokenType.KEYWORD);
1070     }
1071     this(SourceFile file, uint line, uint pos) {
1072         super(TokenType.KEYWORD, file, line, pos);
1073     }
1074     override public Token clone() {
1075         KeywordToken res = new KeywordToken(_file, _line, _pos);
1076         res._keyword = _keyword;
1077         return res;
1078     }
1079     public override @property string toString() {
1080         return "Keyword:" ~ to!string(_keyword);
1081     }
1082 }
1083 
1084 /// comment token
1085 class CommentToken : Token {
1086     protected dstring _text;
1087     protected bool _isDocumentationComment;
1088     protected bool _isMultilineComment;
1089 
1090 
1091     override @property bool isDocumentationComment() {
1092         return _isDocumentationComment;
1093     }
1094 
1095     @property void isDocumentationComment(bool f) {
1096         _isDocumentationComment = f;
1097     }
1098 
1099     /// returns true if this is multiline
1100     override @property bool isMultilineComment() {
1101         return _isMultilineComment;
1102     }
1103 
1104     @property void isMultilineComment(bool f) {
1105         _isMultilineComment = f;
1106     }
1107 
1108     @property override dstring text() { return _text; }
1109     @property void text(dchar[] text) { _text = cast(dstring)text; }
1110     this() {
1111         super(TokenType.COMMENT);
1112     }
1113     this(SourceFile file, uint line, uint pos, dchar[] text) {
1114         super(TokenType.COMMENT, file, line, pos);
1115         _text = cast(dstring)text;
1116     }
1117     override public Token clone() {
1118         CommentToken res = new CommentToken(_file, _line, _pos, _text.dup);
1119         res._isDocumentationComment = _isDocumentationComment;
1120         res._isMultilineComment = _isMultilineComment;
1121         return res;
1122     }
1123     public override @property string toString() {
1124         return "Comment:" ~ to!string(_text);
1125     }
1126 }
1127 
1128 /// Invalid token holder - for error tolerant parsing
1129 class InvalidToken : Token {
1130     protected dstring _text;
1131     protected TokenType _invalidTokenType;
1132     protected int _errorCode;
1133     protected string _errorMessage;
1134 
1135     /// returns error message if it's invalid token (can be returned in error tolerant mode of tokenizer)
1136     override @property string errorMessage() { return _errorMessage; }
1137     /// sets error message
1138     @property void errorMessage(string s) { _errorMessage = s; }
1139     /// returns error code if it's invalid token (can be returned in error tolerant mode of tokenizer)
1140     override @property int errorCode() { return _errorCode; }
1141     /// sets error code
1142     @property void errorCode(int c) { _errorCode = c; }
1143     /// returns type of token parsing of which has been failed - if it's invalid token (can be returned in error tolerant mode of tokenizer)
1144     override @property TokenType invalidTokenType() { return _invalidTokenType; }
1145     /// sets type of token parsing of which has been failed
1146     @property void invalidTokenType(TokenType t) { _invalidTokenType = t; }
1147 
1148     /// text of invalid token
1149     @property override dstring text() { return _text; }
1150     /// text of invalid token
1151     @property void text(dchar[] text) { _text = cast(dstring)text; }
1152 
1153     this() {
1154         super(TokenType.INVALID);
1155     }
1156     this(SourceFile file, uint line, uint pos, dchar[] text) {
1157         super(TokenType.INVALID, file, line, pos);
1158         _text = cast(dstring)text;
1159     }
1160     override Token clone() {
1161         InvalidToken res = new InvalidToken(_file, _line, _pos, _text.dup);
1162         res._errorMessage = _errorMessage.dup;
1163         res._errorCode = _errorCode;
1164         res._invalidTokenType = _invalidTokenType;
1165         return res;
1166     }
1167     override @property string toString() {
1168         return "Invalid:" ~ to!string(_text);
1169     }
1170 }
1171 
1172 alias tokenizer_ident_t = uint;
1173 alias tokenizer_ident_name_t = dstring;
1174 
1175 enum : tokenizer_ident_t {
1176     NO_IDENT = 0
1177 }
1178 
1179 /**
1180  * Global storage for identifier strings.
1181  */
1182 class IdentHolder {
1183     protected tokenizer_ident_t _nextId;
1184     protected tokenizer_ident_name_t[tokenizer_ident_t] _idToName;
1185     protected tokenizer_ident_t[tokenizer_ident_name_t] _nameToId;
1186 
1187     public this() {
1188         _nextId = NO_IDENT + 1;
1189     }
1190 
1191     /**
1192     * Search for id by name, return NO_IDENT if not found.
1193     */
1194     uint findByName(tokenizer_ident_name_t name) {
1195         tokenizer_ident_t * found = (name in _nameToId);
1196         if (found)
1197             return *found; 
1198         return NO_IDENT;
1199     }
1200 
1201     /**
1202     * Search for name by id, return null if not found.
1203     */
1204     tokenizer_ident_name_t nameById(tokenizer_ident_t id) {
1205         auto found = (id in _idToName);
1206         if (found)
1207             return *found;
1208         return null;
1209     }
1210 
1211     /**
1212      * Search for ident id by name, create new entry if not found.
1213      */
1214     tokenizer_ident_t idByName(tokenizer_ident_name_t name) {
1215         uint * found = (name in _nameToId);
1216         if (found)
1217             return *found; 
1218         uint newid = _nextId++;
1219         immutable tokenizer_ident_name_t nameCopy = name.dup;
1220         _nameToId[nameCopy] = newid;
1221         _idToName[newid] = nameCopy;
1222         return newid;
1223     }
1224 }
1225 
1226 /**
1227 * Thread local storage for IDs.
1228 */
1229 IdentHolder identMap;
1230 
1231 static this() {
1232     // init ID storage
1233     identMap = new IdentHolder();
1234 }
1235 
1236 class StringLiteralToken : Token {
1237     dstring _text;
1238     dchar _literalType;
1239     public @property override dchar literalType() { return _literalType; }
1240     public @property override dstring text() { return _text; }
1241     public void setText(dchar[] text, dchar type) { _text = cast(dstring)text; _literalType = type; }
1242     this() {
1243         super(TokenType.STRING);
1244     }
1245     this(SourceFile file, uint line, uint pos, dchar[] text, dchar type) {
1246         super(TokenType.STRING, file, line, pos);
1247         _text = cast(dstring)text;
1248         _literalType = type;
1249     }
1250     override public Token clone() {
1251         return new StringLiteralToken(_file, _line, _pos, _text.dup, _literalType);
1252     }
1253     public override @property string toString() {
1254         return toUTF8("String:\"" ~ _text ~ "\"" ~ (_literalType ? _literalType : ' '));
1255     }
1256 }
1257 
1258 class CharacterLiteralToken : Token {
1259     dchar _character;
1260     dchar _literalType;
1261     @property override dchar literalType() { return _literalType; }
1262     @property dchar character() { return _character; }
1263     @property override dstring text() { return [_character]; }
1264     void setCharacter(dchar ch, dchar type) { _character = ch; _literalType = type; }
1265     this() {
1266         super(TokenType.CHARACTER);
1267     }
1268     this(SourceFile file, uint line, uint pos, dchar character, dchar type) {
1269         super(TokenType.CHARACTER, file, line, pos);
1270         _character = character;
1271         _literalType = type;
1272     }
1273     override public Token clone() {
1274         return new CharacterLiteralToken(_file, _line, _pos, _character, _literalType);
1275     }
1276     public override @property string toString() {
1277         return "Char:" ~ toUTF8([_character]);
1278     }
1279 }
1280 
1281 class IntegerLiteralToken : Token {
1282     ulong _value;
1283     bool _unsigned;
1284     bool _long;
1285     public @property override ulong intValue() { return _value; }
1286     public @property override bool isUnsigned() { return _unsigned; }
1287     public @property override ulong isLong() { return _long; }
1288     public @property override dstring text() { return to!dstring(_value); }
1289     public void setValue(ulong value, bool unsignedFlag = false, bool longFlag = false) {
1290         _value = value;
1291         _unsigned = unsignedFlag;
1292         _long = longFlag;
1293     }
1294     public void setFlags(bool unsignedFlag = false, bool longFlag = false) {
1295         _unsigned = unsignedFlag;
1296         _long = longFlag;
1297     }
1298     this() {
1299         super(TokenType.INTEGER);
1300     }
1301     this(SourceFile file, uint line, uint pos, ulong value, bool unsignedFlag, bool longFlag) {
1302         super(TokenType.INTEGER, file, line, pos);
1303         _value = value;
1304         _unsigned = unsignedFlag;
1305         _long = longFlag;
1306     }
1307     override public Token clone() {
1308         return new IntegerLiteralToken(_file, _line, _pos, _value, _unsigned, _long);
1309     }
1310     public override @property string toString() {
1311         return "Integer:" ~ to!string(_value) ~ (_long ? "L" : "") ~ (_unsigned ? "U" : "");
1312     }
1313 }
1314 
1315 class RealLiteralToken : Token {
1316     real _value;
1317     byte _precision;
1318     bool _imaginary;
1319     public @property override ulong intValue() { return to!long(_value); }
1320     public @property override real realValue() { return _value; }
1321     public @property override double doubleValue() { return cast(double)_value; }
1322     public @property override float floatValue() { return cast(float)_value; }
1323     public @property override byte precision() { return _precision; }
1324     public @property override bool isImaginary() { return _imaginary; }
1325     public @property override dstring text() { return to!dstring(_value); }
1326     public void setValue(real value, byte precision = 1, bool imaginary = false) {
1327         _value = value;
1328         _precision = precision;
1329         _imaginary = imaginary;
1330     }
1331     public void setFlags(byte precision = 1, bool imaginary = false) {
1332         _precision = precision;
1333         _imaginary = imaginary;
1334     }
1335     this() {
1336         super(TokenType.FLOAT);
1337     }
1338     this(SourceFile file, uint line, uint pos, real value, byte precision, bool imaginary) {
1339         super(TokenType.FLOAT, file, line, pos);
1340         _value = value;
1341         _precision = precision;
1342         _imaginary = imaginary;
1343     }
1344     override public Token clone() {
1345         return new RealLiteralToken(_file, _line, _pos, _value, _precision, _imaginary);
1346     }
1347     public override @property string toString() {
1348         return "Real:" ~ to!string(_value) ~ (_precision == 0 ? "f" : (_precision == 2 ? "L" : "")) ~ (_imaginary ? "i" : "");
1349     }
1350 }
1351 
1352 class IdentToken : Token {
1353     tokenizer_ident_t _id;
1354     public @property override dstring text() {
1355         return identMap.nameById(_id);
1356     }
1357     public void setText(dchar[] text) {
1358         _id = identMap.idByName(cast(immutable)text);
1359     }
1360     this() {
1361         super(TokenType.IDENTIFIER);
1362     }
1363     this(SourceFile file, uint line, uint pos, dchar[] text) {
1364         super(TokenType.IDENTIFIER, file, line, pos);
1365         _id = identMap.idByName(cast(immutable)text);
1366     }
1367     this(SourceFile file, uint line, uint pos, tokenizer_ident_t id) {
1368         super(TokenType.IDENTIFIER, file, line, pos);
1369         _id = id;
1370     }
1371     override public Token clone() {
1372         return new IdentToken(_file, _line, _pos, _id);
1373     }
1374     public override @property string toString() {
1375         return "Ident:" ~ to!string(text);
1376     }
1377 }
1378 
1379 // shared appender buffer, to avoid extra heap allocations
1380 struct StringAppender {
1381     dchar[] buf;
1382     uint len;
1383     dchar[] get() {
1384         return buf[0 .. len];
1385     }
1386     void appendEol() {
1387         if (len + 1 > buf.length) {
1388             uint newsize = cast(uint)((len + 1 + buf.length) * 2);
1389             if (newsize < 128)
1390                 newsize = 128;
1391             buf.length = newsize;
1392         }
1393         buf[len] = '\n';
1394         len++;
1395     }
1396     void append(dchar[] s) {
1397         if (s.length == 0)
1398             return;
1399         if (len + s.length > buf.length) {
1400             uint newsize = cast(uint)((len + s.length + buf.length) * 2);
1401             if (newsize < 128)
1402                 newsize = 128;
1403             buf.length = newsize;
1404         }
1405         buf[len .. len + s.length] = s;
1406         len += s.length;
1407     }
1408     void append(dchar ch) {
1409         if (len + 1 > buf.length) {
1410             uint newsize = cast(uint)(buf.length * 2);
1411             if (newsize < 128)
1412                 newsize = 128;
1413             buf.length = newsize;
1414         }
1415         buf[len++] = ch;
1416     }
1417     void reset() {
1418         len = 0;
1419     }
1420     static int parseHexDigit(dchar ch) {
1421         if (ch >= '0' && ch <='9')
1422             return ch - '0';
1423         if (ch >= 'a' && ch <='f')
1424             return ch - 'a' + 10;
1425         if (ch >= 'A' && ch <='F')
1426             return ch - 'A' + 10;
1427         return -1;
1428     }
1429     bool errorFlag = false;
1430     dchar decodeHex(ref int pos, int count) {
1431         dchar res = 0;
1432         for (int i = 0; i < count; i++) {
1433             if (pos >= len - 1) {
1434                 errorFlag = true;
1435                 return res;
1436             }
1437             dchar ch = buf[++pos];
1438             int digit = parseHexDigit(ch);
1439             if (digit < 0) {
1440                 errorFlag = true;
1441                 digit = 0;
1442             }
1443             res = (res << 4) | digit;
1444         }
1445         return res;
1446     }
1447     dchar decodeOct(dchar firstChar, ref int pos) {
1448         dchar res = 0;
1449         res = firstChar - '0';
1450         if (pos < len - 1 && buf[pos + 1] >= '0' && buf[pos + 1] <= '7') {
1451             res = (res << 3) | (buf[++pos] - '0');
1452         }
1453         if (pos < len - 1 && buf[pos + 1] >= '0' && buf[pos + 1] <= '7') {
1454             res = (res << 3) | (buf[++pos] - '0');
1455         }
1456         return res;
1457     }
1458 
1459     char[] entityNameBuf;
1460     int entityNameLen;
1461 
1462     dchar decodeCharacterEntity(ref int pos) {
1463         entityNameLen = 0;
1464         pos++;
1465         for(; pos < len && buf[pos] != ';'; pos++) {
1466             dchar ch = buf[pos];
1467             if (ch >= 0x80)
1468                 errorFlag = true;
1469             if (entityNameBuf.length < entityNameLen + 4)
1470                 entityNameBuf.length += 32;
1471             entityNameBuf[entityNameLen++] = cast(char)ch;
1472         }
1473         if (pos < len && buf[pos] == ';') {
1474             dchar ch = entityToChar(cast(string)entityNameBuf[0 .. entityNameLen]);
1475             if (ch)
1476                 return ch;
1477         }
1478         errorFlag = true;
1479         return '?';
1480     }
1481 
1482     bool processEscapeSequences() {
1483         errorFlag = false;
1484         int dst = 0;
1485         for (int src = 0; src < len; src++) {
1486             dchar ch = buf[src];
1487             if (ch == '\\') {
1488                 if (src == len - 1)
1489                     break; // INVALID
1490                 ch = buf[++src];
1491                 switch (ch) {
1492                     case '\'':
1493                     case '\"':
1494                     case '?':
1495                     case '\\':
1496                         buf[dst++] = ch;
1497                         break;
1498                     case '0':
1499                         buf[dst++] = '\0';
1500                         break;
1501                     case 'a':
1502                         buf[dst++] = '\a';
1503                         break;
1504                     case 'b':
1505                         buf[dst++] = '\b';
1506                         break;
1507                     case 'f':
1508                         buf[dst++] = '\f';
1509                         break;
1510                     case 'n':
1511                         buf[dst++] = '\n';
1512                         break;
1513                     case 'r':
1514                         buf[dst++] = '\r';
1515                         break;
1516                     case 't':
1517                         buf[dst++] = '\t';
1518                         break;
1519                     case 'v':
1520                         buf[dst++] = '\v';
1521                         break;
1522                     case 'x':
1523                         buf[dst++] = decodeHex(src, 2);
1524                         break;
1525                     case 'u':
1526                         buf[dst++] = decodeHex(src, 4);
1527                         break;
1528                     case 'U':
1529                         buf[dst++] = decodeHex(src, 8);
1530                         break;
1531                     default:
1532                         if (ch >= '0' && ch <= '7') {
1533                             // octal X XX or XXX
1534                             buf[dst++] = decodeOct(ch, src); // something wrong
1535                         } else if (ch == '&') {
1536                             // named character entity
1537                             buf[dst++] = decodeCharacterEntity(src);
1538                             // just show it as is
1539                         } else {
1540                             buf[dst++] = ch; // something wrong
1541                             errorFlag = true;
1542                         }
1543                         break;
1544                 }
1545             } else {
1546                 buf[dst++] = ch;
1547             }
1548         }
1549         len = dst;
1550         return errorFlag;
1551     }
1552 }
1553 
1554 class Tokenizer
1555 {
1556     protected SourceLines _lineStream;
1557     protected dchar[] _lineText;
1558     protected int _line; // current line number
1559     protected int _len; // current line length
1560     protected int _pos; // current line read position
1561     protected int _prevLineLength; // previous line length
1562     protected uint _state; // tokenizer state
1563     
1564     enum : int {
1565         EOF_CHAR = 0x001A,
1566         EOL_CHAR = 0x000A
1567     };
1568     
1569     protected WhiteSpaceToken _sharedWhiteSpaceToken = new WhiteSpaceToken();
1570     protected CommentToken _sharedCommentToken = new CommentToken();
1571     protected StringLiteralToken _sharedStringLiteralToken = new StringLiteralToken();
1572     protected IdentToken _sharedIdentToken = new IdentToken();
1573     protected OpToken _sharedOpToken = new OpToken();
1574     protected KeywordToken _sharedKeywordToken = new KeywordToken();
1575     protected IntegerLiteralToken _sharedIntegerToken = new IntegerLiteralToken();
1576     protected RealLiteralToken _sharedRealToken = new RealLiteralToken();
1577     protected InvalidToken _sharedInvalidToken = new InvalidToken();
1578     protected CharacterLiteralToken _sharedCharacterLiteralToken = new CharacterLiteralToken();
1579     protected StringAppender _stringLiteralAppender;
1580     protected StringAppender _commentAppender;
1581     protected StringAppender _identAppender;
1582     
1583     protected bool _enableCommentText = true;
1584     /// when false, does not put comment text into comment token - for less allocations
1585     @property void enableCommentText(bool enabled) {
1586         _enableCommentText = enabled;
1587     }
1588     /// when false, does not put comment text into comment token - for less allocations
1589     @property bool enableCommentText() {
1590         return _enableCommentText;
1591     }
1592 
1593     protected bool _errorTolerant = false;
1594     /// when true, returns BadToken instead of throwing exception
1595     @property void errorTolerant(bool enabled) {
1596         _errorTolerant = enabled;
1597     }
1598     /// when true, returns BadToken instead of throwing exception
1599     @property bool errorTolerant() {
1600         return _errorTolerant;
1601     }
1602 
1603     this(SourceLines lineStream) {
1604         initialize(lineStream);
1605     }
1606 
1607     void initialize(SourceLines lineStream, int pos = 0) {
1608         _lineStream = lineStream;
1609         SourceFile file = _lineStream.file;
1610         _sharedWhiteSpaceToken.setFile(file);
1611         _sharedCommentToken.setFile(file);
1612         _sharedStringLiteralToken.setFile(file);
1613         _sharedIdentToken.setFile(file);
1614         _sharedOpToken.setFile(file);
1615         _sharedKeywordToken.setFile(file);
1616         _sharedIntegerToken.setFile(file);
1617         _sharedRealToken.setFile(file);
1618         _sharedInvalidToken.setFile(file);
1619         _sharedCharacterLiteralToken.setFile(file);
1620         buildTime = Clock.currTime();
1621         _line = lineStream.line;
1622         _pos = 0;
1623         _prevLineLength = 0;
1624         _lineText = null;
1625         nextLine();
1626         _pos = pos;
1627     }
1628     
1629     this(string code, string filename = "") {
1630         this(new ArraySourceLines(code, filename));
1631     }
1632     
1633     // fetch next line from source stream
1634     protected bool nextLine() {
1635         _prevLineLength = cast(int)_lineText.length;
1636         _lineText = _lineStream.readLine();
1637         if (!_lineText) {
1638             if (_lineStream.errorCode != 0)
1639                 throw new SourceEncodingException(_lineStream.errorMessage, _lineStream.file, _lineStream.errorLine, _lineStream.errorPos);
1640             if (_lineStream.eof) {
1641                 // end of file
1642                 _pos = 0;
1643                 _len = 0;
1644                 return false;
1645             }
1646             // just an empty line
1647         }
1648         _line = _lineStream.line;
1649         _pos = 0;
1650         _len = cast(int)_lineText.length; // do not support lines longer that 4Gb
1651         return true;
1652     }
1653     
1654     protected dchar nextChar() {
1655         if (_pos >= _len) {
1656             if (!nextLine()) {
1657                 _pos = _prevLineLength + 1;
1658                 return EOF_CHAR;
1659             }
1660             return EOL_CHAR;
1661         }
1662         dchar res = _lineText[_pos++];
1663         if (_pos >= _len)
1664             nextLine();
1665         return res;
1666     }
1667     
1668     protected dchar peekChar() {
1669         if (_lineText is null) {
1670             if (!nextLine()) {
1671                 return EOF_CHAR;
1672             }
1673         }
1674         if (_pos >= _len)
1675             return EOL_CHAR;
1676         return _lineText[_pos++];
1677     }
1678     
1679     protected Token emitEof() {
1680         // TODO: check for current state
1681         return new EofToken(_lineStream.file, _startLine, _startPos + 2);
1682     }
1683     
1684     protected Token processWhiteSpace(dchar firstChar) {
1685         // reuse the same token instance, to avoid extra heap spamming
1686         _sharedWhiteSpaceToken.setPos(_startLine, _startPos);
1687         for (;;) {
1688             int i = _pos;
1689             for (; i < _len; i++) {
1690                 dchar ch = _lineText[i];
1691                 if (!(ch == 0x0020 || ch == 0x0009 || ch == 0x000B || ch == 0x000C || ch == EOL_CHAR))
1692                     break;
1693             }
1694             _pos = i;
1695             if (_pos < _len)
1696                 break;
1697             // go to next line
1698             if (!nextLine())
1699                 break;
1700         }
1701         return _sharedWhiteSpaceToken;
1702     }
1703     
1704     protected Token processOneLineComment() {
1705         _sharedCommentToken.setPos(_startLine, _startPos);
1706         _sharedCommentToken.isDocumentationComment = _pos + 1 < _lineText.length && _lineText[_pos + 1] == '/';
1707         _sharedCommentToken.isMultilineComment = false;
1708         if (_enableCommentText) {
1709             _sharedCommentToken.text = _lineText[_pos + 1 .. $];
1710         }
1711         _pos = _len;
1712         nextChar();
1713         return _sharedCommentToken;
1714     }
1715 
1716     protected Token processOneLineSharpComment() {
1717         _sharedCommentToken.setPos(_startLine, _startPos);
1718         if (_enableCommentText) {
1719             _sharedCommentToken.text = _lineText[_pos .. $];
1720         }
1721         _pos = _len;
1722         return _sharedCommentToken;
1723     }
1724 
1725     // Comment /*   */    
1726     protected Token processMultilineComment() {
1727         _sharedCommentToken.setPos(_startLine, _startPos);
1728         _sharedCommentToken.isDocumentationComment = _pos + 1 < _lineText.length && _lineText[_pos + 1] == '*';
1729         _sharedCommentToken.isMultilineComment = true;
1730         _commentAppender.reset();
1731         int textStart = _pos + 1;
1732         for (;;) {
1733             int textEnd = int.max;
1734             int i = textStart;
1735             for (; i < _len - 1; i++) {
1736                 if (_lineText[i] == '*' && _lineText[i + 1] == '/') {
1737                     textEnd = i;
1738                     break;
1739                 }
1740             }
1741             if (textEnd != int.max) {
1742                 if (_enableCommentText)
1743                     _commentAppender.append(_lineText[textStart .. textEnd]);
1744                 _pos = textEnd + 2;
1745                 break;
1746             }
1747             if (!nextLine()) {
1748                 // TODO: do we need throw exception if comment not closed by end of file?
1749                 _pos = _len;
1750                 break;
1751             }
1752             textStart = 0;
1753         }
1754         if (_enableCommentText) {
1755             _sharedCommentToken.text = _commentAppender.get();
1756         }
1757         return _sharedCommentToken;
1758     }
1759     
1760     // Comment /+   +/    
1761     protected Token processNestedComment() {
1762         _sharedCommentToken.setPos(_startLine, _startPos);
1763         _sharedCommentToken.isDocumentationComment = _pos + 1 < _lineText.length && _lineText[_pos + 1] == '+';
1764         _sharedCommentToken.isMultilineComment = true;
1765         _commentAppender.reset();
1766         dchar[] text;
1767         int textStart = _pos + 1;
1768         int level = 1;
1769         for (;;) {
1770             int textEnd = int.max;
1771             int i = textStart;
1772             for (; i < _len - 1; i++) {
1773                 if (_lineText[i] == '/' && _lineText[i + 1] == '+') {
1774                     level++;
1775                     i++;
1776                 } else if (_lineText[i] == '+' && _lineText[i + 1] == '/') {
1777                     if (--level == 0) {
1778                         textEnd = i;
1779                         break;
1780                     }
1781                 }
1782             }
1783             if (textEnd != int.max) {
1784                 if (_enableCommentText)
1785                     _commentAppender.append(_lineText[textStart .. textEnd]);
1786                 _pos = textEnd + 2;
1787                 break;
1788             }
1789             if (!nextLine()) {
1790                 // TODO: do we need throw exception if comment not closed by end of file?
1791                 _pos = _len;
1792                 break;
1793             }
1794             if (_enableCommentText)
1795                 _commentAppender.appendEol();
1796             textStart = 0;
1797         }
1798         if (_enableCommentText) {
1799             _sharedCommentToken.text = _commentAppender.get();
1800         }
1801         return _sharedCommentToken;
1802     }
1803     
1804     protected Token processHexString() {
1805         _pos++;
1806         // TODO:
1807         return null;
1808     }
1809     
1810     protected Token processDelimitedString() {
1811         _pos++;
1812         // TODO:
1813         return null;
1814     }
1815     
1816     // r"string"   or    `string`
1817     protected Token processWysiwygString(dchar ch) {
1818         _pos++;
1819         // TODO:
1820         return null;
1821     }
1822     
1823     protected Token processIdent(dchar firstChar) {
1824         _sharedIdentToken.setPos(_startLine, _startPos);
1825         _identAppender.reset();
1826         _identAppender.append(firstChar);
1827         for (; _pos < _len; ) {
1828             dchar ch = _lineText[_pos];
1829             if (!isIdentMiddleChar(ch)) {
1830                 break;
1831             }
1832             _identAppender.append(ch);
1833             _pos++;
1834         }
1835         _sharedIdentToken.setText(_identAppender.get);
1836         return _sharedIdentToken;
1837     }
1838 
1839     protected Token processIntegerSuffix() {
1840         if (_pos >= _len)
1841             return _sharedIntegerToken;
1842         bool longFlag = false;
1843         bool unsignedFlag = false;
1844         dchar ch = _lineText[_pos];
1845         dchar ch2 = _pos < _len - 1 ? _lineText[_pos + 1] : 0;
1846         if (ch == 'l' || ch == 'L') {
1847             longFlag = true;
1848             _pos++;
1849             if (ch2 == 'u' || ch2 == 'U') {
1850                 unsignedFlag = true;
1851                 _pos++;
1852             } 
1853         } else if (ch == 'u' || ch == 'U') {
1854             unsignedFlag = true;
1855             _pos++;
1856             if (ch2 == 'l' || ch2 == 'L') {
1857                 longFlag = true;
1858                 _pos++;
1859             } 
1860         }
1861         _sharedIntegerToken.setFlags(unsignedFlag, longFlag);
1862         ch = _pos < _len ? _lineText[_pos] : 0;
1863         if (isIdentMiddleChar(ch))
1864             return parserError("Unexpected character after number", _sharedIntegerToken);
1865         return _sharedIntegerToken;
1866     }
1867     
1868     protected Token processBinaryNumber() {
1869         _sharedIntegerToken.setPos(_startLine, _startPos);
1870         _pos++;
1871         if (_pos >= _len)
1872             return parserError("Unexpected end of line in binary number", _sharedIntegerToken);
1873         int digits = 0;
1874         ulong number = 0;
1875         int i = _pos;
1876         for (;i < _len; i++) {
1877             dchar ch = _lineText[i];
1878             if (ch != '0' && ch != '1')
1879                 break;
1880             number = (number << 1) | (ch == '1' ? 1 : 0);
1881             digits++;
1882         }
1883         _pos = i;
1884         if (digits > 64)
1885             return parserError("number is too big", _sharedIntegerToken);
1886         _sharedIntegerToken.setValue(number);
1887         return processIntegerSuffix();
1888     }
1889 
1890     protected Token processHexNumber() {
1891         _sharedIntegerToken.setPos(_startLine, _startPos);
1892         _sharedRealToken.setPos(_startLine, _startPos);
1893         _pos++;
1894         if (_pos >= _len)
1895             return parserError("Unexpected end of line in hex number", _sharedIntegerToken);
1896         int digits = 0;
1897         ulong number = 0;
1898         int i = _pos;
1899         for (;i < _len; i++) {
1900             dchar ch = _lineText[i];
1901             uint digit = 0;
1902             if (ch >= '0' && ch <= '9')
1903                 digit = ch - '0';
1904             else if (ch >= 'a' && ch <= 'f')
1905                 digit = ch - 'a' + 10;
1906             else if (ch >= 'A' && ch <= 'F')
1907                 digit = ch - 'A' + 10;
1908             else if (ch == '_')
1909                 continue;
1910             else
1911                 break;
1912             number = (number << 4) | digit;
1913             digits++;
1914         }
1915         _pos = i;
1916         if (digits > 16)
1917             return parserError("number is too big to fit 64 bits", _sharedIntegerToken);
1918         _sharedIntegerToken.setValue(number);
1919         return processIntegerSuffix();
1920     }
1921     
1922     protected Token processOctNumber() {
1923         _sharedIntegerToken.setPos(_startLine, _startPos);
1924         if (_pos >= _len)
1925             return parserError("Unexpected end of line in octal number", _sharedIntegerToken);
1926         int digits = 0;
1927         ulong number = 0;
1928         int i = _pos;
1929         bool overflow = false;
1930         for (;i < _len; i++) {
1931             dchar ch = _lineText[i];
1932             int digit = 0;
1933             if (ch >= '0' && ch <= '7')
1934                 digit = ch - '0';
1935             else if (ch == '_')
1936                 continue;
1937             else
1938                 break;
1939             number <<= 3;
1940             if (digits >= 20) {
1941                 if ((number >> 3) << 3 != number) {
1942                     overflow = true;
1943                     break;
1944                 }
1945             }
1946             number |= digit;
1947             digits++;
1948         }
1949         _pos = i;
1950         if (overflow)
1951             return parserError("number is too big to fit 64 bits", _sharedIntegerToken);
1952         _sharedIntegerToken.setValue(number);
1953         return processIntegerSuffix();
1954     }
1955     
1956     // 
1957     protected Token processDecFloatSuffix(real value) {
1958         ubyte precision = 1;
1959         bool imaginary = false;
1960         dchar next = _pos < _len ? _lineText[_pos] : 0;
1961         if (next == 'f') {
1962             _pos++;
1963             precision = 0;
1964         } else if (next == 'L') {
1965             _pos++;
1966             precision = 2;
1967         }
1968         next = _pos < _len ? _lineText[_pos] : 0;
1969         if (next == 'i') {
1970             _pos++;
1971             imaginary = true;
1972         }
1973         next = _pos < _len ? _lineText[_pos] : 0;
1974         if (isIdentMiddleChar(next))
1975             return parserError("invalid suffix for floating point literal", _sharedRealToken);
1976         _sharedRealToken.setValue(value, precision, imaginary);
1977         return _sharedRealToken;
1978     }
1979     
1980     // after E char
1981     protected Token processDecFloatExponent(real value) {
1982         dchar next = _pos < _len ? _lineText[_pos] : 0;
1983         int sign = 1;
1984         if (next == '+') {
1985             _pos++;
1986         } else if (next == '-') {
1987             _pos++;
1988             sign = -1;
1989         }
1990         if (_pos >= _len)
1991             return parserError("Invalid exponent", _sharedRealToken);
1992         ulong digits = 0;
1993         ulong number = 0;
1994         int i = _pos;
1995         bool overflow = false;
1996         for (;i < _len; i++) {
1997             dchar ch = _lineText[i];
1998             uint digit = 0;
1999             if (ch >= '0' && ch <= '9')
2000                 digit = ch - '0';
2001             else if (ch == '_')
2002                 continue;
2003             else
2004                 break;
2005             number *= 10;
2006             if (digits >= 18) {
2007                 if ((number * 10) / 10 != number) {
2008                     overflow = true;
2009                     break;
2010                 }
2011             }
2012             number += digit;
2013             digits++;
2014         }
2015         if (digits == 0)
2016             return parserError("Invalid exponent", _sharedRealToken);
2017         _pos = i;
2018         value *= pow(10., cast(long)number * sign);
2019         return processDecFloatSuffix(value);
2020     }
2021         
2022     protected Token processDecFloatSecondPart(ulong firstPart) {
2023         if (_pos >= _len) {
2024             _sharedRealToken.setValue(cast(real)firstPart);
2025             return _sharedRealToken;
2026         }
2027         ulong divider = 1;
2028         ulong number = 0;
2029         int i = _pos;
2030         bool overflow = false;
2031         for (;i < _len; i++) {
2032             dchar ch = _lineText[i];
2033             uint digit = 0;
2034             if (ch >= '0' && ch <= '9')
2035                 digit = ch - '0';
2036             else if (ch == '_')
2037                 continue;
2038             else
2039                 break;
2040             if (divider * 10 < divider)
2041                 continue; // ignore extra digits
2042             number *= 10;
2043             number += digit;
2044             divider *= 10;
2045         }
2046         _pos = i;
2047         real value = cast(real)firstPart + (cast(real)number / divider);
2048         dchar next = _pos < _len ? _lineText[_pos] : 0;
2049         if (next == 0) {
2050             // neither exponent nor suffix
2051             _sharedRealToken.setValue(value);
2052             return _sharedRealToken;
2053         }
2054            if (next == 'e' || next == 'E') {
2055             _pos++;
2056             return processDecFloatExponent(value);
2057         }
2058         return processDecFloatSuffix(value);
2059     }
2060         
2061     protected Token processDecNumber(dchar c) {
2062         _sharedIntegerToken.setPos(_startLine, _startPos);
2063         _sharedRealToken.setPos(_startLine, _startPos);
2064         //if (_pos >= _len)
2065         //    return parserError("Unexpected end of line in number", _sharedIntegerToken);
2066         int digits = 1;
2067         ulong number = c - '0';
2068         int i = _pos;
2069         bool overflow = false;
2070         if (_line == _startLine) {
2071             for (;i < _len; i++) {
2072                 dchar ch = _lineText[i];
2073                 uint digit = 0;
2074                 if (ch >= '0' && ch <= '9')
2075                     digit = ch - '0';
2076                 else if (ch == '_')
2077                     continue;
2078                 else
2079                     break;
2080                 number *= 10;
2081                 if (digits >= 18) {
2082                     if ((number * 10) / 10 != number) {
2083                         overflow = true;
2084                         break;
2085                     }
2086                 }
2087                 number += digit;
2088                 digits++;
2089             }
2090             _pos = i;
2091         }
2092         if (overflow)
2093             return parserError("number is too big to fit 64 bits", _sharedIntegerToken);
2094         _sharedIntegerToken.setValue(number);
2095         dchar next = _line == _startLine && _pos < _len ? _lineText[_pos] : 0;
2096         if (next == 0)
2097             return _sharedIntegerToken;
2098         if (next == 'e' || next == 'E') {
2099             _pos++;
2100             return processDecFloatExponent(number);
2101         } else if (next == '.') {
2102             _pos++;
2103             return processDecFloatSecondPart(number);
2104         }
2105         return processIntegerSuffix();
2106     }
2107         
2108     /// Either return InvalidToken or throw parser exception depending on current errorTolerant flag
2109     protected Token parserError(string msg, Token incompleteToken) {
2110         return parserError(msg, incompleteToken.line, incompleteToken.pos, incompleteToken.type);
2111     }
2112     /// Either return InvalidToken or throw parser exception depending on current errorTolerant flag
2113     protected Token parserError(string msg, int startLine, int startPos, TokenType failedTokenType = TokenType.INVALID) {
2114         if (_errorTolerant) {
2115             startPos--;
2116             _sharedInvalidToken.setPos(startLine, startPos);
2117             _sharedInvalidToken.errorMessage = msg;
2118             _sharedInvalidToken.errorCode = 1; // for future extension
2119             _sharedInvalidToken.invalidTokenType = failedTokenType; // for future extension
2120             // make invalid source text
2121             dchar[] invalidText;
2122             int p = startLine == _line ? startPos : 0;
2123             for (int i = p; i < _pos && i < _lineText.length; i++)
2124                 invalidText ~= _lineText[i];
2125 
2126             // recover after error
2127             for (; _pos < _lineText.length; _pos++) {
2128                 dchar ch = _lineText[_pos];
2129                 if (ch == ' ' || ch == '\t' || ch == '(' || ch == ')' || ch == '[' || ch == ']' || ch == '{' || ch == '}')
2130                     break;
2131                 if (failedTokenType == TokenType.INTEGER || failedTokenType == TokenType.FLOAT) {
2132                     if (ch == '*' || ch == '/')
2133                         break;
2134                 }
2135                 invalidText ~= ch;
2136             }
2137             _sharedInvalidToken.text = invalidText;
2138             return _sharedInvalidToken;
2139         }
2140         throw new ParserException(msg, _lineStream.file, _line, _pos);
2141     }
2142 
2143     protected Keyword detectKeyword(dchar ch) {
2144         if (ch > 'z')
2145             return Keyword.NONE;
2146         int len = _len - _pos;
2147         switch (cast(ubyte)ch) {
2148             //    ABSTRACT,
2149             //    ALIAS,
2150             //    ALIGN,
2151             //    ASM,
2152             //    ASSERT,
2153             //    AUTO,
2154             case 'a': return findKeyword(Keyword.ABSTRACT, Keyword.AUTO, _lineText.ptr + _pos, len, _pos);
2155 
2156             //    BODY,
2157             //    BOOL,
2158             //    BREAK,
2159             //    BYTE,
2160             case 'b': return findKeyword(Keyword.BODY, Keyword.BYTE, _lineText.ptr + _pos, len, _pos);
2161                 
2162             //    CASE,
2163             //    CAST,
2164             //    CATCH,
2165             //    CDOUBLE,
2166             //    CENT,
2167             //    CFLOAT,
2168             //    CHAR,
2169             //    CLASS,
2170             //    CONST,
2171             //    CONTINUE,
2172             //    CREAL,
2173             case 'c': return findKeyword(Keyword.CASE, Keyword.CREAL, _lineText.ptr + _pos, len, _pos);
2174                 
2175             //    DCHAR,
2176             //    DEBUG,
2177             //    DEFAULT,
2178             //    DELEGATE,
2179             //    DELETE,
2180             //    DEPRECATED,
2181             //    DO,
2182             //    DOUBLE,
2183             case 'd': return findKeyword(Keyword.DCHAR, Keyword.DOUBLE, _lineText.ptr + _pos, len, _pos);
2184                 
2185             //    ELSE,
2186             //    ENUM,
2187             //    EXPORT,
2188             //    EXTERN,
2189             case 'e': return findKeyword(Keyword.ELSE, Keyword.EXTERN, _lineText.ptr + _pos, len, _pos);
2190                 
2191             //    FALSE,
2192             //    FINAL,
2193             //    FINALLY,
2194             //    FLOAT,
2195             //    FOR,
2196             //    FOREACH,
2197             //    FOREACH_REVERSE,
2198             //    FUNCTION,
2199             case 'f': return findKeyword(Keyword.FALSE, Keyword.FUNCTION, _lineText.ptr + _pos, len, _pos);
2200                 
2201             //    GOTO,
2202             case 'g': return findKeyword(Keyword.GOTO, Keyword.GOTO, _lineText.ptr + _pos, len, _pos);
2203                 
2204             //    IDOUBLE,
2205             //    IF,
2206             //    IFLOAT,
2207             //    IMMUTABLE,
2208             //    IMPORT,
2209             //    IN,
2210             //    INOUT,
2211             //    INT,
2212             //    INTERFACE,
2213             //    INVARIANT,
2214             //    IREAL,
2215             //    IS,
2216             case 'i': return findKeyword(Keyword.IDOUBLE, Keyword.IS, _lineText.ptr + _pos, len, _pos);
2217                 
2218             //    LAZY,
2219             //    LONG,
2220             case 'l': return findKeyword(Keyword.LAZY, Keyword.LONG, _lineText.ptr + _pos, len, _pos);
2221                 
2222             //    MACRO,
2223             //    MIXIN,
2224             //    MODULE,
2225             case 'm': return findKeyword(Keyword.MACRO, Keyword.MODULE, _lineText.ptr + _pos, len, _pos);
2226                 
2227             //    NEW,
2228             //    NOTHROW,
2229             //    NULL,
2230             case 'n': return findKeyword(Keyword.NEW, Keyword.NULL, _lineText.ptr + _pos, len, _pos);
2231                 
2232             //    OUT,
2233             //    OVERRIDE,
2234             case 'o': return findKeyword(Keyword.OUT, Keyword.OVERRIDE, _lineText.ptr + _pos, len, _pos);
2235                 
2236             //    PACKAGE,
2237             //    PRAGMA,
2238             //    PRIVATE,
2239             //    PROTECTED,
2240             //    PUBLIC,
2241             //    PURE,
2242             case 'p': return findKeyword(Keyword.PACKAGE, Keyword.PURE, _lineText.ptr + _pos, len, _pos);
2243                 
2244             //    REAL,
2245             //    REF,
2246             //    RETURN,
2247             case 'r': return findKeyword(Keyword.REAL, Keyword.RETURN, _lineText.ptr + _pos, len, _pos);
2248                 
2249             //    SCOPE,
2250             //    SHARED,
2251             //    SHORT,
2252             //    STATIC,
2253             //    STRUCT,
2254             //    SUPER,
2255             //    SWITCH,
2256             //    SYNCHRONIZED,
2257             case 's': return findKeyword(Keyword.SCOPE, Keyword.SYNCHRONIZED, _lineText.ptr + _pos, len, _pos);
2258                 
2259             //    TEMPLATE,
2260             //    THIS,
2261             //    THROW,
2262             //    TRUE,
2263             //    TRY,
2264             //    TYPEDEF,
2265             //    TYPEID,
2266             //    TYPEOF,
2267             case 't': return findKeyword(Keyword.TEMPLATE, Keyword.TYPEOF, _lineText.ptr + _pos, len, _pos);
2268                 
2269             //    UBYTE,
2270             //    UCENT,
2271             //    UINT,
2272             //    ULONG,
2273             //    UNION,
2274             //    UNITTEST,
2275             //    USHORT,
2276             case 'u': return findKeyword(Keyword.UBYTE, Keyword.USHORT, _lineText.ptr + _pos, len, _pos);
2277                 
2278             //    VERSION,
2279             //    VOID,
2280             //    VOLATILE,
2281             case 'v': return findKeyword(Keyword.VERSION, Keyword.VOLATILE, _lineText.ptr + _pos, len, _pos);
2282                 
2283             //    WCHAR,
2284             //    WHILE,
2285             //    WITH,
2286             case 'w': return findKeyword(Keyword.WCHAR, Keyword.WITH, _lineText.ptr + _pos, len, _pos);
2287                 
2288             //    FILE,
2289             //    MODULE,
2290             //    LINE,
2291             //    FUNCTION,
2292             //    PRETTY_FUNCTION,
2293             //
2294             //    GSHARED,
2295             //    TRAITS,
2296             //    VECTOR,
2297             //    PARAMETERS,
2298             case '_': return findKeyword(Keyword.FILE, Keyword.PARAMETERS, _lineText.ptr + _pos, len, _pos);
2299             default: return Keyword.NONE;                
2300         }
2301     }    
2302     protected OpCode detectOp(dchar ch) nothrow {
2303         if (ch >= 128)
2304             return OpCode.NONE;
2305         dchar ch2 = _pos < _len ? _lineText[_pos] : 0;
2306         dchar ch3 = _pos < _len - 1 ? _lineText[_pos + 1] : 0;
2307         switch(cast(ubyte)ch) {
2308             //    DIV,         //    /
2309             //    DIV_EQ,     //    /=
2310             case '/':
2311                 if (ch2 == '=') {
2312                     _pos++;
2313                     return OpCode.DIV_EQ;
2314                 }
2315                 return OpCode.DIV;
2316             //    DOT,         //    .
2317             //    DOT_DOT,     //    ..
2318             //    DOT_DOT_DOT,//    ...
2319             case '.':
2320                 if (ch2 == '.') {
2321                     if (ch3 == '.') {
2322                         _pos += 2;
2323                         return OpCode.DOT_DOT_DOT;
2324                     }
2325                     _pos++;
2326                     return OpCode.DOT_DOT;
2327                 }
2328                 return OpCode.DOT;
2329             //    AND,         //    &
2330             //    AND_EQ,     //    &=
2331             //    LOG_AND,     //    &&
2332             case '&':
2333                 if (ch2 == '=') {
2334                     _pos++;
2335                     return OpCode.AND_EQ;
2336                 }
2337                 if (ch2 == '&') {
2338                     _pos++;
2339                     return OpCode.LOG_AND;
2340                 }
2341                 return OpCode.AND;
2342             //    OR,         //    |
2343             //    OR_EQ,         //    |=
2344             //    LOG_OR,     //    ||
2345             case '|':
2346                 if (ch2 == '=') {
2347                     _pos++;
2348                     return OpCode.OR_EQ;
2349                 }
2350                 if (ch2 == '|') {
2351                     _pos++;
2352                     return OpCode.LOG_OR;
2353                 }
2354                 return OpCode.OR;
2355             //    MINUS,         //    -
2356             //    MINUS_EQ,     //    -=
2357             //    MINUS_MINUS,//    --
2358             case '-':
2359                 if (ch2 == '=') {
2360                     _pos++;
2361                     return OpCode.MINUS_EQ;
2362                 }
2363                 if (ch2 == '-') {
2364                     _pos++;
2365                     return OpCode.MINUS_MINUS;
2366                 }
2367                 return OpCode.MINUS;
2368             //    PLUS,         //    +
2369             //    PLUS_EQ,     //    +=
2370             //    PLUS_PLUS,     //    ++
2371             case '+':
2372                 if (ch2 == '=') {
2373                     _pos++;
2374                     return OpCode.PLUS_EQ;
2375                 }
2376                 if (ch2 == '+') {
2377                     _pos++;
2378                     return OpCode.PLUS_PLUS;
2379                 }
2380                 return OpCode.PLUS;
2381             //    LT,         //    <
2382             //    LT_EQ,         //    <=
2383             //    SHL,         //    <<
2384             //    SHL_EQ,     //    <<=
2385             //    LT_GT,         //    <>
2386             //    NE_EQ,         //    <>=
2387             case '<':
2388                 if (ch2 == '<') {
2389                     if (ch3 == '=') {
2390                         _pos += 2;
2391                         return OpCode.SHL_EQ;
2392                     }
2393                     _pos++;
2394                     return OpCode.SHL;
2395                 }
2396                 if (ch2 == '>') {
2397                     if (ch3 == '=') {
2398                         _pos += 2;
2399                         return OpCode.NE_EQ;
2400                     }
2401                     _pos++;
2402                     return OpCode.LT_GT;
2403                 }
2404                 if (ch2 == '=') {
2405                     _pos++;
2406                     return OpCode.LT_EQ;
2407                 }
2408                 return OpCode.LT;
2409             //    GT,         //    >
2410             //    GT_EQ,         //    >=
2411             //    SHR_EQ        //    >>=
2412             //    ASR_EQ,     //    >>>=
2413             //    SHR,         //    >>
2414             //    ASR,         //    >>>
2415             case '>':
2416                 if (ch2 == '>') {
2417                     if (ch3 == '>') {
2418                         dchar ch4 = _pos < _len - 2 ? _lineText[_pos + 2] : 0;
2419                         if (ch4 == '=') { // >>>=
2420                             _pos += 3;
2421                             return OpCode.ASR_EQ;
2422                         }
2423                         _pos += 2;
2424                         return OpCode.ASR; // >>>
2425                     }
2426                     if (ch3 == '=') { // >>=
2427                         _pos += 2;
2428                         return OpCode.SHR_EQ;
2429                     }
2430                     _pos++;
2431                     return OpCode.SHR;
2432                 }
2433                 if (ch2 == '=') { // >=
2434                     _pos++;
2435                     return OpCode.GT_EQ;
2436                 }
2437                 // >
2438                 return OpCode.GT;
2439             //    NOT,         //    !
2440             //    NOT_EQ        //    !=
2441             //    NOT_LT_GT,     //    !<>
2442             //    NOT_LT_GT_EQ, //    !<>=
2443             //    NOT_LT,     //    !<
2444             //    NOT_LT_EQ,     //    !<=
2445             //    NOT_GT,     //    !>
2446             //    NOT_GT_EQ,     //    !>=
2447             case '!':
2448                 if (ch2 == '<') { // !<
2449                     if (ch3 == '>') { // !<>
2450                         dchar ch4 = _pos < _len - 2 ? _lineText[_pos + 2] : 0;
2451                         if (ch4 == '=') { // !<>=
2452                             _pos += 3;
2453                             return OpCode.NOT_LT_GT_EQ;
2454                         }
2455                         _pos += 2;
2456                         return OpCode.NOT_LT_GT; // !<>
2457                     }
2458                     if (ch3 == '=') { // !<=
2459                         _pos += 2;
2460                         return OpCode.NOT_LT_EQ;
2461                     }
2462                     _pos++;
2463                     return OpCode.NOT_LT; // !<
2464                 }
2465                 if (ch2 == '=') { // !=
2466                     _pos++;
2467                     return OpCode.NOT_EQ;
2468                 }
2469                 return OpCode.NOT;
2470             //    PAR_OPEN,     //    (
2471             case '(':
2472                 return OpCode.PAR_OPEN;
2473             //    PAR_CLOSE,     //    )
2474             case ')':
2475                 return OpCode.PAR_CLOSE;
2476             //    SQ_OPEN,     //    [
2477             case '[':
2478                 return OpCode.SQ_OPEN;
2479             //    SQ_CLOSE,     //    ]
2480             case ']':
2481                 return OpCode.SQ_CLOSE;
2482             //    CURL_OPEN,     //    {
2483             case '{':
2484                 return OpCode.CURL_OPEN;
2485             //    CURL_CLOSE, //    }
2486             case '}':
2487                 return OpCode.CURL_CLOSE;
2488             //    QUEST,         //    ?
2489             case '?':
2490                 return OpCode.QUEST;
2491             //    COMMA,         //    ,
2492             case ',':
2493                 return OpCode.COMMA;
2494             //    SEMICOLON,     //    ;
2495             case ';':
2496                 return OpCode.SEMICOLON;
2497             //    COLON,         //    :
2498             case ':':
2499                 return OpCode.COLON;
2500             //    DOLLAR,     //    $
2501             case '$':
2502                 return OpCode.DOLLAR;
2503             //    EQ,         //    =
2504             //    QE_EQ,         //    ==
2505             //    EQ_GT,         //    =>
2506             case '=':
2507                 if (ch2 == '=') { // ==
2508                     _pos++;
2509                     return OpCode.QE_EQ;
2510                 }
2511                 if (ch2 == '>') { // =>
2512                     _pos++;
2513                     return OpCode.EQ_GT;
2514                 }
2515                 return OpCode.EQ;
2516             //    MUL,         //    *
2517             //    MUL_EQ,     //    *=
2518             case '*':
2519                 if (ch2 == '=') {
2520                     _pos++;
2521                     return OpCode.MUL_EQ;
2522                 }
2523                 return OpCode.MUL;
2524             //    MOD,     //    %
2525             //    MOD_EQ, //    %=
2526             case '%':
2527                 if (ch2 == '=') {
2528                     _pos++;
2529                     return OpCode.MOD_EQ;
2530                 }
2531                 return OpCode.MOD;
2532             //    XOR,         //    ^
2533             //    XOR_EQ,     //    ^=
2534             //    LOG_XOR,     //    ^^
2535             //    LOG_XOR_EQ, //    ^^=
2536             case '^':
2537                 if (ch2 == '^') {
2538                     if (ch3 == '=') {
2539                         _pos += 2;
2540                         return OpCode.LOG_XOR_EQ;
2541                     }
2542                     _pos++;
2543                     return OpCode.LOG_XOR;
2544                 }
2545                 if (ch2 == '=') {
2546                     _pos++;
2547                     return OpCode.XOR_EQ;
2548                 }
2549                 return OpCode.XOR;
2550             //    INV,         //    ~
2551             //    INV_EQ,     //    ~=
2552             case '~':
2553                 if (ch2 == '=') {
2554                     _pos++;
2555                     return OpCode.INV_EQ;
2556                 }
2557                 return OpCode.INV;
2558             //    AT,         //    @
2559             case '@':
2560                 return OpCode.AT;
2561             //    SHARP         //    #
2562             case '#':
2563                 return OpCode.SHARP;
2564             default:
2565                 return OpCode.NONE;
2566         }
2567     }
2568     
2569     protected Token processCharacterLiteral() {
2570         _sharedCharacterLiteralToken.setPos(_startLine, _startPos);
2571         if (_pos + 2 > _len)
2572             return parserError("Invalid character literal", _sharedCharacterLiteralToken);
2573         dchar ch = _lineText[_pos++];
2574         dchar ch2 = _lineText[_pos++];
2575         dchar type = 0;
2576         if (ch == '\\') {
2577             // process escaped character - store it in ch
2578             // TODO: support all escape sequences
2579             switch(ch2) {
2580                 case 'r':
2581                     ch = '\r';
2582                     break;
2583                 case 'n':
2584                     ch = '\n';
2585                     break;
2586                 case 't':
2587                     ch = '\t';
2588                     break;
2589                 case '\\':
2590                     ch = '\\';
2591                     break;
2592                 default:
2593                     ch = ch2;
2594                     break;
2595             }
2596             // here must be closing '
2597             if (_pos + 1 > _len)
2598                 return parserError("Invalid character literal", _sharedCharacterLiteralToken);
2599             ch2 = _lineText[_pos++];
2600         }
2601         if (ch2 != '\'')
2602             return parserError("Invalid character literal", _sharedCharacterLiteralToken);
2603         if (_pos < _len) {
2604             dchar t = _lineText[_pos];
2605             if (t == 'd' || t == 'w' || t == 'c') {
2606                 type = t;
2607                 _pos++;
2608             } else if (isIdentMiddleChar(ch)) {
2609                 return parserError("Unexpected character after character literal", _sharedCharacterLiteralToken);
2610             }
2611         }
2612         _sharedCharacterLiteralToken.setCharacter(ch, type);
2613         return _sharedCharacterLiteralToken;
2614     }
2615 
2616     protected Token processDoubleQuotedOrWysiwygString(dchar delimiter) {
2617         bool wysiwyg = (delimiter == 'r' || delimiter == '`');
2618         //writeln("processDoubleQuotedString()");
2619         _sharedStringLiteralToken.setPos(_startLine, _startPos);
2620         _stringLiteralAppender.reset();
2621         if (delimiter == 'r') {
2622             _pos++;
2623             delimiter = '\"';
2624         }
2625         dchar type = 0;
2626         for (;;) {
2627             int i = _pos;
2628             int endPos = int.max;
2629             bool lastBackSlash = false;
2630             for(; i < _len; i++) {
2631                 dchar ch = _lineText[i];
2632                 if (ch == '\\') {
2633                     if (lastBackSlash)
2634                         lastBackSlash = false;
2635                     else
2636                         lastBackSlash = true;
2637                 }
2638                 else if (ch == delimiter && !lastBackSlash) {
2639                     endPos = i;
2640                     break;
2641                 }
2642                 else if(lastBackSlash)
2643                     lastBackSlash = false;
2644             }
2645             if (endPos != int.max) {
2646                 // found end quote
2647                 _stringLiteralAppender.append(_lineText[_pos .. endPos]);
2648                 _pos = endPos + 1;
2649                 break;
2650             }
2651             // no quote by end of line
2652             _stringLiteralAppender.append(_lineText[_pos .. $]);
2653             _stringLiteralAppender.appendEol();
2654             if (!nextLine()) {
2655                 // do we need to throw exception if eof comes before end of string?
2656                 break;
2657             }
2658         }
2659         dchar t = 0;
2660         if (_pos < _len) {
2661             dchar ch = _lineText[_pos];
2662             if (ch == 'c' || ch == 'w' || ch == 'd') {
2663                 t = ch;
2664                 _pos++;
2665                 if (_pos < _len) {
2666                     ch = _lineText[_pos];
2667                     if (isIdentMiddleChar(ch))
2668                         return parserError("Unexpected character after string literal", _sharedStringLiteralToken);
2669                 }
2670             } else if (isIdentMiddleChar(ch))
2671                 return parserError("Unexpected character after string literal", _sharedStringLiteralToken);
2672         }
2673         if (t != 0) {
2674             if (type != 0 && t != type)
2675                 return parserError("Cannot concatenate strings of different type", _sharedStringLiteralToken);
2676             type = t;
2677         }
2678         if (wysiwyg) {
2679             // no escape processing
2680             _sharedStringLiteralToken.setText(_stringLiteralAppender.get(), type);
2681             return _sharedStringLiteralToken;
2682         }
2683         _stringLiteralAppender.processEscapeSequences();
2684         _sharedStringLiteralToken.setText(_stringLiteralAppender.get(), type);
2685         return _sharedStringLiteralToken;
2686     }
2687 
2688     protected SysTime buildTime;
2689     
2690     //    string literal of the date of compilation "mmm dd yyyy"
2691     protected dstring formatBuildDate() {
2692         // TODO: provide proper format
2693         return to!dstring(buildTime);
2694     }
2695     
2696     //    string literal of the time of compilation "hh:mm:ss"
2697     protected dstring formatBuildTime() {
2698         // TODO: provide proper format
2699         return to!dstring(buildTime);
2700     }
2701     
2702     //    string literal of the date and time of compilation "www mmm dd hh:mm:ss yyyy"
2703     protected dstring formatBuildTimestamp() {
2704         // TODO: provide proper format
2705         return to!dstring(buildTime);
2706     }
2707     
2708     static immutable dstring VERSION = "0.1";
2709     static immutable dstring VENDOR = "coolreader.org";
2710     
2711     protected Token makeSpecialTokenString(dstring str, int pos) {
2712         _sharedStringLiteralToken.setPos(_startLine, _startPos);
2713         _sharedStringLiteralToken.setText(cast(dchar[])str, 0);
2714         return _sharedStringLiteralToken;
2715     }
2716     
2717     protected Token processSpecialToken(Keyword keyword, int pos) {
2718         switch (keyword) {
2719             //Special Token    Replaced with
2720             case Keyword.DATE: //    string literal of the date of compilation "mmm dd yyyy"
2721                 return makeSpecialTokenString(formatBuildDate(), pos);
2722             case Keyword.TIME: //    string literal of the time of compilation "hh:mm:ss"
2723                 return makeSpecialTokenString(formatBuildTime(), pos);
2724             case Keyword.TIMESTAMP: //    string literal of the date and time of compilation "www mmm dd hh:mm:ss yyyy"
2725                 return makeSpecialTokenString(formatBuildTimestamp(), pos);
2726             case Keyword.VENDOR: //    Compiler vendor string, such as "Digital Mars D"
2727                 return makeSpecialTokenString(VENDOR, pos);
2728             case Keyword.VERSION_: //    Compiler version as an integer, such as 2001
2729                 return makeSpecialTokenString(VERSION, pos);
2730             default:
2731                 parserError("Unknown special token", _line, pos);
2732         }
2733         return null;
2734     }
2735     
2736     protected int _startLine;
2737     protected int _startPos;
2738 
2739     // returns next token (clone it if you want to store for future usage, otherwise it may be overwritten by further nextToken() calls).
2740     Token nextToken() {
2741         _startLine = _line;
2742         _startPos = _pos;
2743         dchar ch = nextChar();
2744         if (ch == EOF_CHAR) {
2745             return emitEof();
2746         }
2747         if (ch == '\r' || ch == '\n' || ch == 0x0020 || ch == 0x0009 || ch == 0x000B || ch == 0x000C) {
2748             // white space (treat EOL as whitespace, too)
2749             return processWhiteSpace(ch);
2750         }
2751         dchar next = _pos < _len ? _lineText[_pos] : 0;
2752         if (ch == '/') {
2753             if (next == '/')
2754                 return processOneLineComment();
2755             else if (next == '*')
2756                 return processMultilineComment();
2757             else if (next == '+')
2758                 return processNestedComment();
2759         }
2760         if (ch == '#' && _line == 1)
2761             return processOneLineSharpComment();
2762         if (ch == '\"')
2763             return processDoubleQuotedOrWysiwygString(ch);
2764         if (ch == '\'')
2765             return processCharacterLiteral();
2766         if (ch == 'x' && next == '\"')
2767             return processHexString();
2768         if (ch == 'q' && next == '\"')
2769             return processDelimitedString();
2770         if ((ch == 'r' && next == '\"') || (ch == '`'))
2771             return processDoubleQuotedOrWysiwygString(ch);
2772         int oldPos = _pos - 1;
2773         
2774         if (ch == '0') {
2775             if (next == 'b' || next == 'B')
2776                 return processBinaryNumber();
2777             if (next == 'x' || next == 'X')
2778                 return processHexNumber();
2779             if (next >= '0' && next <= '9')
2780                 return processOctNumber();
2781             if (next >= '0' && next <= '9')
2782                 return processDecNumber(ch);
2783         }
2784         if (ch >= '0' && ch <= '9')
2785             return processDecNumber(ch);
2786         if (ch == '.' && next >= '0' && next <= '9') // .123
2787             return processDecFloatSecondPart(0);
2788                 
2789         if (ch == '_' || isUniversalAlpha(ch)) {
2790             // start of identifier or keyword?
2791             Keyword keyword = detectKeyword(ch);
2792             if (keyword != Keyword.NONE) {
2793                 switch (keyword) {
2794                     //Special Token    Replaced with
2795                     case Keyword.EOF: return emitEof(); //    sets the scanner to the end of the file
2796                     case Keyword.DATE: //    string literal of the date of compilation "mmm dd yyyy"
2797                     case Keyword.TIME: //    string literal of the time of compilation "hh:mm:ss"
2798                     case Keyword.TIMESTAMP: //    string literal of the date and time of compilation "www mmm dd hh:mm:ss yyyy"
2799                     case Keyword.VENDOR: //    Compiler vendor string, such as "Digital Mars D"
2800                     case Keyword.VERSION_: //    Compiler version as an integer, such as 2001
2801                         return processSpecialToken(keyword, oldPos);
2802                     default:
2803                         _sharedKeywordToken.setPos(_startLine, _startPos);
2804                         _sharedKeywordToken.keyword = keyword;
2805                         return _sharedKeywordToken;
2806                 }
2807             }
2808             return processIdent(ch);
2809         }
2810         OpCode op = detectOp(ch);
2811         if (op != OpCode.NONE) {
2812             _sharedOpToken.setPos(_startLine, _startPos);
2813             _sharedOpToken.opCode = op;
2814             return _sharedOpToken;
2815         }
2816         return parserError("Invalid token", _line, _pos);
2817     }
2818 
2819     /// tokenize all
2820     Token[] allTokens() {
2821         Token[] res;
2822         res.assumeSafeAppend;
2823         for(;;) {
2824             Token tok = nextToken();
2825             if (!tok || tok.type == TokenType.EOF)
2826                 break;
2827             res ~= tok.clone();
2828         }
2829         return res;
2830     }
2831 }
2832 
2833 unittest {
2834     version(DisableLexerTest) {
2835     import std.stdio;
2836     import std.conv;
2837     import std.utf;
2838     import dlangui.core.linestream;
2839     string fname = "/home/lve/src/d/ddc/ddclexer/tests/tokenizer_test.d";
2840     writeln("opening file");
2841     try {
2842         std.stream.File f = new std.stream.File(fname);
2843         scope(exit) { f.close(); }
2844         try {
2845             LineStream lines = LineStream.create(f, fname);
2846             Tokenizer tokenizer = new Tokenizer(lines);
2847             for (;;) {
2848                 Token token = tokenizer.nextToken();
2849                 if (token is null) {
2850                     writeln("Null token returned");
2851                     break;
2852                 }
2853                 if (token.type == TokenType.EOF) {
2854                     writeln("EOF token");
2855                     break;
2856                 }
2857                 writeln("", token.line, ":", token.pos, "\t", token.toString);
2858             }
2859         } catch (Exception e) {
2860             writeln("Exception " ~ e.toString);
2861         }
2862     } catch (Exception e) {
2863         writeln("Exception " ~ e.toString);
2864     }
2865     }
2866 }
2867 
2868 /// converts named entity to character, returns 0 if not found
2869 dchar entityToChar(string name) {
2870     if (auto ch = name in entityToCharMap) {
2871         return *ch;
2872     }
2873     return 0;
2874 }
2875 
2876 /// fings entity name for character, returns null if not found
2877 string charToEntity(dchar ch) {
2878     if (auto name = ch in charToEntityMap) {
2879         return *name;
2880     }
2881     return null;
2882 }
2883 
2884 private __gshared dchar[string]entityToCharMap;
2885 private __gshared string[dchar]charToEntityMap;
2886 private void addEntity(string name, dchar ch) {
2887     entityToCharMap[name] = ch;
2888     charToEntityMap[ch] = name;
2889 }
2890 __gshared static this() {
2891     addEntity("quot", 34);
2892     addEntity("amp",    38);
2893     addEntity("lt",    60);
2894     addEntity("gt",    62);
2895     addEntity("OElig",    338);
2896     addEntity("oelig",    339);
2897     addEntity("Scaron",    352);
2898     addEntity("scaron",    353);
2899     addEntity("Yuml",    376);
2900     addEntity("circ",    710);
2901     addEntity("tilde",    732);
2902     addEntity("ensp",    8194);
2903     addEntity("emsp",    8195);
2904     addEntity("thinsp",    8201);
2905     addEntity("zwnj",    8204);
2906     addEntity("zwj",    8205);
2907     addEntity("lrm",    8206);
2908     addEntity("rlm",    8207);
2909     addEntity("ndash",    8211);
2910     addEntity("mdash",    8212);
2911     addEntity("lsquo",    8216);
2912     addEntity("rsquo",    8217);
2913     addEntity("sbquo",    8218);
2914     addEntity("ldquo",    8220);
2915     addEntity("rdquo",    8221);
2916     addEntity("bdquo",    8222);
2917     addEntity("dagger",    8224);
2918     addEntity("Dagger",    8225);
2919     addEntity("permil",    8240);
2920     addEntity("lsaquo",    8249);
2921     addEntity("rsaquo",    8250);
2922     addEntity("euro",    8364);
2923     addEntity("nbsp",    160);
2924     addEntity("iexcl",    161);
2925     addEntity("cent",    162);
2926     addEntity("pound",    163);
2927     addEntity("curren",    164);
2928     addEntity("yen",    165);
2929     addEntity("brvbar",    166);
2930     addEntity("sect",    167);
2931     addEntity("uml",    168);
2932     addEntity("copy",    169);
2933     addEntity("ordf",    170);
2934     addEntity("laquo",    171);
2935     addEntity("not",    172);
2936     addEntity("shy",    173);
2937     addEntity("reg",    174);
2938     addEntity("macr",    175);
2939     addEntity("deg",    176);
2940     addEntity("plusmn",    177);
2941     addEntity("sup2",    178);
2942     addEntity("sup3",    179);
2943     addEntity("acute",    180);
2944     addEntity("micro",    181);
2945     addEntity("para",    182);
2946     addEntity("middot",    183);
2947     addEntity("cedil",    184);
2948     addEntity("sup1",    185);
2949     addEntity("ordm",    186);
2950     addEntity("raquo",    187);
2951     addEntity("frac14",    188);
2952     addEntity("frac12",    189);
2953     addEntity("frac34",    190);
2954     addEntity("iquest",    191);
2955     addEntity("Agrave",    192);
2956     addEntity("Aacute",    193);
2957     addEntity("Acirc",    194);
2958     addEntity("Atilde",    195);
2959     addEntity("Auml",    196);
2960     addEntity("Aring",    197);
2961     addEntity("AElig",    198);
2962     addEntity("Ccedil",    199);
2963     addEntity("Egrave",    200);
2964     addEntity("Eacute",    201);
2965     addEntity("Ecirc",    202);
2966     addEntity("Euml",    203);
2967     addEntity("Igrave",    204);
2968     addEntity("Iacute",    205);
2969     addEntity("Icirc",    206);
2970     addEntity("Iuml",    207);
2971     addEntity("ETH",    208);
2972     addEntity("Ntilde",    209);
2973     addEntity("Ograve",    210);
2974     addEntity("Oacute",    211);
2975     addEntity("Ocirc",    212);
2976     addEntity("Otilde",    213);
2977     addEntity("Ouml",    214);
2978     addEntity("times",    215);
2979     addEntity("Oslash",    216);
2980     addEntity("Ugrave",    217);
2981     addEntity("Uacute",    218);
2982     addEntity("Ucirc",    219);
2983     addEntity("Uuml",    220);
2984     addEntity("Yacute",    221);
2985     addEntity("THORN",    222);
2986     addEntity("szlig",    223);
2987     addEntity("agrave",    224);
2988     addEntity("aacute",    225);
2989     addEntity("acirc",    226);
2990     addEntity("atilde",    227);
2991     addEntity("auml",    228);
2992     addEntity("aring",    229);
2993     addEntity("aelig",    230);
2994     addEntity("ccedil",    231);
2995     addEntity("egrave",    232);
2996     addEntity("eacute",    233);
2997     addEntity("ecirc",    234);
2998     addEntity("euml",    235);
2999     addEntity("igrave",    236);
3000     addEntity("iacute",    237);
3001     addEntity("icirc",    238);
3002     addEntity("iuml",    239);
3003     addEntity("eth",    240);
3004     addEntity("ntilde",    241);
3005     addEntity("ograve",    242);
3006     addEntity("oacute",    243);
3007     addEntity("ocirc",    244);
3008     addEntity("otilde",    245);
3009     addEntity("ouml",    246);
3010     addEntity("divide",    247);
3011     addEntity("oslash",    248);
3012     addEntity("ugrave",    249);
3013     addEntity("uacute",    250);
3014     addEntity("ucirc",    251);
3015     addEntity("uuml",    252);
3016     addEntity("yacute",    253);
3017     addEntity("thorn",    254);
3018     addEntity("yuml",    255);
3019     addEntity("fnof",    402);
3020     addEntity("Alpha",    913);
3021     addEntity("Beta",    914);
3022     addEntity("Gamma",    915);
3023     addEntity("Delta",    916);
3024     addEntity("Epsilon",    917);
3025     addEntity("Zeta",    918);
3026     addEntity("Eta",    919);
3027     addEntity("Theta",    920);
3028     addEntity("Iota",    921);
3029     addEntity("Kappa",    922);
3030     addEntity("Lambda",    923);
3031     addEntity("Mu",    924);
3032     addEntity("Nu",    925);
3033     addEntity("Xi",    926);
3034     addEntity("Omicron",    927);
3035     addEntity("Pi",    928);
3036     addEntity("Rho",    929);
3037     addEntity("Sigma",    931);
3038     addEntity("Tau",    932);
3039     addEntity("Upsilon",    933);
3040     addEntity("Phi",    934);
3041     addEntity("Chi",    935);
3042     addEntity("Psi",    936);
3043     addEntity("Omega",    937);
3044     addEntity("alpha",    945);
3045     addEntity("beta",    946);
3046     addEntity("gamma",    947);
3047     addEntity("delta",    948);
3048     addEntity("epsilon",    949);
3049     addEntity("zeta",    950);
3050     addEntity("eta",    951);
3051     addEntity("theta",    952);
3052     addEntity("iota",    953);
3053     addEntity("kappa",    954);
3054     addEntity("lambda",    955);
3055     addEntity("mu",    956);
3056     addEntity("nu",    957);
3057     addEntity("xi",    958);
3058     addEntity("omicron",    959);
3059     addEntity("pi",    960);
3060     addEntity("rho",    961);
3061     addEntity("sigmaf",    962);
3062     addEntity("sigma",    963);
3063     addEntity("tau",    964);
3064     addEntity("upsilon",    965);
3065     addEntity("phi",    966);
3066     addEntity("chi",    967);
3067     addEntity("psi",    968);
3068     addEntity("omega",    969);
3069     addEntity("thetasym",    977);
3070     addEntity("upsih",    978);
3071     addEntity("piv",    982);
3072     addEntity("bull",    8226);
3073     addEntity("hellip",    8230);
3074     addEntity("prime",    8242);
3075     addEntity("Prime",    8243);
3076     addEntity("oline",    8254);
3077     addEntity("frasl",    8260);
3078     addEntity("weierp",    8472);
3079     addEntity("image",    8465);
3080     addEntity("real",    8476);
3081     addEntity("trade",    8482);
3082     addEntity("alefsym",    8501);
3083     addEntity("larr",    8592);
3084     addEntity("uarr",    8593);
3085     addEntity("rarr",    8594);
3086     addEntity("darr",    8595);
3087     addEntity("harr",    8596);
3088     addEntity("crarr",    8629);
3089     addEntity("lArr",    8656);
3090     addEntity("uArr",    8657);
3091     addEntity("rArr",    8658);
3092     addEntity("dArr",    8659);
3093     addEntity("hArr",    8660);
3094     addEntity("forall",    8704);
3095     addEntity("part",    8706);
3096     addEntity("exist",    8707);
3097     addEntity("empty",    8709);
3098     addEntity("nabla",    8711);
3099     addEntity("isin",    8712);
3100     addEntity("notin",    8713);
3101     addEntity("ni",    8715);
3102     addEntity("prod",    8719);
3103     addEntity("sum",    8721);
3104     addEntity("minus",    8722);
3105     addEntity("lowast",    8727);
3106     addEntity("radic",    8730);
3107     addEntity("prop",    8733);
3108     addEntity("infin",    8734);
3109     addEntity("ang",    8736);
3110     addEntity("and",    8743);
3111     addEntity("or",    8744);
3112     addEntity("cap",    8745);
3113     addEntity("cup",    8746);
3114     addEntity("int",    8747);
3115     addEntity("there4",    8756);
3116     addEntity("sim",    8764);
3117     addEntity("cong",    8773);
3118     addEntity("asymp",    8776);
3119     addEntity("ne",    8800);
3120     addEntity("equiv",    8801);
3121     addEntity("le",    8804);
3122     addEntity("ge",    8805);
3123     addEntity("sub",    8834);
3124     addEntity("sup",    8835);
3125     addEntity("nsub",    8836);
3126     addEntity("sube",    8838);
3127     addEntity("supe",    8839);
3128     addEntity("oplus",    8853);
3129     addEntity("otimes",    8855);
3130     addEntity("perp",    8869);
3131     addEntity("sdot",    8901);
3132     addEntity("lceil",    8968);
3133     addEntity("rceil",    8969);
3134     addEntity("lfloor",    8970);
3135     addEntity("rfloor",    8971);
3136     addEntity("loz",    9674);
3137     addEntity("spades",    9824);
3138     addEntity("clubs",    9827);
3139     addEntity("hearts",    9829);
3140     addEntity("diams",    9830);
3141     addEntity("lang",    10216);
3142     addEntity("rang",    10217);
3143 }
3144 
3145 
3146 
3147 //void runTokenizerTest()
3148 unittest 
3149 {
3150     import std.algorithm;
3151     class TokenTest {
3152         int _line;
3153         string _file;
3154         this(string file, int line) {
3155             _file = file;
3156             _line = line;
3157         }
3158         bool doTest(Token token) {
3159             return true;
3160         }        
3161         void execute(Tokenizer tokenizer) {
3162             Token token = tokenizer.nextToken();
3163             if (!doTest(token)) {
3164                 assert(false, "    token doesn not match at " ~ _file ~ ":" ~ to!string(_line) ~ "  foundToken: " ~ token.toString ~ " expected: " ~ toString);
3165             }
3166         }
3167         public override @property string toString() {
3168             return "TokenTest";
3169         }
3170     }
3171     void testTokenizer(string code, TokenTest[] tokens, string file = __FILE__, uint line = __LINE__) {
3172         Tokenizer tokenizer = new Tokenizer(code, "tokenizerTest:" ~ file ~ ":" ~ to!string(line));
3173         for (int i = 0; i < tokens.length; i++) {
3174             tokens[i].execute(tokenizer);
3175         }
3176     }
3177     class KeywordTest : TokenTest {
3178         Keyword _code;
3179         this(Keyword code, string file = __FILE__, uint line = __LINE__) {
3180             super(file, line);
3181             _code = code;
3182         }
3183         override bool doTest(Token token) {
3184             if (token.type != TokenType.KEYWORD)
3185                 return false;
3186             if (token.keyword != _code)
3187                 return false;
3188             return true;
3189         }        
3190         public override @property string toString() {
3191             return "Keyword:" ~ to!string(_code);
3192         }
3193     }
3194     class OpTest : TokenTest {
3195         OpCode _code;
3196         this(OpCode code, string file = __FILE__, uint line = __LINE__) {
3197             super(file, line);
3198             _code = code;
3199         }
3200         override bool doTest(Token token) {
3201             if (token.type != TokenType.OP)
3202                 return false;
3203             if (token.opCode != _code)
3204                 return false;
3205             return true;
3206         }        
3207         public override @property string toString() {
3208             return "Op:" ~ to!string(_code);
3209         }
3210     }
3211     class StringTest : TokenTest {
3212         dstring _value;
3213         dchar _literalType;
3214         this(dstring value, dchar literalType = 0, string file = __FILE__, uint line = __LINE__) {
3215             super(file, line);
3216             _value = value;
3217             _literalType = literalType;
3218         }
3219         override bool doTest(Token token) {
3220             if (token.type != TokenType.STRING)
3221                 return false;
3222             if (!token.text.equal(_value))
3223                 return false;
3224             if (token.literalType != _literalType)
3225                 return false;
3226             return true;
3227         }        
3228         public override @property string toString() {
3229             return toUTF8("String:\"" ~ _value ~ "\"" ~ (_literalType ? _literalType : ' '));
3230         }
3231     }
3232     class IntegerTest : TokenTest {
3233         ulong _value;
3234         bool _unsigned;
3235         bool _long;
3236         this(ulong value, bool unsignedFlag = false, bool longFlag = false, string file = __FILE__, uint line = __LINE__) {
3237             super(file, line);
3238             _value = value;
3239             _unsigned = unsignedFlag;
3240             _long = longFlag;
3241         }
3242         override bool doTest(Token token) {
3243             if (token.type != TokenType.INTEGER)
3244                 return false;
3245             if (token.intValue != _value)
3246                 return false;
3247             if (token.isUnsigned != _unsigned)
3248                 return false;
3249             if (token.isLong != _long)
3250                 return false;
3251             return true;
3252         }        
3253         public override @property string toString() {
3254             return "Integer:" ~ to!string(_value);
3255         }
3256     }
3257     class RealTest : TokenTest {
3258         real _value;
3259         ubyte _precision;
3260         bool _imaginary;
3261         this(real value, ubyte precision = 1, bool imaginary = false, string file = __FILE__, uint line = __LINE__) {
3262             super(file, line);
3263             _value = value;
3264             _precision = precision;
3265             _imaginary = imaginary;
3266         }
3267         override bool doTest(Token token) {
3268             if (token.type != TokenType.FLOAT)
3269                 return false;
3270             real diff = token.realValue - _value;
3271             real maxerr = _value / 1000000;
3272             if (diff < 0) diff = -diff;
3273             if (maxerr < 0) maxerr = -maxerr;
3274             if (diff > maxerr)
3275                 return false;
3276             if (token.precision != _precision)
3277                 return false;
3278             if (token.isImaginary != _imaginary)
3279                 return false;
3280             return true;
3281         }        
3282         public override @property string toString() {
3283             return "Real:" ~ to!string(_value) ~ (_precision == 0 ? "f" : (_precision == 2 ? "L" : "")) ~ (_imaginary ? "i" : "");
3284         }
3285     }
3286     class IdentTest : TokenTest {
3287         string _value;
3288         this(string value, string file = __FILE__, uint line = __LINE__) {
3289             super(file, line);
3290             _value = value;
3291         }
3292         override bool doTest(Token token) {
3293             if (token.type != TokenType.IDENTIFIER)
3294                 return false;
3295             if (! to!string(token.text).equal(_value))
3296                 return false;
3297             return true;
3298         }        
3299         public override @property string toString() {
3300             return "Ident:" ~ _value;
3301         }
3302     }
3303     class CommentTest : TokenTest {
3304         this(string file = __FILE__, uint line = __LINE__) {
3305             super(file, line);
3306         }
3307         override bool doTest(Token token) {
3308             if (token.type != TokenType.COMMENT)
3309                 return false;
3310             return true;
3311         }        
3312         public override @property string toString() {
3313             return "Comment";
3314         }
3315     }
3316     class EOFTest : TokenTest {
3317         this(string file = __FILE__, uint line = __LINE__) {
3318             super(file, line);
3319         }
3320         override bool doTest(Token token) {
3321             if (token.type != TokenType.EOF)
3322                 return false;
3323             return true;
3324         }        
3325         public override @property string toString() {
3326             return "EOF";
3327         }
3328     }
3329     class WhiteSpaceTest : TokenTest {
3330         this(string file = __FILE__, uint line = __LINE__) {
3331             super(file, line);
3332         }
3333         override bool doTest(Token token) {
3334             if (token.type != TokenType.WHITESPACE)
3335                 return false;
3336             return true;
3337         }        
3338         public override @property string toString() {
3339             return "whiteSpace";
3340         }
3341     }
3342     TokenTest checkString(dstring value, dchar literalType = 0, string file = __FILE__, uint line = __LINE__) { 
3343         return new StringTest(value, literalType, file, line);
3344     }
3345     TokenTest checkInteger(ulong value, bool unsignedFlag = false, bool longFlag = false, string file = __FILE__, uint line = __LINE__) { 
3346         return new IntegerTest(value, unsignedFlag, longFlag, file, line);
3347     }
3348     TokenTest checkReal(real value, byte precision = 1, bool imaginary = false, string file = __FILE__, uint line = __LINE__) { 
3349         return new RealTest(value, precision, imaginary, file, line);
3350     }
3351     TokenTest checkIdent(string value, string file = __FILE__, uint line = __LINE__) { 
3352         return new IdentTest(value, file, line);
3353     }
3354     TokenTest checkKeyword(Keyword value, string file = __FILE__, uint line = __LINE__) { 
3355         return new KeywordTest(value, file, line);
3356     }
3357     TokenTest checkOp(OpCode value, string file = __FILE__, uint line = __LINE__) { 
3358         return new OpTest(value, file, line);
3359     }
3360     TokenTest checkSpace(string file = __FILE__, uint line = __LINE__) { 
3361         return new WhiteSpaceTest(file, line);
3362     }
3363     TokenTest checkComment(string file = __FILE__, uint line = __LINE__) { 
3364         return new CommentTest(file, line);
3365     }
3366     TokenTest checkEOF(string file = __FILE__, uint line = __LINE__) { 
3367         return new EOFTest(file, line);
3368     }
3369 
3370     // test strings
3371     testTokenizer("r\"simple\\nstring\"", [checkString( r"simple\nstring" )]);
3372 
3373     // test strings
3374     testTokenizer(q"TEST
3375 "simple string"
3376 "simple\nstring"
3377 `simple string`
3378 "simple string"d
3379 "simple string"c
3380 "simple string"w
3381 "simple\&quot;string"
3382 "\r\n\f\t\\\"\'&"
3383 TEST"
3384                   , [
3385                       checkString("simple string"),
3386                       checkSpace(),
3387                       checkString("simple\nstring"),
3388                       checkSpace(),
3389                       checkString("simple string"),
3390                       checkSpace(),
3391                       checkString("simple string", 'd'),
3392                       checkSpace(),
3393                       checkString("simple string", 'c'),
3394                       checkSpace(),
3395                       checkString("simple string", 'w'),
3396                       checkSpace(),
3397                       checkString("simple\&quot;string"),
3398                       checkSpace(),
3399                       checkString("\r\n\f\t\\\"\'&"),
3400     ]);
3401     // basic test
3402     testTokenizer(q"TEST
3403 int i;
3404 TEST"
3405                   , [
3406                       checkKeyword(Keyword.INT),
3407                       checkSpace(),
3408                       checkIdent("i"),
3409                       checkOp(OpCode.SEMICOLON),
3410                       checkEOF()
3411                   ]);
3412     // test numbers
3413     testTokenizer("0b1101 0x123abcdU 0xABCL 0743 192837465 0 192_837_465 5.25 12.3f 54.1L 67.1i 3e3 25.67e-5f"
3414                   , [
3415                       checkInteger(13),
3416                       checkSpace(),
3417                       checkInteger(0x123abcd, true, false),
3418                       checkSpace(),
3419                       checkInteger(0xabc, false, true),
3420                       checkSpace(),
3421                       checkInteger(std.conv.octal!743),
3422                       checkSpace(),
3423                       checkInteger(192_837_465),
3424                       checkSpace(),
3425                       checkInteger(0),
3426                       checkSpace(),
3427                       checkInteger(192837465),
3428                       checkSpace(),
3429                       checkReal(5.25),
3430                       checkSpace(),
3431                       checkReal(12.3f, 0),
3432                       checkSpace(),
3433                       checkReal(54.1L, 2),
3434                       checkSpace(),
3435                       checkReal(67.1, 1, true),
3436                       checkSpace(),
3437                       checkReal(3e3),
3438                       checkSpace(),
3439                       checkReal(25.67e-5f, 0),
3440                       checkEOF()
3441                   ]);
3442 }
3443