1 module ddc.lexer.tokenizer;
2 
3 import ddc.lexer.textsource;
4 import ddc.lexer.exceptions;
5 
6 import std.stdio;
7 import std.datetime;
8 import std.conv;
9 import std.utf;
10 import std.math;
11 
12 enum TokenType : ubyte {
13     EOF,
14     //EOL,
15     WHITESPACE,
16     COMMENT,
17     IDENTIFIER,
18     STRING,
19     CHARACTER,
20     INTEGER,
21     FLOAT,
22     KEYWORD,
23     OP,
24     INVALID
25 }
26 
27 // table for fast checking of UniversalAlpha (as per ISO/IEC 9899:1999 Annex E) OR a..z OR A..Z OR _
28 // max code is 0xd7ff
29 //1728
30 const uint[1728] UNIVERSAL_ALPHA_FLAGS = [
31     0x00000000,0x00000000,0x87fffffe,0x07fffffe,0x00000000,0x04a00400,0xff7fffff,0xff7fffff,// 0000-00ff
32     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xfc3fffff,// 0100-01ff
33     0x00ffffff,0x00000000,0xffff0000,0xffffffff,0xffffffff,0xe9ff01ff,0x00030003,0x0000001f,// 0200-02ff
34     0x00000000,0x00000000,0x00000000,0x04000000,0xffffd740,0xfffffffb,0x547f7fff,0x000ffffd,// 0300-03ff
35     0xffffdffe,0xffffffff,0xdffeffff,0xffffffff,0xffff0003,0xffffffff,0xffff199f,0x033fcfff,// 0400-04ff
36     0x00000000,0xfffe0000,0x027fffff,0xfffffffe,0x000000ff,0xbbff0000,0xffff0006,0x000707ff,// 0500-05ff
37     0x00000000,0x07fffffe,0x0007ffff,0xffff03ff,0xffffffff,0x7cffffff,0x1fff7fff,0x03ff3de0,// 0600-06ff
38     0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 0700-07ff
39     0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 0800-08ff
40     0xffffffee,0xe3ffffff,0xff073fff,0x0000ffcf,0xfff99fee,0xc3c5fdff,0xb000399f,0x0003ffcf,// 0900-09ff
41     0xfff987e4,0xc36dfdff,0x5e003987,0x0010ffc0,0xfffbafee,0xe3edfdff,0x00013bbf,0x0000ffc1,// 0a00-0aff
42     0xfff99fee,0xe3cdfdff,0xb000398f,0x0000ffc3,0xd63dc7ec,0xc3bfc718,0x00003dc7,0x0000ff80,// 0b00-0bff
43     0xfffddfee,0xc3effdff,0x00003ddf,0x0000ffc3,0xfffddfec,0xc3effdff,0x40003ddf,0x0000ffc3,// 0c00-0cff
44     0xfffddfec,0xc3fffdff,0x00003dcf,0x0000ffc3,0x00000000,0x00000000,0x00000000,0x00000000,// 0d00-0dff
45     0xfffffffe,0x07ffffff,0x0fffffff,0x00000000,0xfef02596,0x3bff6cae,0x33ff3f5f,0x00000000,// 0e00-0eff
46     0x03000001,0xc2afffff,0xfffffeff,0xfffe03ff,0xfebf0fdf,0x02fe3fff,0x00000000,0x00000000,// 0f00-0fff
47     0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0xffffffff,0xffff003f,0x007fffff,// 1000-10ff
48     0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 1100-11ff
49     0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 1200-12ff
50     0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 1300-13ff
51     0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 1400-14ff
52     0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 1500-15ff
53     0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 1600-16ff
54     0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 1700-17ff
55     0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 1800-18ff
56     0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 1900-19ff
57     0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 1a00-1aff
58     0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 1b00-1bff
59     0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 1c00-1cff
60     0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 1d00-1dff
61     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0x0fffffff,0xffffffff,0xffffffff,0x03ffffff,// 1e00-1eff
62     0x3f3fffff,0xffffffff,0xaaff3f3f,0x3fffffff,0xffffffff,0x5fdfffff,0x0fcf1fdc,0x1fdc1fff,// 1f00-1fff
63     0x00000000,0x80000000,0x00000001,0x80000000,0x00000000,0x00000000,0x00000000,0x00000000,// 2000-20ff
64     0x3f2ffc84,0x01fbfd50,0x00000000,0xffffffff,0x00000007,0x00000000,0x00000000,0x00000000,// 2100-21ff
65     0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 2200-22ff
66     0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 2300-23ff
67     0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 2400-24ff
68     0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 2500-25ff
69     0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 2600-26ff
70     0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 2700-27ff
71     0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 2800-28ff
72     0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 2900-29ff
73     0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 2a00-2aff
74     0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 2b00-2bff
75     0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 2c00-2cff
76     0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 2d00-2dff
77     0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 2e00-2eff
78     0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 2f00-2fff
79     0x000000e0,0x000003fe,0xfffffffe,0xffffffff,0x180fffff,0xfffffffe,0xffffffff,0x187fffff,// 3000-30ff
80     0xffffffe0,0x00001fff,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 3100-31ff
81     0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 3200-32ff
82     0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 3300-33ff
83     0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 3400-34ff
84     0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 3500-35ff
85     0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 3600-36ff
86     0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 3700-37ff
87     0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 3800-38ff
88     0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 3900-39ff
89     0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 3a00-3aff
90     0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 3b00-3bff
91     0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 3c00-3cff
92     0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 3d00-3dff
93     0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 3e00-3eff
94     0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 3f00-3fff
95     0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 4000-40ff
96     0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 4100-41ff
97     0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 4200-42ff
98     0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 4300-43ff
99     0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 4400-44ff
100     0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 4500-45ff
101     0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 4600-46ff
102     0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 4700-47ff
103     0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 4800-48ff
104     0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 4900-49ff
105     0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 4a00-4aff
106     0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 4b00-4bff
107     0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 4c00-4cff
108     0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 4d00-4dff
109     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 4e00-4eff
110     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 4f00-4fff
111     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 5000-50ff
112     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 5100-51ff
113     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 5200-52ff
114     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 5300-53ff
115     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 5400-54ff
116     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 5500-55ff
117     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 5600-56ff
118     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 5700-57ff
119     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 5800-58ff
120     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 5900-59ff
121     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 5a00-5aff
122     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 5b00-5bff
123     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 5c00-5cff
124     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 5d00-5dff
125     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 5e00-5eff
126     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 5f00-5fff
127     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 6000-60ff
128     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 6100-61ff
129     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 6200-62ff
130     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 6300-63ff
131     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 6400-64ff
132     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 6500-65ff
133     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 6600-66ff
134     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 6700-67ff
135     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 6800-68ff
136     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 6900-69ff
137     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 6a00-6aff
138     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 6b00-6bff
139     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 6c00-6cff
140     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 6d00-6dff
141     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 6e00-6eff
142     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 6f00-6fff
143     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 7000-70ff
144     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 7100-71ff
145     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 7200-72ff
146     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 7300-73ff
147     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 7400-74ff
148     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 7500-75ff
149     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 7600-76ff
150     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 7700-77ff
151     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 7800-78ff
152     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 7900-79ff
153     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 7a00-7aff
154     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 7b00-7bff
155     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 7c00-7cff
156     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 7d00-7dff
157     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 7e00-7eff
158     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 7f00-7fff
159     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 8000-80ff
160     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 8100-81ff
161     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 8200-82ff
162     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 8300-83ff
163     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 8400-84ff
164     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 8500-85ff
165     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 8600-86ff
166     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 8700-87ff
167     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 8800-88ff
168     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 8900-89ff
169     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 8a00-8aff
170     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 8b00-8bff
171     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 8c00-8cff
172     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 8d00-8dff
173     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 8e00-8eff
174     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 8f00-8fff
175     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 9000-90ff
176     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 9100-91ff
177     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 9200-92ff
178     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 9300-93ff
179     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 9400-94ff
180     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 9500-95ff
181     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 9600-96ff
182     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 9700-97ff
183     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 9800-98ff
184     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 9900-99ff
185     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 9a00-9aff
186     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 9b00-9bff
187     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 9c00-9cff
188     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 9d00-9dff
189     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 9e00-9eff
190     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0x0000003f,0x00000000,0x00000000,// 9f00-9fff
191     0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// a000-a0ff
192     0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// a100-a1ff
193     0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// a200-a2ff
194     0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// a300-a3ff
195     0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// a400-a4ff
196     0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// a500-a5ff
197     0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// a600-a6ff
198     0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// a700-a7ff
199     0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// a800-a8ff
200     0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// a900-a9ff
201     0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// aa00-aaff
202     0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// ab00-abff
203     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// ac00-acff
204     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// ad00-adff
205     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// ae00-aeff
206     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// af00-afff
207     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// b000-b0ff
208     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// b100-b1ff
209     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// b200-b2ff
210     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// b300-b3ff
211     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// b400-b4ff
212     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// b500-b5ff
213     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// b600-b6ff
214     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// b700-b7ff
215     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// b800-b8ff
216     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// b900-b9ff
217     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// ba00-baff
218     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// bb00-bbff
219     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// bc00-bcff
220     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// bd00-bdff
221     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// be00-beff
222     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// bf00-bfff
223     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// c000-c0ff
224     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// c100-c1ff
225     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// c200-c2ff
226     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// c300-c3ff
227     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// c400-c4ff
228     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// c500-c5ff
229     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// c600-c6ff
230     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// c700-c7ff
231     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// c800-c8ff
232     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// c900-c9ff
233     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// ca00-caff
234     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// cb00-cbff
235     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// cc00-ccff
236     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// cd00-cdff
237     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// ce00-ceff
238     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// cf00-cfff
239     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// d000-d0ff
240     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// d100-d1ff
241     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// d200-d2ff
242     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// d300-d3ff
243     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// d400-d4ff
244     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// d500-d5ff
245     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// d600-d6ff
246     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0x0000000f,0x00000000,0x00000000// d700-d7ff
247 ];
248 
249 /// returns true if character is A..Z, a..z, _ or universal alpha
250 bool isUniversalAlpha(dchar ch) pure nothrow {
251     return (ch <= 0xd7ff && (UNIVERSAL_ALPHA_FLAGS[ch >> 5] & (1 << (ch & 31))));
252 }
253 
254 /// character can present at the beginning of identifier
255 bool isIdentStartChar(dchar ch) pure nothrow {
256     return isUniversalAlpha(ch);
257 }
258 
259 /// character can present in middle of identifier
260 bool isIdentMiddleChar(dchar ch) pure nothrow {
261     return (ch >= '0' && ch <='9') || isUniversalAlpha(ch);
262 }
263     
264 immutable bool ENABLE_DUMP_UNIVERSAL_ALPHA_TABLE = false;
265 static if (ENABLE_DUMP_UNIVERSAL_ALPHA_TABLE) {
266         bool r(dchar ch, wchar v) pure nothrow {
267             return ch == v;
268         }
269         
270         bool r(dchar ch, wchar v1, wchar v2) pure nothrow {
271             return ch >= v1 && ch <= v2;
272         }
273 
274         bool isUniversalAlphaSlow(dchar c)  pure nothrow {
275             return 
276                 // Latin: 00AA, 00BA, 00C0−00D6, 00D8−00F6, 00F8−01F5, 01FA−0217,
277                 // 0250−02A8, 1E00−1E9B, 1EA0−1EF9, 207F
278                 r(c, 0xAA) || r(c, 0x00BA) || r(c, 0x00C0,0x00D6) || r(c, 0x00D8,0x00F6) || r(c, 0x00F8,0x01F5) || r(c, 0x01FA,0x0217)
279                 || r(c, 0x0250,0x02A8) || r(c, 0x1E00,0x1E9B) || r(c, 0x1EA0,0x1EF9) || r(c, 0x207F)
280                 //Greek: 0386, 0388−038A, 038C, 038E−03A1, 03A3−03CE, 03D0−03D6,
281                 //03DA, 03DC, 03DE, 03E0, 03E2−03F3, 1F00−1F15, 1F18−1F1D,
282                 //1F20−1F45, 1F48−1F4D, 1F50−1F57, 1F59, 1F5B, 1F5D,
283                 //1F5F−1F7D, 1F80−1FB4, 1FB6−1FBC, 1FC2−1FC4, 1FC6−1FCC,
284                 //1FD0−1FD3, 1FD6−1FDB, 1FE0−1FEC, 1FF2−1FF4, 1FF6−1FFC
285                 || r(c, 0x0386) || r(c, 0x0388,0x038A) || r(c, 0x038C) || r(c, 0x038E,0x03A1) || r(c, 0x03A3,0x03CE) || r(c, 0x03D0,0x03D6)
286                 || r(c, 0x03DA) || r(c, 0x03DC) || r(c, 0x03DE) || r(c, 0x03E0) || r(c, 0x03E2,0x03F3) || r(c, 0x1F00,0x1F15) || r(c, 0x1F18,0x1F1D)
287                 || r(c, 0x1F20,0x1F45) || r(c, 0x1F48,0x1F4D) || r(c, 0x1F50,0x1F57) || r(c, 0x1F59) || r(c, 0x1F5B) || r(c, 0x1F5D)
288                 || r(c, 0x1F5F,0x1F7D) || r(c, 0x1F80,0x1FB4) || r(c, 0x1FB6,0x1FBC) || r(c, 0x1FC2,0x1FC4) || r(c, 0x1FC6,0x1FCC)
289                 || r(c, 0x1FD0,0x1FD3) || r(c, 0x1FD6,0x1FDB) || r(c, 0x1FE0,0x1FEC) || r(c, 0x1FF2,0x1FF4) || r(c, 0x1FF6,0x1FFC)
290                 //Cyrillic: 0401−040C, 040E−044F, 0451−045C, 045E−0481, 0490−04C4,
291                 //04C7−04C8, 04CB−04CC, 04D0−04EB, 04EE−04F5, 04F8−04F9
292                 || r(c, 0x0401,0x040C) || r(c, 0x040E,0x044F) || r(c, 0x0451,0x045C) || r(c, 0x045E,0x0481) || r(c, 0x0490,0x04C4)
293                 || r(c, 0x04C7,0x04C8) || r(c, 0x04CB,0x04CC) || r(c, 0x04D0,0x04EB) || r(c, 0x04EE,0x04F5) || r(c, 0x04F8,0x04F9)
294                 //Armenian: 0531−0556, 0561−0587
295                 || r(c, 0x0531,0x0556) || r(c, 0x0561,0x0587)
296                 //Hebrew: 05B0−05B9, 05BB−05BD, 05BF, 05C1−05C2, 05D0−05EA,
297                 //05F0−05F2
298                 || r(c, 0x05B0,0x05B9) || r(c, 0x05BB,0x05BD) || r(c, 0x05BF) || r(c, 0x05C1,0x05C2) || r(c, 0x05D0,0x05EA)
299                 || r(c, 0x05F0,0x05F2)
300                 //Arabic: 0621−063A, 0640−0652, 0670−06B7, 06BA−06BE, 06C0−06CE,
301                 //06D0−06DC, 06E5−06E8, 06EA−06ED
302                 || r(c, 0x0621,0x063A) || r(c, 0x0640,0x0652) || r(c, 0x0670,0x06B7) || r(c, 0x06BA,0x06BE) || r(c, 0x06C0,0x06CE)
303                 || r(c, 0x06D0,0x06DC) || r(c, 0x06E5,0x06E8) || r(c, 0x06EA,0x06ED)
304                 //Devanagari: 0901−0903, 0905−0939, 093E−094D, 0950−0952, 0958−0963
305                 || r(c, 0x0901,0x0903) || r(c, 0x0905,0x0939) || r(c, 0x093E,0x094D) || r(c, 0x0950,0x0952) || r(c, 0x0958,0x0963)
306                 //Bengali: 0981−0983, 0985−098C, 098F−0990, 0993−09A8, 09AA−09B0,
307                 //09B2, 09B6−09B9, 09BE−09C4, 09C7−09C8, 09CB−09CD,
308                 //09DC−09DD, 09DF−09E3, 09F0−09F1
309                 || r(c, 0x0981,0x0983) || r(c, 0x0985,0x098C) || r(c, 0x098F,0x0990) || r(c, 0x0993,0x09A8) || r(c, 0x09AA,0x09B0)
310                 || r(c, 0x09B2) || r(c, 0x09B6,0x09B9) || r(c, 0x09BE,0x09C4) || r(c, 0x09C7,0x09C8) || r(c, 0x09CB,0x09CD)
311                 || r(c, 0x09DC,0x09DD) || r(c, 0x09DF,0x09E3) || r(c, 0x09F0,0x09F1)
312                 //Gurmukhi: 0A02, 0A05−0A0A, 0A0F−0A10, 0A13−0A28, 0A2A−0A30,
313                 //0A32−0A33, 0A35−0A36, 0A38−0A39, 0A3E−0A42, 0A47−0A48,
314                 //0A4B−0A4D, 0A59−0A5C, 0A5E, 0A74
315                 || r(c, 0x0A02) || r(c, 0x0A05,0x0A0A) || r(c, 0x0A0F,0x0A10) || r(c, 0x0A13,0x0A28) || r(c, 0x0A2A,0x0A30)
316                 || r(c, 0x0A32,0x0A33) || r(c, 0x0A35,0x0A36) || r(c, 0x0A38,0x0A39) || r(c, 0x0A3E,0x0A42) || r(c, 0x0A47,0x0A48)
317                 || r(c, 0x0A4B,0x0A4D) || r(c, 0x0A59,0x0A5C) || r(c, 0x0A5E) || r(c, 0x0A74)
318                 //Gujarati: 0A81−0A83, 0A85−0A8B, 0A8D, 0A8F−0A91, 0A93−0AA8,
319                 //0AAA−0AB0, 0AB2−0AB3, 0AB5−0AB9, 0ABD−0AC5,
320                 //0AC7−0AC9, 0ACB−0ACD, 0AD0, 0AE0
321                 || r(c, 0x0A81,0x0A83) || r(c, 0x0A85,0x0A8B) || r(c, 0x0A8D) || r(c, 0x0A8F,0x0A91) || r(c, 0x0A93,0x0AA8)
322                 || r(c, 0x0AAA,0x0AB0) || r(c, 0x0AB2,0x0AB3) || r(c, 0x0AB5,0x0AB9) || r(c, 0x0ABD,0x0AC5)
323                 || r(c, 0x0AC7,0x0AC9) || r(c, 0x0ACB,0x0ACD) || r(c, 0x0AD0) || r(c, 0x0AE0)
324                 // Oriya: 0B01−0B03, 0B05−0B0C, 0B0F−0B10, 0B13−0B28, 0B2A−0B30,
325                 //0B32−0B33, 0B36−0B39, 0B3E−0B43, 0B47−0B48, 0B4B−0B4D,
326                 //0B5C−0B5D, 0B5F−0B61
327                 || r(c, 0x0B01,0x0B03) || r(c, 0x0B05,0x0B0C) || r(c, 0x0B0F,0x0B10) || r(c, 0x0B13,0x0B28) || r(c, 0x0B2A,0x0B30)
328                 || r(c, 0x0B32,0x0B33) || r(c, 0x0B36,0x0B39) || r(c, 0x0B3E,0x0B43) || r(c, 0x0B47,0x0B48) || r(c, 0x0B4B,0x0B4D)
329                 || r(c, 0x0B5C,0x0B5D) || r(c, 0x0B5F,0x0B61)
330                 //Tamil: 0B82−0B83, 0B85−0B8A, 0B8E−0B90, 0B92−0B95, 0B99−0B9A,
331                 //0B9C, 0B9E−0B9F, 0BA3−0BA4, 0BA8−0BAA, 0BAE−0BB5,
332                 //0BB7−0BB9, 0BBE−0BC2, 0BC6−0BC8, 0BCA−0BCD
333                 || r(c, 0x0B82,0x0B83) || r(c, 0x0B85,0x0B8A) || r(c, 0x0B8E,0x0B90) || r(c, 0x0B92,0x0B95) || r(c, 0x0B99,0x0B9A)
334                 || r(c, 0x0B9C) || r(c, 0x0B9E,0x0B9F) || r(c, 0x0BA3,0x0BA4) || r(c, 0x0BA8,0x0BAA) || r(c, 0x0BAE,0x0BB5)
335                 || r(c, 0x0BB7,0x0BB9) || r(c, 0x0BBE,0x0BC2) || r(c, 0x0BC6,0x0BC8) || r(c, 0x0BCA,0x0BCD)
336                 //Telugu: 0C01−0C03, 0C05−0C0C, 0C0E−0C10, 0C12−0C28, 0C2A−0C33,
337                 //0C35−0C39, 0C3E−0C44, 0C46−0C48, 0C4A−0C4D, 0C60−0C61
338                 || r(c, 0x0C01,0x0C03) || r(c, 0x0C05,0x0C0C) || r(c, 0x0C0E,0x0C10) || r(c, 0x0C12,0x0C28) || r(c, 0x0C2A,0x0C33)
339                 || r(c, 0x0C35,0x0C39) || r(c, 0x0C3E,0x0C44) || r(c, 0x0C46,0x0C48) || r(c, 0x0C4A,0x0C4D) || r(c, 0x0C60,0x0C61)
340                 //Kannada: 0C82−0C83, 0C85−0C8C, 0C8E−0C90, 0C92−0CA8, 0CAA−0CB3,
341                 //0CB5−0CB9, 0CBE−0CC4, 0CC6−0CC8, 0CCA−0CCD, 0CDE,
342                 //0CE0−0CE1
343                 || r(c, 0x0C82,0x0C83) || r(c, 0x0C85,0x0C8C) || r(c, 0x0C8E,0x0C90) || r(c, 0x0C92,0x0CA8) || r(c, 0x0CAA,0x0CB3)
344                 || r(c, 0x0CB5,0x0CB9) || r(c, 0x0CBE,0x0CC4) || r(c, 0x0CC6,0x0CC8) || r(c, 0x0CCA,0x0CCD) || r(c, 0x0CDE)
345                 || r(c, 0x0CE0,0x0CE1)
346                 //Malayalam: 0D02−0D03, 0D05−0D0C, 0D0E−0D10, 0D12−0D28, 0D2A−0D39,
347                 //0D3E−0D43, 0D46−0D48, 0D4A−0D4D, 0D60−0D61
348                 || r(c, 0x0D02,0x0D03) || r(c, 0x0D05,0x0D0C) || r(c, 0x0D0E,0x0D10) || r(c, 0x0D12,0x0D28) || r(c, 0x0D2A,0x0D39)
349                 || r(c, 0xD3E,0x0D43) || r(c, 0x0D46,0x0D48) || r(c, 0x0D4A,0x0D4D) || r(c, 0x0D60,0x0D61)
350                 //Thai: 0E01−0E3A, 0E40−0E5B
351                 || r(c, 0x0E01,0x0E3A) || r(c, 0x0E40,0x0E5B)
352                 //Lao: 0E81−0E82, 0E84, 0E87−0E88, 0E8A, 0E8D, 0E94−0E97,
353                 //0E99−0E9F, 0EA1−0EA3, 0EA5, 0EA7, 0EAA−0EAB,
354                 //0EAD−0EAE, 0EB0−0EB9, 0EBB−0EBD, 0EC0−0EC4, 0EC6,
355                 //0EC8−0ECD, 0EDC−0EDD
356                 || r(c, 0x0E81,0x0E82) || r(c, 0x0E84) || r(c, 0x0E87,0x0E88) || r(c, 0x0E8A) || r(c, 0x0E8D) || r(c, 0x0E94,0x0E97)
357                 || r(c, 0x0E99,0x0E9F) || r(c, 0x0EA1,0x0EA3) || r(c, 0x0EA5) || r(c, 0x0EA7) || r(c, 0x0EAA,0x0EAB)
358                 || r(c, 0x0EAD,0x0EAE) || r(c, 0x0EB0,0x0EB9) || r(c, 0x0EBB,0x0EBD) || r(c, 0x0EC0,0x0EC4) || r(c, 0x0EC6)
359                 || r(c, 0x0EC8,0x0ECD) || r(c, 0x0EDC,0x0EDD)
360                 //Tibetan: 0F00, 0F18−0F19, 0F35, 0F37, 0F39, 0F3E−0F47, 0F49−0F69,
361                 //0F71−0F84, 0F86−0F8B, 0F90−0F95, 0F97, 0F99−0FAD,
362                 //0FB1−0FB7, 0FB9
363                 || r(c, 0x0F00) || r(c, 0x0F18,0x0F19) || r(c, 0x0F35) || r(c, 0x0F37) || r(c, 0x0F39) || r(c, 0x0F3E,0x0F47) || r(c, 0x0F49,0x0F69)
364                 || r(c, 0x0F71,0x0F84) || r(c, 0x0F86,0x0F8B) || r(c, 0x0F90,0x0F95) || r(c, 0x0F97) || r(c, 0x0F99,0x0FAD)
365                 || r(c, 0x0FB1,0x0FB7) || r(c, 0x0FB9)
366                 //Georgian: 10A0−10C5, 10D0−10F6
367                 || r(c, 0x10A0,0x10C5) || r(c, 0x10D0,0x10F6)
368                 //Hiragana: 3041−3093, 309B−309C
369                 || r(c, 0x3041,0x3093) || r(c, 0x309B,0x309C)
370                 //Katakana: 30A1−30F6, 30FB−30FC
371                 || r(c, 0x30A1,0x30F6) || r(c, 0x30FB,0x30FC)
372                 //Bopomofo: 3105−312C
373                 || r(c, 0x3105,0x312C)
374                 //CJK Unified Ideographs: 4E00−9FA5
375                 || r(c, 0x4E00,0x9FA5)
376                 //Hangul: AC00−D7A3
377                 || r(c, 0xAC00,0xD7A3)
378                 //Digits: 0660−0669, 06F0−06F9, 0966−096F, 09E6−09EF, 0A66−0A6F,
379                 //0AE6−0AEF, 0B66−0B6F, 0BE7−0BEF, 0C66−0C6F, 0CE6−0CEF,
380                 //0D66−0D6F, 0E50−0E59, 0ED0−0ED9, 0F20−0F33
381                 || r(c, 0x0660,0x0669) || r(c, 0x06F0,0x06F9) || r(c, 0x0966,0x096F) || r(c, 0x09E6,0x09EF) || r(c, 0x0A66,0x0A6F)
382                 || r(c, 0x0AE6,0x0AEF) || r(c, 0x0B66,0x0B6F) || r(c, 0x0BE7,0x0BEF) || r(c, 0x0C66,0x0C6F) || r(c, 0x0CE6,0x0CEF)
383                 || r(c, 0x0D66,0x0D6F) || r(c, 0x0E50,0x0E59) || r(c, 0x0ED0,0x0ED9) || r(c, 0x0F20,0x0F33)
384                 //Special characters: 00B5, 00B7, 02B0−02B8, 02BB, 02BD−02C1, 02D0−02D1,
385                 //02E0−02E4, 037A, 0559, 093D, 0B3D, 1FBE, 203F−2040, 2102,
386                 //2107, 210A−2113, 2115, 2118−211D, 2124, 2126, 2128, 212A−2131,
387                 //2133−2138, 2160−2182, 3005−3007, 3021−3029
388                 || r(c, 0x00B5) || r(c, 0x00B7) || r(c, 0x02B0,0x02B8) || r(c, 0x02BB) || r(c, 0x02BD,0x02C1) || r(c, 0x02D0,0x02D1)
389                 || r(c, 0x2E0,0x02E4) || r(c, 0x037A) || r(c, 0x0559) || r(c, 0x093D) || r(c, 0x0B3D) || r(c, 0x1FBE) || r(c, 0x203F,0x2040) || r(c, 0x2102)
390                 || r(c, 0x2107) || r(c, 0x210A,0x2113) || r(c, 0x2115) || r(c, 0x2118,0x211D) || r(c, 0x2124) || r(c, 0x2126) || r(c, 0x2128) || r(c, 0x212A,0x2131)
391                 || r(c, 0x2133,0x2138) || r(c, 0x2160,0x2182) || r(c, 0x3005,0x3007) || r(c, 0x3021,0x3029)
392                 ;
393         }
394 
395 }
396 
397 unittest {
398     
399         
400     static if (ENABLE_DUMP_UNIVERSAL_ALPHA_TABLE) {
401         immutable uint itemsInRow = 8;
402         
403         uint maxAlpha = 0;
404         for (uint i = 0; i < 0x10000; i++) {
405             uint ch = i;
406             if (isUniversalAlphaSlow(ch) || ch == '_' || (ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z'))
407                 maxAlpha = i;
408         }
409         maxAlpha = (maxAlpha + itemsInRow * 32 - 1) / (itemsInRow * 32) * (itemsInRow * 32) - 1;
410         writeln("// table for fast checking of UniversalAlpha (as per ISO/IEC 9899:1999 Annex E) OR a..z OR A..Z OR _");
411         writefln("// max code is 0x%04x", maxAlpha);
412         writeln("immutable uint[", (maxAlpha + 1) / 32,"] UNIVERSAL_ALPHA_FLAGS = [");
413         for (uint i = 0; i <= maxAlpha; i += 32) {
414             if ((i / 32) % itemsInRow  == 0)
415                 write("    ");
416             uint flags = 0;
417             for (uint j = 0; j < 32; j++) {
418                 uint ch = i + j;
419                 bool flag = isUniversalAlphaSlow(ch) || ch == '_' || (ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z');
420                 if (flag)
421                     flags |= (1 << j);
422             }
423             writef("0x%08x", flags);
424             if (i != maxAlpha / 32 * 32)
425                 write(",");
426             if ((i / 32) % itemsInRow  == itemsInRow - 1)
427                 writefln("// %04x-%04x", i - itemsInRow * 32 + 1 + 31, i + 31);
428         }
429         writeln("];");
430         
431         for (uint ch = 0; ch < 0x100000; ch++) {
432             bool flag = isUniversalAlphaSlow(ch) || ch == '_' || (ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z');
433             bool flag2 = isUniversalAlpha(ch);
434             if (flag2 != flag) {
435                 isUniversalAlpha(ch);
436                 writefln("universalAlpha test failed for char %06x expeced %d actual %d", ch, flag ? 1 : 0, flag2 ? 1 : 0);
437             }
438             assert(flag2 == flag);
439         }
440     }
441 }
442 
443 enum OpCode : ubyte {
444     NONE,       //    no op
445     DIV,         //    /
446     DIV_EQ,     //    /=
447     DOT,         //    .
448     DOT_DOT,     //    ..
449     DOT_DOT_DOT,//    ...
450     AND,         //    &
451     AND_EQ,     //    &=
452     LOG_AND,     //    &&
453     OR,         //    |
454     OR_EQ,         //    |=
455     LOG_OR,     //    ||
456     MINUS,         //    -
457     MINUS_EQ,     //    -=
458     MINUS_MINUS,//    --
459     PLUS,         //    +
460     PLUS_EQ,     //    +=
461     PLUS_PLUS,     //    ++
462     LT,         //    <
463     LT_EQ,         //    <=
464     SHL,         //    <<
465     SHL_EQ,     //    <<=
466     LT_GT,         //    <>
467     NE_EQ,         //    <>=
468     GT,         //    >
469     GT_EQ,         //    >=
470     SHR_EQ,        //    >>=
471     ASR_EQ,     //    >>>=
472     SHR,         //    >>
473     ASR,         //    >>>
474     NOT,         //    !
475     NOT_EQ,        //    !=
476     NOT_LT_GT,     //    !<>
477     NOT_LT_GT_EQ, //    !<>=
478     NOT_LT,     //    !<
479     NOT_LT_EQ,     //    !<=
480     NOT_GT,     //    !>
481     NOT_GT_EQ,     //    !>=
482     PAR_OPEN,     //    (
483     PAR_CLOSE,     //    )
484     SQ_OPEN,     //    [
485     SQ_CLOSE,     //    ]
486     CURL_OPEN,     //    {
487     CURL_CLOSE, //    }
488     QUEST,         //    ?
489     COMMA,         //    ,
490     SEMICOLON,  //    ;
491     COLON,         //    :
492     DOLLAR,     //    $
493     EQ,         //    =
494     QE_EQ,         //    ==
495     MUL,         //    *
496     MUL_EQ,     //    *=
497     MOD,     //    %
498     MOD_EQ, //    %=
499     XOR,         //    ^
500     XOR_EQ,     //    ^=
501     LOG_XOR,     //    ^^
502     LOG_XOR_EQ, //    ^^=
503     INV,         //    ~
504     INV_EQ,     //    ~=
505     AT,         //    @
506     EQ_GT,         //    =>
507     SHARP         //    #
508 };
509 
510 immutable dstring[] OP_CODE_STRINGS = [
511     "",
512     "/",
513     "/=",
514     ".",
515     "..",
516     "...",
517     "&",
518     "&=",
519     "&&",
520     "|",
521     "|=",
522     "||",
523     "-",
524     "-=",
525     "--",
526     "+",
527     "+=",
528     "++",
529     "<",
530     "<=",
531     "<<",
532     "<<=",
533     "<>",
534     "<>=",
535     ">",
536     ">=",
537     ">>=",
538     ">>>=",
539     ">>",
540     ">>>",
541     "!",
542     "!=",
543     "!<>",
544     "!<>=",
545     "!<",
546     "!<=",
547     "!>",
548     "!>=",
549     "(",
550     ")",
551     "[",
552     "]",
553     "{",
554     "}",
555     "?",
556     ",",
557     ";",
558     ":",
559     "$",
560     "=",
561     "==",
562     "*",
563     "*=",
564     "%",
565     "%=",
566     "^",
567     "^=",
568     "^^",
569     "^^=",
570     "~",
571     "~=",
572     "@",
573     "=>",
574     "#"
575 ];
576 
577 dstring getOpNameD(OpCode op) pure nothrow {
578     return OP_CODE_STRINGS[op];
579 };
580 
581 enum Keyword : ubyte {
582     NONE,
583     ABSTRACT,
584     ALIAS,
585     ALIGN,
586     ASM,
587     ASSERT,
588     AUTO,
589 
590     BODY,
591     BOOL,
592     BREAK,
593     BYTE,
594 
595     CASE,
596     CAST,
597     CATCH,
598     CDOUBLE,
599     CENT,
600     CFLOAT,
601     CHAR,
602     CLASS,
603     CONST,
604     CONTINUE,
605     CREAL,
606 
607     DCHAR,
608     DEBUG,
609     DEFAULT,
610     DELEGATE,
611     DELETE,
612     DEPRECATED,
613     DO,
614     DOUBLE,
615 
616     ELSE,
617     ENUM,
618     EXPORT,
619     EXTERN,
620 
621     FALSE,
622     FINAL,
623     FINALLY,
624     FLOAT,
625     FOR,
626     FOREACH,
627     FOREACH_REVERSE,
628     FUNCTION,
629 
630     GOTO,
631 
632     IDOUBLE,
633     IF,
634     IFLOAT,
635     IMMUTABLE,
636     IMPORT,
637     IN,
638     INOUT,
639     INT,
640     INTERFACE,
641     INVARIANT,
642     IREAL,
643     IS,
644 
645     LAZY,
646     LONG,
647 
648     MACRO,
649     MIXIN,
650     MODULE,
651 
652     NEW,
653     NOTHROW,
654     NULL,
655 
656     OUT,
657     OVERRIDE,
658 
659     PACKAGE,
660     PRAGMA,
661     PRIVATE,
662     PROTECTED,
663     PUBLIC,
664     PURE,
665 
666     REAL,
667     REF,
668     RETURN,
669 
670     SCOPE,
671     SHARED,
672     SHORT,
673     STATIC,
674     STRUCT,
675     SUPER,
676     SWITCH,
677     SYNCHRONIZED,
678 
679     TEMPLATE,
680     THIS,
681     THROW,
682     TRUE,
683     TRY,
684     TYPEDEF,
685     TYPEID,
686     TYPEOF,
687 
688     UBYTE,
689     UCENT,
690     UINT,
691     ULONG,
692     UNION,
693     UNITTEST,
694     USHORT,
695 
696     VERSION,
697     VOID,
698     VOLATILE,
699 
700     WCHAR,
701     WHILE,
702     WITH,
703 
704     FILE,
705     MODULE__,
706     LINE,
707     FUNCTION__,
708     PRETTY_FUNCTION,
709 
710     //Special Token    Replaced with
711     DATE, //    string literal of the date of compilation "mmm dd yyyy"
712     EOF, //    sets the scanner to the end of the file
713     TIME, //    string literal of the time of compilation "hh:mm:ss"
714     TIMESTAMP, //    string literal of the date and time of compilation "www mmm dd hh:mm:ss yyyy"
715     VENDOR, //    Compiler vendor string, such as "Digital Mars D"
716     VERSION_, //    Compiler version as an integer, such as 2001
717     
718     GSHARED,
719     TRAITS,
720     VECTOR,
721     PARAMETERS,
722 
723 }
724 
725 immutable dstring[] KEYWORD_STRINGS = [
726     "",
727     "abstract",
728     "alias",
729     "align",
730     "asm",
731     "assert",
732     "auto",
733 
734     "body",
735     "bool",
736     "break",
737     "byte",
738 
739     "case",
740     "cast",
741     "catch",
742     "cdouble",
743     "cent",
744     "cfloat",
745     "char",
746     "class",
747     "const",
748     "continue",
749     "creal",
750 
751     "dchar",
752     "debug",
753     "default",
754     "delegate",
755     "delete",
756     "deprecated",
757     "do",
758     "double",
759 
760     "else",
761     "enum",
762     "export",
763     "extern",
764 
765     "false",
766     "final",
767     "finally",
768     "float",
769     "for",
770     "foreach",
771     "foreach_reverse",
772     "function",
773 
774     "goto",
775 
776     "idouble",
777     "if",
778     "ifloat",
779     "immutable",
780     "import",
781     "in",
782     "inout", 
783     "int",
784     "interface",
785     "invariant",
786     "ireal",
787     "is",
788 
789     "lazy",
790     "long",
791 
792     "macro",
793     "mixin",
794     "module",
795 
796     "new",
797     "nothrow",
798     "null",
799 
800     "out",
801     "override",
802 
803     "package",
804     "pragma",
805     "private",
806     "protected",
807     "public",
808     "pure",
809 
810     "real",
811     "ref",
812     "return",
813 
814     "scope",
815     "shared",
816     "short",
817     "static",
818     "struct",
819     "super",
820     "switch",
821     "synchronized",
822 
823     "template",
824     "this",
825     "throw",
826     "true",
827     "try",
828     "typedef",
829     "typeid",
830     "typeof",
831 
832     "ubyte",
833     "ucent",
834     "uint",
835     "ulong",
836     "union",
837     "unittest",
838     "ushort",
839 
840     "version",
841     "void",
842     "volatile",
843 
844     "wchar",
845     "while",
846     "with",
847 
848     "__FILE__",
849     "__MODULE__",
850     "__LINE__",
851     "__FUNCTION__",
852     "__PRETTY_FUNCTION__",
853 
854     //Special Token    Replaced with
855     "__DATE__", //    string literal of the date of compilation "mmm dd yyyy"
856     "__EOF__", //    sets the scanner to the end of the file
857     "__TIME__", //    string literal of the time of compilation "hh:mm:ss"
858     "__TIMESTAMP__", //    string literal of the date and time of compilation "www mmm dd hh:mm:ss yyyy"
859     "__VENDOR__", //    Compiler vendor string, such as "Digital Mars D"
860     "__VERSION__", //    Compiler version as an integer, such as 2001
861 
862         
863     "__gshared",
864     "__traits",
865     "__vector",
866     "__parameters"
867 ];
868 
869 public dstring getKeywordNameD(Keyword keyword) pure nothrow {
870     return KEYWORD_STRINGS[keyword];
871 };
872 
873 public Keyword findKeyword(Keyword start, Keyword end, dchar * name, int len, ref int pos) pure nothrow {
874     for (Keyword i = start; i <= end; i++) {
875         dstring s = KEYWORD_STRINGS[i];
876         if (s.length > len + 1)
877             continue; // too long
878         bool found = true;
879         for (uint j = 1; j < s.length; j++) {
880             if (s[j] != name[j - 1]) {
881                 found = false;
882                 break;
883             }
884         }
885         if (found) {
886             //if (s.length == len - 1 || !isIdentMiddleChar(name[s.length - 1])) {
887             if (s.length == len + 1 || !isIdentMiddleChar(name[s.length - 1])) {
888                 pos += s.length - 1;
889                 return i;
890             }
891         }
892     }
893     return Keyword.NONE;
894 }
895 
896 /**
897  * Token.
898  */
899 class Token {
900     //                                 32bit      64bit platform
901     //                    vtable       4 bytes    8 bytes
902     protected SourceFile _file;   //   4 bytes    8 bytes
903     protected int _line;          //   4 bytes    4 bytes
904     protected int _pos;           //   4 bytes    4 bytes
905     protected TokenType _type;    //   1 byte     1 byte
906     //                    total        17 bytes   25 bytes
907     /// returns token type
908     @property TokenType type() { return _type; }
909     /// returns file info for source
910     @property SourceFile filename() { return _file; }
911     /// returns 1-based source line number of token start
912     @property int line() { return _line; }
913     /// returns 1-based source line position of token start
914     @property int pos() { return _pos; }
915     /// returns token text
916     @property dstring text() { return null; }
917 
918     // number token properties
919     @property dchar literalType() { return 0; }
920     @property ulong intValue() { return 0; }
921     @property bool isUnsigned() { return false; }
922     @property ulong isLong() { return false; }
923     @property real realValue() { return 0; }
924     @property double doubleValue() { return 0; }
925     @property float floatValue() { return 0; }
926     @property byte precision() { return 0; }
927     @property bool isImaginary() { return false; }
928     @property bool isBracket() {
929         OpCode op = opCode; 
930         return op == OpCode.PAR_OPEN 
931             || op == OpCode.PAR_CLOSE 
932             || op == OpCode.SQ_OPEN 
933             || op == OpCode.SQ_CLOSE 
934             || op == OpCode.CURL_OPEN 
935             || op == OpCode.CURL_CLOSE; 
936     }
937     @property bool isOpenBracket() {
938         OpCode op = opCode;
939         return op == OpCode.PAR_OPEN
940             || op == OpCode.SQ_OPEN
941             || op == OpCode.CURL_OPEN;
942     }
943     @property bool isCloseBracket() {
944         OpCode op = opCode; 
945         return op == OpCode.PAR_CLOSE
946             || op == OpCode.SQ_CLOSE
947             || op == OpCode.CURL_CLOSE;
948     }
949     @property bool isEof() { return type == TokenType.EOF; }
950 
951     /// returns opcode ID - for opcode tokens
952     @property OpCode opCode() { return OpCode.NONE; }
953     /// returns keyword ID - for keyword tokens
954     @property Keyword keyword() { return Keyword.NONE; }
955     /// returns true if this is documentation comment token
956     @property bool isDocumentationComment() { return false; }
957     /// returns true if this is multiline
958     @property bool isMultilineComment() { return false; }
959 
960     // error handling
961 
962     /// returns true if it's invalid token (can be returned in error tolerant mode of tokenizer)
963     @property bool isError() { return type == TokenType.INVALID; }
964     /// returns error message if it's invalid token (can be returned in error tolerant mode of tokenizer)
965     @property string errorMessage() { return null; }
966     /// returns error code if it's invalid token (can be returned in error tolerant mode of tokenizer)
967     @property int errorCode() { return 0; }
968     /// returns type of token parsing of which has been failed - if it's invalid token (can be returned in error tolerant mode of tokenizer)
969     @property TokenType invalidTokenType() { return TokenType.INVALID; }
970 
971 
972     this(TokenType type) {
973         _type = type;
974     }
975 
976     this(TokenType type, SourceFile file, int line, int pos) {
977         _type = type;
978         _file = file;
979         _line = line;
980         _pos = pos;
981     }
982     /// set start position for token (line is 1-based, pos is 0-based)
983     void setPos(SourceFile file, int line, int pos) {
984         _file = file;
985         _line = line;
986         _pos = pos + 1;
987     }
988     /// set source file information for token
989     void setFile(SourceFile file) {
990         _file = file;
991     }
992     /// set start position for token (line is 1-based, pos is 0-based)
993     void setPos(int line, int pos) {
994         _line = line;
995         _pos = pos + 1;
996     }
997 
998     public abstract Token clone();
999     public override @property string toString() {
1000         return "" ~ to!string(_line) ~ ":" ~ to!string(_pos) ~ " " ~ to!string(type) ~ " " ~ to!string(opCode) ~ " " ~ to!string(keyword) 
1001             ~" \"" ~ toUTF8(text()) ~ "\"";
1002     }
1003 }
1004 
1005 class EofToken : Token {
1006     this() {
1007         super(TokenType.EOF);
1008     }
1009     this(SourceFile file, uint line, uint pos) {
1010         super(TokenType.EOF, file, line, pos);
1011     }
1012     override public Token clone() {
1013         return new EofToken(_file, _line, _pos);
1014     }
1015     public override @property string toString() {
1016         return "EOF";
1017     }
1018 }
1019 
1020 // treat as white space
1021 //class EolToken : Token {
1022 //    this(string file, uint line, uint pos) {
1023 //        super(TokenType.EOL, file, line, pos);
1024 //    }
1025 //}
1026 
1027 /// white space token
1028 class WhiteSpaceToken : Token {
1029     this() {
1030         super(TokenType.WHITESPACE);
1031     }
1032     this(SourceFile file, uint line, uint pos) {
1033         super(TokenType.WHITESPACE, file, line, pos);
1034     }
1035     override public Token clone() {
1036         return new WhiteSpaceToken(_file, _line, _pos);
1037     }
1038     public override @property string toString() {
1039         return "WhiteSpace";
1040     }
1041 }
1042 
1043 class OpToken : Token {
1044     OpCode _op;
1045     public @property override OpCode opCode() { return _op; }
1046     public @property void opCode(OpCode op) { _op = op; }
1047     public @property override dstring text() { return getOpNameD(_op); }
1048     this() {
1049         super(TokenType.OP);
1050     }
1051     this(SourceFile file, uint line, uint pos) {
1052         super(TokenType.OP, file, line, pos);
1053     }
1054     override public Token clone() {
1055         OpToken res = new OpToken(_file, _line, _pos);
1056         res._op = _op;
1057         return res;
1058     }
1059     public override @property string toString() {
1060         return "Op:" ~ to!string(_op);
1061     }
1062 }
1063 
1064 class KeywordToken : Token {
1065     Keyword _keyword;
1066     public @property override Keyword keyword() { return _keyword; }
1067     public @property void keyword(Keyword keyword) { _keyword = keyword; }
1068     public @property override dstring text() { return getKeywordNameD(_keyword); }
1069     this() {
1070         super(TokenType.KEYWORD);
1071     }
1072     this(SourceFile file, uint line, uint pos) {
1073         super(TokenType.KEYWORD, file, line, pos);
1074     }
1075     override public Token clone() {
1076         KeywordToken res = new KeywordToken(_file, _line, _pos);
1077         res._keyword = _keyword;
1078         return res;
1079     }
1080     public override @property string toString() {
1081         return "Keyword:" ~ to!string(_keyword);
1082     }
1083 }
1084 
1085 /// comment token
1086 class CommentToken : Token {
1087     protected dstring _text;
1088     protected bool _isDocumentationComment;
1089     protected bool _isMultilineComment;
1090 
1091 
1092     override @property bool isDocumentationComment() {
1093         return _isDocumentationComment;
1094     }
1095 
1096     @property void isDocumentationComment(bool f) {
1097         _isDocumentationComment = f;
1098     }
1099 
1100     /// returns true if this is multiline
1101     override @property bool isMultilineComment() {
1102         return _isMultilineComment;
1103     }
1104 
1105     @property void isMultilineComment(bool f) {
1106         _isMultilineComment = f;
1107     }
1108 
1109     @property override dstring text() { return _text; }
1110     @property void text(dchar[] text) { _text = cast(dstring)text; }
1111     this() {
1112         super(TokenType.COMMENT);
1113     }
1114     this(SourceFile file, uint line, uint pos, dchar[] text) {
1115         super(TokenType.COMMENT, file, line, pos);
1116         _text = cast(dstring)text;
1117     }
1118     override public Token clone() {
1119         CommentToken res = new CommentToken(_file, _line, _pos, _text.dup);
1120         res._isDocumentationComment = _isDocumentationComment;
1121         res._isMultilineComment = _isMultilineComment;
1122         return res;
1123     }
1124     public override @property string toString() {
1125         return "Comment:" ~ to!string(_text);
1126     }
1127 }
1128 
1129 /// Invalid token holder - for error tolerant parsing
1130 class InvalidToken : Token {
1131     protected dstring _text;
1132     protected TokenType _invalidTokenType;
1133     protected int _errorCode;
1134     protected string _errorMessage;
1135 
1136     /// returns error message if it's invalid token (can be returned in error tolerant mode of tokenizer)
1137     override @property string errorMessage() { return _errorMessage; }
1138     /// sets error message
1139     @property void errorMessage(string s) { _errorMessage = s; }
1140     /// returns error code if it's invalid token (can be returned in error tolerant mode of tokenizer)
1141     override @property int errorCode() { return _errorCode; }
1142     /// sets error code
1143     @property void errorCode(int c) { _errorCode = c; }
1144     /// returns type of token parsing of which has been failed - if it's invalid token (can be returned in error tolerant mode of tokenizer)
1145     override @property TokenType invalidTokenType() { return _invalidTokenType; }
1146     /// sets type of token parsing of which has been failed
1147     @property void invalidTokenType(TokenType t) { _invalidTokenType = t; }
1148 
1149     /// text of invalid token
1150     @property override dstring text() { return _text; }
1151     /// text of invalid token
1152     @property void text(dchar[] text) { _text = cast(dstring)text; }
1153 
1154     this() {
1155         super(TokenType.INVALID);
1156     }
1157     this(SourceFile file, uint line, uint pos, dchar[] text) {
1158         super(TokenType.INVALID, file, line, pos);
1159         _text = cast(dstring)text;
1160     }
1161     override Token clone() {
1162         InvalidToken res = new InvalidToken(_file, _line, _pos, _text.dup);
1163         res._errorMessage = _errorMessage.dup;
1164         res._errorCode = _errorCode;
1165         res._invalidTokenType = _invalidTokenType;
1166         return res;
1167     }
1168     override @property string toString() {
1169         return "Invalid:" ~ to!string(_text);
1170     }
1171 }
1172 
1173 alias tokenizer_ident_t = uint;
1174 alias tokenizer_ident_name_t = dstring;
1175 
1176 enum : tokenizer_ident_t {
1177     NO_IDENT = 0
1178 }
1179 
1180 /**
1181  * Global storage for identifier strings.
1182  */
1183 class IdentHolder {
1184     protected tokenizer_ident_t _nextId;
1185     protected tokenizer_ident_name_t[tokenizer_ident_t] _idToName;
1186     protected tokenizer_ident_t[tokenizer_ident_name_t] _nameToId;
1187 
1188     public this() {
1189         _nextId = NO_IDENT + 1;
1190     }
1191 
1192     /**
1193     * Search for id by name, return NO_IDENT if not found.
1194     */
1195     uint findByName(tokenizer_ident_name_t name) {
1196         tokenizer_ident_t * found = (name in _nameToId);
1197         if (found)
1198             return *found; 
1199         return NO_IDENT;
1200     }
1201 
1202     /**
1203     * Search for name by id, return null if not found.
1204     */
1205     tokenizer_ident_name_t nameById(tokenizer_ident_t id) {
1206         auto found = (id in _idToName);
1207         if (found)
1208             return *found;
1209         return null;
1210     }
1211 
1212     /**
1213      * Search for ident id by name, create new entry if not found.
1214      */
1215     tokenizer_ident_t idByName(tokenizer_ident_name_t name) {
1216         uint * found = (name in _nameToId);
1217         if (found)
1218             return *found; 
1219         uint newid = _nextId++;
1220         immutable tokenizer_ident_name_t nameCopy = name.dup;
1221         _nameToId[nameCopy] = newid;
1222         _idToName[newid] = nameCopy;
1223         return newid;
1224     }
1225 }
1226 
1227 /**
1228 * Thread local storage for IDs.
1229 */
1230 IdentHolder identMap;
1231 
1232 static this() {
1233     // init ID storage
1234     identMap = new IdentHolder();
1235 }
1236 
1237 class StringLiteralToken : Token {
1238     dstring _text;
1239     dchar _literalType;
1240     public @property override dchar literalType() { return _literalType; }
1241     public @property override dstring text() { return _text; }
1242     public void setText(dchar[] text, dchar type) { _text = cast(dstring)text; _literalType = type; }
1243     this() {
1244         super(TokenType.STRING);
1245     }
1246     this(SourceFile file, uint line, uint pos, dchar[] text, dchar type) {
1247         super(TokenType.STRING, file, line, pos);
1248         _text = cast(dstring)text;
1249         _literalType = type;
1250     }
1251     override public Token clone() {
1252         return new StringLiteralToken(_file, _line, _pos, _text.dup, _literalType);
1253     }
1254     public override @property string toString() {
1255         return toUTF8("String:\"" ~ _text ~ "\"" ~ (_literalType ? _literalType : ' '));
1256     }
1257 }
1258 
1259 class CharacterLiteralToken : Token {
1260     dchar _character;
1261     dchar _literalType;
1262     @property override dchar literalType() { return _literalType; }
1263     @property dchar character() { return _character; }
1264     @property override dstring text() { return [_character]; }
1265     void setCharacter(dchar ch, dchar type) { _character = ch; _literalType = type; }
1266     this() {
1267         super(TokenType.CHARACTER);
1268     }
1269     this(SourceFile file, uint line, uint pos, dchar character, dchar type) {
1270         super(TokenType.CHARACTER, file, line, pos);
1271         _character = character;
1272         _literalType = type;
1273     }
1274     override public Token clone() {
1275         return new CharacterLiteralToken(_file, _line, _pos, _character, _literalType);
1276     }
1277     public override @property string toString() {
1278         return "Char:" ~ toUTF8([_character]);
1279     }
1280 }
1281 
1282 class IntegerLiteralToken : Token {
1283     ulong _value;
1284     bool _unsigned;
1285     bool _long;
1286     public @property override ulong intValue() { return _value; }
1287     public @property override bool isUnsigned() { return _unsigned; }
1288     public @property override ulong isLong() { return _long; }
1289     public @property override dstring text() { return to!dstring(_value); }
1290     public void setValue(ulong value, bool unsignedFlag = false, bool longFlag = false) {
1291         _value = value;
1292         _unsigned = unsignedFlag;
1293         _long = longFlag;
1294     }
1295     public void setFlags(bool unsignedFlag = false, bool longFlag = false) {
1296         _unsigned = unsignedFlag;
1297         _long = longFlag;
1298     }
1299     this() {
1300         super(TokenType.INTEGER);
1301     }
1302     this(SourceFile file, uint line, uint pos, ulong value, bool unsignedFlag, bool longFlag) {
1303         super(TokenType.INTEGER, file, line, pos);
1304         _value = value;
1305         _unsigned = unsignedFlag;
1306         _long = longFlag;
1307     }
1308     override public Token clone() {
1309         return new IntegerLiteralToken(_file, _line, _pos, _value, _unsigned, _long);
1310     }
1311     public override @property string toString() {
1312         return "Integer:" ~ to!string(_value) ~ (_long ? "L" : "") ~ (_unsigned ? "U" : "");
1313     }
1314 }
1315 
1316 class RealLiteralToken : Token {
1317     real _value;
1318     byte _precision;
1319     bool _imaginary;
1320     public @property override ulong intValue() { return to!long(_value); }
1321     public @property override real realValue() { return _value; }
1322     public @property override double doubleValue() { return cast(double)_value; }
1323     public @property override float floatValue() { return cast(float)_value; }
1324     public @property override byte precision() { return _precision; }
1325     public @property override bool isImaginary() { return _imaginary; }
1326     public @property override dstring text() { return to!dstring(_value); }
1327     public void setValue(real value, byte precision = 1, bool imaginary = false) {
1328         _value = value;
1329         _precision = precision;
1330         _imaginary = imaginary;
1331     }
1332     public void setFlags(byte precision = 1, bool imaginary = false) {
1333         _precision = precision;
1334         _imaginary = imaginary;
1335     }
1336     this() {
1337         super(TokenType.FLOAT);
1338     }
1339     this(SourceFile file, uint line, uint pos, real value, byte precision, bool imaginary) {
1340         super(TokenType.FLOAT, file, line, pos);
1341         _value = value;
1342         _precision = precision;
1343         _imaginary = imaginary;
1344     }
1345     override public Token clone() {
1346         return new RealLiteralToken(_file, _line, _pos, _value, _precision, _imaginary);
1347     }
1348     public override @property string toString() {
1349         return "Real:" ~ to!string(_value) ~ (_precision == 0 ? "f" : (_precision == 2 ? "L" : "")) ~ (_imaginary ? "i" : "");
1350     }
1351 }
1352 
1353 class IdentToken : Token {
1354     tokenizer_ident_t _id;
1355     public @property override dstring text() {
1356         return identMap.nameById(_id);
1357     }
1358     public void setText(dchar[] text) {
1359         _id = identMap.idByName(cast(immutable)text);
1360     }
1361     this() {
1362         super(TokenType.IDENTIFIER);
1363     }
1364     this(SourceFile file, uint line, uint pos, dchar[] text) {
1365         super(TokenType.IDENTIFIER, file, line, pos);
1366         _id = identMap.idByName(cast(immutable)text);
1367     }
1368     this(SourceFile file, uint line, uint pos, tokenizer_ident_t id) {
1369         super(TokenType.IDENTIFIER, file, line, pos);
1370         _id = id;
1371     }
1372     override public Token clone() {
1373         return new IdentToken(_file, _line, _pos, _id);
1374     }
1375     public override @property string toString() {
1376         return "Ident:" ~ to!string(text);
1377     }
1378 }
1379 
1380 // shared appender buffer, to avoid extra heap allocations
1381 struct StringAppender {
1382     dchar[] buf;
1383     uint len;
1384     dchar[] get() {
1385         return buf[0 .. len];
1386     }
1387     void appendEol() {
1388         if (len + 1 > buf.length) {
1389             uint newsize = cast(uint)((len + 1 + buf.length) * 2);
1390             if (newsize < 128)
1391                 newsize = 128;
1392             buf.length = newsize;
1393         }
1394         buf[len] = '\n';
1395         len++;
1396     }
1397     void append(dchar[] s) {
1398         if (s.length == 0)
1399             return;
1400         if (len + s.length > buf.length) {
1401             uint newsize = cast(uint)((len + s.length + buf.length) * 2);
1402             if (newsize < 128)
1403                 newsize = 128;
1404             buf.length = newsize;
1405         }
1406         buf[len .. len + s.length] = s;
1407         len += s.length;
1408     }
1409     void append(dchar ch) {
1410         if (len + 1 > buf.length) {
1411             uint newsize = cast(uint)(buf.length * 2);
1412             if (newsize < 128)
1413                 newsize = 128;
1414             buf.length = newsize;
1415         }
1416         buf[len++] = ch;
1417     }
1418     void reset() {
1419         len = 0;
1420     }
1421     static int parseHexDigit(dchar ch) {
1422         if (ch >= '0' && ch <='9')
1423             return ch - '0';
1424         if (ch >= 'a' && ch <='f')
1425             return ch - 'a' + 10;
1426         if (ch >= 'A' && ch <='F')
1427             return ch - 'A' + 10;
1428         return -1;
1429     }
1430     bool errorFlag = false;
1431     dchar decodeHex(ref int pos, int count) {
1432         dchar res = 0;
1433         for (int i = 0; i < count; i++) {
1434             if (pos >= len - 1) {
1435                 errorFlag = true;
1436                 return res;
1437             }
1438             dchar ch = buf[++pos];
1439             int digit = parseHexDigit(ch);
1440             if (digit < 0) {
1441                 errorFlag = true;
1442                 digit = 0;
1443             }
1444             res = (res << 4) | digit;
1445         }
1446         return res;
1447     }
1448     dchar decodeOct(dchar firstChar, ref int pos) {
1449         dchar res = 0;
1450         res = firstChar - '0';
1451         if (pos < len - 1 && buf[pos + 1] >= '0' && buf[pos + 1] <= '7') {
1452             res = (res << 3) | (buf[++pos] - '0');
1453         }
1454         if (pos < len - 1 && buf[pos + 1] >= '0' && buf[pos + 1] <= '7') {
1455             res = (res << 3) | (buf[++pos] - '0');
1456         }
1457         return res;
1458     }
1459 
1460     char[] entityNameBuf;
1461     int entityNameLen;
1462 
1463     dchar decodeCharacterEntity(ref int pos) {
1464         entityNameLen = 0;
1465         pos++;
1466         for(; pos < len && buf[pos] != ';'; pos++) {
1467             dchar ch = buf[pos];
1468             if (ch >= 0x80)
1469                 errorFlag = true;
1470             if (entityNameBuf.length < entityNameLen + 4)
1471                 entityNameBuf.length += 32;
1472             entityNameBuf[entityNameLen++] = cast(char)ch;
1473         }
1474         if (pos < len && buf[pos] == ';') {
1475             dchar ch = entityToChar(cast(string)entityNameBuf[0 .. entityNameLen]);
1476             if (ch)
1477                 return ch;
1478         }
1479         errorFlag = true;
1480         return '?';
1481     }
1482 
1483     bool processEscapeSequences() {
1484         errorFlag = false;
1485         int dst = 0;
1486         for (int src = 0; src < len; src++) {
1487             dchar ch = buf[src];
1488             if (ch == '\\') {
1489                 if (src == len - 1)
1490                     break; // INVALID
1491                 ch = buf[++src];
1492                 switch (ch) {
1493                     case '\'':
1494                     case '\"':
1495                     case '?':
1496                     case '\\':
1497                         buf[dst++] = ch;
1498                         break;
1499                     case '0':
1500                         buf[dst++] = '\0';
1501                         break;
1502                     case 'a':
1503                         buf[dst++] = '\a';
1504                         break;
1505                     case 'b':
1506                         buf[dst++] = '\b';
1507                         break;
1508                     case 'f':
1509                         buf[dst++] = '\f';
1510                         break;
1511                     case 'n':
1512                         buf[dst++] = '\n';
1513                         break;
1514                     case 'r':
1515                         buf[dst++] = '\r';
1516                         break;
1517                     case 't':
1518                         buf[dst++] = '\t';
1519                         break;
1520                     case 'v':
1521                         buf[dst++] = '\v';
1522                         break;
1523                     case 'x':
1524                         buf[dst++] = decodeHex(src, 2);
1525                         break;
1526                     case 'u':
1527                         buf[dst++] = decodeHex(src, 4);
1528                         break;
1529                     case 'U':
1530                         buf[dst++] = decodeHex(src, 8);
1531                         break;
1532                     default:
1533                         if (ch >= '0' && ch <= '7') {
1534                             // octal X XX or XXX
1535                             buf[dst++] = decodeOct(ch, src); // something wrong
1536                         } else if (ch == '&') {
1537                             // named character entity
1538                             buf[dst++] = decodeCharacterEntity(src);
1539                             // just show it as is
1540                         } else {
1541                             buf[dst++] = ch; // something wrong
1542                             errorFlag = true;
1543                         }
1544                         break;
1545                 }
1546             } else {
1547                 buf[dst++] = ch;
1548             }
1549         }
1550         len = dst;
1551         return errorFlag;
1552     }
1553 }
1554 
1555 class Tokenizer
1556 {
1557     protected SourceLines _lineStream;
1558     protected dchar[] _lineText;
1559     protected int _line; // current line number
1560     protected int _len; // current line length
1561     protected int _pos; // current line read position
1562     protected int _prevLineLength; // previous line length
1563     protected uint _state; // tokenizer state
1564     
1565     enum : int {
1566         EOF_CHAR = 0x001A,
1567         EOL_CHAR = 0x000A
1568     };
1569     
1570     protected WhiteSpaceToken _sharedWhiteSpaceToken = new WhiteSpaceToken();
1571     protected CommentToken _sharedCommentToken = new CommentToken();
1572     protected StringLiteralToken _sharedStringLiteralToken = new StringLiteralToken();
1573     protected IdentToken _sharedIdentToken = new IdentToken();
1574     protected OpToken _sharedOpToken = new OpToken();
1575     protected KeywordToken _sharedKeywordToken = new KeywordToken();
1576     protected IntegerLiteralToken _sharedIntegerToken = new IntegerLiteralToken();
1577     protected RealLiteralToken _sharedRealToken = new RealLiteralToken();
1578     protected InvalidToken _sharedInvalidToken = new InvalidToken();
1579     protected CharacterLiteralToken _sharedCharacterLiteralToken = new CharacterLiteralToken();
1580     protected StringAppender _stringLiteralAppender;
1581     protected StringAppender _commentAppender;
1582     protected StringAppender _identAppender;
1583     
1584     protected bool _enableCommentText = true;
1585     /// when false, does not put comment text into comment token - for less allocations
1586     @property void enableCommentText(bool enabled) {
1587         _enableCommentText = enabled;
1588     }
1589     /// when false, does not put comment text into comment token - for less allocations
1590     @property bool enableCommentText() {
1591         return _enableCommentText;
1592     }
1593 
1594     protected bool _errorTolerant = false;
1595     /// when true, returns BadToken instead of throwing exception
1596     @property void errorTolerant(bool enabled) {
1597         _errorTolerant = enabled;
1598     }
1599     /// when true, returns BadToken instead of throwing exception
1600     @property bool errorTolerant() {
1601         return _errorTolerant;
1602     }
1603 
1604     this(SourceLines lineStream) {
1605         initialize(lineStream);
1606     }
1607 
1608     void initialize(SourceLines lineStream, int pos = 0) {
1609         _lineStream = lineStream;
1610         SourceFile file = _lineStream.file;
1611         _sharedWhiteSpaceToken.setFile(file);
1612         _sharedCommentToken.setFile(file);
1613         _sharedStringLiteralToken.setFile(file);
1614         _sharedIdentToken.setFile(file);
1615         _sharedOpToken.setFile(file);
1616         _sharedKeywordToken.setFile(file);
1617         _sharedIntegerToken.setFile(file);
1618         _sharedRealToken.setFile(file);
1619         _sharedInvalidToken.setFile(file);
1620         _sharedCharacterLiteralToken.setFile(file);
1621         buildTime = Clock.currTime();
1622         _line = lineStream.line;
1623         _pos = 0;
1624         _prevLineLength = 0;
1625         _lineText = null;
1626         nextLine();
1627         _pos = pos;
1628     }
1629     
1630     this(string code, string filename = "") {
1631         this(new ArraySourceLines(code, filename));
1632     }
1633     
1634     // fetch next line from source stream
1635     protected bool nextLine() {
1636         _prevLineLength = cast(int)_lineText.length;
1637         _lineText = _lineStream.readLine();
1638         if (!_lineText) {
1639             if (_lineStream.errorCode != 0)
1640                 throw new SourceEncodingException(_lineStream.errorMessage, _lineStream.file, _lineStream.errorLine, _lineStream.errorPos);
1641             if (_lineStream.eof) {
1642                 // end of file
1643                 _pos = 0;
1644                 _len = 0;
1645                 return false;
1646             }
1647             // just an empty line
1648         }
1649         _line = _lineStream.line;
1650         _pos = 0;
1651         _len = cast(int)_lineText.length; // do not support lines longer that 4Gb
1652         return true;
1653     }
1654     
1655     protected dchar nextChar() {
1656         if (_pos >= _len) {
1657             if (!nextLine()) {
1658                 _pos = _prevLineLength + 1;
1659                 return EOF_CHAR;
1660             }
1661             return EOL_CHAR;
1662         }
1663         dchar res = _lineText[_pos++];
1664         if (_pos >= _len)
1665             nextLine();
1666         return res;
1667     }
1668     
1669     protected dchar peekChar() {
1670         if (_lineText is null) {
1671             if (!nextLine()) {
1672                 return EOF_CHAR;
1673             }
1674         }
1675         if (_pos >= _len)
1676             return EOL_CHAR;
1677         return _lineText[_pos++];
1678     }
1679     
1680     protected Token emitEof() {
1681         // TODO: check for current state
1682         return new EofToken(_lineStream.file, _startLine, _startPos + 2);
1683     }
1684     
1685     protected Token processWhiteSpace(dchar firstChar) {
1686         // reuse the same token instance, to avoid extra heap spamming
1687         _sharedWhiteSpaceToken.setPos(_startLine, _startPos);
1688         for (;;) {
1689             int i = _pos;
1690             for (; i < _len; i++) {
1691                 dchar ch = _lineText[i];
1692                 if (!(ch == 0x0020 || ch == 0x0009 || ch == 0x000B || ch == 0x000C || ch == EOL_CHAR))
1693                     break;
1694             }
1695             _pos = i;
1696             if (_pos < _len)
1697                 break;
1698             // go to next line
1699             if (!nextLine())
1700                 break;
1701         }
1702         return _sharedWhiteSpaceToken;
1703     }
1704     
1705     protected Token processOneLineComment() {
1706         _sharedCommentToken.setPos(_startLine, _startPos);
1707         _sharedCommentToken.isDocumentationComment = _pos + 1 < _lineText.length && _lineText[_pos + 1] == '/';
1708         _sharedCommentToken.isMultilineComment = false;
1709         if (_enableCommentText) {
1710             _sharedCommentToken.text = _lineText[_pos + 1 .. $];
1711         }
1712         _pos = _len;
1713         nextChar();
1714         return _sharedCommentToken;
1715     }
1716 
1717     protected Token processOneLineSharpComment() {
1718         _sharedCommentToken.setPos(_startLine, _startPos);
1719         if (_enableCommentText) {
1720             _sharedCommentToken.text = _lineText[_pos .. $];
1721         }
1722         _pos = _len;
1723         return _sharedCommentToken;
1724     }
1725 
1726     // Comment /*   */    
1727     protected Token processMultilineComment() {
1728         _sharedCommentToken.setPos(_startLine, _startPos);
1729         _sharedCommentToken.isDocumentationComment = _pos + 1 < _lineText.length && _lineText[_pos + 1] == '*';
1730         _sharedCommentToken.isMultilineComment = true;
1731         _commentAppender.reset();
1732         int textStart = _pos + 1;
1733         for (;;) {
1734             int textEnd = int.max;
1735             int i = textStart;
1736             for (; i < _len - 1; i++) {
1737                 if (_lineText[i] == '*' && _lineText[i + 1] == '/') {
1738                     textEnd = i;
1739                     break;
1740                 }
1741             }
1742             if (textEnd != int.max) {
1743                 if (_enableCommentText)
1744                     _commentAppender.append(_lineText[textStart .. textEnd]);
1745                 _pos = textEnd + 2;
1746                 break;
1747             }
1748             if (!nextLine()) {
1749                 // TODO: do we need throw exception if comment not closed by end of file?
1750                 _pos = _len;
1751                 break;
1752             }
1753             textStart = 0;
1754         }
1755         if (_enableCommentText) {
1756             _sharedCommentToken.text = _commentAppender.get();
1757         }
1758         return _sharedCommentToken;
1759     }
1760     
1761     // Comment /+   +/    
1762     protected Token processNestedComment() {
1763         _sharedCommentToken.setPos(_startLine, _startPos);
1764         _sharedCommentToken.isDocumentationComment = _pos + 1 < _lineText.length && _lineText[_pos + 1] == '+';
1765         _sharedCommentToken.isMultilineComment = true;
1766         _commentAppender.reset();
1767         dchar[] text;
1768         int textStart = _pos + 1;
1769         int level = 1;
1770         for (;;) {
1771             int textEnd = int.max;
1772             int i = textStart;
1773             for (; i < _len - 1; i++) {
1774                 if (_lineText[i] == '/' && _lineText[i + 1] == '+') {
1775                     level++;
1776                     i++;
1777                 } else if (_lineText[i] == '+' && _lineText[i + 1] == '/') {
1778                     if (--level == 0) {
1779                         textEnd = i;
1780                         break;
1781                     }
1782                 }
1783             }
1784             if (textEnd != int.max) {
1785                 if (_enableCommentText)
1786                     _commentAppender.append(_lineText[textStart .. textEnd]);
1787                 _pos = textEnd + 2;
1788                 break;
1789             }
1790             if (!nextLine()) {
1791                 // TODO: do we need throw exception if comment not closed by end of file?
1792                 _pos = _len;
1793                 break;
1794             }
1795             if (_enableCommentText)
1796                 _commentAppender.appendEol();
1797             textStart = 0;
1798         }
1799         if (_enableCommentText) {
1800             _sharedCommentToken.text = _commentAppender.get();
1801         }
1802         return _sharedCommentToken;
1803     }
1804     
1805     protected Token processHexString() {
1806         _pos++;
1807         // TODO:
1808         return null;
1809     }
1810     
1811     protected Token processDelimitedString() {
1812         _pos++;
1813         // TODO:
1814         return null;
1815     }
1816     
1817     // r"string"   or    `string`
1818     protected Token processWysiwygString(dchar ch) {
1819         _pos++;
1820         // TODO:
1821         return null;
1822     }
1823     
1824     protected Token processIdent(dchar firstChar) {
1825         _sharedIdentToken.setPos(_startLine, _startPos);
1826         _identAppender.reset();
1827         _identAppender.append(firstChar);
1828         for (; _pos < _len; ) {
1829             dchar ch = _lineText[_pos];
1830             if (!isIdentMiddleChar(ch)) {
1831                 break;
1832             }
1833             _identAppender.append(ch);
1834             _pos++;
1835         }
1836         _sharedIdentToken.setText(_identAppender.get);
1837         return _sharedIdentToken;
1838     }
1839 
1840     protected Token processIntegerSuffix() {
1841         if (_pos >= _len)
1842             return _sharedIntegerToken;
1843         bool longFlag = false;
1844         bool unsignedFlag = false;
1845         dchar ch = _lineText[_pos];
1846         dchar ch2 = _pos < _len - 1 ? _lineText[_pos + 1] : 0;
1847         if (ch == 'l' || ch == 'L') {
1848             longFlag = true;
1849             _pos++;
1850             if (ch2 == 'u' || ch2 == 'U') {
1851                 unsignedFlag = true;
1852                 _pos++;
1853             } 
1854         } else if (ch == 'u' || ch == 'U') {
1855             unsignedFlag = true;
1856             _pos++;
1857             if (ch2 == 'l' || ch2 == 'L') {
1858                 longFlag = true;
1859                 _pos++;
1860             } 
1861         }
1862         _sharedIntegerToken.setFlags(unsignedFlag, longFlag);
1863         ch = _pos < _len ? _lineText[_pos] : 0;
1864         if (isIdentMiddleChar(ch))
1865             return parserError("Unexpected character after number", _sharedIntegerToken);
1866         return _sharedIntegerToken;
1867     }
1868     
1869     protected Token processBinaryNumber() {
1870         _sharedIntegerToken.setPos(_startLine, _startPos);
1871         _pos++;
1872         if (_pos >= _len)
1873             return parserError("Unexpected end of line in binary number", _sharedIntegerToken);
1874         int digits = 0;
1875         ulong number = 0;
1876         int i = _pos;
1877         for (;i < _len; i++) {
1878             dchar ch = _lineText[i];
1879             if (ch != '0' && ch != '1')
1880                 break;
1881             number = (number << 1) | (ch == '1' ? 1 : 0);
1882             digits++;
1883         }
1884         _pos = i;
1885         if (digits > 64)
1886             return parserError("number is too big", _sharedIntegerToken);
1887         _sharedIntegerToken.setValue(number);
1888         return processIntegerSuffix();
1889     }
1890 
1891     protected Token processHexNumber() {
1892         _sharedIntegerToken.setPos(_startLine, _startPos);
1893         _sharedRealToken.setPos(_startLine, _startPos);
1894         _pos++;
1895         if (_pos >= _len)
1896             return parserError("Unexpected end of line in hex number", _sharedIntegerToken);
1897         int digits = 0;
1898         ulong number = 0;
1899         int i = _pos;
1900         for (;i < _len; i++) {
1901             dchar ch = _lineText[i];
1902             uint digit = 0;
1903             if (ch >= '0' && ch <= '9')
1904                 digit = ch - '0';
1905             else if (ch >= 'a' && ch <= 'f')
1906                 digit = ch - 'a' + 10;
1907             else if (ch >= 'A' && ch <= 'F')
1908                 digit = ch - 'A' + 10;
1909             else if (ch == '_')
1910                 continue;
1911             else
1912                 break;
1913             number = (number << 4) | digit;
1914             digits++;
1915         }
1916         _pos = i;
1917         if (digits > 16)
1918             return parserError("number is too big to fit 64 bits", _sharedIntegerToken);
1919         _sharedIntegerToken.setValue(number);
1920         return processIntegerSuffix();
1921     }
1922     
1923     protected Token processOctNumber() {
1924         _sharedIntegerToken.setPos(_startLine, _startPos);
1925         if (_pos >= _len)
1926             return parserError("Unexpected end of line in octal number", _sharedIntegerToken);
1927         int digits = 0;
1928         ulong number = 0;
1929         int i = _pos;
1930         bool overflow = false;
1931         for (;i < _len; i++) {
1932             dchar ch = _lineText[i];
1933             int digit = 0;
1934             if (ch >= '0' && ch <= '7')
1935                 digit = ch - '0';
1936             else if (ch == '_')
1937                 continue;
1938             else
1939                 break;
1940             number <<= 3;
1941             if (digits >= 20) {
1942                 if ((number >> 3) << 3 != number) {
1943                     overflow = true;
1944                     break;
1945                 }
1946             }
1947             number |= digit;
1948             digits++;
1949         }
1950         _pos = i;
1951         if (overflow)
1952             return parserError("number is too big to fit 64 bits", _sharedIntegerToken);
1953         _sharedIntegerToken.setValue(number);
1954         return processIntegerSuffix();
1955     }
1956     
1957     // 
1958     protected Token processDecFloatSuffix(real value) {
1959         ubyte precision = 1;
1960         bool imaginary = false;
1961         dchar next = _pos < _len ? _lineText[_pos] : 0;
1962         if (next == 'f') {
1963             _pos++;
1964             precision = 0;
1965         } else if (next == 'L') {
1966             _pos++;
1967             precision = 2;
1968         }
1969         next = _pos < _len ? _lineText[_pos] : 0;
1970         if (next == 'i') {
1971             _pos++;
1972             imaginary = true;
1973         }
1974         next = _pos < _len ? _lineText[_pos] : 0;
1975         if (isIdentMiddleChar(next))
1976             return parserError("invalid suffix for floating point literal", _sharedRealToken);
1977         _sharedRealToken.setValue(value, precision, imaginary);
1978         return _sharedRealToken;
1979     }
1980     
1981     // after E char
1982     protected Token processDecFloatExponent(real value) {
1983         dchar next = _pos < _len ? _lineText[_pos] : 0;
1984         int sign = 1;
1985         if (next == '+') {
1986             _pos++;
1987         } else if (next == '-') {
1988             _pos++;
1989             sign = -1;
1990         }
1991         if (_pos >= _len)
1992             return parserError("Invalid exponent", _sharedRealToken);
1993         ulong digits = 0;
1994         ulong number = 0;
1995         int i = _pos;
1996         bool overflow = false;
1997         for (;i < _len; i++) {
1998             dchar ch = _lineText[i];
1999             uint digit = 0;
2000             if (ch >= '0' && ch <= '9')
2001                 digit = ch - '0';
2002             else if (ch == '_')
2003                 continue;
2004             else
2005                 break;
2006             number *= 10;
2007             if (digits >= 18) {
2008                 if ((number * 10) / 10 != number) {
2009                     overflow = true;
2010                     break;
2011                 }
2012             }
2013             number += digit;
2014             digits++;
2015         }
2016         if (digits == 0)
2017             return parserError("Invalid exponent", _sharedRealToken);
2018         _pos = i;
2019         value *= pow(10., cast(long)number * sign);
2020         return processDecFloatSuffix(value);
2021     }
2022         
2023     protected Token processDecFloatSecondPart(ulong firstPart) {
2024         if (_pos >= _len) {
2025             _sharedRealToken.setValue(cast(real)firstPart);
2026             return _sharedRealToken;
2027         }
2028         ulong divider = 1;
2029         ulong number = 0;
2030         int i = _pos;
2031         bool overflow = false;
2032         for (;i < _len; i++) {
2033             dchar ch = _lineText[i];
2034             uint digit = 0;
2035             if (ch >= '0' && ch <= '9')
2036                 digit = ch - '0';
2037             else if (ch == '_')
2038                 continue;
2039             else
2040                 break;
2041             if (divider * 10 < divider)
2042                 continue; // ignore extra digits
2043             number *= 10;
2044             number += digit;
2045             divider *= 10;
2046         }
2047         _pos = i;
2048         real value = cast(real)firstPart + (cast(real)number / divider);
2049         dchar next = _pos < _len ? _lineText[_pos] : 0;
2050         if (next == 0) {
2051             // neither exponent nor suffix
2052             _sharedRealToken.setValue(value);
2053             return _sharedRealToken;
2054         }
2055            if (next == 'e' || next == 'E') {
2056             _pos++;
2057             return processDecFloatExponent(value);
2058         }
2059         return processDecFloatSuffix(value);
2060     }
2061         
2062     protected Token processDecNumber(dchar c) {
2063         _sharedIntegerToken.setPos(_startLine, _startPos);
2064         _sharedRealToken.setPos(_startLine, _startPos);
2065         //if (_pos >= _len)
2066         //    return parserError("Unexpected end of line in number", _sharedIntegerToken);
2067         int digits = 1;
2068         ulong number = c - '0';
2069         int i = _pos;
2070         bool overflow = false;
2071         if (_line == _startLine) {
2072             for (;i < _len; i++) {
2073                 dchar ch = _lineText[i];
2074                 uint digit = 0;
2075                 if (ch >= '0' && ch <= '9')
2076                     digit = ch - '0';
2077                 else if (ch == '_')
2078                     continue;
2079                 else
2080                     break;
2081                 number *= 10;
2082                 if (digits >= 18) {
2083                     if ((number * 10) / 10 != number) {
2084                         overflow = true;
2085                         break;
2086                     }
2087                 }
2088                 number += digit;
2089                 digits++;
2090             }
2091             _pos = i;
2092         }
2093         if (overflow)
2094             return parserError("number is too big to fit 64 bits", _sharedIntegerToken);
2095         _sharedIntegerToken.setValue(number);
2096         dchar next = _line == _startLine && _pos < _len ? _lineText[_pos] : 0;
2097         if (next == 0)
2098             return _sharedIntegerToken;
2099         if (next == 'e' || next == 'E') {
2100             _pos++;
2101             return processDecFloatExponent(number);
2102         } else if (next == '.') {
2103             _pos++;
2104             return processDecFloatSecondPart(number);
2105         }
2106         return processIntegerSuffix();
2107     }
2108         
2109     /// Either return InvalidToken or throw parser exception depending on current errorTolerant flag
2110     protected Token parserError(string msg, Token incompleteToken) {
2111         return parserError(msg, incompleteToken.line, incompleteToken.pos, incompleteToken.type);
2112     }
2113     /// Either return InvalidToken or throw parser exception depending on current errorTolerant flag
2114     protected Token parserError(string msg, int startLine, int startPos, TokenType failedTokenType = TokenType.INVALID) {
2115         if (_errorTolerant) {
2116             startPos--;
2117             _sharedInvalidToken.setPos(startLine, startPos);
2118             _sharedInvalidToken.errorMessage = msg;
2119             _sharedInvalidToken.errorCode = 1; // for future extension
2120             _sharedInvalidToken.invalidTokenType = failedTokenType; // for future extension
2121             // make invalid source text
2122             dchar[] invalidText;
2123             int p = startLine == _line ? startPos : 0;
2124             for (int i = p; i < _pos && i < _lineText.length; i++)
2125                 invalidText ~= _lineText[i];
2126 
2127             // recover after error
2128             for (; _pos < _lineText.length; _pos++) {
2129                 dchar ch = _lineText[_pos];
2130                 if (ch == ' ' || ch == '\t' || ch == '(' || ch == ')' || ch == '[' || ch == ']' || ch == '{' || ch == '}')
2131                     break;
2132                 if (failedTokenType == TokenType.INTEGER || failedTokenType == TokenType.FLOAT) {
2133                     if (ch == '*' || ch == '/')
2134                         break;
2135                 }
2136                 invalidText ~= ch;
2137             }
2138             _sharedInvalidToken.text = invalidText;
2139             return _sharedInvalidToken;
2140         }
2141         throw new ParserException(msg, _lineStream.file, _line, _pos);
2142     }
2143 
2144     protected Keyword detectKeyword(dchar ch) {
2145         if (ch > 'z')
2146             return Keyword.NONE;
2147         int len = _len - _pos;
2148         switch (cast(ubyte)ch) {
2149             //    ABSTRACT,
2150             //    ALIAS,
2151             //    ALIGN,
2152             //    ASM,
2153             //    ASSERT,
2154             //    AUTO,
2155             case 'a': return findKeyword(Keyword.ABSTRACT, Keyword.AUTO, _lineText.ptr + _pos, len, _pos);
2156 
2157             //    BODY,
2158             //    BOOL,
2159             //    BREAK,
2160             //    BYTE,
2161             case 'b': return findKeyword(Keyword.BODY, Keyword.BYTE, _lineText.ptr + _pos, len, _pos);
2162                 
2163             //    CASE,
2164             //    CAST,
2165             //    CATCH,
2166             //    CDOUBLE,
2167             //    CENT,
2168             //    CFLOAT,
2169             //    CHAR,
2170             //    CLASS,
2171             //    CONST,
2172             //    CONTINUE,
2173             //    CREAL,
2174             case 'c': return findKeyword(Keyword.CASE, Keyword.CREAL, _lineText.ptr + _pos, len, _pos);
2175                 
2176             //    DCHAR,
2177             //    DEBUG,
2178             //    DEFAULT,
2179             //    DELEGATE,
2180             //    DELETE,
2181             //    DEPRECATED,
2182             //    DO,
2183             //    DOUBLE,
2184             case 'd': return findKeyword(Keyword.DCHAR, Keyword.DOUBLE, _lineText.ptr + _pos, len, _pos);
2185                 
2186             //    ELSE,
2187             //    ENUM,
2188             //    EXPORT,
2189             //    EXTERN,
2190             case 'e': return findKeyword(Keyword.ELSE, Keyword.EXTERN, _lineText.ptr + _pos, len, _pos);
2191                 
2192             //    FALSE,
2193             //    FINAL,
2194             //    FINALLY,
2195             //    FLOAT,
2196             //    FOR,
2197             //    FOREACH,
2198             //    FOREACH_REVERSE,
2199             //    FUNCTION,
2200             case 'f': return findKeyword(Keyword.FALSE, Keyword.FUNCTION, _lineText.ptr + _pos, len, _pos);
2201                 
2202             //    GOTO,
2203             case 'g': return findKeyword(Keyword.GOTO, Keyword.GOTO, _lineText.ptr + _pos, len, _pos);
2204                 
2205             //    IDOUBLE,
2206             //    IF,
2207             //    IFLOAT,
2208             //    IMMUTABLE,
2209             //    IMPORT,
2210             //    IN,
2211             //    INOUT,
2212             //    INT,
2213             //    INTERFACE,
2214             //    INVARIANT,
2215             //    IREAL,
2216             //    IS,
2217             case 'i': return findKeyword(Keyword.IDOUBLE, Keyword.IS, _lineText.ptr + _pos, len, _pos);
2218                 
2219             //    LAZY,
2220             //    LONG,
2221             case 'l': return findKeyword(Keyword.LAZY, Keyword.LONG, _lineText.ptr + _pos, len, _pos);
2222                 
2223             //    MACRO,
2224             //    MIXIN,
2225             //    MODULE,
2226             case 'm': return findKeyword(Keyword.MACRO, Keyword.MODULE, _lineText.ptr + _pos, len, _pos);
2227                 
2228             //    NEW,
2229             //    NOTHROW,
2230             //    NULL,
2231             case 'n': return findKeyword(Keyword.NEW, Keyword.NULL, _lineText.ptr + _pos, len, _pos);
2232                 
2233             //    OUT,
2234             //    OVERRIDE,
2235             case 'o': return findKeyword(Keyword.OUT, Keyword.OVERRIDE, _lineText.ptr + _pos, len, _pos);
2236                 
2237             //    PACKAGE,
2238             //    PRAGMA,
2239             //    PRIVATE,
2240             //    PROTECTED,
2241             //    PUBLIC,
2242             //    PURE,
2243             case 'p': return findKeyword(Keyword.PACKAGE, Keyword.PURE, _lineText.ptr + _pos, len, _pos);
2244                 
2245             //    REAL,
2246             //    REF,
2247             //    RETURN,
2248             case 'r': return findKeyword(Keyword.REAL, Keyword.RETURN, _lineText.ptr + _pos, len, _pos);
2249                 
2250             //    SCOPE,
2251             //    SHARED,
2252             //    SHORT,
2253             //    STATIC,
2254             //    STRUCT,
2255             //    SUPER,
2256             //    SWITCH,
2257             //    SYNCHRONIZED,
2258             case 's': return findKeyword(Keyword.SCOPE, Keyword.SYNCHRONIZED, _lineText.ptr + _pos, len, _pos);
2259                 
2260             //    TEMPLATE,
2261             //    THIS,
2262             //    THROW,
2263             //    TRUE,
2264             //    TRY,
2265             //    TYPEDEF,
2266             //    TYPEID,
2267             //    TYPEOF,
2268             case 't': return findKeyword(Keyword.TEMPLATE, Keyword.TYPEOF, _lineText.ptr + _pos, len, _pos);
2269                 
2270             //    UBYTE,
2271             //    UCENT,
2272             //    UINT,
2273             //    ULONG,
2274             //    UNION,
2275             //    UNITTEST,
2276             //    USHORT,
2277             case 'u': return findKeyword(Keyword.UBYTE, Keyword.USHORT, _lineText.ptr + _pos, len, _pos);
2278                 
2279             //    VERSION,
2280             //    VOID,
2281             //    VOLATILE,
2282             case 'v': return findKeyword(Keyword.VERSION, Keyword.VOLATILE, _lineText.ptr + _pos, len, _pos);
2283                 
2284             //    WCHAR,
2285             //    WHILE,
2286             //    WITH,
2287             case 'w': return findKeyword(Keyword.WCHAR, Keyword.WITH, _lineText.ptr + _pos, len, _pos);
2288                 
2289             //    FILE,
2290             //    MODULE,
2291             //    LINE,
2292             //    FUNCTION,
2293             //    PRETTY_FUNCTION,
2294             //
2295             //    GSHARED,
2296             //    TRAITS,
2297             //    VECTOR,
2298             //    PARAMETERS,
2299             case '_': return findKeyword(Keyword.FILE, Keyword.PARAMETERS, _lineText.ptr + _pos, len, _pos);
2300             default: return Keyword.NONE;                
2301         }
2302     }    
2303     protected OpCode detectOp(dchar ch) nothrow {
2304         if (ch >= 128)
2305             return OpCode.NONE;
2306         dchar ch2 = _pos < _len ? _lineText[_pos] : 0;
2307         dchar ch3 = _pos < _len - 1 ? _lineText[_pos + 1] : 0;
2308         switch(cast(ubyte)ch) {
2309             //    DIV,         //    /
2310             //    DIV_EQ,     //    /=
2311             case '/':
2312                 if (ch2 == '=') {
2313                     _pos++;
2314                     return OpCode.DIV_EQ;
2315                 }
2316                 return OpCode.DIV;
2317             //    DOT,         //    .
2318             //    DOT_DOT,     //    ..
2319             //    DOT_DOT_DOT,//    ...
2320             case '.':
2321                 if (ch2 == '.') {
2322                     if (ch3 == '.') {
2323                         _pos += 2;
2324                         return OpCode.DOT_DOT_DOT;
2325                     }
2326                     _pos++;
2327                     return OpCode.DOT_DOT;
2328                 }
2329                 return OpCode.DOT;
2330             //    AND,         //    &
2331             //    AND_EQ,     //    &=
2332             //    LOG_AND,     //    &&
2333             case '&':
2334                 if (ch2 == '=') {
2335                     _pos++;
2336                     return OpCode.AND_EQ;
2337                 }
2338                 if (ch2 == '&') {
2339                     _pos++;
2340                     return OpCode.LOG_AND;
2341                 }
2342                 return OpCode.AND;
2343             //    OR,         //    |
2344             //    OR_EQ,         //    |=
2345             //    LOG_OR,     //    ||
2346             case '|':
2347                 if (ch2 == '=') {
2348                     _pos++;
2349                     return OpCode.OR_EQ;
2350                 }
2351                 if (ch2 == '|') {
2352                     _pos++;
2353                     return OpCode.LOG_OR;
2354                 }
2355                 return OpCode.OR;
2356             //    MINUS,         //    -
2357             //    MINUS_EQ,     //    -=
2358             //    MINUS_MINUS,//    --
2359             case '-':
2360                 if (ch2 == '=') {
2361                     _pos++;
2362                     return OpCode.MINUS_EQ;
2363                 }
2364                 if (ch2 == '-') {
2365                     _pos++;
2366                     return OpCode.MINUS_MINUS;
2367                 }
2368                 return OpCode.MINUS;
2369             //    PLUS,         //    +
2370             //    PLUS_EQ,     //    +=
2371             //    PLUS_PLUS,     //    ++
2372             case '+':
2373                 if (ch2 == '=') {
2374                     _pos++;
2375                     return OpCode.PLUS_EQ;
2376                 }
2377                 if (ch2 == '+') {
2378                     _pos++;
2379                     return OpCode.PLUS_PLUS;
2380                 }
2381                 return OpCode.PLUS;
2382             //    LT,         //    <
2383             //    LT_EQ,         //    <=
2384             //    SHL,         //    <<
2385             //    SHL_EQ,     //    <<=
2386             //    LT_GT,         //    <>
2387             //    NE_EQ,         //    <>=
2388             case '<':
2389                 if (ch2 == '<') {
2390                     if (ch3 == '=') {
2391                         _pos += 2;
2392                         return OpCode.SHL_EQ;
2393                     }
2394                     _pos++;
2395                     return OpCode.SHL;
2396                 }
2397                 if (ch2 == '>') {
2398                     if (ch3 == '=') {
2399                         _pos += 2;
2400                         return OpCode.NE_EQ;
2401                     }
2402                     _pos++;
2403                     return OpCode.LT_GT;
2404                 }
2405                 if (ch2 == '=') {
2406                     _pos++;
2407                     return OpCode.LT_EQ;
2408                 }
2409                 return OpCode.LT;
2410             //    GT,         //    >
2411             //    GT_EQ,         //    >=
2412             //    SHR_EQ        //    >>=
2413             //    ASR_EQ,     //    >>>=
2414             //    SHR,         //    >>
2415             //    ASR,         //    >>>
2416             case '>':
2417                 if (ch2 == '>') {
2418                     if (ch3 == '>') {
2419                         dchar ch4 = _pos < _len - 2 ? _lineText[_pos + 2] : 0;
2420                         if (ch4 == '=') { // >>>=
2421                             _pos += 3;
2422                             return OpCode.ASR_EQ;
2423                         }
2424                         _pos += 2;
2425                         return OpCode.ASR; // >>>
2426                     }
2427                     if (ch3 == '=') { // >>=
2428                         _pos += 2;
2429                         return OpCode.SHR_EQ;
2430                     }
2431                     _pos++;
2432                     return OpCode.SHR;
2433                 }
2434                 if (ch2 == '=') { // >=
2435                     _pos++;
2436                     return OpCode.GT_EQ;
2437                 }
2438                 // >
2439                 return OpCode.GT;
2440             //    NOT,         //    !
2441             //    NOT_EQ        //    !=
2442             //    NOT_LT_GT,     //    !<>
2443             //    NOT_LT_GT_EQ, //    !<>=
2444             //    NOT_LT,     //    !<
2445             //    NOT_LT_EQ,     //    !<=
2446             //    NOT_GT,     //    !>
2447             //    NOT_GT_EQ,     //    !>=
2448             case '!':
2449                 if (ch2 == '<') { // !<
2450                     if (ch3 == '>') { // !<>
2451                         dchar ch4 = _pos < _len - 2 ? _lineText[_pos + 2] : 0;
2452                         if (ch4 == '=') { // !<>=
2453                             _pos += 3;
2454                             return OpCode.NOT_LT_GT_EQ;
2455                         }
2456                         _pos += 2;
2457                         return OpCode.NOT_LT_GT; // !<>
2458                     }
2459                     if (ch3 == '=') { // !<=
2460                         _pos += 2;
2461                         return OpCode.NOT_LT_EQ;
2462                     }
2463                     _pos++;
2464                     return OpCode.NOT_LT; // !<
2465                 }
2466                 if (ch2 == '=') { // !=
2467                     _pos++;
2468                     return OpCode.NOT_EQ;
2469                 }
2470                 return OpCode.NOT;
2471             //    PAR_OPEN,     //    (
2472             case '(':
2473                 return OpCode.PAR_OPEN;
2474             //    PAR_CLOSE,     //    )
2475             case ')':
2476                 return OpCode.PAR_CLOSE;
2477             //    SQ_OPEN,     //    [
2478             case '[':
2479                 return OpCode.SQ_OPEN;
2480             //    SQ_CLOSE,     //    ]
2481             case ']':
2482                 return OpCode.SQ_CLOSE;
2483             //    CURL_OPEN,     //    {
2484             case '{':
2485                 return OpCode.CURL_OPEN;
2486             //    CURL_CLOSE, //    }
2487             case '}':
2488                 return OpCode.CURL_CLOSE;
2489             //    QUEST,         //    ?
2490             case '?':
2491                 return OpCode.QUEST;
2492             //    COMMA,         //    ,
2493             case ',':
2494                 return OpCode.COMMA;
2495             //    SEMICOLON,     //    ;
2496             case ';':
2497                 return OpCode.SEMICOLON;
2498             //    COLON,         //    :
2499             case ':':
2500                 return OpCode.COLON;
2501             //    DOLLAR,     //    $
2502             case '$':
2503                 return OpCode.DOLLAR;
2504             //    EQ,         //    =
2505             //    QE_EQ,         //    ==
2506             //    EQ_GT,         //    =>
2507             case '=':
2508                 if (ch2 == '=') { // ==
2509                     _pos++;
2510                     return OpCode.QE_EQ;
2511                 }
2512                 if (ch2 == '>') { // =>
2513                     _pos++;
2514                     return OpCode.EQ_GT;
2515                 }
2516                 return OpCode.EQ;
2517             //    MUL,         //    *
2518             //    MUL_EQ,     //    *=
2519             case '*':
2520                 if (ch2 == '=') {
2521                     _pos++;
2522                     return OpCode.MUL_EQ;
2523                 }
2524                 return OpCode.MUL;
2525             //    MOD,     //    %
2526             //    MOD_EQ, //    %=
2527             case '%':
2528                 if (ch2 == '=') {
2529                     _pos++;
2530                     return OpCode.MOD_EQ;
2531                 }
2532                 return OpCode.MOD;
2533             //    XOR,         //    ^
2534             //    XOR_EQ,     //    ^=
2535             //    LOG_XOR,     //    ^^
2536             //    LOG_XOR_EQ, //    ^^=
2537             case '^':
2538                 if (ch2 == '^') {
2539                     if (ch3 == '=') {
2540                         _pos += 2;
2541                         return OpCode.LOG_XOR_EQ;
2542                     }
2543                     _pos++;
2544                     return OpCode.LOG_XOR;
2545                 }
2546                 if (ch2 == '=') {
2547                     _pos++;
2548                     return OpCode.XOR_EQ;
2549                 }
2550                 return OpCode.XOR;
2551             //    INV,         //    ~
2552             //    INV_EQ,     //    ~=
2553             case '~':
2554                 if (ch2 == '=') {
2555                     _pos++;
2556                     return OpCode.INV_EQ;
2557                 }
2558                 return OpCode.INV;
2559             //    AT,         //    @
2560             case '@':
2561                 return OpCode.AT;
2562             //    SHARP         //    #
2563             case '#':
2564                 return OpCode.SHARP;
2565             default:
2566                 return OpCode.NONE;
2567         }
2568     }
2569     
2570     protected Token processCharacterLiteral() {
2571         _sharedCharacterLiteralToken.setPos(_startLine, _startPos);
2572         if (_pos + 2 > _len)
2573             return parserError("Invalid character literal", _sharedCharacterLiteralToken);
2574         dchar ch = _lineText[_pos++];
2575         dchar ch2 = _lineText[_pos++];
2576         dchar type = 0;
2577         if (ch == '\\') {
2578             // process escaped character - store it in ch
2579             // TODO: support all escape sequences
2580             switch(ch2) {
2581                 case 'r':
2582                     ch = '\r';
2583                     break;
2584                 case 'n':
2585                     ch = '\n';
2586                     break;
2587                 case 't':
2588                     ch = '\t';
2589                     break;
2590                 case '\\':
2591                     ch = '\\';
2592                     break;
2593                 default:
2594                     ch = ch2;
2595                     break;
2596             }
2597             // here must be closing '
2598             if (_pos + 1 > _len)
2599                 return parserError("Invalid character literal", _sharedCharacterLiteralToken);
2600             ch2 = _lineText[_pos++];
2601         }
2602         if (ch2 != '\'')
2603             return parserError("Invalid character literal", _sharedCharacterLiteralToken);
2604         if (_pos < _len) {
2605             dchar t = _lineText[_pos];
2606             if (t == 'd' || t == 'w' || t == 'c') {
2607                 type = t;
2608                 _pos++;
2609             } else if (isIdentMiddleChar(ch)) {
2610                 return parserError("Unexpected character after character literal", _sharedCharacterLiteralToken);
2611             }
2612         }
2613         _sharedCharacterLiteralToken.setCharacter(ch, type);
2614         return _sharedCharacterLiteralToken;
2615     }
2616 
2617     protected Token processDoubleQuotedOrWysiwygString(dchar delimiter) {
2618         bool wysiwyg = (delimiter == 'r' || delimiter == '`');
2619         //writeln("processDoubleQuotedString()");
2620         _sharedStringLiteralToken.setPos(_startLine, _startPos);
2621         _stringLiteralAppender.reset();
2622         if (delimiter == 'r') {
2623             _pos++;
2624             delimiter = '\"';
2625         }
2626         dchar type = 0;
2627         for (;;) {
2628             int i = _pos;
2629             int endPos = int.max;
2630             bool lastBackSlash = false;
2631             for(; i < _len; i++) {
2632                 dchar ch = _lineText[i];
2633                 if (ch == '\\') {
2634                     if (lastBackSlash)
2635                         lastBackSlash = false;
2636                     else
2637                         lastBackSlash = true;
2638                 }
2639                 else if (ch == delimiter && !lastBackSlash) {
2640                     endPos = i;
2641                     break;
2642                 }
2643                 else if(lastBackSlash)
2644                     lastBackSlash = false;
2645             }
2646             if (endPos != int.max) {
2647                 // found end quote
2648                 _stringLiteralAppender.append(_lineText[_pos .. endPos]);
2649                 _pos = endPos + 1;
2650                 break;
2651             }
2652             // no quote by end of line
2653             _stringLiteralAppender.append(_lineText[_pos .. $]);
2654             _stringLiteralAppender.appendEol();
2655             if (!nextLine()) {
2656                 // do we need to throw exception if eof comes before end of string?
2657                 break;
2658             }
2659         }
2660         dchar t = 0;
2661         if (_pos < _len) {
2662             dchar ch = _lineText[_pos];
2663             if (ch == 'c' || ch == 'w' || ch == 'd') {
2664                 t = ch;
2665                 _pos++;
2666                 if (_pos < _len) {
2667                     ch = _lineText[_pos];
2668                     if (isIdentMiddleChar(ch))
2669                         return parserError("Unexpected character after string literal", _sharedStringLiteralToken);
2670                 }
2671             } else if (isIdentMiddleChar(ch))
2672                 return parserError("Unexpected character after string literal", _sharedStringLiteralToken);
2673         }
2674         if (t != 0) {
2675             if (type != 0 && t != type)
2676                 return parserError("Cannot concatenate strings of different type", _sharedStringLiteralToken);
2677             type = t;
2678         }
2679         if (wysiwyg) {
2680             // no escape processing
2681             _sharedStringLiteralToken.setText(_stringLiteralAppender.get(), type);
2682             return _sharedStringLiteralToken;
2683         }
2684         _stringLiteralAppender.processEscapeSequences();
2685         _sharedStringLiteralToken.setText(_stringLiteralAppender.get(), type);
2686         return _sharedStringLiteralToken;
2687     }
2688 
2689     protected SysTime buildTime;
2690     
2691     //    string literal of the date of compilation "mmm dd yyyy"
2692     protected dstring formatBuildDate() {
2693         // TODO: provide proper format
2694         return to!dstring(buildTime);
2695     }
2696     
2697     //    string literal of the time of compilation "hh:mm:ss"
2698     protected dstring formatBuildTime() {
2699         // TODO: provide proper format
2700         return to!dstring(buildTime);
2701     }
2702     
2703     //    string literal of the date and time of compilation "www mmm dd hh:mm:ss yyyy"
2704     protected dstring formatBuildTimestamp() {
2705         // TODO: provide proper format
2706         return to!dstring(buildTime);
2707     }
2708     
2709     static immutable dstring VERSION = "0.1";
2710     static immutable dstring VENDOR = "coolreader.org";
2711     
2712     protected Token makeSpecialTokenString(dstring str, int pos) {
2713         _sharedStringLiteralToken.setPos(_startLine, _startPos);
2714         _sharedStringLiteralToken.setText(cast(dchar[])str, 0);
2715         return _sharedStringLiteralToken;
2716     }
2717     
2718     protected Token processSpecialToken(Keyword keyword, int pos) {
2719         switch (keyword) {
2720             //Special Token    Replaced with
2721             case Keyword.DATE: //    string literal of the date of compilation "mmm dd yyyy"
2722                 return makeSpecialTokenString(formatBuildDate(), pos);
2723             case Keyword.TIME: //    string literal of the time of compilation "hh:mm:ss"
2724                 return makeSpecialTokenString(formatBuildTime(), pos);
2725             case Keyword.TIMESTAMP: //    string literal of the date and time of compilation "www mmm dd hh:mm:ss yyyy"
2726                 return makeSpecialTokenString(formatBuildTimestamp(), pos);
2727             case Keyword.VENDOR: //    Compiler vendor string, such as "Digital Mars D"
2728                 return makeSpecialTokenString(VENDOR, pos);
2729             case Keyword.VERSION_: //    Compiler version as an integer, such as 2001
2730                 return makeSpecialTokenString(VERSION, pos);
2731             default:
2732                 parserError("Unknown special token", _line, pos);
2733         }
2734         return null;
2735     }
2736     
2737     protected int _startLine;
2738     protected int _startPos;
2739 
2740     // returns next token (clone it if you want to store for future usage, otherwise it may be overwritten by further nextToken() calls).
2741     Token nextToken() {
2742         _startLine = _line;
2743         _startPos = _pos;
2744         dchar ch = nextChar();
2745         if (ch == EOF_CHAR) {
2746             return emitEof();
2747         }
2748         if (ch == '\r' || ch == '\n' || ch == 0x0020 || ch == 0x0009 || ch == 0x000B || ch == 0x000C) {
2749             // white space (treat EOL as whitespace, too)
2750             return processWhiteSpace(ch);
2751         }
2752         dchar next = _pos < _len ? _lineText[_pos] : 0;
2753         if (ch == '/') {
2754             if (next == '/')
2755                 return processOneLineComment();
2756             else if (next == '*')
2757                 return processMultilineComment();
2758             else if (next == '+')
2759                 return processNestedComment();
2760         }
2761         if (ch == '#' && _line == 1)
2762             return processOneLineSharpComment();
2763         if (ch == '\"')
2764             return processDoubleQuotedOrWysiwygString(ch);
2765         if (ch == '\'')
2766             return processCharacterLiteral();
2767         if (ch == 'x' && next == '\"')
2768             return processHexString();
2769         if (ch == 'q' && next == '\"')
2770             return processDelimitedString();
2771         if ((ch == 'r' && next == '\"') || (ch == '`'))
2772             return processDoubleQuotedOrWysiwygString(ch);
2773         int oldPos = _pos - 1;
2774         
2775         if (ch == '0') {
2776             if (next == 'b' || next == 'B')
2777                 return processBinaryNumber();
2778             if (next == 'x' || next == 'X')
2779                 return processHexNumber();
2780             if (next >= '0' && next <= '9')
2781                 return processOctNumber();
2782             if (next >= '0' && next <= '9')
2783                 return processDecNumber(ch);
2784         }
2785         if (ch >= '0' && ch <= '9')
2786             return processDecNumber(ch);
2787         if (ch == '.' && next >= '0' && next <= '9') // .123
2788             return processDecFloatSecondPart(0);
2789                 
2790         if (ch == '_' || isUniversalAlpha(ch)) {
2791             // start of identifier or keyword?
2792             Keyword keyword = detectKeyword(ch);
2793             if (keyword != Keyword.NONE) {
2794                 switch (keyword) {
2795                     //Special Token    Replaced with
2796                     case Keyword.EOF: return emitEof(); //    sets the scanner to the end of the file
2797                     case Keyword.DATE: //    string literal of the date of compilation "mmm dd yyyy"
2798                     case Keyword.TIME: //    string literal of the time of compilation "hh:mm:ss"
2799                     case Keyword.TIMESTAMP: //    string literal of the date and time of compilation "www mmm dd hh:mm:ss yyyy"
2800                     case Keyword.VENDOR: //    Compiler vendor string, such as "Digital Mars D"
2801                     case Keyword.VERSION_: //    Compiler version as an integer, such as 2001
2802                         return processSpecialToken(keyword, oldPos);
2803                     default:
2804                         _sharedKeywordToken.setPos(_startLine, _startPos);
2805                         _sharedKeywordToken.keyword = keyword;
2806                         return _sharedKeywordToken;
2807                 }
2808             }
2809             return processIdent(ch);
2810         }
2811         OpCode op = detectOp(ch);
2812         if (op != OpCode.NONE) {
2813             _sharedOpToken.setPos(_startLine, _startPos);
2814             _sharedOpToken.opCode = op;
2815             return _sharedOpToken;
2816         }
2817         return parserError("Invalid token", _line, _pos);
2818     }
2819 
2820     /// tokenize all
2821     Token[] allTokens() {
2822         Token[] res;
2823         res.assumeSafeAppend;
2824         for(;;) {
2825             Token tok = nextToken();
2826             if (!tok || tok.type == TokenType.EOF)
2827                 break;
2828             res ~= tok.clone();
2829         }
2830         return res;
2831     }
2832 }
2833 
2834 unittest {
2835     version(DisableLexerTest) {
2836     import std.stdio;
2837     import std.conv;
2838     import std.utf;
2839     import dlangui.core.linestream;
2840     string fname = "/home/lve/src/d/ddc/ddclexer/tests/tokenizer_test.d";
2841     writeln("opening file");
2842     try {
2843         std.stream.File f = new std.stream.File(fname);
2844         scope(exit) { f.close(); }
2845         try {
2846             LineStream lines = LineStream.create(f, fname);
2847             Tokenizer tokenizer = new Tokenizer(lines);
2848             for (;;) {
2849                 Token token = tokenizer.nextToken();
2850                 if (token is null) {
2851                     writeln("Null token returned");
2852                     break;
2853                 }
2854                 if (token.type == TokenType.EOF) {
2855                     writeln("EOF token");
2856                     break;
2857                 }
2858                 writeln("", token.line, ":", token.pos, "\t", token.toString);
2859             }
2860         } catch (Exception e) {
2861             writeln("Exception " ~ e.toString);
2862         }
2863     } catch (Exception e) {
2864         writeln("Exception " ~ e.toString);
2865     }
2866     }
2867 }
2868 
2869 /// converts named entity to character, returns 0 if not found
2870 dchar entityToChar(string name) {
2871     if (auto ch = name in entityToCharMap) {
2872         return *ch;
2873     }
2874     return 0;
2875 }
2876 
2877 /// fings entity name for character, returns null if not found
2878 string charToEntity(dchar ch) {
2879     if (auto name = ch in charToEntityMap) {
2880         return *name;
2881     }
2882     return null;
2883 }
2884 
2885 private __gshared dchar[string]entityToCharMap;
2886 private __gshared string[dchar]charToEntityMap;
2887 private void addEntity(string name, dchar ch) {
2888     entityToCharMap[name] = ch;
2889     charToEntityMap[ch] = name;
2890 }
2891 __gshared static this() {
2892     addEntity("quot", 34);
2893     addEntity("amp",    38);
2894     addEntity("lt",    60);
2895     addEntity("gt",    62);
2896     addEntity("OElig",    338);
2897     addEntity("oelig",    339);
2898     addEntity("Scaron",    352);
2899     addEntity("scaron",    353);
2900     addEntity("Yuml",    376);
2901     addEntity("circ",    710);
2902     addEntity("tilde",    732);
2903     addEntity("ensp",    8194);
2904     addEntity("emsp",    8195);
2905     addEntity("thinsp",    8201);
2906     addEntity("zwnj",    8204);
2907     addEntity("zwj",    8205);
2908     addEntity("lrm",    8206);
2909     addEntity("rlm",    8207);
2910     addEntity("ndash",    8211);
2911     addEntity("mdash",    8212);
2912     addEntity("lsquo",    8216);
2913     addEntity("rsquo",    8217);
2914     addEntity("sbquo",    8218);
2915     addEntity("ldquo",    8220);
2916     addEntity("rdquo",    8221);
2917     addEntity("bdquo",    8222);
2918     addEntity("dagger",    8224);
2919     addEntity("Dagger",    8225);
2920     addEntity("permil",    8240);
2921     addEntity("lsaquo",    8249);
2922     addEntity("rsaquo",    8250);
2923     addEntity("euro",    8364);
2924     addEntity("nbsp",    160);
2925     addEntity("iexcl",    161);
2926     addEntity("cent",    162);
2927     addEntity("pound",    163);
2928     addEntity("curren",    164);
2929     addEntity("yen",    165);
2930     addEntity("brvbar",    166);
2931     addEntity("sect",    167);
2932     addEntity("uml",    168);
2933     addEntity("copy",    169);
2934     addEntity("ordf",    170);
2935     addEntity("laquo",    171);
2936     addEntity("not",    172);
2937     addEntity("shy",    173);
2938     addEntity("reg",    174);
2939     addEntity("macr",    175);
2940     addEntity("deg",    176);
2941     addEntity("plusmn",    177);
2942     addEntity("sup2",    178);
2943     addEntity("sup3",    179);
2944     addEntity("acute",    180);
2945     addEntity("micro",    181);
2946     addEntity("para",    182);
2947     addEntity("middot",    183);
2948     addEntity("cedil",    184);
2949     addEntity("sup1",    185);
2950     addEntity("ordm",    186);
2951     addEntity("raquo",    187);
2952     addEntity("frac14",    188);
2953     addEntity("frac12",    189);
2954     addEntity("frac34",    190);
2955     addEntity("iquest",    191);
2956     addEntity("Agrave",    192);
2957     addEntity("Aacute",    193);
2958     addEntity("Acirc",    194);
2959     addEntity("Atilde",    195);
2960     addEntity("Auml",    196);
2961     addEntity("Aring",    197);
2962     addEntity("AElig",    198);
2963     addEntity("Ccedil",    199);
2964     addEntity("Egrave",    200);
2965     addEntity("Eacute",    201);
2966     addEntity("Ecirc",    202);
2967     addEntity("Euml",    203);
2968     addEntity("Igrave",    204);
2969     addEntity("Iacute",    205);
2970     addEntity("Icirc",    206);
2971     addEntity("Iuml",    207);
2972     addEntity("ETH",    208);
2973     addEntity("Ntilde",    209);
2974     addEntity("Ograve",    210);
2975     addEntity("Oacute",    211);
2976     addEntity("Ocirc",    212);
2977     addEntity("Otilde",    213);
2978     addEntity("Ouml",    214);
2979     addEntity("times",    215);
2980     addEntity("Oslash",    216);
2981     addEntity("Ugrave",    217);
2982     addEntity("Uacute",    218);
2983     addEntity("Ucirc",    219);
2984     addEntity("Uuml",    220);
2985     addEntity("Yacute",    221);
2986     addEntity("THORN",    222);
2987     addEntity("szlig",    223);
2988     addEntity("agrave",    224);
2989     addEntity("aacute",    225);
2990     addEntity("acirc",    226);
2991     addEntity("atilde",    227);
2992     addEntity("auml",    228);
2993     addEntity("aring",    229);
2994     addEntity("aelig",    230);
2995     addEntity("ccedil",    231);
2996     addEntity("egrave",    232);
2997     addEntity("eacute",    233);
2998     addEntity("ecirc",    234);
2999     addEntity("euml",    235);
3000     addEntity("igrave",    236);
3001     addEntity("iacute",    237);
3002     addEntity("icirc",    238);
3003     addEntity("iuml",    239);
3004     addEntity("eth",    240);
3005     addEntity("ntilde",    241);
3006     addEntity("ograve",    242);
3007     addEntity("oacute",    243);
3008     addEntity("ocirc",    244);
3009     addEntity("otilde",    245);
3010     addEntity("ouml",    246);
3011     addEntity("divide",    247);
3012     addEntity("oslash",    248);
3013     addEntity("ugrave",    249);
3014     addEntity("uacute",    250);
3015     addEntity("ucirc",    251);
3016     addEntity("uuml",    252);
3017     addEntity("yacute",    253);
3018     addEntity("thorn",    254);
3019     addEntity("yuml",    255);
3020     addEntity("fnof",    402);
3021     addEntity("Alpha",    913);
3022     addEntity("Beta",    914);
3023     addEntity("Gamma",    915);
3024     addEntity("Delta",    916);
3025     addEntity("Epsilon",    917);
3026     addEntity("Zeta",    918);
3027     addEntity("Eta",    919);
3028     addEntity("Theta",    920);
3029     addEntity("Iota",    921);
3030     addEntity("Kappa",    922);
3031     addEntity("Lambda",    923);
3032     addEntity("Mu",    924);
3033     addEntity("Nu",    925);
3034     addEntity("Xi",    926);
3035     addEntity("Omicron",    927);
3036     addEntity("Pi",    928);
3037     addEntity("Rho",    929);
3038     addEntity("Sigma",    931);
3039     addEntity("Tau",    932);
3040     addEntity("Upsilon",    933);
3041     addEntity("Phi",    934);
3042     addEntity("Chi",    935);
3043     addEntity("Psi",    936);
3044     addEntity("Omega",    937);
3045     addEntity("alpha",    945);
3046     addEntity("beta",    946);
3047     addEntity("gamma",    947);
3048     addEntity("delta",    948);
3049     addEntity("epsilon",    949);
3050     addEntity("zeta",    950);
3051     addEntity("eta",    951);
3052     addEntity("theta",    952);
3053     addEntity("iota",    953);
3054     addEntity("kappa",    954);
3055     addEntity("lambda",    955);
3056     addEntity("mu",    956);
3057     addEntity("nu",    957);
3058     addEntity("xi",    958);
3059     addEntity("omicron",    959);
3060     addEntity("pi",    960);
3061     addEntity("rho",    961);
3062     addEntity("sigmaf",    962);
3063     addEntity("sigma",    963);
3064     addEntity("tau",    964);
3065     addEntity("upsilon",    965);
3066     addEntity("phi",    966);
3067     addEntity("chi",    967);
3068     addEntity("psi",    968);
3069     addEntity("omega",    969);
3070     addEntity("thetasym",    977);
3071     addEntity("upsih",    978);
3072     addEntity("piv",    982);
3073     addEntity("bull",    8226);
3074     addEntity("hellip",    8230);
3075     addEntity("prime",    8242);
3076     addEntity("Prime",    8243);
3077     addEntity("oline",    8254);
3078     addEntity("frasl",    8260);
3079     addEntity("weierp",    8472);
3080     addEntity("image",    8465);
3081     addEntity("real",    8476);
3082     addEntity("trade",    8482);
3083     addEntity("alefsym",    8501);
3084     addEntity("larr",    8592);
3085     addEntity("uarr",    8593);
3086     addEntity("rarr",    8594);
3087     addEntity("darr",    8595);
3088     addEntity("harr",    8596);
3089     addEntity("crarr",    8629);
3090     addEntity("lArr",    8656);
3091     addEntity("uArr",    8657);
3092     addEntity("rArr",    8658);
3093     addEntity("dArr",    8659);
3094     addEntity("hArr",    8660);
3095     addEntity("forall",    8704);
3096     addEntity("part",    8706);
3097     addEntity("exist",    8707);
3098     addEntity("empty",    8709);
3099     addEntity("nabla",    8711);
3100     addEntity("isin",    8712);
3101     addEntity("notin",    8713);
3102     addEntity("ni",    8715);
3103     addEntity("prod",    8719);
3104     addEntity("sum",    8721);
3105     addEntity("minus",    8722);
3106     addEntity("lowast",    8727);
3107     addEntity("radic",    8730);
3108     addEntity("prop",    8733);
3109     addEntity("infin",    8734);
3110     addEntity("ang",    8736);
3111     addEntity("and",    8743);
3112     addEntity("or",    8744);
3113     addEntity("cap",    8745);
3114     addEntity("cup",    8746);
3115     addEntity("int",    8747);
3116     addEntity("there4",    8756);
3117     addEntity("sim",    8764);
3118     addEntity("cong",    8773);
3119     addEntity("asymp",    8776);
3120     addEntity("ne",    8800);
3121     addEntity("equiv",    8801);
3122     addEntity("le",    8804);
3123     addEntity("ge",    8805);
3124     addEntity("sub",    8834);
3125     addEntity("sup",    8835);
3126     addEntity("nsub",    8836);
3127     addEntity("sube",    8838);
3128     addEntity("supe",    8839);
3129     addEntity("oplus",    8853);
3130     addEntity("otimes",    8855);
3131     addEntity("perp",    8869);
3132     addEntity("sdot",    8901);
3133     addEntity("lceil",    8968);
3134     addEntity("rceil",    8969);
3135     addEntity("lfloor",    8970);
3136     addEntity("rfloor",    8971);
3137     addEntity("loz",    9674);
3138     addEntity("spades",    9824);
3139     addEntity("clubs",    9827);
3140     addEntity("hearts",    9829);
3141     addEntity("diams",    9830);
3142     addEntity("lang",    10216);
3143     addEntity("rang",    10217);
3144 }
3145 
3146 
3147 
3148 //void runTokenizerTest()
3149 unittest 
3150 {
3151     import std.algorithm;
3152     class TokenTest {
3153         int _line;
3154         string _file;
3155         this(string file, int line) {
3156             _file = file;
3157             _line = line;
3158         }
3159         bool doTest(Token token) {
3160             return true;
3161         }        
3162         void execute(Tokenizer tokenizer) {
3163             Token token = tokenizer.nextToken();
3164             if (!doTest(token)) {
3165                 assert(false, "    token doesn not match at " ~ _file ~ ":" ~ to!string(_line) ~ "  foundToken: " ~ token.toString ~ " expected: " ~ toString);
3166             }
3167         }
3168         public override @property string toString() {
3169             return "TokenTest";
3170         }
3171     }
3172     void testTokenizer(string code, TokenTest[] tokens, string file = __FILE__, uint line = __LINE__) {
3173         Tokenizer tokenizer = new Tokenizer(code, "tokenizerTest:" ~ file ~ ":" ~ to!string(line));
3174         for (int i = 0; i < tokens.length; i++) {
3175             tokens[i].execute(tokenizer);
3176         }
3177     }
3178     class KeywordTest : TokenTest {
3179         Keyword _code;
3180         this(Keyword code, string file = __FILE__, uint line = __LINE__) {
3181             super(file, line);
3182             _code = code;
3183         }
3184         override bool doTest(Token token) {
3185             if (token.type != TokenType.KEYWORD)
3186                 return false;
3187             if (token.keyword != _code)
3188                 return false;
3189             return true;
3190         }        
3191         public override @property string toString() {
3192             return "Keyword:" ~ to!string(_code);
3193         }
3194     }
3195     class OpTest : TokenTest {
3196         OpCode _code;
3197         this(OpCode code, string file = __FILE__, uint line = __LINE__) {
3198             super(file, line);
3199             _code = code;
3200         }
3201         override bool doTest(Token token) {
3202             if (token.type != TokenType.OP)
3203                 return false;
3204             if (token.opCode != _code)
3205                 return false;
3206             return true;
3207         }        
3208         public override @property string toString() {
3209             return "Op:" ~ to!string(_code);
3210         }
3211     }
3212     class StringTest : TokenTest {
3213         dstring _value;
3214         dchar _literalType;
3215         this(dstring value, dchar literalType = 0, string file = __FILE__, uint line = __LINE__) {
3216             super(file, line);
3217             _value = value;
3218             _literalType = literalType;
3219         }
3220         override bool doTest(Token token) {
3221             if (token.type != TokenType.STRING)
3222                 return false;
3223             if (!token.text.equal(_value))
3224                 return false;
3225             if (token.literalType != _literalType)
3226                 return false;
3227             return true;
3228         }        
3229         public override @property string toString() {
3230             return toUTF8("String:\"" ~ _value ~ "\"" ~ (_literalType ? _literalType : ' '));
3231         }
3232     }
3233     class IntegerTest : TokenTest {
3234         ulong _value;
3235         bool _unsigned;
3236         bool _long;
3237         this(ulong value, bool unsignedFlag = false, bool longFlag = false, string file = __FILE__, uint line = __LINE__) {
3238             super(file, line);
3239             _value = value;
3240             _unsigned = unsignedFlag;
3241             _long = longFlag;
3242         }
3243         override bool doTest(Token token) {
3244             if (token.type != TokenType.INTEGER)
3245                 return false;
3246             if (token.intValue != _value)
3247                 return false;
3248             if (token.isUnsigned != _unsigned)
3249                 return false;
3250             if (token.isLong != _long)
3251                 return false;
3252             return true;
3253         }        
3254         public override @property string toString() {
3255             return "Integer:" ~ to!string(_value);
3256         }
3257     }
3258     class RealTest : TokenTest {
3259         real _value;
3260         ubyte _precision;
3261         bool _imaginary;
3262         this(real value, ubyte precision = 1, bool imaginary = false, string file = __FILE__, uint line = __LINE__) {
3263             super(file, line);
3264             _value = value;
3265             _precision = precision;
3266             _imaginary = imaginary;
3267         }
3268         override bool doTest(Token token) {
3269             if (token.type != TokenType.FLOAT)
3270                 return false;
3271             real diff = token.realValue - _value;
3272             real maxerr = _value / 1000000;
3273             if (diff < 0) diff = -diff;
3274             if (maxerr < 0) maxerr = -maxerr;
3275             if (diff > maxerr)
3276                 return false;
3277             if (token.precision != _precision)
3278                 return false;
3279             if (token.isImaginary != _imaginary)
3280                 return false;
3281             return true;
3282         }        
3283         public override @property string toString() {
3284             return "Real:" ~ to!string(_value) ~ (_precision == 0 ? "f" : (_precision == 2 ? "L" : "")) ~ (_imaginary ? "i" : "");
3285         }
3286     }
3287     class IdentTest : TokenTest {
3288         string _value;
3289         this(string value, string file = __FILE__, uint line = __LINE__) {
3290             super(file, line);
3291             _value = value;
3292         }
3293         override bool doTest(Token token) {
3294             if (token.type != TokenType.IDENTIFIER)
3295                 return false;
3296             if (! to!string(token.text).equal(_value))
3297                 return false;
3298             return true;
3299         }        
3300         public override @property string toString() {
3301             return "Ident:" ~ _value;
3302         }
3303     }
3304     class CommentTest : TokenTest {
3305         this(string file = __FILE__, uint line = __LINE__) {
3306             super(file, line);
3307         }
3308         override bool doTest(Token token) {
3309             if (token.type != TokenType.COMMENT)
3310                 return false;
3311             return true;
3312         }        
3313         public override @property string toString() {
3314             return "Comment";
3315         }
3316     }
3317     class EOFTest : TokenTest {
3318         this(string file = __FILE__, uint line = __LINE__) {
3319             super(file, line);
3320         }
3321         override bool doTest(Token token) {
3322             if (token.type != TokenType.EOF)
3323                 return false;
3324             return true;
3325         }        
3326         public override @property string toString() {
3327             return "EOF";
3328         }
3329     }
3330     class WhiteSpaceTest : TokenTest {
3331         this(string file = __FILE__, uint line = __LINE__) {
3332             super(file, line);
3333         }
3334         override bool doTest(Token token) {
3335             if (token.type != TokenType.WHITESPACE)
3336                 return false;
3337             return true;
3338         }        
3339         public override @property string toString() {
3340             return "whiteSpace";
3341         }
3342     }
3343     TokenTest checkString(dstring value, dchar literalType = 0, string file = __FILE__, uint line = __LINE__) { 
3344         return new StringTest(value, literalType, file, line);
3345     }
3346     TokenTest checkInteger(ulong value, bool unsignedFlag = false, bool longFlag = false, string file = __FILE__, uint line = __LINE__) { 
3347         return new IntegerTest(value, unsignedFlag, longFlag, file, line);
3348     }
3349     TokenTest checkReal(real value, byte precision = 1, bool imaginary = false, string file = __FILE__, uint line = __LINE__) { 
3350         return new RealTest(value, precision, imaginary, file, line);
3351     }
3352     TokenTest checkIdent(string value, string file = __FILE__, uint line = __LINE__) { 
3353         return new IdentTest(value, file, line);
3354     }
3355     TokenTest checkKeyword(Keyword value, string file = __FILE__, uint line = __LINE__) { 
3356         return new KeywordTest(value, file, line);
3357     }
3358     TokenTest checkOp(OpCode value, string file = __FILE__, uint line = __LINE__) { 
3359         return new OpTest(value, file, line);
3360     }
3361     TokenTest checkSpace(string file = __FILE__, uint line = __LINE__) { 
3362         return new WhiteSpaceTest(file, line);
3363     }
3364     TokenTest checkComment(string file = __FILE__, uint line = __LINE__) { 
3365         return new CommentTest(file, line);
3366     }
3367     TokenTest checkEOF(string file = __FILE__, uint line = __LINE__) { 
3368         return new EOFTest(file, line);
3369     }
3370 
3371     // test strings
3372     testTokenizer("r\"simple\\nstring\"", [checkString( r"simple\nstring" )]);
3373 
3374     // test strings
3375     testTokenizer(q"TEST
3376 "simple string"
3377 "simple\nstring"
3378 `simple string`
3379 "simple string"d
3380 "simple string"c
3381 "simple string"w
3382 "simple\&quot;string"
3383 "\r\n\f\t\\\"\'&"
3384 TEST"
3385                   , [
3386                       checkString("simple string"),
3387                       checkSpace(),
3388                       checkString("simple\nstring"),
3389                       checkSpace(),
3390                       checkString("simple string"),
3391                       checkSpace(),
3392                       checkString("simple string", 'd'),
3393                       checkSpace(),
3394                       checkString("simple string", 'c'),
3395                       checkSpace(),
3396                       checkString("simple string", 'w'),
3397                       checkSpace(),
3398                       checkString("simple\&quot;string"),
3399                       checkSpace(),
3400                       checkString("\r\n\f\t\\\"\'&"),
3401     ]);
3402     // basic test
3403     testTokenizer(q"TEST
3404 int i;
3405 TEST"
3406                   , [
3407                       checkKeyword(Keyword.INT),
3408                       checkSpace(),
3409                       checkIdent("i"),
3410                       checkOp(OpCode.SEMICOLON),
3411                       checkEOF()
3412                   ]);
3413     // test numbers
3414     testTokenizer("0b1101 0x123abcdU 0xABCL 0743 192837465 0 192_837_465 5.25 12.3f 54.1L 67.1i 3e3 25.67e-5f"
3415                   , [
3416                       checkInteger(13),
3417                       checkSpace(),
3418                       checkInteger(0x123abcd, true, false),
3419                       checkSpace(),
3420                       checkInteger(0xabc, false, true),
3421                       checkSpace(),
3422                       checkInteger(std.conv.octal!743),
3423                       checkSpace(),
3424                       checkInteger(192_837_465),
3425                       checkSpace(),
3426                       checkInteger(0),
3427                       checkSpace(),
3428                       checkInteger(192837465),
3429                       checkSpace(),
3430                       checkReal(5.25),
3431                       checkSpace(),
3432                       checkReal(12.3f, 0),
3433                       checkSpace(),
3434                       checkReal(54.1L, 2),
3435                       checkSpace(),
3436                       checkReal(67.1, 1, true),
3437                       checkSpace(),
3438                       checkReal(3e3),
3439                       checkSpace(),
3440                       checkReal(25.67e-5f, 0),
3441                       checkEOF()
3442                   ]);
3443     // strange keyword detection: `fork;` or `ind;` keyword in beginning of ident is highlighted
3444     testTokenizer("fork;", [checkIdent("fork"),checkOp(OpCode.SEMICOLON),checkEOF()]);
3445 
3446 }
3447