1 module ddc.lexer.tokenizer;
2 
3 import ddc.lexer.textsource;
4 import ddc.lexer.exceptions;
5 
6 import std.stdio;
7 import std.datetime;
8 import std.conv;
9 import std.utf;
10 import std.math;
11 
12 enum TokenType : ubyte {
13     EOF,
14     //EOL,
15     WHITESPACE,
16     COMMENT,
17     IDENTIFIER,
18     STRING,
19     CHARACTER,
20     INTEGER,
21     FLOAT,
22     KEYWORD,
23     OP,
24     INVALID
25 }
26 
27 // table for fast checking of UniversalAlpha (as per ISO/IEC 9899:1999 Annex E) OR a..z OR A..Z OR _
28 // max code is 0xd7ff
29 //1728
30 const uint[1728] UNIVERSAL_ALPHA_FLAGS = [
31     0x00000000,0x00000000,0x87fffffe,0x07fffffe,0x00000000,0x04a00400,0xff7fffff,0xff7fffff,// 0000-00ff
32     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xfc3fffff,// 0100-01ff
33     0x00ffffff,0x00000000,0xffff0000,0xffffffff,0xffffffff,0xe9ff01ff,0x00030003,0x0000001f,// 0200-02ff
34     0x00000000,0x00000000,0x00000000,0x04000000,0xffffd740,0xfffffffb,0x547f7fff,0x000ffffd,// 0300-03ff
35     0xffffdffe,0xffffffff,0xdffeffff,0xffffffff,0xffff0003,0xffffffff,0xffff199f,0x033fcfff,// 0400-04ff
36     0x00000000,0xfffe0000,0x027fffff,0xfffffffe,0x000000ff,0xbbff0000,0xffff0006,0x000707ff,// 0500-05ff
37     0x00000000,0x07fffffe,0x0007ffff,0xffff03ff,0xffffffff,0x7cffffff,0x1fff7fff,0x03ff3de0,// 0600-06ff
38     0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 0700-07ff
39     0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 0800-08ff
40     0xffffffee,0xe3ffffff,0xff073fff,0x0000ffcf,0xfff99fee,0xc3c5fdff,0xb000399f,0x0003ffcf,// 0900-09ff
41     0xfff987e4,0xc36dfdff,0x5e003987,0x0010ffc0,0xfffbafee,0xe3edfdff,0x00013bbf,0x0000ffc1,// 0a00-0aff
42     0xfff99fee,0xe3cdfdff,0xb000398f,0x0000ffc3,0xd63dc7ec,0xc3bfc718,0x00003dc7,0x0000ff80,// 0b00-0bff
43     0xfffddfee,0xc3effdff,0x00003ddf,0x0000ffc3,0xfffddfec,0xc3effdff,0x40003ddf,0x0000ffc3,// 0c00-0cff
44     0xfffddfec,0xc3fffdff,0x00003dcf,0x0000ffc3,0x00000000,0x00000000,0x00000000,0x00000000,// 0d00-0dff
45     0xfffffffe,0x07ffffff,0x0fffffff,0x00000000,0xfef02596,0x3bff6cae,0x33ff3f5f,0x00000000,// 0e00-0eff
46     0x03000001,0xc2afffff,0xfffffeff,0xfffe03ff,0xfebf0fdf,0x02fe3fff,0x00000000,0x00000000,// 0f00-0fff
47     0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0xffffffff,0xffff003f,0x007fffff,// 1000-10ff
48     0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 1100-11ff
49     0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 1200-12ff
50     0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 1300-13ff
51     0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 1400-14ff
52     0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 1500-15ff
53     0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 1600-16ff
54     0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 1700-17ff
55     0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 1800-18ff
56     0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 1900-19ff
57     0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 1a00-1aff
58     0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 1b00-1bff
59     0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 1c00-1cff
60     0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 1d00-1dff
61     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0x0fffffff,0xffffffff,0xffffffff,0x03ffffff,// 1e00-1eff
62     0x3f3fffff,0xffffffff,0xaaff3f3f,0x3fffffff,0xffffffff,0x5fdfffff,0x0fcf1fdc,0x1fdc1fff,// 1f00-1fff
63     0x00000000,0x80000000,0x00000001,0x80000000,0x00000000,0x00000000,0x00000000,0x00000000,// 2000-20ff
64     0x3f2ffc84,0x01fbfd50,0x00000000,0xffffffff,0x00000007,0x00000000,0x00000000,0x00000000,// 2100-21ff
65     0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 2200-22ff
66     0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 2300-23ff
67     0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 2400-24ff
68     0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 2500-25ff
69     0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 2600-26ff
70     0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 2700-27ff
71     0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 2800-28ff
72     0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 2900-29ff
73     0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 2a00-2aff
74     0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 2b00-2bff
75     0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 2c00-2cff
76     0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 2d00-2dff
77     0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 2e00-2eff
78     0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 2f00-2fff
79     0x000000e0,0x000003fe,0xfffffffe,0xffffffff,0x180fffff,0xfffffffe,0xffffffff,0x187fffff,// 3000-30ff
80     0xffffffe0,0x00001fff,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 3100-31ff
81     0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 3200-32ff
82     0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 3300-33ff
83     0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 3400-34ff
84     0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 3500-35ff
85     0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 3600-36ff
86     0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 3700-37ff
87     0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 3800-38ff
88     0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 3900-39ff
89     0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 3a00-3aff
90     0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 3b00-3bff
91     0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 3c00-3cff
92     0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 3d00-3dff
93     0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 3e00-3eff
94     0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 3f00-3fff
95     0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 4000-40ff
96     0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 4100-41ff
97     0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 4200-42ff
98     0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 4300-43ff
99     0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 4400-44ff
100     0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 4500-45ff
101     0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 4600-46ff
102     0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 4700-47ff
103     0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 4800-48ff
104     0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 4900-49ff
105     0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 4a00-4aff
106     0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 4b00-4bff
107     0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 4c00-4cff
108     0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 4d00-4dff
109     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 4e00-4eff
110     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 4f00-4fff
111     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 5000-50ff
112     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 5100-51ff
113     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 5200-52ff
114     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 5300-53ff
115     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 5400-54ff
116     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 5500-55ff
117     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 5600-56ff
118     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 5700-57ff
119     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 5800-58ff
120     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 5900-59ff
121     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 5a00-5aff
122     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 5b00-5bff
123     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 5c00-5cff
124     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 5d00-5dff
125     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 5e00-5eff
126     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 5f00-5fff
127     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 6000-60ff
128     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 6100-61ff
129     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 6200-62ff
130     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 6300-63ff
131     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 6400-64ff
132     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 6500-65ff
133     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 6600-66ff
134     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 6700-67ff
135     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 6800-68ff
136     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 6900-69ff
137     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 6a00-6aff
138     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 6b00-6bff
139     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 6c00-6cff
140     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 6d00-6dff
141     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 6e00-6eff
142     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 6f00-6fff
143     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 7000-70ff
144     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 7100-71ff
145     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 7200-72ff
146     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 7300-73ff
147     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 7400-74ff
148     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 7500-75ff
149     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 7600-76ff
150     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 7700-77ff
151     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 7800-78ff
152     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 7900-79ff
153     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 7a00-7aff
154     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 7b00-7bff
155     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 7c00-7cff
156     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 7d00-7dff
157     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 7e00-7eff
158     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 7f00-7fff
159     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 8000-80ff
160     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 8100-81ff
161     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 8200-82ff
162     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 8300-83ff
163     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 8400-84ff
164     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 8500-85ff
165     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 8600-86ff
166     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 8700-87ff
167     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 8800-88ff
168     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 8900-89ff
169     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 8a00-8aff
170     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 8b00-8bff
171     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 8c00-8cff
172     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 8d00-8dff
173     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 8e00-8eff
174     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 8f00-8fff
175     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 9000-90ff
176     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 9100-91ff
177     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 9200-92ff
178     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 9300-93ff
179     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 9400-94ff
180     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 9500-95ff
181     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 9600-96ff
182     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 9700-97ff
183     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 9800-98ff
184     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 9900-99ff
185     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 9a00-9aff
186     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 9b00-9bff
187     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 9c00-9cff
188     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 9d00-9dff
189     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 9e00-9eff
190     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0x0000003f,0x00000000,0x00000000,// 9f00-9fff
191     0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// a000-a0ff
192     0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// a100-a1ff
193     0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// a200-a2ff
194     0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// a300-a3ff
195     0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// a400-a4ff
196     0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// a500-a5ff
197     0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// a600-a6ff
198     0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// a700-a7ff
199     0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// a800-a8ff
200     0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// a900-a9ff
201     0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// aa00-aaff
202     0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// ab00-abff
203     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// ac00-acff
204     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// ad00-adff
205     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// ae00-aeff
206     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// af00-afff
207     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// b000-b0ff
208     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// b100-b1ff
209     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// b200-b2ff
210     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// b300-b3ff
211     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// b400-b4ff
212     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// b500-b5ff
213     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// b600-b6ff
214     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// b700-b7ff
215     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// b800-b8ff
216     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// b900-b9ff
217     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// ba00-baff
218     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// bb00-bbff
219     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// bc00-bcff
220     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// bd00-bdff
221     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// be00-beff
222     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// bf00-bfff
223     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// c000-c0ff
224     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// c100-c1ff
225     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// c200-c2ff
226     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// c300-c3ff
227     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// c400-c4ff
228     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// c500-c5ff
229     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// c600-c6ff
230     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// c700-c7ff
231     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// c800-c8ff
232     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// c900-c9ff
233     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// ca00-caff
234     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// cb00-cbff
235     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// cc00-ccff
236     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// cd00-cdff
237     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// ce00-ceff
238     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// cf00-cfff
239     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// d000-d0ff
240     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// d100-d1ff
241     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// d200-d2ff
242     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// d300-d3ff
243     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// d400-d4ff
244     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// d500-d5ff
245     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// d600-d6ff
246     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0x0000000f,0x00000000,0x00000000// d700-d7ff
247 ];
248 
249 /// returns true if character is A..Z, a..z, _ or universal alpha
250 bool isUniversalAlpha(dchar ch) pure nothrow {
251     return (ch <= 0xd7ff && (UNIVERSAL_ALPHA_FLAGS[ch >> 5] & (1 << (ch & 31))));
252 }
253 
254 /// character can present at the beginning of identifier
255 bool isIdentStartChar(dchar ch) pure nothrow {
256     return isUniversalAlpha(ch);
257 }
258 
259 /// character can present in middle of identifier
260 bool isIdentMiddleChar(dchar ch) pure nothrow {
261     return (ch >= '0' && ch <='9') || isUniversalAlpha(ch);
262 }
263     
264 immutable bool ENABLE_DUMP_UNIVERSAL_ALPHA_TABLE = false;
265 static if (ENABLE_DUMP_UNIVERSAL_ALPHA_TABLE) {
266         bool r(dchar ch, wchar v) pure nothrow {
267             return ch == v;
268         }
269         
270         bool r(dchar ch, wchar v1, wchar v2) pure nothrow {
271             return ch >= v1 && ch <= v2;
272         }
273 
274         bool isUniversalAlphaSlow(dchar c)  pure nothrow {
275             return 
276                 // Latin: 00AA, 00BA, 00C0−00D6, 00D8−00F6, 00F8−01F5, 01FA−0217,
277                 // 0250−02A8, 1E00−1E9B, 1EA0−1EF9, 207F
278                 r(c, 0xAA) || r(c, 0x00BA) || r(c, 0x00C0,0x00D6) || r(c, 0x00D8,0x00F6) || r(c, 0x00F8,0x01F5) || r(c, 0x01FA,0x0217)
279                 || r(c, 0x0250,0x02A8) || r(c, 0x1E00,0x1E9B) || r(c, 0x1EA0,0x1EF9) || r(c, 0x207F)
280                 //Greek: 0386, 0388−038A, 038C, 038E−03A1, 03A3−03CE, 03D0−03D6,
281                 //03DA, 03DC, 03DE, 03E0, 03E2−03F3, 1F00−1F15, 1F18−1F1D,
282                 //1F20−1F45, 1F48−1F4D, 1F50−1F57, 1F59, 1F5B, 1F5D,
283                 //1F5F−1F7D, 1F80−1FB4, 1FB6−1FBC, 1FC2−1FC4, 1FC6−1FCC,
284                 //1FD0−1FD3, 1FD6−1FDB, 1FE0−1FEC, 1FF2−1FF4, 1FF6−1FFC
285                 || r(c, 0x0386) || r(c, 0x0388,0x038A) || r(c, 0x038C) || r(c, 0x038E,0x03A1) || r(c, 0x03A3,0x03CE) || r(c, 0x03D0,0x03D6)
286                 || r(c, 0x03DA) || r(c, 0x03DC) || r(c, 0x03DE) || r(c, 0x03E0) || r(c, 0x03E2,0x03F3) || r(c, 0x1F00,0x1F15) || r(c, 0x1F18,0x1F1D)
287                 || r(c, 0x1F20,0x1F45) || r(c, 0x1F48,0x1F4D) || r(c, 0x1F50,0x1F57) || r(c, 0x1F59) || r(c, 0x1F5B) || r(c, 0x1F5D)
288                 || r(c, 0x1F5F,0x1F7D) || r(c, 0x1F80,0x1FB4) || r(c, 0x1FB6,0x1FBC) || r(c, 0x1FC2,0x1FC4) || r(c, 0x1FC6,0x1FCC)
289                 || r(c, 0x1FD0,0x1FD3) || r(c, 0x1FD6,0x1FDB) || r(c, 0x1FE0,0x1FEC) || r(c, 0x1FF2,0x1FF4) || r(c, 0x1FF6,0x1FFC)
290                 //Cyrillic: 0401−040C, 040E−044F, 0451−045C, 045E−0481, 0490−04C4,
291                 //04C7−04C8, 04CB−04CC, 04D0−04EB, 04EE−04F5, 04F8−04F9
292                 || r(c, 0x0401,0x040C) || r(c, 0x040E,0x044F) || r(c, 0x0451,0x045C) || r(c, 0x045E,0x0481) || r(c, 0x0490,0x04C4)
293                 || r(c, 0x04C7,0x04C8) || r(c, 0x04CB,0x04CC) || r(c, 0x04D0,0x04EB) || r(c, 0x04EE,0x04F5) || r(c, 0x04F8,0x04F9)
294                 //Armenian: 0531−0556, 0561−0587
295                 || r(c, 0x0531,0x0556) || r(c, 0x0561,0x0587)
296                 //Hebrew: 05B0−05B9, 05BB−05BD, 05BF, 05C1−05C2, 05D0−05EA,
297                 //05F0−05F2
298                 || r(c, 0x05B0,0x05B9) || r(c, 0x05BB,0x05BD) || r(c, 0x05BF) || r(c, 0x05C1,0x05C2) || r(c, 0x05D0,0x05EA)
299                 || r(c, 0x05F0,0x05F2)
300                 //Arabic: 0621−063A, 0640−0652, 0670−06B7, 06BA−06BE, 06C0−06CE,
301                 //06D0−06DC, 06E5−06E8, 06EA−06ED
302                 || r(c, 0x0621,0x063A) || r(c, 0x0640,0x0652) || r(c, 0x0670,0x06B7) || r(c, 0x06BA,0x06BE) || r(c, 0x06C0,0x06CE)
303                 || r(c, 0x06D0,0x06DC) || r(c, 0x06E5,0x06E8) || r(c, 0x06EA,0x06ED)
304                 //Devanagari: 0901−0903, 0905−0939, 093E−094D, 0950−0952, 0958−0963
305                 || r(c, 0x0901,0x0903) || r(c, 0x0905,0x0939) || r(c, 0x093E,0x094D) || r(c, 0x0950,0x0952) || r(c, 0x0958,0x0963)
306                 //Bengali: 0981−0983, 0985−098C, 098F−0990, 0993−09A8, 09AA−09B0,
307                 //09B2, 09B6−09B9, 09BE−09C4, 09C7−09C8, 09CB−09CD,
308                 //09DC−09DD, 09DF−09E3, 09F0−09F1
309                 || r(c, 0x0981,0x0983) || r(c, 0x0985,0x098C) || r(c, 0x098F,0x0990) || r(c, 0x0993,0x09A8) || r(c, 0x09AA,0x09B0)
310                 || r(c, 0x09B2) || r(c, 0x09B6,0x09B9) || r(c, 0x09BE,0x09C4) || r(c, 0x09C7,0x09C8) || r(c, 0x09CB,0x09CD)
311                 || r(c, 0x09DC,0x09DD) || r(c, 0x09DF,0x09E3) || r(c, 0x09F0,0x09F1)
312                 //Gurmukhi: 0A02, 0A05−0A0A, 0A0F−0A10, 0A13−0A28, 0A2A−0A30,
313                 //0A32−0A33, 0A35−0A36, 0A38−0A39, 0A3E−0A42, 0A47−0A48,
314                 //0A4B−0A4D, 0A59−0A5C, 0A5E, 0A74
315                 || r(c, 0x0A02) || r(c, 0x0A05,0x0A0A) || r(c, 0x0A0F,0x0A10) || r(c, 0x0A13,0x0A28) || r(c, 0x0A2A,0x0A30)
316                 || r(c, 0x0A32,0x0A33) || r(c, 0x0A35,0x0A36) || r(c, 0x0A38,0x0A39) || r(c, 0x0A3E,0x0A42) || r(c, 0x0A47,0x0A48)
317                 || r(c, 0x0A4B,0x0A4D) || r(c, 0x0A59,0x0A5C) || r(c, 0x0A5E) || r(c, 0x0A74)
318                 //Gujarati: 0A81−0A83, 0A85−0A8B, 0A8D, 0A8F−0A91, 0A93−0AA8,
319                 //0AAA−0AB0, 0AB2−0AB3, 0AB5−0AB9, 0ABD−0AC5,
320                 //0AC7−0AC9, 0ACB−0ACD, 0AD0, 0AE0
321                 || r(c, 0x0A81,0x0A83) || r(c, 0x0A85,0x0A8B) || r(c, 0x0A8D) || r(c, 0x0A8F,0x0A91) || r(c, 0x0A93,0x0AA8)
322                 || r(c, 0x0AAA,0x0AB0) || r(c, 0x0AB2,0x0AB3) || r(c, 0x0AB5,0x0AB9) || r(c, 0x0ABD,0x0AC5)
323                 || r(c, 0x0AC7,0x0AC9) || r(c, 0x0ACB,0x0ACD) || r(c, 0x0AD0) || r(c, 0x0AE0)
324                 // Oriya: 0B01−0B03, 0B05−0B0C, 0B0F−0B10, 0B13−0B28, 0B2A−0B30,
325                 //0B32−0B33, 0B36−0B39, 0B3E−0B43, 0B47−0B48, 0B4B−0B4D,
326                 //0B5C−0B5D, 0B5F−0B61
327                 || r(c, 0x0B01,0x0B03) || r(c, 0x0B05,0x0B0C) || r(c, 0x0B0F,0x0B10) || r(c, 0x0B13,0x0B28) || r(c, 0x0B2A,0x0B30)
328                 || r(c, 0x0B32,0x0B33) || r(c, 0x0B36,0x0B39) || r(c, 0x0B3E,0x0B43) || r(c, 0x0B47,0x0B48) || r(c, 0x0B4B,0x0B4D)
329                 || r(c, 0x0B5C,0x0B5D) || r(c, 0x0B5F,0x0B61)
330                 //Tamil: 0B82−0B83, 0B85−0B8A, 0B8E−0B90, 0B92−0B95, 0B99−0B9A,
331                 //0B9C, 0B9E−0B9F, 0BA3−0BA4, 0BA8−0BAA, 0BAE−0BB5,
332                 //0BB7−0BB9, 0BBE−0BC2, 0BC6−0BC8, 0BCA−0BCD
333                 || r(c, 0x0B82,0x0B83) || r(c, 0x0B85,0x0B8A) || r(c, 0x0B8E,0x0B90) || r(c, 0x0B92,0x0B95) || r(c, 0x0B99,0x0B9A)
334                 || r(c, 0x0B9C) || r(c, 0x0B9E,0x0B9F) || r(c, 0x0BA3,0x0BA4) || r(c, 0x0BA8,0x0BAA) || r(c, 0x0BAE,0x0BB5)
335                 || r(c, 0x0BB7,0x0BB9) || r(c, 0x0BBE,0x0BC2) || r(c, 0x0BC6,0x0BC8) || r(c, 0x0BCA,0x0BCD)
336                 //Telugu: 0C01−0C03, 0C05−0C0C, 0C0E−0C10, 0C12−0C28, 0C2A−0C33,
337                 //0C35−0C39, 0C3E−0C44, 0C46−0C48, 0C4A−0C4D, 0C60−0C61
338                 || r(c, 0x0C01,0x0C03) || r(c, 0x0C05,0x0C0C) || r(c, 0x0C0E,0x0C10) || r(c, 0x0C12,0x0C28) || r(c, 0x0C2A,0x0C33)
339                 || r(c, 0x0C35,0x0C39) || r(c, 0x0C3E,0x0C44) || r(c, 0x0C46,0x0C48) || r(c, 0x0C4A,0x0C4D) || r(c, 0x0C60,0x0C61)
340                 //Kannada: 0C82−0C83, 0C85−0C8C, 0C8E−0C90, 0C92−0CA8, 0CAA−0CB3,
341                 //0CB5−0CB9, 0CBE−0CC4, 0CC6−0CC8, 0CCA−0CCD, 0CDE,
342                 //0CE0−0CE1
343                 || r(c, 0x0C82,0x0C83) || r(c, 0x0C85,0x0C8C) || r(c, 0x0C8E,0x0C90) || r(c, 0x0C92,0x0CA8) || r(c, 0x0CAA,0x0CB3)
344                 || r(c, 0x0CB5,0x0CB9) || r(c, 0x0CBE,0x0CC4) || r(c, 0x0CC6,0x0CC8) || r(c, 0x0CCA,0x0CCD) || r(c, 0x0CDE)
345                 || r(c, 0x0CE0,0x0CE1)
346                 //Malayalam: 0D02−0D03, 0D05−0D0C, 0D0E−0D10, 0D12−0D28, 0D2A−0D39,
347                 //0D3E−0D43, 0D46−0D48, 0D4A−0D4D, 0D60−0D61
348                 || r(c, 0x0D02,0x0D03) || r(c, 0x0D05,0x0D0C) || r(c, 0x0D0E,0x0D10) || r(c, 0x0D12,0x0D28) || r(c, 0x0D2A,0x0D39)
349                 || r(c, 0xD3E,0x0D43) || r(c, 0x0D46,0x0D48) || r(c, 0x0D4A,0x0D4D) || r(c, 0x0D60,0x0D61)
350                 //Thai: 0E01−0E3A, 0E40−0E5B
351                 || r(c, 0x0E01,0x0E3A) || r(c, 0x0E40,0x0E5B)
352                 //Lao: 0E81−0E82, 0E84, 0E87−0E88, 0E8A, 0E8D, 0E94−0E97,
353                 //0E99−0E9F, 0EA1−0EA3, 0EA5, 0EA7, 0EAA−0EAB,
354                 //0EAD−0EAE, 0EB0−0EB9, 0EBB−0EBD, 0EC0−0EC4, 0EC6,
355                 //0EC8−0ECD, 0EDC−0EDD
356                 || r(c, 0x0E81,0x0E82) || r(c, 0x0E84) || r(c, 0x0E87,0x0E88) || r(c, 0x0E8A) || r(c, 0x0E8D) || r(c, 0x0E94,0x0E97)
357                 || r(c, 0x0E99,0x0E9F) || r(c, 0x0EA1,0x0EA3) || r(c, 0x0EA5) || r(c, 0x0EA7) || r(c, 0x0EAA,0x0EAB)
358                 || r(c, 0x0EAD,0x0EAE) || r(c, 0x0EB0,0x0EB9) || r(c, 0x0EBB,0x0EBD) || r(c, 0x0EC0,0x0EC4) || r(c, 0x0EC6)
359                 || r(c, 0x0EC8,0x0ECD) || r(c, 0x0EDC,0x0EDD)
360                 //Tibetan: 0F00, 0F18−0F19, 0F35, 0F37, 0F39, 0F3E−0F47, 0F49−0F69,
361                 //0F71−0F84, 0F86−0F8B, 0F90−0F95, 0F97, 0F99−0FAD,
362                 //0FB1−0FB7, 0FB9
363                 || r(c, 0x0F00) || r(c, 0x0F18,0x0F19) || r(c, 0x0F35) || r(c, 0x0F37) || r(c, 0x0F39) || r(c, 0x0F3E,0x0F47) || r(c, 0x0F49,0x0F69)
364                 || r(c, 0x0F71,0x0F84) || r(c, 0x0F86,0x0F8B) || r(c, 0x0F90,0x0F95) || r(c, 0x0F97) || r(c, 0x0F99,0x0FAD)
365                 || r(c, 0x0FB1,0x0FB7) || r(c, 0x0FB9)
366                 //Georgian: 10A0−10C5, 10D0−10F6
367                 || r(c, 0x10A0,0x10C5) || r(c, 0x10D0,0x10F6)
368                 //Hiragana: 3041−3093, 309B−309C
369                 || r(c, 0x3041,0x3093) || r(c, 0x309B,0x309C)
370                 //Katakana: 30A1−30F6, 30FB−30FC
371                 || r(c, 0x30A1,0x30F6) || r(c, 0x30FB,0x30FC)
372                 //Bopomofo: 3105−312C
373                 || r(c, 0x3105,0x312C)
374                 //CJK Unified Ideographs: 4E00−9FA5
375                 || r(c, 0x4E00,0x9FA5)
376                 //Hangul: AC00−D7A3
377                 || r(c, 0xAC00,0xD7A3)
378                 //Digits: 0660−0669, 06F0−06F9, 0966−096F, 09E6−09EF, 0A66−0A6F,
379                 //0AE6−0AEF, 0B66−0B6F, 0BE7−0BEF, 0C66−0C6F, 0CE6−0CEF,
380                 //0D66−0D6F, 0E50−0E59, 0ED0−0ED9, 0F20−0F33
381                 || r(c, 0x0660,0x0669) || r(c, 0x06F0,0x06F9) || r(c, 0x0966,0x096F) || r(c, 0x09E6,0x09EF) || r(c, 0x0A66,0x0A6F)
382                 || r(c, 0x0AE6,0x0AEF) || r(c, 0x0B66,0x0B6F) || r(c, 0x0BE7,0x0BEF) || r(c, 0x0C66,0x0C6F) || r(c, 0x0CE6,0x0CEF)
383                 || r(c, 0x0D66,0x0D6F) || r(c, 0x0E50,0x0E59) || r(c, 0x0ED0,0x0ED9) || r(c, 0x0F20,0x0F33)
384                 //Special characters: 00B5, 00B7, 02B0−02B8, 02BB, 02BD−02C1, 02D0−02D1,
385                 //02E0−02E4, 037A, 0559, 093D, 0B3D, 1FBE, 203F−2040, 2102,
386                 //2107, 210A−2113, 2115, 2118−211D, 2124, 2126, 2128, 212A−2131,
387                 //2133−2138, 2160−2182, 3005−3007, 3021−3029
388                 || r(c, 0x00B5) || r(c, 0x00B7) || r(c, 0x02B0,0x02B8) || r(c, 0x02BB) || r(c, 0x02BD,0x02C1) || r(c, 0x02D0,0x02D1)
389                 || r(c, 0x2E0,0x02E4) || r(c, 0x037A) || r(c, 0x0559) || r(c, 0x093D) || r(c, 0x0B3D) || r(c, 0x1FBE) || r(c, 0x203F,0x2040) || r(c, 0x2102)
390                 || r(c, 0x2107) || r(c, 0x210A,0x2113) || r(c, 0x2115) || r(c, 0x2118,0x211D) || r(c, 0x2124) || r(c, 0x2126) || r(c, 0x2128) || r(c, 0x212A,0x2131)
391                 || r(c, 0x2133,0x2138) || r(c, 0x2160,0x2182) || r(c, 0x3005,0x3007) || r(c, 0x3021,0x3029)
392                 ;
393         }
394 
395 }
396 
397 unittest {
398     
399         
400     static if (ENABLE_DUMP_UNIVERSAL_ALPHA_TABLE) {
401         immutable uint itemsInRow = 8;
402         
403         uint maxAlpha = 0;
404         for (uint i = 0; i < 0x10000; i++) {
405             uint ch = i;
406             if (isUniversalAlphaSlow(ch) || ch == '_' || (ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z'))
407                 maxAlpha = i;
408         }
409         maxAlpha = (maxAlpha + itemsInRow * 32 - 1) / (itemsInRow * 32) * (itemsInRow * 32) - 1;
410         writeln("// table for fast checking of UniversalAlpha (as per ISO/IEC 9899:1999 Annex E) OR a..z OR A..Z OR _");
411         writefln("// max code is 0x%04x", maxAlpha);
412         writeln("immutable uint[", (maxAlpha + 1) / 32,"] UNIVERSAL_ALPHA_FLAGS = [");
413         for (uint i = 0; i <= maxAlpha; i += 32) {
414             if ((i / 32) % itemsInRow  == 0)
415                 write("    ");
416             uint flags = 0;
417             for (uint j = 0; j < 32; j++) {
418                 uint ch = i + j;
419                 bool flag = isUniversalAlphaSlow(ch) || ch == '_' || (ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z');
420                 if (flag)
421                     flags |= (1 << j);
422             }
423             writef("0x%08x", flags);
424             if (i != maxAlpha / 32 * 32)
425                 write(",");
426             if ((i / 32) % itemsInRow  == itemsInRow - 1)
427                 writefln("// %04x-%04x", i - itemsInRow * 32 + 1 + 31, i + 31);
428         }
429         writeln("];");
430         
431         for (uint ch = 0; ch < 0x100000; ch++) {
432             bool flag = isUniversalAlphaSlow(ch) || ch == '_' || (ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z');
433             bool flag2 = isUniversalAlpha(ch);
434             if (flag2 != flag) {
435                 isUniversalAlpha(ch);
436                 writefln("universalAlpha test failed for char %06x expeced %d actual %d", ch, flag ? 1 : 0, flag2 ? 1 : 0);
437             }
438             assert(flag2 == flag);
439         }
440     }
441 }
442 
443 enum OpCode : ubyte {
444     NONE,       //    no op
445     DIV,         //    /
446     DIV_EQ,     //    /=
447     DOT,         //    .
448     DOT_DOT,     //    ..
449     DOT_DOT_DOT,//    ...
450     AND,         //    &
451     AND_EQ,     //    &=
452     LOG_AND,     //    &&
453     OR,         //    |
454     OR_EQ,         //    |=
455     LOG_OR,     //    ||
456     MINUS,         //    -
457     MINUS_EQ,     //    -=
458     MINUS_MINUS,//    --
459     PLUS,         //    +
460     PLUS_EQ,     //    +=
461     PLUS_PLUS,     //    ++
462     LT,         //    <
463     LT_EQ,         //    <=
464     SHL,         //    <<
465     SHL_EQ,     //    <<=
466     LT_GT,         //    <>
467     NE_EQ,         //    <>=
468     GT,         //    >
469     GT_EQ,         //    >=
470     SHR_EQ,        //    >>=
471     ASR_EQ,     //    >>>=
472     SHR,         //    >>
473     ASR,         //    >>>
474     NOT,         //    !
475     NOT_EQ,        //    !=
476     NOT_LT_GT,     //    !<>
477     NOT_LT_GT_EQ, //    !<>=
478     NOT_LT,     //    !<
479     NOT_LT_EQ,     //    !<=
480     NOT_GT,     //    !>
481     NOT_GT_EQ,     //    !>=
482     PAR_OPEN,     //    (
483     PAR_CLOSE,     //    )
484     SQ_OPEN,     //    [
485     SQ_CLOSE,     //    ]
486     CURL_OPEN,     //    {
487     CURL_CLOSE, //    }
488     QUEST,         //    ?
489     COMMA,         //    ,
490     SEMICOLON,  //    ;
491     COLON,         //    :
492     DOLLAR,     //    $
493     EQ,         //    =
494     QE_EQ,         //    ==
495     MUL,         //    *
496     MUL_EQ,     //    *=
497     MOD,     //    %
498     MOD_EQ, //    %=
499     XOR,         //    ^
500     XOR_EQ,     //    ^=
501     LOG_XOR,     //    ^^
502     LOG_XOR_EQ, //    ^^=
503     INV,         //    ~
504     INV_EQ,     //    ~=
505     AT,         //    @
506     EQ_GT,         //    =>
507     SHARP         //    #
508 };
509 
510 immutable dstring[] OP_CODE_STRINGS = [
511     "",
512     "/",
513     "/=",
514     ".",
515     "..",
516     "...",
517     "&",
518     "&=",
519     "&&",
520     "|",
521     "|=",
522     "||",
523     "-",
524     "-=",
525     "--",
526     "+",
527     "+=",
528     "++",
529     "<",
530     "<=",
531     "<<",
532     "<<=",
533     "<>",
534     "<>=",
535     ">",
536     ">=",
537     ">>=",
538     ">>>=",
539     ">>",
540     ">>>",
541     "!",
542     "!=",
543     "!<>",
544     "!<>=",
545     "!<",
546     "!<=",
547     "!>",
548     "!>=",
549     "(",
550     ")",
551     "[",
552     "]",
553     "{",
554     "}",
555     "?",
556     ",",
557     ";",
558     ":",
559     "$",
560     "=",
561     "==",
562     "*",
563     "*=",
564     "%",
565     "%=",
566     "^",
567     "^=",
568     "^^",
569     "^^=",
570     "~",
571     "~=",
572     "@",
573     "=>",
574     "#"
575 ];
576 
577 dstring getOpNameD(OpCode op) pure nothrow {
578     return OP_CODE_STRINGS[op];
579 };
580 
581 enum Keyword : ubyte {
582     NONE,
583 
584     AT_DISABLE, //"@disable",
585     AT_NOGC, //"@nogc",
586     AT_PROPERTY, //"@property",
587 
588     ABSTRACT,
589     ALIAS,
590     ALIGN,
591     ASM,
592     ASSERT,
593     AUTO,
594 
595     BODY,
596     BOOL,
597     BREAK,
598     BYTE,
599 
600     CASE,
601     CAST,
602     CATCH,
603     CDOUBLE,
604     CENT,
605     CFLOAT,
606     CHAR,
607     CLASS,
608     CONST,
609     CONTINUE,
610     CREAL,
611 
612     DCHAR,
613     DEBUG,
614     DEFAULT,
615     DELEGATE,
616     DELETE,
617     DEPRECATED,
618     DO,
619     DOUBLE,
620 
621     ELSE,
622     ENUM,
623     EXPORT,
624     EXTERN,
625 
626     FALSE,
627     FINAL,
628     FINALLY,
629     FLOAT,
630     FOR,
631     FOREACH,
632     FOREACH_REVERSE,
633     FUNCTION,
634 
635     GOTO,
636 
637     IDOUBLE,
638     IF,
639     IFLOAT,
640     IMMUTABLE,
641     IMPORT,
642     IN,
643     INOUT,
644     INT,
645     INTERFACE,
646     INVARIANT,
647     IREAL,
648     IS,
649 
650     LAZY,
651     LONG,
652 
653     MACRO,
654     MIXIN,
655     MODULE,
656 
657     NEW,
658     NOTHROW,
659     NULL,
660 
661     OUT,
662     OVERRIDE,
663 
664     PACKAGE,
665     PRAGMA,
666     PRIVATE,
667     PROTECTED,
668     PUBLIC,
669     PURE,
670 
671     REAL,
672     REF,
673     RETURN,
674 
675     SAFE,
676     SCOPE,
677     SHARED,
678     SHORT,
679     STATIC,
680     STRUCT,
681     SUPER,
682     SWITCH,
683     SYNCHRONIZED,
684     SYSTEM,
685 
686     TEMPLATE,
687     THIS,
688     THROW,
689     TRUE,
690     TRUSTED,
691     TRY,
692     TYPEDEF,
693     TYPEID,
694     TYPEOF,
695 
696     UBYTE,
697     UCENT,
698     UINT,
699     ULONG,
700     UNION,
701     UNITTEST,
702     USHORT,
703 
704     VERSION,
705     VOID,
706     VOLATILE,
707 
708     WCHAR,
709     WHILE,
710     WITH,
711 
712     FILE,
713     MODULE__,
714     LINE,
715     FUNCTION__,
716     PRETTY_FUNCTION,
717 
718     //Special Token    Replaced with
719     DATE, //    string literal of the date of compilation "mmm dd yyyy"
720     EOF, //    sets the scanner to the end of the file
721     TIME, //    string literal of the time of compilation "hh:mm:ss"
722     TIMESTAMP, //    string literal of the date and time of compilation "www mmm dd hh:mm:ss yyyy"
723     VENDOR, //    Compiler vendor string, such as "Digital Mars D"
724     VERSION_, //    Compiler version as an integer, such as 2001
725     
726     GSHARED,
727     TRAITS,
728     VECTOR,
729     PARAMETERS,
730 
731 }
732 
733 immutable dstring[] KEYWORD_STRINGS = [
734     "",
735 
736     "@disable",
737     "@nogc",
738     "@property",
739 
740     "abstract",
741     "alias",
742     "align",
743     "asm",
744     "assert",
745     "auto",
746 
747     "body",
748     "bool",
749     "break",
750     "byte",
751 
752     "case",
753     "cast",
754     "catch",
755     "cdouble",
756     "cent",
757     "cfloat",
758     "char",
759     "class",
760     "const",
761     "continue",
762     "creal",
763 
764     "dchar",
765     "debug",
766     "default",
767     "delegate",
768     "delete",
769     "deprecated",
770     "do",
771     "double",
772 
773     "else",
774     "enum",
775     "export",
776     "extern",
777 
778     "false",
779     "final",
780     "finally",
781     "float",
782     "for",
783     "foreach",
784     "foreach_reverse",
785     "function",
786 
787     "goto",
788 
789     "idouble",
790     "if",
791     "ifloat",
792     "immutable",
793     "import",
794     "in",
795     "inout", 
796     "int",
797     "interface",
798     "invariant",
799     "ireal",
800     "is",
801 
802     "lazy",
803     "long",
804 
805     "macro",
806     "mixin",
807     "module",
808 
809     "new",
810     "nothrow",
811     "null",
812 
813     "out",
814     "override",
815 
816     "package",
817     "pragma",
818     "private",
819     "protected",
820     "public",
821     "pure",
822 
823     "real",
824     "ref",
825     "return",
826 
827     "safe",
828     "scope",
829     "shared",
830     "short",
831     "static",
832     "struct",
833     "super",
834     "switch",
835     "synchronized",
836     "system",
837 
838     "template",
839     "this",
840     "throw",
841     "true",
842     "trusted",
843     "try",
844     "typedef",
845     "typeid",
846     "typeof",
847 
848     "ubyte",
849     "ucent",
850     "uint",
851     "ulong",
852     "union",
853     "unittest",
854     "ushort",
855 
856     "version",
857     "void",
858     "volatile",
859 
860     "wchar",
861     "while",
862     "with",
863 
864     "__FILE__",
865     "__MODULE__",
866     "__LINE__",
867     "__FUNCTION__",
868     "__PRETTY_FUNCTION__",
869 
870     //Special Token    Replaced with
871     "__DATE__", //    string literal of the date of compilation "mmm dd yyyy"
872     "__EOF__", //    sets the scanner to the end of the file
873     "__TIME__", //    string literal of the time of compilation "hh:mm:ss"
874     "__TIMESTAMP__", //    string literal of the date and time of compilation "www mmm dd hh:mm:ss yyyy"
875     "__VENDOR__", //    Compiler vendor string, such as "Digital Mars D"
876     "__VERSION__", //    Compiler version as an integer, such as 2001
877 
878         
879     "__gshared",
880     "__traits",
881     "__vector",
882     "__parameters"
883 ];
884 
885 public dstring getKeywordNameD(Keyword keyword) pure nothrow {
886     return KEYWORD_STRINGS[keyword];
887 };
888 
889 public Keyword findKeyword(Keyword start, Keyword end, dchar * name, int len, ref int pos) pure nothrow {
890     for (Keyword i = start; i <= end; i++) {
891         dstring s = KEYWORD_STRINGS[i];
892         if (s.length > len + 1)
893             continue; // too long
894         bool found = true;
895         for (uint j = 1; j < s.length; j++) {
896             if (s[j] != name[j - 1]) {
897                 found = false;
898                 break;
899             }
900         }
901         if (found) {
902             //if (s.length == len - 1 || !isIdentMiddleChar(name[s.length - 1])) {
903             if (s.length == len + 1 || !isIdentMiddleChar(name[s.length - 1])) {
904                 pos += s.length - 1;
905                 return i;
906             }
907         }
908     }
909     return Keyword.NONE;
910 }
911 
912 /**
913  * Token.
914  */
915 class Token {
916     //                                 32bit      64bit platform
917     //                    vtable       4 bytes    8 bytes
918     protected SourceFile _file;   //   4 bytes    8 bytes
919     protected int _line;          //   4 bytes    4 bytes
920     protected int _pos;           //   4 bytes    4 bytes
921     protected TokenType _type;    //   1 byte     1 byte
922     //                    total        17 bytes   25 bytes
923     /// returns token type
924     @property TokenType type() { return _type; }
925     /// returns file info for source
926     @property SourceFile filename() { return _file; }
927     /// returns 1-based source line number of token start
928     @property int line() { return _line; }
929     /// returns 1-based source line position of token start
930     @property int pos() { return _pos; }
931     /// returns token text
932     @property dstring text() { return null; }
933 
934     // number token properties
935     @property dchar literalType() { return 0; }
936     @property ulong intValue() { return 0; }
937     @property bool isUnsigned() { return false; }
938     @property ulong isLong() { return false; }
939     @property real realValue() { return 0; }
940     @property double doubleValue() { return 0; }
941     @property float floatValue() { return 0; }
942     @property byte precision() { return 0; }
943     @property bool isImaginary() { return false; }
944     @property bool isBracket() {
945         OpCode op = opCode; 
946         return op == OpCode.PAR_OPEN 
947             || op == OpCode.PAR_CLOSE 
948             || op == OpCode.SQ_OPEN 
949             || op == OpCode.SQ_CLOSE 
950             || op == OpCode.CURL_OPEN 
951             || op == OpCode.CURL_CLOSE; 
952     }
953     @property bool isOpenBracket() {
954         OpCode op = opCode;
955         return op == OpCode.PAR_OPEN
956             || op == OpCode.SQ_OPEN
957             || op == OpCode.CURL_OPEN;
958     }
959     @property bool isCloseBracket() {
960         OpCode op = opCode; 
961         return op == OpCode.PAR_CLOSE
962             || op == OpCode.SQ_CLOSE
963             || op == OpCode.CURL_CLOSE;
964     }
965     @property bool isEof() { return type == TokenType.EOF; }
966 
967     /// returns opcode ID - for opcode tokens
968     @property OpCode opCode() { return OpCode.NONE; }
969     /// returns keyword ID - for keyword tokens
970     @property Keyword keyword() { return Keyword.NONE; }
971     /// returns true if this is documentation comment token
972     @property bool isDocumentationComment() { return false; }
973     /// returns true if this is multiline
974     @property bool isMultilineComment() { return false; }
975 
976     // error handling
977 
978     /// returns true if it's invalid token (can be returned in error tolerant mode of tokenizer)
979     @property bool isError() { return type == TokenType.INVALID; }
980     /// returns error message if it's invalid token (can be returned in error tolerant mode of tokenizer)
981     @property string errorMessage() { return null; }
982     /// returns error code if it's invalid token (can be returned in error tolerant mode of tokenizer)
983     @property int errorCode() { return 0; }
984     /// returns type of token parsing of which has been failed - if it's invalid token (can be returned in error tolerant mode of tokenizer)
985     @property TokenType invalidTokenType() { return TokenType.INVALID; }
986 
987 
988     this(TokenType type) {
989         _type = type;
990     }
991 
992     this(TokenType type, SourceFile file, int line, int pos) {
993         _type = type;
994         _file = file;
995         _line = line;
996         _pos = pos;
997     }
998     /// set start position for token (line is 1-based, pos is 0-based)
999     void setPos(SourceFile file, int line, int pos) {
1000         _file = file;
1001         _line = line;
1002         _pos = pos + 1;
1003     }
1004     /// set source file information for token
1005     void setFile(SourceFile file) {
1006         _file = file;
1007     }
1008     /// set start position for token (line is 1-based, pos is 0-based)
1009     void setPos(int line, int pos) {
1010         _line = line;
1011         _pos = pos + 1;
1012     }
1013 
1014     public abstract Token clone();
1015     public override @property string toString() {
1016         return "" ~ to!string(_line) ~ ":" ~ to!string(_pos) ~ " " ~ to!string(type) ~ " " ~ to!string(opCode) ~ " " ~ to!string(keyword) 
1017             ~" \"" ~ toUTF8(text()) ~ "\"";
1018     }
1019 }
1020 
1021 class EofToken : Token {
1022     this() {
1023         super(TokenType.EOF);
1024     }
1025     this(SourceFile file, uint line, uint pos) {
1026         super(TokenType.EOF, file, line, pos);
1027     }
1028     override public Token clone() {
1029         return new EofToken(_file, _line, _pos);
1030     }
1031     public override @property string toString() {
1032         return "EOF";
1033     }
1034 }
1035 
1036 // treat as white space
1037 //class EolToken : Token {
1038 //    this(string file, uint line, uint pos) {
1039 //        super(TokenType.EOL, file, line, pos);
1040 //    }
1041 //}
1042 
1043 /// white space token
1044 class WhiteSpaceToken : Token {
1045     this() {
1046         super(TokenType.WHITESPACE);
1047     }
1048     this(SourceFile file, uint line, uint pos) {
1049         super(TokenType.WHITESPACE, file, line, pos);
1050     }
1051     override public Token clone() {
1052         return new WhiteSpaceToken(_file, _line, _pos);
1053     }
1054     public override @property string toString() {
1055         return "WhiteSpace";
1056     }
1057 }
1058 
1059 class OpToken : Token {
1060     OpCode _op;
1061     public @property override OpCode opCode() { return _op; }
1062     public @property void opCode(OpCode op) { _op = op; }
1063     public @property override dstring text() { return getOpNameD(_op); }
1064     this() {
1065         super(TokenType.OP);
1066     }
1067     this(SourceFile file, uint line, uint pos) {
1068         super(TokenType.OP, file, line, pos);
1069     }
1070     override public Token clone() {
1071         OpToken res = new OpToken(_file, _line, _pos);
1072         res._op = _op;
1073         return res;
1074     }
1075     public override @property string toString() {
1076         return "Op:" ~ to!string(_op);
1077     }
1078 }
1079 
1080 class KeywordToken : Token {
1081     Keyword _keyword;
1082     public @property override Keyword keyword() { return _keyword; }
1083     public @property void keyword(Keyword keyword) { _keyword = keyword; }
1084     public @property override dstring text() { return getKeywordNameD(_keyword); }
1085     this() {
1086         super(TokenType.KEYWORD);
1087     }
1088     this(SourceFile file, uint line, uint pos) {
1089         super(TokenType.KEYWORD, file, line, pos);
1090     }
1091     override public Token clone() {
1092         KeywordToken res = new KeywordToken(_file, _line, _pos);
1093         res._keyword = _keyword;
1094         return res;
1095     }
1096     public override @property string toString() {
1097         return "Keyword:" ~ to!string(_keyword);
1098     }
1099 }
1100 
1101 /// comment token
1102 class CommentToken : Token {
1103     protected dstring _text;
1104     protected bool _isDocumentationComment;
1105     protected bool _isMultilineComment;
1106 
1107 
1108     override @property bool isDocumentationComment() {
1109         return _isDocumentationComment;
1110     }
1111 
1112     @property void isDocumentationComment(bool f) {
1113         _isDocumentationComment = f;
1114     }
1115 
1116     /// returns true if this is multiline
1117     override @property bool isMultilineComment() {
1118         return _isMultilineComment;
1119     }
1120 
1121     @property void isMultilineComment(bool f) {
1122         _isMultilineComment = f;
1123     }
1124 
1125     @property override dstring text() { return _text; }
1126     @property void text(dchar[] text) { _text = cast(dstring)text; }
1127     this() {
1128         super(TokenType.COMMENT);
1129     }
1130     this(SourceFile file, uint line, uint pos, dchar[] text) {
1131         super(TokenType.COMMENT, file, line, pos);
1132         _text = cast(dstring)text;
1133     }
1134     override public Token clone() {
1135         CommentToken res = new CommentToken(_file, _line, _pos, _text.dup);
1136         res._isDocumentationComment = _isDocumentationComment;
1137         res._isMultilineComment = _isMultilineComment;
1138         return res;
1139     }
1140     public override @property string toString() {
1141         return "Comment:" ~ to!string(_text);
1142     }
1143 }
1144 
1145 /// Invalid token holder - for error tolerant parsing
1146 class InvalidToken : Token {
1147     protected dstring _text;
1148     protected TokenType _invalidTokenType;
1149     protected int _errorCode;
1150     protected string _errorMessage;
1151 
1152     /// returns error message if it's invalid token (can be returned in error tolerant mode of tokenizer)
1153     override @property string errorMessage() { return _errorMessage; }
1154     /// sets error message
1155     @property void errorMessage(string s) { _errorMessage = s; }
1156     /// returns error code if it's invalid token (can be returned in error tolerant mode of tokenizer)
1157     override @property int errorCode() { return _errorCode; }
1158     /// sets error code
1159     @property void errorCode(int c) { _errorCode = c; }
1160     /// returns type of token parsing of which has been failed - if it's invalid token (can be returned in error tolerant mode of tokenizer)
1161     override @property TokenType invalidTokenType() { return _invalidTokenType; }
1162     /// sets type of token parsing of which has been failed
1163     @property void invalidTokenType(TokenType t) { _invalidTokenType = t; }
1164 
1165     /// text of invalid token
1166     @property override dstring text() { return _text; }
1167     /// text of invalid token
1168     @property void text(dchar[] text) { _text = cast(dstring)text; }
1169 
1170     this() {
1171         super(TokenType.INVALID);
1172     }
1173     this(SourceFile file, uint line, uint pos, dchar[] text) {
1174         super(TokenType.INVALID, file, line, pos);
1175         _text = cast(dstring)text;
1176     }
1177     override Token clone() {
1178         InvalidToken res = new InvalidToken(_file, _line, _pos, _text.dup);
1179         res._errorMessage = _errorMessage.dup;
1180         res._errorCode = _errorCode;
1181         res._invalidTokenType = _invalidTokenType;
1182         return res;
1183     }
1184     override @property string toString() {
1185         return "Invalid:" ~ to!string(_text);
1186     }
1187 }
1188 
1189 alias tokenizer_ident_t = uint;
1190 alias tokenizer_ident_name_t = dstring;
1191 
1192 enum : tokenizer_ident_t {
1193     NO_IDENT = 0
1194 }
1195 
1196 /**
1197  * Global storage for identifier strings.
1198  */
1199 class IdentHolder {
1200     protected tokenizer_ident_t _nextId;
1201     protected tokenizer_ident_name_t[tokenizer_ident_t] _idToName;
1202     protected tokenizer_ident_t[tokenizer_ident_name_t] _nameToId;
1203 
1204     public this() {
1205         _nextId = NO_IDENT + 1;
1206     }
1207 
1208     /**
1209     * Search for id by name, return NO_IDENT if not found.
1210     */
1211     uint findByName(tokenizer_ident_name_t name) {
1212         tokenizer_ident_t * found = (name in _nameToId);
1213         if (found)
1214             return *found; 
1215         return NO_IDENT;
1216     }
1217 
1218     /**
1219     * Search for name by id, return null if not found.
1220     */
1221     tokenizer_ident_name_t nameById(tokenizer_ident_t id) {
1222         auto found = (id in _idToName);
1223         if (found)
1224             return *found;
1225         return null;
1226     }
1227 
1228     /**
1229      * Search for ident id by name, create new entry if not found.
1230      */
1231     tokenizer_ident_t idByName(tokenizer_ident_name_t name) {
1232         uint * found = (name in _nameToId);
1233         if (found)
1234             return *found; 
1235         uint newid = _nextId++;
1236         immutable tokenizer_ident_name_t nameCopy = name.dup;
1237         _nameToId[nameCopy] = newid;
1238         _idToName[newid] = nameCopy;
1239         return newid;
1240     }
1241 }
1242 
1243 /**
1244 * Thread local storage for IDs.
1245 */
1246 IdentHolder identMap;
1247 
1248 static this() {
1249     // init ID storage
1250     identMap = new IdentHolder();
1251 }
1252 
1253 class StringLiteralToken : Token {
1254     dstring _text;
1255     dchar _literalType;
1256     public @property override dchar literalType() { return _literalType; }
1257     public @property override dstring text() { return _text; }
1258     public void setText(dchar[] text, dchar type) { _text = cast(dstring)text; _literalType = type; }
1259     this() {
1260         super(TokenType.STRING);
1261     }
1262     this(SourceFile file, uint line, uint pos, dchar[] text, dchar type) {
1263         super(TokenType.STRING, file, line, pos);
1264         _text = cast(dstring)text;
1265         _literalType = type;
1266     }
1267     override public Token clone() {
1268         return new StringLiteralToken(_file, _line, _pos, _text.dup, _literalType);
1269     }
1270     public override @property string toString() {
1271         return toUTF8("String:\"" ~ _text ~ "\"" ~ (_literalType ? _literalType : ' '));
1272     }
1273 }
1274 
1275 class CharacterLiteralToken : Token {
1276     dchar _character;
1277     dchar _literalType;
1278     @property override dchar literalType() { return _literalType; }
1279     @property dchar character() { return _character; }
1280     @property override dstring text() { return [_character]; }
1281     void setCharacter(dchar ch, dchar type) { _character = ch; _literalType = type; }
1282     this() {
1283         super(TokenType.CHARACTER);
1284     }
1285     this(SourceFile file, uint line, uint pos, dchar character, dchar type) {
1286         super(TokenType.CHARACTER, file, line, pos);
1287         _character = character;
1288         _literalType = type;
1289     }
1290     override public Token clone() {
1291         return new CharacterLiteralToken(_file, _line, _pos, _character, _literalType);
1292     }
1293     public override @property string toString() {
1294         return "Char:" ~ toUTF8([_character]);
1295     }
1296 }
1297 
1298 class IntegerLiteralToken : Token {
1299     ulong _value;
1300     bool _unsigned;
1301     bool _long;
1302     public @property override ulong intValue() { return _value; }
1303     public @property override bool isUnsigned() { return _unsigned; }
1304     public @property override ulong isLong() { return _long; }
1305     public @property override dstring text() { return to!dstring(_value); }
1306     public void setValue(ulong value, bool unsignedFlag = false, bool longFlag = false) {
1307         _value = value;
1308         _unsigned = unsignedFlag;
1309         _long = longFlag;
1310     }
1311     public void setFlags(bool unsignedFlag = false, bool longFlag = false) {
1312         _unsigned = unsignedFlag;
1313         _long = longFlag;
1314     }
1315     this() {
1316         super(TokenType.INTEGER);
1317     }
1318     this(SourceFile file, uint line, uint pos, ulong value, bool unsignedFlag, bool longFlag) {
1319         super(TokenType.INTEGER, file, line, pos);
1320         _value = value;
1321         _unsigned = unsignedFlag;
1322         _long = longFlag;
1323     }
1324     override public Token clone() {
1325         return new IntegerLiteralToken(_file, _line, _pos, _value, _unsigned, _long);
1326     }
1327     public override @property string toString() {
1328         return "Integer:" ~ to!string(_value) ~ (_long ? "L" : "") ~ (_unsigned ? "U" : "");
1329     }
1330 }
1331 
1332 class RealLiteralToken : Token {
1333     real _value;
1334     byte _precision;
1335     bool _imaginary;
1336     public @property override ulong intValue() { return to!long(_value); }
1337     public @property override real realValue() { return _value; }
1338     public @property override double doubleValue() { return cast(double)_value; }
1339     public @property override float floatValue() { return cast(float)_value; }
1340     public @property override byte precision() { return _precision; }
1341     public @property override bool isImaginary() { return _imaginary; }
1342     public @property override dstring text() { return to!dstring(_value); }
1343     public void setValue(real value, byte precision = 1, bool imaginary = false) {
1344         _value = value;
1345         _precision = precision;
1346         _imaginary = imaginary;
1347     }
1348     public void setFlags(byte precision = 1, bool imaginary = false) {
1349         _precision = precision;
1350         _imaginary = imaginary;
1351     }
1352     this() {
1353         super(TokenType.FLOAT);
1354     }
1355     this(SourceFile file, uint line, uint pos, real value, byte precision, bool imaginary) {
1356         super(TokenType.FLOAT, file, line, pos);
1357         _value = value;
1358         _precision = precision;
1359         _imaginary = imaginary;
1360     }
1361     override public Token clone() {
1362         return new RealLiteralToken(_file, _line, _pos, _value, _precision, _imaginary);
1363     }
1364     public override @property string toString() {
1365         return "Real:" ~ to!string(_value) ~ (_precision == 0 ? "f" : (_precision == 2 ? "L" : "")) ~ (_imaginary ? "i" : "");
1366     }
1367 }
1368 
1369 class IdentToken : Token {
1370     tokenizer_ident_t _id;
1371     public @property override dstring text() {
1372         return identMap.nameById(_id);
1373     }
1374     public void setText(dchar[] text) {
1375         _id = identMap.idByName(cast(immutable)text);
1376     }
1377     this() {
1378         super(TokenType.IDENTIFIER);
1379     }
1380     this(SourceFile file, uint line, uint pos, dchar[] text) {
1381         super(TokenType.IDENTIFIER, file, line, pos);
1382         _id = identMap.idByName(cast(immutable)text);
1383     }
1384     this(SourceFile file, uint line, uint pos, tokenizer_ident_t id) {
1385         super(TokenType.IDENTIFIER, file, line, pos);
1386         _id = id;
1387     }
1388     override public Token clone() {
1389         return new IdentToken(_file, _line, _pos, _id);
1390     }
1391     public override @property string toString() {
1392         return "Ident:" ~ to!string(text);
1393     }
1394 }
1395 
1396 // shared appender buffer, to avoid extra heap allocations
1397 struct StringAppender {
1398     dchar[] buf;
1399     uint len;
1400     dchar[] get() {
1401         return buf[0 .. len];
1402     }
1403     void appendEol() {
1404         if (len + 1 > buf.length) {
1405             uint newsize = cast(uint)((len + 1 + buf.length) * 2);
1406             if (newsize < 128)
1407                 newsize = 128;
1408             buf.length = newsize;
1409         }
1410         buf[len] = '\n';
1411         len++;
1412     }
1413     void append(dchar[] s) {
1414         if (s.length == 0)
1415             return;
1416         if (len + s.length > buf.length) {
1417             uint newsize = cast(uint)((len + s.length + buf.length) * 2);
1418             if (newsize < 128)
1419                 newsize = 128;
1420             buf.length = newsize;
1421         }
1422         buf[len .. len + s.length] = s;
1423         len += s.length;
1424     }
1425     void append(dchar ch) {
1426         if (len + 1 > buf.length) {
1427             uint newsize = cast(uint)(buf.length * 2);
1428             if (newsize < 128)
1429                 newsize = 128;
1430             buf.length = newsize;
1431         }
1432         buf[len++] = ch;
1433     }
1434     void reset() {
1435         len = 0;
1436     }
1437     static int parseHexDigit(dchar ch) {
1438         if (ch >= '0' && ch <='9')
1439             return ch - '0';
1440         if (ch >= 'a' && ch <='f')
1441             return ch - 'a' + 10;
1442         if (ch >= 'A' && ch <='F')
1443             return ch - 'A' + 10;
1444         return -1;
1445     }
1446     bool errorFlag = false;
1447     dchar decodeHex(ref int pos, int count) {
1448         dchar res = 0;
1449         for (int i = 0; i < count; i++) {
1450             if (pos >= len - 1) {
1451                 errorFlag = true;
1452                 return res;
1453             }
1454             dchar ch = buf[++pos];
1455             int digit = parseHexDigit(ch);
1456             if (digit < 0) {
1457                 errorFlag = true;
1458                 digit = 0;
1459             }
1460             res = (res << 4) | digit;
1461         }
1462         return res;
1463     }
1464     dchar decodeOct(dchar firstChar, ref int pos) {
1465         dchar res = 0;
1466         res = firstChar - '0';
1467         if (pos < len - 1 && buf[pos + 1] >= '0' && buf[pos + 1] <= '7') {
1468             res = (res << 3) | (buf[++pos] - '0');
1469         }
1470         if (pos < len - 1 && buf[pos + 1] >= '0' && buf[pos + 1] <= '7') {
1471             res = (res << 3) | (buf[++pos] - '0');
1472         }
1473         return res;
1474     }
1475 
1476     char[] entityNameBuf;
1477     int entityNameLen;
1478 
1479     dchar decodeCharacterEntity(ref int pos) {
1480         entityNameLen = 0;
1481         pos++;
1482         for(; pos < len && buf[pos] != ';'; pos++) {
1483             dchar ch = buf[pos];
1484             if (ch >= 0x80)
1485                 errorFlag = true;
1486             if (entityNameBuf.length < entityNameLen + 4)
1487                 entityNameBuf.length += 32;
1488             entityNameBuf[entityNameLen++] = cast(char)ch;
1489         }
1490         if (pos < len && buf[pos] == ';') {
1491             dchar ch = entityToChar(cast(string)entityNameBuf[0 .. entityNameLen]);
1492             if (ch)
1493                 return ch;
1494         }
1495         errorFlag = true;
1496         return '?';
1497     }
1498 
1499     bool processEscapeSequences() {
1500         errorFlag = false;
1501         int dst = 0;
1502         for (int src = 0; src < len; src++) {
1503             dchar ch = buf[src];
1504             if (ch == '\\') {
1505                 if (src == len - 1)
1506                     break; // INVALID
1507                 ch = buf[++src];
1508                 switch (ch) {
1509                     case '\'':
1510                     case '\"':
1511                     case '?':
1512                     case '\\':
1513                         buf[dst++] = ch;
1514                         break;
1515                     case '0':
1516                         buf[dst++] = '\0';
1517                         break;
1518                     case 'a':
1519                         buf[dst++] = '\a';
1520                         break;
1521                     case 'b':
1522                         buf[dst++] = '\b';
1523                         break;
1524                     case 'f':
1525                         buf[dst++] = '\f';
1526                         break;
1527                     case 'n':
1528                         buf[dst++] = '\n';
1529                         break;
1530                     case 'r':
1531                         buf[dst++] = '\r';
1532                         break;
1533                     case 't':
1534                         buf[dst++] = '\t';
1535                         break;
1536                     case 'v':
1537                         buf[dst++] = '\v';
1538                         break;
1539                     case 'x':
1540                         buf[dst++] = decodeHex(src, 2);
1541                         break;
1542                     case 'u':
1543                         buf[dst++] = decodeHex(src, 4);
1544                         break;
1545                     case 'U':
1546                         buf[dst++] = decodeHex(src, 8);
1547                         break;
1548                     default:
1549                         if (ch >= '0' && ch <= '7') {
1550                             // octal X XX or XXX
1551                             buf[dst++] = decodeOct(ch, src); // something wrong
1552                         } else if (ch == '&') {
1553                             // named character entity
1554                             buf[dst++] = decodeCharacterEntity(src);
1555                             // just show it as is
1556                         } else {
1557                             buf[dst++] = ch; // something wrong
1558                             errorFlag = true;
1559                         }
1560                         break;
1561                 }
1562             } else {
1563                 buf[dst++] = ch;
1564             }
1565         }
1566         len = dst;
1567         return errorFlag;
1568     }
1569 }
1570 
1571 class Tokenizer
1572 {
1573     protected SourceLines _lineStream;
1574     protected dchar[] _lineText;
1575     protected int _line; // current line number
1576     protected int _len; // current line length
1577     protected int _pos; // current line read position
1578     protected int _prevLineLength; // previous line length
1579     protected uint _state; // tokenizer state
1580     
1581     enum : int {
1582         EOF_CHAR = 0x001A,
1583         EOL_CHAR = 0x000A
1584     };
1585     
1586     protected WhiteSpaceToken _sharedWhiteSpaceToken = new WhiteSpaceToken();
1587     protected CommentToken _sharedCommentToken = new CommentToken();
1588     protected StringLiteralToken _sharedStringLiteralToken = new StringLiteralToken();
1589     protected IdentToken _sharedIdentToken = new IdentToken();
1590     protected OpToken _sharedOpToken = new OpToken();
1591     protected KeywordToken _sharedKeywordToken = new KeywordToken();
1592     protected IntegerLiteralToken _sharedIntegerToken = new IntegerLiteralToken();
1593     protected RealLiteralToken _sharedRealToken = new RealLiteralToken();
1594     protected InvalidToken _sharedInvalidToken = new InvalidToken();
1595     protected CharacterLiteralToken _sharedCharacterLiteralToken = new CharacterLiteralToken();
1596     protected StringAppender _stringLiteralAppender;
1597     protected StringAppender _commentAppender;
1598     protected StringAppender _identAppender;
1599     
1600     protected bool _enableCommentText = true;
1601     /// when false, does not put comment text into comment token - for less allocations
1602     @property void enableCommentText(bool enabled) {
1603         _enableCommentText = enabled;
1604     }
1605     /// when false, does not put comment text into comment token - for less allocations
1606     @property bool enableCommentText() {
1607         return _enableCommentText;
1608     }
1609 
1610     protected bool _errorTolerant = false;
1611     /// when true, returns BadToken instead of throwing exception
1612     @property void errorTolerant(bool enabled) {
1613         _errorTolerant = enabled;
1614     }
1615     /// when true, returns BadToken instead of throwing exception
1616     @property bool errorTolerant() {
1617         return _errorTolerant;
1618     }
1619 
1620     this(SourceLines lineStream) {
1621         initialize(lineStream);
1622     }
1623 
1624     void initialize(SourceLines lineStream, int pos = 0) {
1625         _lineStream = lineStream;
1626         SourceFile file = _lineStream.file;
1627         _sharedWhiteSpaceToken.setFile(file);
1628         _sharedCommentToken.setFile(file);
1629         _sharedStringLiteralToken.setFile(file);
1630         _sharedIdentToken.setFile(file);
1631         _sharedOpToken.setFile(file);
1632         _sharedKeywordToken.setFile(file);
1633         _sharedIntegerToken.setFile(file);
1634         _sharedRealToken.setFile(file);
1635         _sharedInvalidToken.setFile(file);
1636         _sharedCharacterLiteralToken.setFile(file);
1637         buildTime = Clock.currTime();
1638         _line = lineStream.line;
1639         _pos = 0;
1640         _prevLineLength = 0;
1641         _lineText = null;
1642         nextLine();
1643         _pos = pos;
1644     }
1645     
1646     this(string code, string filename = "") {
1647         this(new ArraySourceLines(code, filename));
1648     }
1649     
1650     // fetch next line from source stream
1651     protected bool nextLine() {
1652         _prevLineLength = cast(int)_lineText.length;
1653         _lineText = _lineStream.readLine();
1654         if (!_lineText) {
1655             if (_lineStream.errorCode != 0)
1656                 throw new SourceEncodingException(_lineStream.errorMessage, _lineStream.file, _lineStream.errorLine, _lineStream.errorPos);
1657             if (_lineStream.eof) {
1658                 // end of file
1659                 _pos = 0;
1660                 _len = 0;
1661                 return false;
1662             }
1663             // just an empty line
1664         }
1665         _line = _lineStream.line;
1666         _pos = 0;
1667         _len = cast(int)_lineText.length; // do not support lines longer that 4Gb
1668         return true;
1669     }
1670     
1671     protected dchar nextChar() {
1672         if (_pos >= _len) {
1673             if (!nextLine()) {
1674                 _pos = _prevLineLength + 1;
1675                 return EOF_CHAR;
1676             }
1677             return EOL_CHAR;
1678         }
1679         dchar res = _lineText[_pos++];
1680         if (_pos >= _len)
1681             nextLine();
1682         return res;
1683     }
1684     
1685     protected dchar peekChar() {
1686         if (_lineText is null) {
1687             if (!nextLine()) {
1688                 return EOF_CHAR;
1689             }
1690         }
1691         if (_pos >= _len)
1692             return EOL_CHAR;
1693         return _lineText[_pos++];
1694     }
1695     
1696     protected Token emitEof() {
1697         // TODO: check for current state
1698         return new EofToken(_lineStream.file, _startLine, _startPos + 2);
1699     }
1700     
1701     protected Token processWhiteSpace(dchar firstChar) {
1702         // reuse the same token instance, to avoid extra heap spamming
1703         _sharedWhiteSpaceToken.setPos(_startLine, _startPos);
1704         for (;;) {
1705             int i = _pos;
1706             for (; i < _len; i++) {
1707                 dchar ch = _lineText[i];
1708                 if (!(ch == 0x0020 || ch == 0x0009 || ch == 0x000B || ch == 0x000C || ch == EOL_CHAR))
1709                     break;
1710             }
1711             _pos = i;
1712             if (_pos < _len)
1713                 break;
1714             // go to next line
1715             if (!nextLine())
1716                 break;
1717         }
1718         return _sharedWhiteSpaceToken;
1719     }
1720     
1721     protected Token processOneLineComment() {
1722         _sharedCommentToken.setPos(_startLine, _startPos);
1723         _sharedCommentToken.isDocumentationComment = _pos + 1 < _lineText.length && _lineText[_pos + 1] == '/';
1724         _sharedCommentToken.isMultilineComment = false;
1725         if (_enableCommentText) {
1726             _sharedCommentToken.text = _lineText[_pos + 1 .. $];
1727         }
1728         _pos = _len;
1729         nextChar();
1730         return _sharedCommentToken;
1731     }
1732 
1733     protected Token processOneLineSharpComment() {
1734         _sharedCommentToken.setPos(_startLine, _startPos);
1735         if (_enableCommentText) {
1736             _sharedCommentToken.text = _lineText[_pos .. $];
1737         }
1738         _pos = _len;
1739         return _sharedCommentToken;
1740     }
1741 
1742     // Comment /*   */    
1743     protected Token processMultilineComment() {
1744         _sharedCommentToken.setPos(_startLine, _startPos);
1745         _sharedCommentToken.isDocumentationComment = _pos + 1 < _lineText.length && _lineText[_pos + 1] == '*';
1746         _sharedCommentToken.isMultilineComment = true;
1747         _commentAppender.reset();
1748         int textStart = _pos + 1;
1749         for (;;) {
1750             int textEnd = int.max;
1751             int i = textStart;
1752             for (; i < _len - 1; i++) {
1753                 if (_lineText[i] == '*' && _lineText[i + 1] == '/') {
1754                     textEnd = i;
1755                     break;
1756                 }
1757             }
1758             if (textEnd != int.max) {
1759                 if (_enableCommentText)
1760                     _commentAppender.append(_lineText[textStart .. textEnd]);
1761                 _pos = textEnd + 2;
1762                 break;
1763             }
1764             if (!nextLine()) {
1765                 // TODO: do we need throw exception if comment not closed by end of file?
1766                 _pos = _len;
1767                 break;
1768             }
1769             textStart = 0;
1770         }
1771         if (_enableCommentText) {
1772             _sharedCommentToken.text = _commentAppender.get();
1773         }
1774         return _sharedCommentToken;
1775     }
1776     
1777     // Comment /+   +/    
1778     protected Token processNestedComment() {
1779         _sharedCommentToken.setPos(_startLine, _startPos);
1780         _sharedCommentToken.isDocumentationComment = _pos + 1 < _lineText.length && _lineText[_pos + 1] == '+';
1781         _sharedCommentToken.isMultilineComment = true;
1782         _commentAppender.reset();
1783         dchar[] text;
1784         int textStart = _pos + 1;
1785         int level = 1;
1786         for (;;) {
1787             int textEnd = int.max;
1788             int i = textStart;
1789             for (; i < _len - 1; i++) {
1790                 if (_lineText[i] == '/' && _lineText[i + 1] == '+') {
1791                     level++;
1792                     i++;
1793                 } else if (_lineText[i] == '+' && _lineText[i + 1] == '/') {
1794                     if (--level == 0) {
1795                         textEnd = i;
1796                         break;
1797                     }
1798                 }
1799             }
1800             if (textEnd != int.max) {
1801                 if (_enableCommentText)
1802                     _commentAppender.append(_lineText[textStart .. textEnd]);
1803                 _pos = textEnd + 2;
1804                 break;
1805             }
1806             if (!nextLine()) {
1807                 // TODO: do we need throw exception if comment not closed by end of file?
1808                 _pos = _len;
1809                 break;
1810             }
1811             if (_enableCommentText)
1812                 _commentAppender.appendEol();
1813             textStart = 0;
1814         }
1815         if (_enableCommentText) {
1816             _sharedCommentToken.text = _commentAppender.get();
1817         }
1818         return _sharedCommentToken;
1819     }
1820     
1821     protected Token processHexString() {
1822         _pos++;
1823         // TODO:
1824         return null;
1825     }
1826     
1827     protected Token processDelimitedString() {
1828         _pos++;
1829         // TODO:
1830         return null;
1831     }
1832     
1833     // r"string"   or    `string`
1834     protected Token processWysiwygString(dchar ch) {
1835         _pos++;
1836         // TODO:
1837         return null;
1838     }
1839     
1840     protected Token processIdent(dchar firstChar) {
1841         _sharedIdentToken.setPos(_startLine, _startPos);
1842         _identAppender.reset();
1843         _identAppender.append(firstChar);
1844         for (; _pos < _len; ) {
1845             dchar ch = _lineText[_pos];
1846             if (!isIdentMiddleChar(ch)) {
1847                 break;
1848             }
1849             _identAppender.append(ch);
1850             _pos++;
1851         }
1852         _sharedIdentToken.setText(_identAppender.get);
1853         return _sharedIdentToken;
1854     }
1855 
1856     protected Token processIntegerSuffix() {
1857         if (_pos >= _len)
1858             return _sharedIntegerToken;
1859         bool longFlag = false;
1860         bool unsignedFlag = false;
1861         dchar ch = _lineText[_pos];
1862         dchar ch2 = _pos < _len - 1 ? _lineText[_pos + 1] : 0;
1863         if (ch == 'l' || ch == 'L') {
1864             longFlag = true;
1865             _pos++;
1866             if (ch2 == 'u' || ch2 == 'U') {
1867                 unsignedFlag = true;
1868                 _pos++;
1869             } 
1870         } else if (ch == 'u' || ch == 'U') {
1871             unsignedFlag = true;
1872             _pos++;
1873             if (ch2 == 'l' || ch2 == 'L') {
1874                 longFlag = true;
1875                 _pos++;
1876             } 
1877         }
1878         _sharedIntegerToken.setFlags(unsignedFlag, longFlag);
1879         ch = _pos < _len ? _lineText[_pos] : 0;
1880         if (isIdentMiddleChar(ch))
1881             return parserError("Unexpected character after number", _sharedIntegerToken);
1882         return _sharedIntegerToken;
1883     }
1884     
1885     protected Token processBinaryNumber() {
1886         _sharedIntegerToken.setPos(_startLine, _startPos);
1887         _pos++;
1888         if (_pos >= _len)
1889             return parserError("Unexpected end of line in binary number", _sharedIntegerToken);
1890         int digits = 0;
1891         ulong number = 0;
1892         int i = _pos;
1893         for (;i < _len; i++) {
1894             dchar ch = _lineText[i];
1895             if (ch != '0' && ch != '1')
1896                 break;
1897             number = (number << 1) | (ch == '1' ? 1 : 0);
1898             digits++;
1899         }
1900         _pos = i;
1901         if (digits > 64)
1902             return parserError("number is too big", _sharedIntegerToken);
1903         _sharedIntegerToken.setValue(number);
1904         return processIntegerSuffix();
1905     }
1906 
1907     protected Token processHexNumber() {
1908         _sharedIntegerToken.setPos(_startLine, _startPos);
1909         _sharedRealToken.setPos(_startLine, _startPos);
1910         _pos++;
1911         if (_pos >= _len)
1912             return parserError("Unexpected end of line in hex number", _sharedIntegerToken);
1913         int digits = 0;
1914         ulong number = 0;
1915         int i = _pos;
1916         for (;i < _len; i++) {
1917             dchar ch = _lineText[i];
1918             uint digit = 0;
1919             if (ch >= '0' && ch <= '9')
1920                 digit = ch - '0';
1921             else if (ch >= 'a' && ch <= 'f')
1922                 digit = ch - 'a' + 10;
1923             else if (ch >= 'A' && ch <= 'F')
1924                 digit = ch - 'A' + 10;
1925             else if (ch == '_')
1926                 continue;
1927             else
1928                 break;
1929             number = (number << 4) | digit;
1930             digits++;
1931         }
1932         _pos = i;
1933         if (digits > 16)
1934             return parserError("number is too big to fit 64 bits", _sharedIntegerToken);
1935         _sharedIntegerToken.setValue(number);
1936         return processIntegerSuffix();
1937     }
1938     
1939     protected Token processOctNumber() {
1940         _sharedIntegerToken.setPos(_startLine, _startPos);
1941         if (_pos >= _len)
1942             return parserError("Unexpected end of line in octal number", _sharedIntegerToken);
1943         int digits = 0;
1944         ulong number = 0;
1945         int i = _pos;
1946         bool overflow = false;
1947         for (;i < _len; i++) {
1948             dchar ch = _lineText[i];
1949             int digit = 0;
1950             if (ch >= '0' && ch <= '7')
1951                 digit = ch - '0';
1952             else if (ch == '_')
1953                 continue;
1954             else
1955                 break;
1956             number <<= 3;
1957             if (digits >= 20) {
1958                 if ((number >> 3) << 3 != number) {
1959                     overflow = true;
1960                     break;
1961                 }
1962             }
1963             number |= digit;
1964             digits++;
1965         }
1966         _pos = i;
1967         if (overflow)
1968             return parserError("number is too big to fit 64 bits", _sharedIntegerToken);
1969         _sharedIntegerToken.setValue(number);
1970         return processIntegerSuffix();
1971     }
1972     
1973     // 
1974     protected Token processDecFloatSuffix(real value) {
1975         ubyte precision = 1;
1976         bool imaginary = false;
1977         dchar next = _pos < _len ? _lineText[_pos] : 0;
1978         if (next == 'f') {
1979             _pos++;
1980             precision = 0;
1981         } else if (next == 'L') {
1982             _pos++;
1983             precision = 2;
1984         }
1985         next = _pos < _len ? _lineText[_pos] : 0;
1986         if (next == 'i') {
1987             _pos++;
1988             imaginary = true;
1989         }
1990         next = _pos < _len ? _lineText[_pos] : 0;
1991         if (isIdentMiddleChar(next))
1992             return parserError("invalid suffix for floating point literal", _sharedRealToken);
1993         _sharedRealToken.setValue(value, precision, imaginary);
1994         return _sharedRealToken;
1995     }
1996     
1997     // after E char
1998     protected Token processDecFloatExponent(real value) {
1999         dchar next = _pos < _len ? _lineText[_pos] : 0;
2000         int sign = 1;
2001         if (next == '+') {
2002             _pos++;
2003         } else if (next == '-') {
2004             _pos++;
2005             sign = -1;
2006         }
2007         if (_pos >= _len)
2008             return parserError("Invalid exponent", _sharedRealToken);
2009         ulong digits = 0;
2010         ulong number = 0;
2011         int i = _pos;
2012         bool overflow = false;
2013         for (;i < _len; i++) {
2014             dchar ch = _lineText[i];
2015             uint digit = 0;
2016             if (ch >= '0' && ch <= '9')
2017                 digit = ch - '0';
2018             else if (ch == '_')
2019                 continue;
2020             else
2021                 break;
2022             number *= 10;
2023             if (digits >= 18) {
2024                 if ((number * 10) / 10 != number) {
2025                     overflow = true;
2026                     break;
2027                 }
2028             }
2029             number += digit;
2030             digits++;
2031         }
2032         if (digits == 0)
2033             return parserError("Invalid exponent", _sharedRealToken);
2034         _pos = i;
2035         value *= pow(10., cast(long)number * sign);
2036         return processDecFloatSuffix(value);
2037     }
2038         
2039     protected Token processDecFloatSecondPart(ulong firstPart) {
2040         if (_pos >= _len) {
2041             _sharedRealToken.setValue(cast(real)firstPart);
2042             return _sharedRealToken;
2043         }
2044         ulong divider = 1;
2045         ulong number = 0;
2046         int i = _pos;
2047         bool overflow = false;
2048         for (;i < _len; i++) {
2049             dchar ch = _lineText[i];
2050             uint digit = 0;
2051             if (ch >= '0' && ch <= '9')
2052                 digit = ch - '0';
2053             else if (ch == '_')
2054                 continue;
2055             else
2056                 break;
2057             if (divider * 10 < divider)
2058                 continue; // ignore extra digits
2059             number *= 10;
2060             number += digit;
2061             divider *= 10;
2062         }
2063         _pos = i;
2064         real value = cast(real)firstPart + (cast(real)number / divider);
2065         dchar next = _pos < _len ? _lineText[_pos] : 0;
2066         if (next == 0) {
2067             // neither exponent nor suffix
2068             _sharedRealToken.setValue(value);
2069             return _sharedRealToken;
2070         }
2071            if (next == 'e' || next == 'E') {
2072             _pos++;
2073             return processDecFloatExponent(value);
2074         }
2075         return processDecFloatSuffix(value);
2076     }
2077         
2078     protected Token processDecNumber(dchar c) {
2079         _sharedIntegerToken.setPos(_startLine, _startPos);
2080         _sharedRealToken.setPos(_startLine, _startPos);
2081         //if (_pos >= _len)
2082         //    return parserError("Unexpected end of line in number", _sharedIntegerToken);
2083         int digits = 1;
2084         ulong number = c - '0';
2085         int i = _pos;
2086         bool overflow = false;
2087         if (_line == _startLine) {
2088             for (;i < _len; i++) {
2089                 dchar ch = _lineText[i];
2090                 uint digit = 0;
2091                 if (ch >= '0' && ch <= '9')
2092                     digit = ch - '0';
2093                 else if (ch == '_')
2094                     continue;
2095                 else
2096                     break;
2097                 number *= 10;
2098                 if (digits >= 18) {
2099                     if ((number * 10) / 10 != number) {
2100                         overflow = true;
2101                         break;
2102                     }
2103                 }
2104                 number += digit;
2105                 digits++;
2106             }
2107             _pos = i;
2108         }
2109         if (overflow)
2110             return parserError("number is too big to fit 64 bits", _sharedIntegerToken);
2111         _sharedIntegerToken.setValue(number);
2112         dchar next = _line == _startLine && _pos < _len ? _lineText[_pos] : 0;
2113         if (next == 0)
2114             return _sharedIntegerToken;
2115         if (next == 'e' || next == 'E') {
2116             _pos++;
2117             return processDecFloatExponent(number);
2118         } else if (next == '.') {
2119             _pos++;
2120             return processDecFloatSecondPart(number);
2121         }
2122         return processIntegerSuffix();
2123     }
2124         
2125     /// Either return InvalidToken or throw parser exception depending on current errorTolerant flag
2126     protected Token parserError(string msg, Token incompleteToken) {
2127         return parserError(msg, incompleteToken.line, incompleteToken.pos, incompleteToken.type);
2128     }
2129     /// Either return InvalidToken or throw parser exception depending on current errorTolerant flag
2130     protected Token parserError(string msg, int startLine, int startPos, TokenType failedTokenType = TokenType.INVALID) {
2131         if (_errorTolerant) {
2132             startPos--;
2133             _sharedInvalidToken.setPos(startLine, startPos);
2134             _sharedInvalidToken.errorMessage = msg;
2135             _sharedInvalidToken.errorCode = 1; // for future extension
2136             _sharedInvalidToken.invalidTokenType = failedTokenType; // for future extension
2137             // make invalid source text
2138             dchar[] invalidText;
2139             int p = startLine == _line ? startPos : 0;
2140             for (int i = p; i < _pos && i < _lineText.length; i++)
2141                 invalidText ~= _lineText[i];
2142 
2143             // recover after error
2144             for (; _pos < _lineText.length; _pos++) {
2145                 dchar ch = _lineText[_pos];
2146                 if (ch == ' ' || ch == '\t' || ch == '(' || ch == ')' || ch == '[' || ch == ']' || ch == '{' || ch == '}')
2147                     break;
2148                 if (failedTokenType == TokenType.INTEGER || failedTokenType == TokenType.FLOAT) {
2149                     if (ch == '*' || ch == '/')
2150                         break;
2151                 }
2152                 invalidText ~= ch;
2153             }
2154             _sharedInvalidToken.text = invalidText;
2155             return _sharedInvalidToken;
2156         }
2157         throw new ParserException(msg, _lineStream.file, _line, _pos);
2158     }
2159 
2160     protected Keyword detectKeyword(dchar ch) {
2161         if (ch < '@' || ch > 'z')
2162             return Keyword.NONE;
2163         int len = _len - _pos;
2164         switch (cast(ubyte)ch) {
2165             //    AT_DISABLE
2166             //    AT_NOGC
2167             //    AT_PROPERTY
2168             case '@': return findKeyword(Keyword.AT_DISABLE, Keyword.AT_PROPERTY, _lineText.ptr + _pos, len, _pos);
2169             //    ABSTRACT,
2170             //    ALIAS,
2171             //    ALIGN,
2172             //    ASM,
2173             //    ASSERT,
2174             //    AUTO,
2175             case 'a': return findKeyword(Keyword.ABSTRACT, Keyword.AUTO, _lineText.ptr + _pos, len, _pos);
2176 
2177             //    BODY,
2178             //    BOOL,
2179             //    BREAK,
2180             //    BYTE,
2181             case 'b': return findKeyword(Keyword.BODY, Keyword.BYTE, _lineText.ptr + _pos, len, _pos);
2182                 
2183             //    CASE,
2184             //    CAST,
2185             //    CATCH,
2186             //    CDOUBLE,
2187             //    CENT,
2188             //    CFLOAT,
2189             //    CHAR,
2190             //    CLASS,
2191             //    CONST,
2192             //    CONTINUE,
2193             //    CREAL,
2194             case 'c': return findKeyword(Keyword.CASE, Keyword.CREAL, _lineText.ptr + _pos, len, _pos);
2195                 
2196             //    DCHAR,
2197             //    DEBUG,
2198             //    DEFAULT,
2199             //    DELEGATE,
2200             //    DELETE,
2201             //    DEPRECATED,
2202             //    DO,
2203             //    DOUBLE,
2204             case 'd': return findKeyword(Keyword.DCHAR, Keyword.DOUBLE, _lineText.ptr + _pos, len, _pos);
2205                 
2206             //    ELSE,
2207             //    ENUM,
2208             //    EXPORT,
2209             //    EXTERN,
2210             case 'e': return findKeyword(Keyword.ELSE, Keyword.EXTERN, _lineText.ptr + _pos, len, _pos);
2211                 
2212             //    FALSE,
2213             //    FINAL,
2214             //    FINALLY,
2215             //    FLOAT,
2216             //    FOR,
2217             //    FOREACH,
2218             //    FOREACH_REVERSE,
2219             //    FUNCTION,
2220             case 'f': return findKeyword(Keyword.FALSE, Keyword.FUNCTION, _lineText.ptr + _pos, len, _pos);
2221                 
2222             //    GOTO,
2223             case 'g': return findKeyword(Keyword.GOTO, Keyword.GOTO, _lineText.ptr + _pos, len, _pos);
2224                 
2225             //    IDOUBLE,
2226             //    IF,
2227             //    IFLOAT,
2228             //    IMMUTABLE,
2229             //    IMPORT,
2230             //    IN,
2231             //    INOUT,
2232             //    INT,
2233             //    INTERFACE,
2234             //    INVARIANT,
2235             //    IREAL,
2236             //    IS,
2237             case 'i': return findKeyword(Keyword.IDOUBLE, Keyword.IS, _lineText.ptr + _pos, len, _pos);
2238                 
2239             //    LAZY,
2240             //    LONG,
2241             case 'l': return findKeyword(Keyword.LAZY, Keyword.LONG, _lineText.ptr + _pos, len, _pos);
2242                 
2243             //    MACRO,
2244             //    MIXIN,
2245             //    MODULE,
2246             case 'm': return findKeyword(Keyword.MACRO, Keyword.MODULE, _lineText.ptr + _pos, len, _pos);
2247                 
2248             //    NEW,
2249             //    NOTHROW,
2250             //    NULL,
2251             case 'n': return findKeyword(Keyword.NEW, Keyword.NULL, _lineText.ptr + _pos, len, _pos);
2252                 
2253             //    OUT,
2254             //    OVERRIDE,
2255             case 'o': return findKeyword(Keyword.OUT, Keyword.OVERRIDE, _lineText.ptr + _pos, len, _pos);
2256                 
2257             //    PACKAGE,
2258             //    PRAGMA,
2259             //    PRIVATE,
2260             //    PROTECTED,
2261             //    PUBLIC,
2262             //    PURE,
2263             case 'p': return findKeyword(Keyword.PACKAGE, Keyword.PURE, _lineText.ptr + _pos, len, _pos);
2264                 
2265             //    REAL,
2266             //    REF,
2267             //    RETURN,
2268             case 'r': return findKeyword(Keyword.REAL, Keyword.RETURN, _lineText.ptr + _pos, len, _pos);
2269 
2270             //    SAFE
2271             //    SCOPE,
2272             //    SHARED,
2273             //    SHORT,
2274             //    STATIC,
2275             //    STRUCT,
2276             //    SUPER,
2277             //    SWITCH,
2278             //    SYNCHRONIZED,
2279             //    SYSTEM
2280             case 's': return findKeyword(Keyword.SAFE, Keyword.SYSTEM, _lineText.ptr + _pos, len, _pos);
2281                 
2282             //    TEMPLATE,
2283             //    THIS,
2284             //    THROW,
2285             //    TRUE,
2286             //    TRY,
2287             //    TYPEDEF,
2288             //    TYPEID,
2289             //    TYPEOF,
2290             case 't': return findKeyword(Keyword.TEMPLATE, Keyword.TYPEOF, _lineText.ptr + _pos, len, _pos);
2291                 
2292             //    UBYTE,
2293             //    UCENT,
2294             //    UINT,
2295             //    ULONG,
2296             //    UNION,
2297             //    UNITTEST,
2298             //    USHORT,
2299             case 'u': return findKeyword(Keyword.UBYTE, Keyword.USHORT, _lineText.ptr + _pos, len, _pos);
2300                 
2301             //    VERSION,
2302             //    VOID,
2303             //    VOLATILE,
2304             case 'v': return findKeyword(Keyword.VERSION, Keyword.VOLATILE, _lineText.ptr + _pos, len, _pos);
2305                 
2306             //    WCHAR,
2307             //    WHILE,
2308             //    WITH,
2309             case 'w': return findKeyword(Keyword.WCHAR, Keyword.WITH, _lineText.ptr + _pos, len, _pos);
2310                 
2311             //    FILE,
2312             //    MODULE,
2313             //    LINE,
2314             //    FUNCTION,
2315             //    PRETTY_FUNCTION,
2316             //
2317             //    GSHARED,
2318             //    TRAITS,
2319             //    VECTOR,
2320             //    PARAMETERS,
2321             case '_': return findKeyword(Keyword.FILE, Keyword.PARAMETERS, _lineText.ptr + _pos, len, _pos);
2322             default: return Keyword.NONE;                
2323         }
2324     }    
2325     protected OpCode detectOp(dchar ch) nothrow {
2326         if (ch >= 128)
2327             return OpCode.NONE;
2328         dchar ch2 = _pos < _len ? _lineText[_pos] : 0;
2329         dchar ch3 = _pos < _len - 1 ? _lineText[_pos + 1] : 0;
2330         switch(cast(ubyte)ch) {
2331             //    DIV,         //    /
2332             //    DIV_EQ,     //    /=
2333             case '/':
2334                 if (ch2 == '=') {
2335                     _pos++;
2336                     return OpCode.DIV_EQ;
2337                 }
2338                 return OpCode.DIV;
2339             //    DOT,         //    .
2340             //    DOT_DOT,     //    ..
2341             //    DOT_DOT_DOT,//    ...
2342             case '.':
2343                 if (ch2 == '.') {
2344                     if (ch3 == '.') {
2345                         _pos += 2;
2346                         return OpCode.DOT_DOT_DOT;
2347                     }
2348                     _pos++;
2349                     return OpCode.DOT_DOT;
2350                 }
2351                 return OpCode.DOT;
2352             //    AND,         //    &
2353             //    AND_EQ,     //    &=
2354             //    LOG_AND,     //    &&
2355             case '&':
2356                 if (ch2 == '=') {
2357                     _pos++;
2358                     return OpCode.AND_EQ;
2359                 }
2360                 if (ch2 == '&') {
2361                     _pos++;
2362                     return OpCode.LOG_AND;
2363                 }
2364                 return OpCode.AND;
2365             //    OR,         //    |
2366             //    OR_EQ,         //    |=
2367             //    LOG_OR,     //    ||
2368             case '|':
2369                 if (ch2 == '=') {
2370                     _pos++;
2371                     return OpCode.OR_EQ;
2372                 }
2373                 if (ch2 == '|') {
2374                     _pos++;
2375                     return OpCode.LOG_OR;
2376                 }
2377                 return OpCode.OR;
2378             //    MINUS,         //    -
2379             //    MINUS_EQ,     //    -=
2380             //    MINUS_MINUS,//    --
2381             case '-':
2382                 if (ch2 == '=') {
2383                     _pos++;
2384                     return OpCode.MINUS_EQ;
2385                 }
2386                 if (ch2 == '-') {
2387                     _pos++;
2388                     return OpCode.MINUS_MINUS;
2389                 }
2390                 return OpCode.MINUS;
2391             //    PLUS,         //    +
2392             //    PLUS_EQ,     //    +=
2393             //    PLUS_PLUS,     //    ++
2394             case '+':
2395                 if (ch2 == '=') {
2396                     _pos++;
2397                     return OpCode.PLUS_EQ;
2398                 }
2399                 if (ch2 == '+') {
2400                     _pos++;
2401                     return OpCode.PLUS_PLUS;
2402                 }
2403                 return OpCode.PLUS;
2404             //    LT,         //    <
2405             //    LT_EQ,         //    <=
2406             //    SHL,         //    <<
2407             //    SHL_EQ,     //    <<=
2408             //    LT_GT,         //    <>
2409             //    NE_EQ,         //    <>=
2410             case '<':
2411                 if (ch2 == '<') {
2412                     if (ch3 == '=') {
2413                         _pos += 2;
2414                         return OpCode.SHL_EQ;
2415                     }
2416                     _pos++;
2417                     return OpCode.SHL;
2418                 }
2419                 if (ch2 == '>') {
2420                     if (ch3 == '=') {
2421                         _pos += 2;
2422                         return OpCode.NE_EQ;
2423                     }
2424                     _pos++;
2425                     return OpCode.LT_GT;
2426                 }
2427                 if (ch2 == '=') {
2428                     _pos++;
2429                     return OpCode.LT_EQ;
2430                 }
2431                 return OpCode.LT;
2432             //    GT,         //    >
2433             //    GT_EQ,         //    >=
2434             //    SHR_EQ        //    >>=
2435             //    ASR_EQ,     //    >>>=
2436             //    SHR,         //    >>
2437             //    ASR,         //    >>>
2438             case '>':
2439                 if (ch2 == '>') {
2440                     if (ch3 == '>') {
2441                         dchar ch4 = _pos < _len - 2 ? _lineText[_pos + 2] : 0;
2442                         if (ch4 == '=') { // >>>=
2443                             _pos += 3;
2444                             return OpCode.ASR_EQ;
2445                         }
2446                         _pos += 2;
2447                         return OpCode.ASR; // >>>
2448                     }
2449                     if (ch3 == '=') { // >>=
2450                         _pos += 2;
2451                         return OpCode.SHR_EQ;
2452                     }
2453                     _pos++;
2454                     return OpCode.SHR;
2455                 }
2456                 if (ch2 == '=') { // >=
2457                     _pos++;
2458                     return OpCode.GT_EQ;
2459                 }
2460                 // >
2461                 return OpCode.GT;
2462             //    NOT,         //    !
2463             //    NOT_EQ        //    !=
2464             //    NOT_LT_GT,     //    !<>
2465             //    NOT_LT_GT_EQ, //    !<>=
2466             //    NOT_LT,     //    !<
2467             //    NOT_LT_EQ,     //    !<=
2468             //    NOT_GT,     //    !>
2469             //    NOT_GT_EQ,     //    !>=
2470             case '!':
2471                 if (ch2 == '<') { // !<
2472                     if (ch3 == '>') { // !<>
2473                         dchar ch4 = _pos < _len - 2 ? _lineText[_pos + 2] : 0;
2474                         if (ch4 == '=') { // !<>=
2475                             _pos += 3;
2476                             return OpCode.NOT_LT_GT_EQ;
2477                         }
2478                         _pos += 2;
2479                         return OpCode.NOT_LT_GT; // !<>
2480                     }
2481                     if (ch3 == '=') { // !<=
2482                         _pos += 2;
2483                         return OpCode.NOT_LT_EQ;
2484                     }
2485                     _pos++;
2486                     return OpCode.NOT_LT; // !<
2487                 }
2488                 if (ch2 == '=') { // !=
2489                     _pos++;
2490                     return OpCode.NOT_EQ;
2491                 }
2492                 return OpCode.NOT;
2493             //    PAR_OPEN,     //    (
2494             case '(':
2495                 return OpCode.PAR_OPEN;
2496             //    PAR_CLOSE,     //    )
2497             case ')':
2498                 return OpCode.PAR_CLOSE;
2499             //    SQ_OPEN,     //    [
2500             case '[':
2501                 return OpCode.SQ_OPEN;
2502             //    SQ_CLOSE,     //    ]
2503             case ']':
2504                 return OpCode.SQ_CLOSE;
2505             //    CURL_OPEN,     //    {
2506             case '{':
2507                 return OpCode.CURL_OPEN;
2508             //    CURL_CLOSE, //    }
2509             case '}':
2510                 return OpCode.CURL_CLOSE;
2511             //    QUEST,         //    ?
2512             case '?':
2513                 return OpCode.QUEST;
2514             //    COMMA,         //    ,
2515             case ',':
2516                 return OpCode.COMMA;
2517             //    SEMICOLON,     //    ;
2518             case ';':
2519                 return OpCode.SEMICOLON;
2520             //    COLON,         //    :
2521             case ':':
2522                 return OpCode.COLON;
2523             //    DOLLAR,     //    $
2524             case '$':
2525                 return OpCode.DOLLAR;
2526             //    EQ,         //    =
2527             //    QE_EQ,         //    ==
2528             //    EQ_GT,         //    =>
2529             case '=':
2530                 if (ch2 == '=') { // ==
2531                     _pos++;
2532                     return OpCode.QE_EQ;
2533                 }
2534                 if (ch2 == '>') { // =>
2535                     _pos++;
2536                     return OpCode.EQ_GT;
2537                 }
2538                 return OpCode.EQ;
2539             //    MUL,         //    *
2540             //    MUL_EQ,     //    *=
2541             case '*':
2542                 if (ch2 == '=') {
2543                     _pos++;
2544                     return OpCode.MUL_EQ;
2545                 }
2546                 return OpCode.MUL;
2547             //    MOD,     //    %
2548             //    MOD_EQ, //    %=
2549             case '%':
2550                 if (ch2 == '=') {
2551                     _pos++;
2552                     return OpCode.MOD_EQ;
2553                 }
2554                 return OpCode.MOD;
2555             //    XOR,         //    ^
2556             //    XOR_EQ,     //    ^=
2557             //    LOG_XOR,     //    ^^
2558             //    LOG_XOR_EQ, //    ^^=
2559             case '^':
2560                 if (ch2 == '^') {
2561                     if (ch3 == '=') {
2562                         _pos += 2;
2563                         return OpCode.LOG_XOR_EQ;
2564                     }
2565                     _pos++;
2566                     return OpCode.LOG_XOR;
2567                 }
2568                 if (ch2 == '=') {
2569                     _pos++;
2570                     return OpCode.XOR_EQ;
2571                 }
2572                 return OpCode.XOR;
2573             //    INV,         //    ~
2574             //    INV_EQ,     //    ~=
2575             case '~':
2576                 if (ch2 == '=') {
2577                     _pos++;
2578                     return OpCode.INV_EQ;
2579                 }
2580                 return OpCode.INV;
2581             //    AT,         //    @
2582             case '@':
2583                 return OpCode.AT;
2584             //    SHARP         //    #
2585             case '#':
2586                 return OpCode.SHARP;
2587             default:
2588                 return OpCode.NONE;
2589         }
2590     }
2591     
2592     protected Token processCharacterLiteral() {
2593         _sharedCharacterLiteralToken.setPos(_startLine, _startPos);
2594         if (_pos + 2 > _len)
2595             return parserError("Invalid character literal", _sharedCharacterLiteralToken);
2596         dchar ch = _lineText[_pos++];
2597         dchar ch2 = _lineText[_pos++];
2598         dchar type = 0;
2599         if (ch == '\\') {
2600             // process escaped character - store it in ch
2601             // TODO: support all escape sequences
2602             switch(ch2) {
2603                 case 'r':
2604                     ch = '\r';
2605                     break;
2606                 case 'n':
2607                     ch = '\n';
2608                     break;
2609                 case 't':
2610                     ch = '\t';
2611                     break;
2612                 case '\\':
2613                     ch = '\\';
2614                     break;
2615                 default:
2616                     ch = ch2;
2617                     break;
2618             }
2619             // here must be closing '
2620             if (_pos + 1 > _len)
2621                 return parserError("Invalid character literal", _sharedCharacterLiteralToken);
2622             ch2 = _lineText[_pos++];
2623         }
2624         if (ch2 != '\'')
2625             return parserError("Invalid character literal", _sharedCharacterLiteralToken);
2626         if (_pos < _len) {
2627             dchar t = _lineText[_pos];
2628             if (t == 'd' || t == 'w' || t == 'c') {
2629                 type = t;
2630                 _pos++;
2631             } else if (isIdentMiddleChar(ch)) {
2632                 return parserError("Unexpected character after character literal", _sharedCharacterLiteralToken);
2633             }
2634         }
2635         _sharedCharacterLiteralToken.setCharacter(ch, type);
2636         return _sharedCharacterLiteralToken;
2637     }
2638 
2639     protected Token processDoubleQuotedOrWysiwygString(dchar delimiter) {
2640         bool wysiwyg = (delimiter == 'r' || delimiter == '`');
2641         //writeln("processDoubleQuotedString()");
2642         _sharedStringLiteralToken.setPos(_startLine, _startPos);
2643         _stringLiteralAppender.reset();
2644         if (delimiter == 'r') {
2645             _pos++;
2646             delimiter = '\"';
2647         }
2648         dchar type = 0;
2649         for (;;) {
2650             int i = _pos;
2651             int endPos = int.max;
2652             bool lastBackSlash = false;
2653             for(; i < _len; i++) {
2654                 dchar ch = _lineText[i];
2655                 if (ch == '\\') {
2656                     if (lastBackSlash)
2657                         lastBackSlash = false;
2658                     else
2659                         lastBackSlash = true;
2660                 }
2661                 else if (ch == delimiter && !lastBackSlash) {
2662                     endPos = i;
2663                     break;
2664                 }
2665                 else if(lastBackSlash)
2666                     lastBackSlash = false;
2667             }
2668             if (endPos != int.max) {
2669                 // found end quote
2670                 _stringLiteralAppender.append(_lineText[_pos .. endPos]);
2671                 _pos = endPos + 1;
2672                 break;
2673             }
2674             // no quote by end of line
2675             _stringLiteralAppender.append(_lineText[_pos .. $]);
2676             _stringLiteralAppender.appendEol();
2677             if (!nextLine()) {
2678                 // do we need to throw exception if eof comes before end of string?
2679                 break;
2680             }
2681         }
2682         dchar t = 0;
2683         if (_pos < _len) {
2684             dchar ch = _lineText[_pos];
2685             if (ch == 'c' || ch == 'w' || ch == 'd') {
2686                 t = ch;
2687                 _pos++;
2688                 if (_pos < _len) {
2689                     ch = _lineText[_pos];
2690                     if (isIdentMiddleChar(ch))
2691                         return parserError("Unexpected character after string literal", _sharedStringLiteralToken);
2692                 }
2693             } else if (isIdentMiddleChar(ch))
2694                 return parserError("Unexpected character after string literal", _sharedStringLiteralToken);
2695         }
2696         if (t != 0) {
2697             if (type != 0 && t != type)
2698                 return parserError("Cannot concatenate strings of different type", _sharedStringLiteralToken);
2699             type = t;
2700         }
2701         if (wysiwyg) {
2702             // no escape processing
2703             _sharedStringLiteralToken.setText(_stringLiteralAppender.get(), type);
2704             return _sharedStringLiteralToken;
2705         }
2706         _stringLiteralAppender.processEscapeSequences();
2707         _sharedStringLiteralToken.setText(_stringLiteralAppender.get(), type);
2708         return _sharedStringLiteralToken;
2709     }
2710 
2711     protected SysTime buildTime;
2712     
2713     //    string literal of the date of compilation "mmm dd yyyy"
2714     protected dstring formatBuildDate() {
2715         // TODO: provide proper format
2716         return to!dstring(buildTime);
2717     }
2718     
2719     //    string literal of the time of compilation "hh:mm:ss"
2720     protected dstring formatBuildTime() {
2721         // TODO: provide proper format
2722         return to!dstring(buildTime);
2723     }
2724     
2725     //    string literal of the date and time of compilation "www mmm dd hh:mm:ss yyyy"
2726     protected dstring formatBuildTimestamp() {
2727         // TODO: provide proper format
2728         return to!dstring(buildTime);
2729     }
2730     
2731     static immutable dstring VERSION = "0.1";
2732     static immutable dstring VENDOR = "coolreader.org";
2733     
2734     protected Token makeSpecialTokenString(dstring str, int pos) {
2735         _sharedStringLiteralToken.setPos(_startLine, _startPos);
2736         _sharedStringLiteralToken.setText(cast(dchar[])str, 0);
2737         return _sharedStringLiteralToken;
2738     }
2739     
2740     protected Token processSpecialToken(Keyword keyword, int pos) {
2741         switch (keyword) {
2742             //Special Token    Replaced with
2743             case Keyword.DATE: //    string literal of the date of compilation "mmm dd yyyy"
2744                 return makeSpecialTokenString(formatBuildDate(), pos);
2745             case Keyword.TIME: //    string literal of the time of compilation "hh:mm:ss"
2746                 return makeSpecialTokenString(formatBuildTime(), pos);
2747             case Keyword.TIMESTAMP: //    string literal of the date and time of compilation "www mmm dd hh:mm:ss yyyy"
2748                 return makeSpecialTokenString(formatBuildTimestamp(), pos);
2749             case Keyword.VENDOR: //    Compiler vendor string, such as "Digital Mars D"
2750                 return makeSpecialTokenString(VENDOR, pos);
2751             case Keyword.VERSION_: //    Compiler version as an integer, such as 2001
2752                 return makeSpecialTokenString(VERSION, pos);
2753             default:
2754                 parserError("Unknown special token", _line, pos);
2755         }
2756         return null;
2757     }
2758     
2759     protected int _startLine;
2760     protected int _startPos;
2761 
2762     // returns next token (clone it if you want to store for future usage, otherwise it may be overwritten by further nextToken() calls).
2763     Token nextToken() {
2764         _startLine = _line;
2765         _startPos = _pos;
2766         dchar ch = nextChar();
2767         if (ch == EOF_CHAR) {
2768             return emitEof();
2769         }
2770         if (ch == '\r' || ch == '\n' || ch == 0x0020 || ch == 0x0009 || ch == 0x000B || ch == 0x000C) {
2771             // white space (treat EOL as whitespace, too)
2772             return processWhiteSpace(ch);
2773         }
2774         dchar next = _pos < _len ? _lineText[_pos] : 0;
2775         if (ch == '/') {
2776             if (next == '/')
2777                 return processOneLineComment();
2778             else if (next == '*')
2779                 return processMultilineComment();
2780             else if (next == '+')
2781                 return processNestedComment();
2782         }
2783         if (ch == '#' && _line == 1)
2784             return processOneLineSharpComment();
2785         if (ch == '\"')
2786             return processDoubleQuotedOrWysiwygString(ch);
2787         if (ch == '\'')
2788             return processCharacterLiteral();
2789         if (ch == 'x' && next == '\"')
2790             return processHexString();
2791         if (ch == 'q' && next == '\"')
2792             return processDelimitedString();
2793         if ((ch == 'r' && next == '\"') || (ch == '`'))
2794             return processDoubleQuotedOrWysiwygString(ch);
2795         int oldPos = _pos - 1;
2796         
2797         if (ch == '0') {
2798             if (next == 'b' || next == 'B')
2799                 return processBinaryNumber();
2800             if (next == 'x' || next == 'X')
2801                 return processHexNumber();
2802             if (next >= '0' && next <= '9')
2803                 return processOctNumber();
2804             if (next >= '0' && next <= '9')
2805                 return processDecNumber(ch);
2806         }
2807         if (ch >= '0' && ch <= '9')
2808             return processDecNumber(ch);
2809         if (ch == '.' && next >= '0' && next <= '9') // .123
2810             return processDecFloatSecondPart(0);
2811                 
2812         if (ch == '_' || ch == '@' || isUniversalAlpha(ch)) {
2813             // start of identifier or keyword?
2814             Keyword keyword = detectKeyword(ch);
2815             if (keyword != Keyword.NONE) {
2816                 switch (keyword) {
2817                     //Special Token    Replaced with
2818                     case Keyword.EOF: return emitEof(); //    sets the scanner to the end of the file
2819                     case Keyword.DATE: //    string literal of the date of compilation "mmm dd yyyy"
2820                     case Keyword.TIME: //    string literal of the time of compilation "hh:mm:ss"
2821                     case Keyword.TIMESTAMP: //    string literal of the date and time of compilation "www mmm dd hh:mm:ss yyyy"
2822                     case Keyword.VENDOR: //    Compiler vendor string, such as "Digital Mars D"
2823                     case Keyword.VERSION_: //    Compiler version as an integer, such as 2001
2824                         return processSpecialToken(keyword, oldPos);
2825                     default:
2826                         _sharedKeywordToken.setPos(_startLine, _startPos);
2827                         _sharedKeywordToken.keyword = keyword;
2828                         return _sharedKeywordToken;
2829                 }
2830             }
2831             return processIdent(ch);
2832         }
2833         OpCode op = detectOp(ch);
2834         if (op != OpCode.NONE) {
2835             _sharedOpToken.setPos(_startLine, _startPos);
2836             _sharedOpToken.opCode = op;
2837             return _sharedOpToken;
2838         }
2839         return parserError("Invalid token", _line, _pos);
2840     }
2841 
2842     /// tokenize all
2843     Token[] allTokens() {
2844         Token[] res;
2845         res.assumeSafeAppend;
2846         for(;;) {
2847             Token tok = nextToken();
2848             if (!tok || tok.type == TokenType.EOF)
2849                 break;
2850             res ~= tok.clone();
2851         }
2852         return res;
2853     }
2854 }
2855 
2856 unittest {
2857     version(DisableLexerTest) {
2858     import std.stdio;
2859     import std.conv;
2860     import std.utf;
2861     import dlangui.core.linestream;
2862     string fname = "/home/lve/src/d/ddc/ddclexer/tests/tokenizer_test.d";
2863     writeln("opening file");
2864     try {
2865         std.stream.File f = new std.stream.File(fname);
2866         scope(exit) { f.close(); }
2867         try {
2868             LineStream lines = LineStream.create(f, fname);
2869             Tokenizer tokenizer = new Tokenizer(lines);
2870             for (;;) {
2871                 Token token = tokenizer.nextToken();
2872                 if (token is null) {
2873                     writeln("Null token returned");
2874                     break;
2875                 }
2876                 if (token.type == TokenType.EOF) {
2877                     writeln("EOF token");
2878                     break;
2879                 }
2880                 writeln("", token.line, ":", token.pos, "\t", token.toString);
2881             }
2882         } catch (Exception e) {
2883             writeln("Exception " ~ e.toString);
2884         }
2885     } catch (Exception e) {
2886         writeln("Exception " ~ e.toString);
2887     }
2888     }
2889 }
2890 
2891 /// converts named entity to character, returns 0 if not found
2892 dchar entityToChar(string name) {
2893     if (auto ch = name in entityToCharMap) {
2894         return *ch;
2895     }
2896     return 0;
2897 }
2898 
2899 /// fings entity name for character, returns null if not found
2900 string charToEntity(dchar ch) {
2901     if (auto name = ch in charToEntityMap) {
2902         return *name;
2903     }
2904     return null;
2905 }
2906 
2907 private __gshared dchar[string]entityToCharMap;
2908 private __gshared string[dchar]charToEntityMap;
2909 private void addEntity(string name, dchar ch) {
2910     entityToCharMap[name] = ch;
2911     charToEntityMap[ch] = name;
2912 }
2913 __gshared static this() {
2914     addEntity("quot", 34);
2915     addEntity("amp",    38);
2916     addEntity("lt",    60);
2917     addEntity("gt",    62);
2918     addEntity("OElig",    338);
2919     addEntity("oelig",    339);
2920     addEntity("Scaron",    352);
2921     addEntity("scaron",    353);
2922     addEntity("Yuml",    376);
2923     addEntity("circ",    710);
2924     addEntity("tilde",    732);
2925     addEntity("ensp",    8194);
2926     addEntity("emsp",    8195);
2927     addEntity("thinsp",    8201);
2928     addEntity("zwnj",    8204);
2929     addEntity("zwj",    8205);
2930     addEntity("lrm",    8206);
2931     addEntity("rlm",    8207);
2932     addEntity("ndash",    8211);
2933     addEntity("mdash",    8212);
2934     addEntity("lsquo",    8216);
2935     addEntity("rsquo",    8217);
2936     addEntity("sbquo",    8218);
2937     addEntity("ldquo",    8220);
2938     addEntity("rdquo",    8221);
2939     addEntity("bdquo",    8222);
2940     addEntity("dagger",    8224);
2941     addEntity("Dagger",    8225);
2942     addEntity("permil",    8240);
2943     addEntity("lsaquo",    8249);
2944     addEntity("rsaquo",    8250);
2945     addEntity("euro",    8364);
2946     addEntity("nbsp",    160);
2947     addEntity("iexcl",    161);
2948     addEntity("cent",    162);
2949     addEntity("pound",    163);
2950     addEntity("curren",    164);
2951     addEntity("yen",    165);
2952     addEntity("brvbar",    166);
2953     addEntity("sect",    167);
2954     addEntity("uml",    168);
2955     addEntity("copy",    169);
2956     addEntity("ordf",    170);
2957     addEntity("laquo",    171);
2958     addEntity("not",    172);
2959     addEntity("shy",    173);
2960     addEntity("reg",    174);
2961     addEntity("macr",    175);
2962     addEntity("deg",    176);
2963     addEntity("plusmn",    177);
2964     addEntity("sup2",    178);
2965     addEntity("sup3",    179);
2966     addEntity("acute",    180);
2967     addEntity("micro",    181);
2968     addEntity("para",    182);
2969     addEntity("middot",    183);
2970     addEntity("cedil",    184);
2971     addEntity("sup1",    185);
2972     addEntity("ordm",    186);
2973     addEntity("raquo",    187);
2974     addEntity("frac14",    188);
2975     addEntity("frac12",    189);
2976     addEntity("frac34",    190);
2977     addEntity("iquest",    191);
2978     addEntity("Agrave",    192);
2979     addEntity("Aacute",    193);
2980     addEntity("Acirc",    194);
2981     addEntity("Atilde",    195);
2982     addEntity("Auml",    196);
2983     addEntity("Aring",    197);
2984     addEntity("AElig",    198);
2985     addEntity("Ccedil",    199);
2986     addEntity("Egrave",    200);
2987     addEntity("Eacute",    201);
2988     addEntity("Ecirc",    202);
2989     addEntity("Euml",    203);
2990     addEntity("Igrave",    204);
2991     addEntity("Iacute",    205);
2992     addEntity("Icirc",    206);
2993     addEntity("Iuml",    207);
2994     addEntity("ETH",    208);
2995     addEntity("Ntilde",    209);
2996     addEntity("Ograve",    210);
2997     addEntity("Oacute",    211);
2998     addEntity("Ocirc",    212);
2999     addEntity("Otilde",    213);
3000     addEntity("Ouml",    214);
3001     addEntity("times",    215);
3002     addEntity("Oslash",    216);
3003     addEntity("Ugrave",    217);
3004     addEntity("Uacute",    218);
3005     addEntity("Ucirc",    219);
3006     addEntity("Uuml",    220);
3007     addEntity("Yacute",    221);
3008     addEntity("THORN",    222);
3009     addEntity("szlig",    223);
3010     addEntity("agrave",    224);
3011     addEntity("aacute",    225);
3012     addEntity("acirc",    226);
3013     addEntity("atilde",    227);
3014     addEntity("auml",    228);
3015     addEntity("aring",    229);
3016     addEntity("aelig",    230);
3017     addEntity("ccedil",    231);
3018     addEntity("egrave",    232);
3019     addEntity("eacute",    233);
3020     addEntity("ecirc",    234);
3021     addEntity("euml",    235);
3022     addEntity("igrave",    236);
3023     addEntity("iacute",    237);
3024     addEntity("icirc",    238);
3025     addEntity("iuml",    239);
3026     addEntity("eth",    240);
3027     addEntity("ntilde",    241);
3028     addEntity("ograve",    242);
3029     addEntity("oacute",    243);
3030     addEntity("ocirc",    244);
3031     addEntity("otilde",    245);
3032     addEntity("ouml",    246);
3033     addEntity("divide",    247);
3034     addEntity("oslash",    248);
3035     addEntity("ugrave",    249);
3036     addEntity("uacute",    250);
3037     addEntity("ucirc",    251);
3038     addEntity("uuml",    252);
3039     addEntity("yacute",    253);
3040     addEntity("thorn",    254);
3041     addEntity("yuml",    255);
3042     addEntity("fnof",    402);
3043     addEntity("Alpha",    913);
3044     addEntity("Beta",    914);
3045     addEntity("Gamma",    915);
3046     addEntity("Delta",    916);
3047     addEntity("Epsilon",    917);
3048     addEntity("Zeta",    918);
3049     addEntity("Eta",    919);
3050     addEntity("Theta",    920);
3051     addEntity("Iota",    921);
3052     addEntity("Kappa",    922);
3053     addEntity("Lambda",    923);
3054     addEntity("Mu",    924);
3055     addEntity("Nu",    925);
3056     addEntity("Xi",    926);
3057     addEntity("Omicron",    927);
3058     addEntity("Pi",    928);
3059     addEntity("Rho",    929);
3060     addEntity("Sigma",    931);
3061     addEntity("Tau",    932);
3062     addEntity("Upsilon",    933);
3063     addEntity("Phi",    934);
3064     addEntity("Chi",    935);
3065     addEntity("Psi",    936);
3066     addEntity("Omega",    937);
3067     addEntity("alpha",    945);
3068     addEntity("beta",    946);
3069     addEntity("gamma",    947);
3070     addEntity("delta",    948);
3071     addEntity("epsilon",    949);
3072     addEntity("zeta",    950);
3073     addEntity("eta",    951);
3074     addEntity("theta",    952);
3075     addEntity("iota",    953);
3076     addEntity("kappa",    954);
3077     addEntity("lambda",    955);
3078     addEntity("mu",    956);
3079     addEntity("nu",    957);
3080     addEntity("xi",    958);
3081     addEntity("omicron",    959);
3082     addEntity("pi",    960);
3083     addEntity("rho",    961);
3084     addEntity("sigmaf",    962);
3085     addEntity("sigma",    963);
3086     addEntity("tau",    964);
3087     addEntity("upsilon",    965);
3088     addEntity("phi",    966);
3089     addEntity("chi",    967);
3090     addEntity("psi",    968);
3091     addEntity("omega",    969);
3092     addEntity("thetasym",    977);
3093     addEntity("upsih",    978);
3094     addEntity("piv",    982);
3095     addEntity("bull",    8226);
3096     addEntity("hellip",    8230);
3097     addEntity("prime",    8242);
3098     addEntity("Prime",    8243);
3099     addEntity("oline",    8254);
3100     addEntity("frasl",    8260);
3101     addEntity("weierp",    8472);
3102     addEntity("image",    8465);
3103     addEntity("real",    8476);
3104     addEntity("trade",    8482);
3105     addEntity("alefsym",    8501);
3106     addEntity("larr",    8592);
3107     addEntity("uarr",    8593);
3108     addEntity("rarr",    8594);
3109     addEntity("darr",    8595);
3110     addEntity("harr",    8596);
3111     addEntity("crarr",    8629);
3112     addEntity("lArr",    8656);
3113     addEntity("uArr",    8657);
3114     addEntity("rArr",    8658);
3115     addEntity("dArr",    8659);
3116     addEntity("hArr",    8660);
3117     addEntity("forall",    8704);
3118     addEntity("part",    8706);
3119     addEntity("exist",    8707);
3120     addEntity("empty",    8709);
3121     addEntity("nabla",    8711);
3122     addEntity("isin",    8712);
3123     addEntity("notin",    8713);
3124     addEntity("ni",    8715);
3125     addEntity("prod",    8719);
3126     addEntity("sum",    8721);
3127     addEntity("minus",    8722);
3128     addEntity("lowast",    8727);
3129     addEntity("radic",    8730);
3130     addEntity("prop",    8733);
3131     addEntity("infin",    8734);
3132     addEntity("ang",    8736);
3133     addEntity("and",    8743);
3134     addEntity("or",    8744);
3135     addEntity("cap",    8745);
3136     addEntity("cup",    8746);
3137     addEntity("int",    8747);
3138     addEntity("there4",    8756);
3139     addEntity("sim",    8764);
3140     addEntity("cong",    8773);
3141     addEntity("asymp",    8776);
3142     addEntity("ne",    8800);
3143     addEntity("equiv",    8801);
3144     addEntity("le",    8804);
3145     addEntity("ge",    8805);
3146     addEntity("sub",    8834);
3147     addEntity("sup",    8835);
3148     addEntity("nsub",    8836);
3149     addEntity("sube",    8838);
3150     addEntity("supe",    8839);
3151     addEntity("oplus",    8853);
3152     addEntity("otimes",    8855);
3153     addEntity("perp",    8869);
3154     addEntity("sdot",    8901);
3155     addEntity("lceil",    8968);
3156     addEntity("rceil",    8969);
3157     addEntity("lfloor",    8970);
3158     addEntity("rfloor",    8971);
3159     addEntity("loz",    9674);
3160     addEntity("spades",    9824);
3161     addEntity("clubs",    9827);
3162     addEntity("hearts",    9829);
3163     addEntity("diams",    9830);
3164     addEntity("lang",    10216);
3165     addEntity("rang",    10217);
3166 }
3167 
3168 
3169 
3170 //void runTokenizerTest()
3171 unittest 
3172 {
3173     import std.algorithm;
3174     class TokenTest {
3175         int _line;
3176         string _file;
3177         this(string file, int line) {
3178             _file = file;
3179             _line = line;
3180         }
3181         bool doTest(Token token) {
3182             return true;
3183         }        
3184         void execute(Tokenizer tokenizer) {
3185             Token token = tokenizer.nextToken();
3186             if (!doTest(token)) {
3187                 assert(false, "    token doesn not match at " ~ _file ~ ":" ~ to!string(_line) ~ "  foundToken: " ~ token.toString ~ " expected: " ~ toString);
3188             }
3189         }
3190         public override @property string toString() {
3191             return "TokenTest";
3192         }
3193     }
3194     void testTokenizer(string code, TokenTest[] tokens, string file = __FILE__, uint line = __LINE__) {
3195         Tokenizer tokenizer = new Tokenizer(code, "tokenizerTest:" ~ file ~ ":" ~ to!string(line));
3196         for (int i = 0; i < tokens.length; i++) {
3197             tokens[i].execute(tokenizer);
3198         }
3199     }
3200     class KeywordTest : TokenTest {
3201         Keyword _code;
3202         this(Keyword code, string file = __FILE__, uint line = __LINE__) {
3203             super(file, line);
3204             _code = code;
3205         }
3206         override bool doTest(Token token) {
3207             if (token.type != TokenType.KEYWORD)
3208                 return false;
3209             if (token.keyword != _code)
3210                 return false;
3211             return true;
3212         }        
3213         public override @property string toString() {
3214             return "Keyword:" ~ to!string(_code);
3215         }
3216     }
3217     class OpTest : TokenTest {
3218         OpCode _code;
3219         this(OpCode code, string file = __FILE__, uint line = __LINE__) {
3220             super(file, line);
3221             _code = code;
3222         }
3223         override bool doTest(Token token) {
3224             if (token.type != TokenType.OP)
3225                 return false;
3226             if (token.opCode != _code)
3227                 return false;
3228             return true;
3229         }        
3230         public override @property string toString() {
3231             return "Op:" ~ to!string(_code);
3232         }
3233     }
3234     class StringTest : TokenTest {
3235         dstring _value;
3236         dchar _literalType;
3237         this(dstring value, dchar literalType = 0, string file = __FILE__, uint line = __LINE__) {
3238             super(file, line);
3239             _value = value;
3240             _literalType = literalType;
3241         }
3242         override bool doTest(Token token) {
3243             if (token.type != TokenType.STRING)
3244                 return false;
3245             if (!token.text.equal(_value))
3246                 return false;
3247             if (token.literalType != _literalType)
3248                 return false;
3249             return true;
3250         }        
3251         public override @property string toString() {
3252             return toUTF8("String:\"" ~ _value ~ "\"" ~ (_literalType ? _literalType : ' '));
3253         }
3254     }
3255     class IntegerTest : TokenTest {
3256         ulong _value;
3257         bool _unsigned;
3258         bool _long;
3259         this(ulong value, bool unsignedFlag = false, bool longFlag = false, string file = __FILE__, uint line = __LINE__) {
3260             super(file, line);
3261             _value = value;
3262             _unsigned = unsignedFlag;
3263             _long = longFlag;
3264         }
3265         override bool doTest(Token token) {
3266             if (token.type != TokenType.INTEGER)
3267                 return false;
3268             if (token.intValue != _value)
3269                 return false;
3270             if (token.isUnsigned != _unsigned)
3271                 return false;
3272             if (token.isLong != _long)
3273                 return false;
3274             return true;
3275         }        
3276         public override @property string toString() {
3277             return "Integer:" ~ to!string(_value);
3278         }
3279     }
3280     class RealTest : TokenTest {
3281         real _value;
3282         ubyte _precision;
3283         bool _imaginary;
3284         this(real value, ubyte precision = 1, bool imaginary = false, string file = __FILE__, uint line = __LINE__) {
3285             super(file, line);
3286             _value = value;
3287             _precision = precision;
3288             _imaginary = imaginary;
3289         }
3290         override bool doTest(Token token) {
3291             if (token.type != TokenType.FLOAT)
3292                 return false;
3293             real diff = token.realValue - _value;
3294             real maxerr = _value / 1000000;
3295             if (diff < 0) diff = -diff;
3296             if (maxerr < 0) maxerr = -maxerr;
3297             if (diff > maxerr)
3298                 return false;
3299             if (token.precision != _precision)
3300                 return false;
3301             if (token.isImaginary != _imaginary)
3302                 return false;
3303             return true;
3304         }        
3305         public override @property string toString() {
3306             return "Real:" ~ to!string(_value) ~ (_precision == 0 ? "f" : (_precision == 2 ? "L" : "")) ~ (_imaginary ? "i" : "");
3307         }
3308     }
3309     class IdentTest : TokenTest {
3310         string _value;
3311         this(string value, string file = __FILE__, uint line = __LINE__) {
3312             super(file, line);
3313             _value = value;
3314         }
3315         override bool doTest(Token token) {
3316             if (token.type != TokenType.IDENTIFIER)
3317                 return false;
3318             if (! to!string(token.text).equal(_value))
3319                 return false;
3320             return true;
3321         }        
3322         public override @property string toString() {
3323             return "Ident:" ~ _value;
3324         }
3325     }
3326     class CommentTest : TokenTest {
3327         this(string file = __FILE__, uint line = __LINE__) {
3328             super(file, line);
3329         }
3330         override bool doTest(Token token) {
3331             if (token.type != TokenType.COMMENT)
3332                 return false;
3333             return true;
3334         }        
3335         public override @property string toString() {
3336             return "Comment";
3337         }
3338     }
3339     class EOFTest : TokenTest {
3340         this(string file = __FILE__, uint line = __LINE__) {
3341             super(file, line);
3342         }
3343         override bool doTest(Token token) {
3344             if (token.type != TokenType.EOF)
3345                 return false;
3346             return true;
3347         }        
3348         public override @property string toString() {
3349             return "EOF";
3350         }
3351     }
3352     class WhiteSpaceTest : TokenTest {
3353         this(string file = __FILE__, uint line = __LINE__) {
3354             super(file, line);
3355         }
3356         override bool doTest(Token token) {
3357             if (token.type != TokenType.WHITESPACE)
3358                 return false;
3359             return true;
3360         }        
3361         public override @property string toString() {
3362             return "whiteSpace";
3363         }
3364     }
3365     TokenTest checkString(dstring value, dchar literalType = 0, string file = __FILE__, uint line = __LINE__) { 
3366         return new StringTest(value, literalType, file, line);
3367     }
3368     TokenTest checkInteger(ulong value, bool unsignedFlag = false, bool longFlag = false, string file = __FILE__, uint line = __LINE__) { 
3369         return new IntegerTest(value, unsignedFlag, longFlag, file, line);
3370     }
3371     TokenTest checkReal(real value, byte precision = 1, bool imaginary = false, string file = __FILE__, uint line = __LINE__) { 
3372         return new RealTest(value, precision, imaginary, file, line);
3373     }
3374     TokenTest checkIdent(string value, string file = __FILE__, uint line = __LINE__) { 
3375         return new IdentTest(value, file, line);
3376     }
3377     TokenTest checkKeyword(Keyword value, string file = __FILE__, uint line = __LINE__) { 
3378         return new KeywordTest(value, file, line);
3379     }
3380     TokenTest checkOp(OpCode value, string file = __FILE__, uint line = __LINE__) { 
3381         return new OpTest(value, file, line);
3382     }
3383     TokenTest checkSpace(string file = __FILE__, uint line = __LINE__) { 
3384         return new WhiteSpaceTest(file, line);
3385     }
3386     TokenTest checkComment(string file = __FILE__, uint line = __LINE__) { 
3387         return new CommentTest(file, line);
3388     }
3389     TokenTest checkEOF(string file = __FILE__, uint line = __LINE__) { 
3390         return new EOFTest(file, line);
3391     }
3392 
3393     // test strings
3394     testTokenizer("r\"simple\\nstring\"", [checkString( r"simple\nstring" )]);
3395 
3396     // test strings
3397     testTokenizer(q"TEST
3398 "simple string"
3399 "simple\nstring"
3400 `simple string`
3401 "simple string"d
3402 "simple string"c
3403 "simple string"w
3404 "simple\&quot;string"
3405 "\r\n\f\t\\\"\'&"
3406 TEST"
3407                   , [
3408                       checkString("simple string"),
3409                       checkSpace(),
3410                       checkString("simple\nstring"),
3411                       checkSpace(),
3412                       checkString("simple string"),
3413                       checkSpace(),
3414                       checkString("simple string", 'd'),
3415                       checkSpace(),
3416                       checkString("simple string", 'c'),
3417                       checkSpace(),
3418                       checkString("simple string", 'w'),
3419                       checkSpace(),
3420                       checkString("simple\&quot;string"),
3421                       checkSpace(),
3422                       checkString("\r\n\f\t\\\"\'&"),
3423     ]);
3424     // basic test
3425     testTokenizer(q"TEST
3426 int i;
3427 TEST"
3428                   , [
3429                       checkKeyword(Keyword.INT),
3430                       checkSpace(),
3431                       checkIdent("i"),
3432                       checkOp(OpCode.SEMICOLON),
3433                       checkEOF()
3434                   ]);
3435     // test numbers
3436     testTokenizer("0b1101 0x123abcdU 0xABCL 0743 192837465 0 192_837_465 5.25 12.3f 54.1L 67.1i 3e3 25.67e-5f"
3437                   , [
3438                       checkInteger(13),
3439                       checkSpace(),
3440                       checkInteger(0x123abcd, true, false),
3441                       checkSpace(),
3442                       checkInteger(0xabc, false, true),
3443                       checkSpace(),
3444                       checkInteger(std.conv.octal!743),
3445                       checkSpace(),
3446                       checkInteger(192_837_465),
3447                       checkSpace(),
3448                       checkInteger(0),
3449                       checkSpace(),
3450                       checkInteger(192837465),
3451                       checkSpace(),
3452                       checkReal(5.25),
3453                       checkSpace(),
3454                       checkReal(12.3f, 0),
3455                       checkSpace(),
3456                       checkReal(54.1L, 2),
3457                       checkSpace(),
3458                       checkReal(67.1, 1, true),
3459                       checkSpace(),
3460                       checkReal(3e3),
3461                       checkSpace(),
3462                       checkReal(25.67e-5f, 0),
3463                       checkEOF()
3464                   ]);
3465     // strange keyword detection: `fork;` or `ind;` keyword in beginning of ident is highlighted
3466     testTokenizer("fork;", [checkIdent("fork"),checkOp(OpCode.SEMICOLON),checkEOF()]);
3467 
3468 }
3469