1 module ddc.lexer.tokenizer;
2 
3 import ddc.lexer.textsource;
4 import ddc.lexer.exceptions;
5 
6 import std.stdio;
7 import std.datetime;
8 import std.conv;
9 import std.utf;
10 import std.math;
11 
12 enum TokenType : ubyte {
13 	EOF,
14 	//EOL,
15 	WHITESPACE,
16 	COMMENT,
17 	IDENTIFIER,
18 	STRING,
19 	CHARACTER,
20 	INTEGER,
21 	FLOAT,
22 	KEYWORD,
23 	OP,
24     INVALID
25 }
26 
27 // table for fast checking of UniversalAlpha (as per ISO/IEC 9899:1999 Annex E) OR a..z OR A..Z OR _
28 // max code is 0xd7ff
29 //1728
30 const uint[1728] UNIVERSAL_ALPHA_FLAGS = [
31     0x00000000,0x00000000,0x87fffffe,0x07fffffe,0x00000000,0x04a00400,0xff7fffff,0xff7fffff,// 0000-00ff
32     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xfc3fffff,// 0100-01ff
33     0x00ffffff,0x00000000,0xffff0000,0xffffffff,0xffffffff,0xe9ff01ff,0x00030003,0x0000001f,// 0200-02ff
34     0x00000000,0x00000000,0x00000000,0x04000000,0xffffd740,0xfffffffb,0x547f7fff,0x000ffffd,// 0300-03ff
35     0xffffdffe,0xffffffff,0xdffeffff,0xffffffff,0xffff0003,0xffffffff,0xffff199f,0x033fcfff,// 0400-04ff
36     0x00000000,0xfffe0000,0x027fffff,0xfffffffe,0x000000ff,0xbbff0000,0xffff0006,0x000707ff,// 0500-05ff
37     0x00000000,0x07fffffe,0x0007ffff,0xffff03ff,0xffffffff,0x7cffffff,0x1fff7fff,0x03ff3de0,// 0600-06ff
38     0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 0700-07ff
39     0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 0800-08ff
40     0xffffffee,0xe3ffffff,0xff073fff,0x0000ffcf,0xfff99fee,0xc3c5fdff,0xb000399f,0x0003ffcf,// 0900-09ff
41     0xfff987e4,0xc36dfdff,0x5e003987,0x0010ffc0,0xfffbafee,0xe3edfdff,0x00013bbf,0x0000ffc1,// 0a00-0aff
42     0xfff99fee,0xe3cdfdff,0xb000398f,0x0000ffc3,0xd63dc7ec,0xc3bfc718,0x00003dc7,0x0000ff80,// 0b00-0bff
43     0xfffddfee,0xc3effdff,0x00003ddf,0x0000ffc3,0xfffddfec,0xc3effdff,0x40003ddf,0x0000ffc3,// 0c00-0cff
44     0xfffddfec,0xc3fffdff,0x00003dcf,0x0000ffc3,0x00000000,0x00000000,0x00000000,0x00000000,// 0d00-0dff
45     0xfffffffe,0x07ffffff,0x0fffffff,0x00000000,0xfef02596,0x3bff6cae,0x33ff3f5f,0x00000000,// 0e00-0eff
46     0x03000001,0xc2afffff,0xfffffeff,0xfffe03ff,0xfebf0fdf,0x02fe3fff,0x00000000,0x00000000,// 0f00-0fff
47     0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0xffffffff,0xffff003f,0x007fffff,// 1000-10ff
48     0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 1100-11ff
49     0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 1200-12ff
50     0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 1300-13ff
51     0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 1400-14ff
52     0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 1500-15ff
53     0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 1600-16ff
54     0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 1700-17ff
55     0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 1800-18ff
56     0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 1900-19ff
57     0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 1a00-1aff
58     0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 1b00-1bff
59     0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 1c00-1cff
60     0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 1d00-1dff
61     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0x0fffffff,0xffffffff,0xffffffff,0x03ffffff,// 1e00-1eff
62     0x3f3fffff,0xffffffff,0xaaff3f3f,0x3fffffff,0xffffffff,0x5fdfffff,0x0fcf1fdc,0x1fdc1fff,// 1f00-1fff
63     0x00000000,0x80000000,0x00000001,0x80000000,0x00000000,0x00000000,0x00000000,0x00000000,// 2000-20ff
64     0x3f2ffc84,0x01fbfd50,0x00000000,0xffffffff,0x00000007,0x00000000,0x00000000,0x00000000,// 2100-21ff
65     0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 2200-22ff
66     0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 2300-23ff
67     0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 2400-24ff
68     0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 2500-25ff
69     0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 2600-26ff
70     0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 2700-27ff
71     0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 2800-28ff
72     0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 2900-29ff
73     0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 2a00-2aff
74     0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 2b00-2bff
75     0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 2c00-2cff
76     0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 2d00-2dff
77     0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 2e00-2eff
78     0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 2f00-2fff
79     0x000000e0,0x000003fe,0xfffffffe,0xffffffff,0x180fffff,0xfffffffe,0xffffffff,0x187fffff,// 3000-30ff
80     0xffffffe0,0x00001fff,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 3100-31ff
81     0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 3200-32ff
82     0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 3300-33ff
83     0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 3400-34ff
84     0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 3500-35ff
85     0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 3600-36ff
86     0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 3700-37ff
87     0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 3800-38ff
88     0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 3900-39ff
89     0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 3a00-3aff
90     0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 3b00-3bff
91     0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 3c00-3cff
92     0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 3d00-3dff
93     0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 3e00-3eff
94     0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 3f00-3fff
95     0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 4000-40ff
96     0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 4100-41ff
97     0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 4200-42ff
98     0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 4300-43ff
99     0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 4400-44ff
100     0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 4500-45ff
101     0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 4600-46ff
102     0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 4700-47ff
103     0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 4800-48ff
104     0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 4900-49ff
105     0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 4a00-4aff
106     0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 4b00-4bff
107     0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 4c00-4cff
108     0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 4d00-4dff
109     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 4e00-4eff
110     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 4f00-4fff
111     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 5000-50ff
112     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 5100-51ff
113     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 5200-52ff
114     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 5300-53ff
115     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 5400-54ff
116     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 5500-55ff
117     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 5600-56ff
118     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 5700-57ff
119     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 5800-58ff
120     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 5900-59ff
121     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 5a00-5aff
122     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 5b00-5bff
123     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 5c00-5cff
124     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 5d00-5dff
125     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 5e00-5eff
126     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 5f00-5fff
127     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 6000-60ff
128     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 6100-61ff
129     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 6200-62ff
130     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 6300-63ff
131     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 6400-64ff
132     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 6500-65ff
133     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 6600-66ff
134     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 6700-67ff
135     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 6800-68ff
136     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 6900-69ff
137     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 6a00-6aff
138     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 6b00-6bff
139     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 6c00-6cff
140     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 6d00-6dff
141     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 6e00-6eff
142     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 6f00-6fff
143     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 7000-70ff
144     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 7100-71ff
145     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 7200-72ff
146     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 7300-73ff
147     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 7400-74ff
148     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 7500-75ff
149     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 7600-76ff
150     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 7700-77ff
151     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 7800-78ff
152     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 7900-79ff
153     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 7a00-7aff
154     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 7b00-7bff
155     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 7c00-7cff
156     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 7d00-7dff
157     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 7e00-7eff
158     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 7f00-7fff
159     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 8000-80ff
160     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 8100-81ff
161     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 8200-82ff
162     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 8300-83ff
163     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 8400-84ff
164     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 8500-85ff
165     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 8600-86ff
166     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 8700-87ff
167     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 8800-88ff
168     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 8900-89ff
169     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 8a00-8aff
170     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 8b00-8bff
171     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 8c00-8cff
172     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 8d00-8dff
173     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 8e00-8eff
174     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 8f00-8fff
175     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 9000-90ff
176     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 9100-91ff
177     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 9200-92ff
178     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 9300-93ff
179     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 9400-94ff
180     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 9500-95ff
181     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 9600-96ff
182     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 9700-97ff
183     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 9800-98ff
184     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 9900-99ff
185     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 9a00-9aff
186     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 9b00-9bff
187     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 9c00-9cff
188     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 9d00-9dff
189     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 9e00-9eff
190     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0x0000003f,0x00000000,0x00000000,// 9f00-9fff
191     0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// a000-a0ff
192     0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// a100-a1ff
193     0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// a200-a2ff
194     0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// a300-a3ff
195     0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// a400-a4ff
196     0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// a500-a5ff
197     0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// a600-a6ff
198     0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// a700-a7ff
199     0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// a800-a8ff
200     0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// a900-a9ff
201     0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// aa00-aaff
202     0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// ab00-abff
203     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// ac00-acff
204     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// ad00-adff
205     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// ae00-aeff
206     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// af00-afff
207     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// b000-b0ff
208     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// b100-b1ff
209     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// b200-b2ff
210     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// b300-b3ff
211     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// b400-b4ff
212     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// b500-b5ff
213     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// b600-b6ff
214     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// b700-b7ff
215     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// b800-b8ff
216     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// b900-b9ff
217     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// ba00-baff
218     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// bb00-bbff
219     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// bc00-bcff
220     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// bd00-bdff
221     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// be00-beff
222     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// bf00-bfff
223     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// c000-c0ff
224     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// c100-c1ff
225     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// c200-c2ff
226     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// c300-c3ff
227     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// c400-c4ff
228     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// c500-c5ff
229     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// c600-c6ff
230     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// c700-c7ff
231     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// c800-c8ff
232     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// c900-c9ff
233     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// ca00-caff
234     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// cb00-cbff
235     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// cc00-ccff
236     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// cd00-cdff
237     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// ce00-ceff
238     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// cf00-cfff
239     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// d000-d0ff
240     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// d100-d1ff
241     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// d200-d2ff
242     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// d300-d3ff
243     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// d400-d4ff
244     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// d500-d5ff
245     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// d600-d6ff
246     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0x0000000f,0x00000000,0x00000000// d700-d7ff
247 ];
248 
249 /// returns true if character is A..Z, a..z, _ or universal alpha
250 bool isUniversalAlpha(dchar ch) pure nothrow {
251 	return (ch <= 0xd7ff && (UNIVERSAL_ALPHA_FLAGS[ch >> 5] & (1 << (ch & 31))));
252 }
253 
254 /// character can present at the beginning of identifier
255 bool isIdentStartChar(dchar ch) pure nothrow {
256 	return isUniversalAlpha(ch);
257 }
258 
259 /// character can present in middle of identifier
260 bool isIdentMiddleChar(dchar ch) pure nothrow {
261 	return (ch >= '0' && ch <='9') || isUniversalAlpha(ch);
262 }
263 	
264 immutable bool ENABLE_DUMP_UNIVERSAL_ALPHA_TABLE = false;
265 static if (ENABLE_DUMP_UNIVERSAL_ALPHA_TABLE) {
266 		bool r(dchar ch, wchar v) pure nothrow {
267 			return ch == v;
268 		}
269 		
270 		bool r(dchar ch, wchar v1, wchar v2) pure nothrow {
271 			return ch >= v1 && ch <= v2;
272 		}
273 
274 		bool isUniversalAlphaSlow(dchar c)  pure nothrow {
275 			return 
276 				// Latin: 00AA, 00BA, 00C0−00D6, 00D8−00F6, 00F8−01F5, 01FA−0217,
277 				// 0250−02A8, 1E00−1E9B, 1EA0−1EF9, 207F
278 				r(c, 0xAA) || r(c, 0x00BA) || r(c, 0x00C0,0x00D6) || r(c, 0x00D8,0x00F6) || r(c, 0x00F8,0x01F5) || r(c, 0x01FA,0x0217)
279 				|| r(c, 0x0250,0x02A8) || r(c, 0x1E00,0x1E9B) || r(c, 0x1EA0,0x1EF9) || r(c, 0x207F)
280 				//Greek: 0386, 0388−038A, 038C, 038E−03A1, 03A3−03CE, 03D0−03D6,
281 				//03DA, 03DC, 03DE, 03E0, 03E2−03F3, 1F00−1F15, 1F18−1F1D,
282 				//1F20−1F45, 1F48−1F4D, 1F50−1F57, 1F59, 1F5B, 1F5D,
283 				//1F5F−1F7D, 1F80−1FB4, 1FB6−1FBC, 1FC2−1FC4, 1FC6−1FCC,
284 				//1FD0−1FD3, 1FD6−1FDB, 1FE0−1FEC, 1FF2−1FF4, 1FF6−1FFC
285 				|| r(c, 0x0386) || r(c, 0x0388,0x038A) || r(c, 0x038C) || r(c, 0x038E,0x03A1) || r(c, 0x03A3,0x03CE) || r(c, 0x03D0,0x03D6)
286 				|| r(c, 0x03DA) || r(c, 0x03DC) || r(c, 0x03DE) || r(c, 0x03E0) || r(c, 0x03E2,0x03F3) || r(c, 0x1F00,0x1F15) || r(c, 0x1F18,0x1F1D)
287 				|| r(c, 0x1F20,0x1F45) || r(c, 0x1F48,0x1F4D) || r(c, 0x1F50,0x1F57) || r(c, 0x1F59) || r(c, 0x1F5B) || r(c, 0x1F5D)
288 				|| r(c, 0x1F5F,0x1F7D) || r(c, 0x1F80,0x1FB4) || r(c, 0x1FB6,0x1FBC) || r(c, 0x1FC2,0x1FC4) || r(c, 0x1FC6,0x1FCC)
289 				|| r(c, 0x1FD0,0x1FD3) || r(c, 0x1FD6,0x1FDB) || r(c, 0x1FE0,0x1FEC) || r(c, 0x1FF2,0x1FF4) || r(c, 0x1FF6,0x1FFC)
290 				//Cyrillic: 0401−040C, 040E−044F, 0451−045C, 045E−0481, 0490−04C4,
291 				//04C7−04C8, 04CB−04CC, 04D0−04EB, 04EE−04F5, 04F8−04F9
292 				|| r(c, 0x0401,0x040C) || r(c, 0x040E,0x044F) || r(c, 0x0451,0x045C) || r(c, 0x045E,0x0481) || r(c, 0x0490,0x04C4)
293 				|| r(c, 0x04C7,0x04C8) || r(c, 0x04CB,0x04CC) || r(c, 0x04D0,0x04EB) || r(c, 0x04EE,0x04F5) || r(c, 0x04F8,0x04F9)
294 				//Armenian: 0531−0556, 0561−0587
295 				|| r(c, 0x0531,0x0556) || r(c, 0x0561,0x0587)
296 				//Hebrew: 05B0−05B9, 05BB−05BD, 05BF, 05C1−05C2, 05D0−05EA,
297 				//05F0−05F2
298 				|| r(c, 0x05B0,0x05B9) || r(c, 0x05BB,0x05BD) || r(c, 0x05BF) || r(c, 0x05C1,0x05C2) || r(c, 0x05D0,0x05EA)
299 				|| r(c, 0x05F0,0x05F2)
300 				//Arabic: 0621−063A, 0640−0652, 0670−06B7, 06BA−06BE, 06C0−06CE,
301 				//06D0−06DC, 06E5−06E8, 06EA−06ED
302 				|| r(c, 0x0621,0x063A) || r(c, 0x0640,0x0652) || r(c, 0x0670,0x06B7) || r(c, 0x06BA,0x06BE) || r(c, 0x06C0,0x06CE)
303 				|| r(c, 0x06D0,0x06DC) || r(c, 0x06E5,0x06E8) || r(c, 0x06EA,0x06ED)
304 				//Devanagari: 0901−0903, 0905−0939, 093E−094D, 0950−0952, 0958−0963
305 				|| r(c, 0x0901,0x0903) || r(c, 0x0905,0x0939) || r(c, 0x093E,0x094D) || r(c, 0x0950,0x0952) || r(c, 0x0958,0x0963)
306 				//Bengali: 0981−0983, 0985−098C, 098F−0990, 0993−09A8, 09AA−09B0,
307 				//09B2, 09B6−09B9, 09BE−09C4, 09C7−09C8, 09CB−09CD,
308 				//09DC−09DD, 09DF−09E3, 09F0−09F1
309 				|| r(c, 0x0981,0x0983) || r(c, 0x0985,0x098C) || r(c, 0x098F,0x0990) || r(c, 0x0993,0x09A8) || r(c, 0x09AA,0x09B0)
310 				|| r(c, 0x09B2) || r(c, 0x09B6,0x09B9) || r(c, 0x09BE,0x09C4) || r(c, 0x09C7,0x09C8) || r(c, 0x09CB,0x09CD)
311 				|| r(c, 0x09DC,0x09DD) || r(c, 0x09DF,0x09E3) || r(c, 0x09F0,0x09F1)
312 				//Gurmukhi: 0A02, 0A05−0A0A, 0A0F−0A10, 0A13−0A28, 0A2A−0A30,
313 				//0A32−0A33, 0A35−0A36, 0A38−0A39, 0A3E−0A42, 0A47−0A48,
314 				//0A4B−0A4D, 0A59−0A5C, 0A5E, 0A74
315 				|| r(c, 0x0A02) || r(c, 0x0A05,0x0A0A) || r(c, 0x0A0F,0x0A10) || r(c, 0x0A13,0x0A28) || r(c, 0x0A2A,0x0A30)
316 				|| r(c, 0x0A32,0x0A33) || r(c, 0x0A35,0x0A36) || r(c, 0x0A38,0x0A39) || r(c, 0x0A3E,0x0A42) || r(c, 0x0A47,0x0A48)
317 				|| r(c, 0x0A4B,0x0A4D) || r(c, 0x0A59,0x0A5C) || r(c, 0x0A5E) || r(c, 0x0A74)
318 				//Gujarati: 0A81−0A83, 0A85−0A8B, 0A8D, 0A8F−0A91, 0A93−0AA8,
319 				//0AAA−0AB0, 0AB2−0AB3, 0AB5−0AB9, 0ABD−0AC5,
320 				//0AC7−0AC9, 0ACB−0ACD, 0AD0, 0AE0
321 				|| r(c, 0x0A81,0x0A83) || r(c, 0x0A85,0x0A8B) || r(c, 0x0A8D) || r(c, 0x0A8F,0x0A91) || r(c, 0x0A93,0x0AA8)
322 				|| r(c, 0x0AAA,0x0AB0) || r(c, 0x0AB2,0x0AB3) || r(c, 0x0AB5,0x0AB9) || r(c, 0x0ABD,0x0AC5)
323 				|| r(c, 0x0AC7,0x0AC9) || r(c, 0x0ACB,0x0ACD) || r(c, 0x0AD0) || r(c, 0x0AE0)
324 				// Oriya: 0B01−0B03, 0B05−0B0C, 0B0F−0B10, 0B13−0B28, 0B2A−0B30,
325 				//0B32−0B33, 0B36−0B39, 0B3E−0B43, 0B47−0B48, 0B4B−0B4D,
326 				//0B5C−0B5D, 0B5F−0B61
327 				|| r(c, 0x0B01,0x0B03) || r(c, 0x0B05,0x0B0C) || r(c, 0x0B0F,0x0B10) || r(c, 0x0B13,0x0B28) || r(c, 0x0B2A,0x0B30)
328 				|| r(c, 0x0B32,0x0B33) || r(c, 0x0B36,0x0B39) || r(c, 0x0B3E,0x0B43) || r(c, 0x0B47,0x0B48) || r(c, 0x0B4B,0x0B4D)
329 				|| r(c, 0x0B5C,0x0B5D) || r(c, 0x0B5F,0x0B61)
330 				//Tamil: 0B82−0B83, 0B85−0B8A, 0B8E−0B90, 0B92−0B95, 0B99−0B9A,
331 				//0B9C, 0B9E−0B9F, 0BA3−0BA4, 0BA8−0BAA, 0BAE−0BB5,
332 				//0BB7−0BB9, 0BBE−0BC2, 0BC6−0BC8, 0BCA−0BCD
333 				|| r(c, 0x0B82,0x0B83) || r(c, 0x0B85,0x0B8A) || r(c, 0x0B8E,0x0B90) || r(c, 0x0B92,0x0B95) || r(c, 0x0B99,0x0B9A)
334 				|| r(c, 0x0B9C) || r(c, 0x0B9E,0x0B9F) || r(c, 0x0BA3,0x0BA4) || r(c, 0x0BA8,0x0BAA) || r(c, 0x0BAE,0x0BB5)
335 				|| r(c, 0x0BB7,0x0BB9) || r(c, 0x0BBE,0x0BC2) || r(c, 0x0BC6,0x0BC8) || r(c, 0x0BCA,0x0BCD)
336 				//Telugu: 0C01−0C03, 0C05−0C0C, 0C0E−0C10, 0C12−0C28, 0C2A−0C33,
337 				//0C35−0C39, 0C3E−0C44, 0C46−0C48, 0C4A−0C4D, 0C60−0C61
338 				|| r(c, 0x0C01,0x0C03) || r(c, 0x0C05,0x0C0C) || r(c, 0x0C0E,0x0C10) || r(c, 0x0C12,0x0C28) || r(c, 0x0C2A,0x0C33)
339 				|| r(c, 0x0C35,0x0C39) || r(c, 0x0C3E,0x0C44) || r(c, 0x0C46,0x0C48) || r(c, 0x0C4A,0x0C4D) || r(c, 0x0C60,0x0C61)
340 				//Kannada: 0C82−0C83, 0C85−0C8C, 0C8E−0C90, 0C92−0CA8, 0CAA−0CB3,
341 				//0CB5−0CB9, 0CBE−0CC4, 0CC6−0CC8, 0CCA−0CCD, 0CDE,
342 				//0CE0−0CE1
343 				|| r(c, 0x0C82,0x0C83) || r(c, 0x0C85,0x0C8C) || r(c, 0x0C8E,0x0C90) || r(c, 0x0C92,0x0CA8) || r(c, 0x0CAA,0x0CB3)
344 				|| r(c, 0x0CB5,0x0CB9) || r(c, 0x0CBE,0x0CC4) || r(c, 0x0CC6,0x0CC8) || r(c, 0x0CCA,0x0CCD) || r(c, 0x0CDE)
345 				|| r(c, 0x0CE0,0x0CE1)
346 				//Malayalam: 0D02−0D03, 0D05−0D0C, 0D0E−0D10, 0D12−0D28, 0D2A−0D39,
347 				//0D3E−0D43, 0D46−0D48, 0D4A−0D4D, 0D60−0D61
348 				|| r(c, 0x0D02,0x0D03) || r(c, 0x0D05,0x0D0C) || r(c, 0x0D0E,0x0D10) || r(c, 0x0D12,0x0D28) || r(c, 0x0D2A,0x0D39)
349 				|| r(c, 0xD3E,0x0D43) || r(c, 0x0D46,0x0D48) || r(c, 0x0D4A,0x0D4D) || r(c, 0x0D60,0x0D61)
350 				//Thai: 0E01−0E3A, 0E40−0E5B
351 				|| r(c, 0x0E01,0x0E3A) || r(c, 0x0E40,0x0E5B)
352 				//Lao: 0E81−0E82, 0E84, 0E87−0E88, 0E8A, 0E8D, 0E94−0E97,
353 				//0E99−0E9F, 0EA1−0EA3, 0EA5, 0EA7, 0EAA−0EAB,
354 				//0EAD−0EAE, 0EB0−0EB9, 0EBB−0EBD, 0EC0−0EC4, 0EC6,
355 				//0EC8−0ECD, 0EDC−0EDD
356 				|| r(c, 0x0E81,0x0E82) || r(c, 0x0E84) || r(c, 0x0E87,0x0E88) || r(c, 0x0E8A) || r(c, 0x0E8D) || r(c, 0x0E94,0x0E97)
357 				|| r(c, 0x0E99,0x0E9F) || r(c, 0x0EA1,0x0EA3) || r(c, 0x0EA5) || r(c, 0x0EA7) || r(c, 0x0EAA,0x0EAB)
358 				|| r(c, 0x0EAD,0x0EAE) || r(c, 0x0EB0,0x0EB9) || r(c, 0x0EBB,0x0EBD) || r(c, 0x0EC0,0x0EC4) || r(c, 0x0EC6)
359 				|| r(c, 0x0EC8,0x0ECD) || r(c, 0x0EDC,0x0EDD)
360 				//Tibetan: 0F00, 0F18−0F19, 0F35, 0F37, 0F39, 0F3E−0F47, 0F49−0F69,
361 				//0F71−0F84, 0F86−0F8B, 0F90−0F95, 0F97, 0F99−0FAD,
362 				//0FB1−0FB7, 0FB9
363 				|| r(c, 0x0F00) || r(c, 0x0F18,0x0F19) || r(c, 0x0F35) || r(c, 0x0F37) || r(c, 0x0F39) || r(c, 0x0F3E,0x0F47) || r(c, 0x0F49,0x0F69)
364 				|| r(c, 0x0F71,0x0F84) || r(c, 0x0F86,0x0F8B) || r(c, 0x0F90,0x0F95) || r(c, 0x0F97) || r(c, 0x0F99,0x0FAD)
365 				|| r(c, 0x0FB1,0x0FB7) || r(c, 0x0FB9)
366 				//Georgian: 10A0−10C5, 10D0−10F6
367 				|| r(c, 0x10A0,0x10C5) || r(c, 0x10D0,0x10F6)
368 				//Hiragana: 3041−3093, 309B−309C
369 				|| r(c, 0x3041,0x3093) || r(c, 0x309B,0x309C)
370 				//Katakana: 30A1−30F6, 30FB−30FC
371 				|| r(c, 0x30A1,0x30F6) || r(c, 0x30FB,0x30FC)
372 				//Bopomofo: 3105−312C
373 				|| r(c, 0x3105,0x312C)
374 				//CJK Unified Ideographs: 4E00−9FA5
375 				|| r(c, 0x4E00,0x9FA5)
376 				//Hangul: AC00−D7A3
377 				|| r(c, 0xAC00,0xD7A3)
378 				//Digits: 0660−0669, 06F0−06F9, 0966−096F, 09E6−09EF, 0A66−0A6F,
379 				//0AE6−0AEF, 0B66−0B6F, 0BE7−0BEF, 0C66−0C6F, 0CE6−0CEF,
380 				//0D66−0D6F, 0E50−0E59, 0ED0−0ED9, 0F20−0F33
381 				|| r(c, 0x0660,0x0669) || r(c, 0x06F0,0x06F9) || r(c, 0x0966,0x096F) || r(c, 0x09E6,0x09EF) || r(c, 0x0A66,0x0A6F)
382 				|| r(c, 0x0AE6,0x0AEF) || r(c, 0x0B66,0x0B6F) || r(c, 0x0BE7,0x0BEF) || r(c, 0x0C66,0x0C6F) || r(c, 0x0CE6,0x0CEF)
383 				|| r(c, 0x0D66,0x0D6F) || r(c, 0x0E50,0x0E59) || r(c, 0x0ED0,0x0ED9) || r(c, 0x0F20,0x0F33)
384 				//Special characters: 00B5, 00B7, 02B0−02B8, 02BB, 02BD−02C1, 02D0−02D1,
385 				//02E0−02E4, 037A, 0559, 093D, 0B3D, 1FBE, 203F−2040, 2102,
386 				//2107, 210A−2113, 2115, 2118−211D, 2124, 2126, 2128, 212A−2131,
387 				//2133−2138, 2160−2182, 3005−3007, 3021−3029
388 				|| r(c, 0x00B5) || r(c, 0x00B7) || r(c, 0x02B0,0x02B8) || r(c, 0x02BB) || r(c, 0x02BD,0x02C1) || r(c, 0x02D0,0x02D1)
389 				|| r(c, 0x2E0,0x02E4) || r(c, 0x037A) || r(c, 0x0559) || r(c, 0x093D) || r(c, 0x0B3D) || r(c, 0x1FBE) || r(c, 0x203F,0x2040) || r(c, 0x2102)
390 				|| r(c, 0x2107) || r(c, 0x210A,0x2113) || r(c, 0x2115) || r(c, 0x2118,0x211D) || r(c, 0x2124) || r(c, 0x2126) || r(c, 0x2128) || r(c, 0x212A,0x2131)
391 				|| r(c, 0x2133,0x2138) || r(c, 0x2160,0x2182) || r(c, 0x3005,0x3007) || r(c, 0x3021,0x3029)
392 				;
393 		}
394 
395 }
396 
397 unittest {
398 	
399 		
400 	static if (ENABLE_DUMP_UNIVERSAL_ALPHA_TABLE) {
401 		immutable uint itemsInRow = 8;
402 		
403 		uint maxAlpha = 0;
404 		for (uint i = 0; i < 0x10000; i++) {
405 			uint ch = i;
406 			if (isUniversalAlphaSlow(ch) || ch == '_' || (ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z'))
407 				maxAlpha = i;
408 		}
409 		maxAlpha = (maxAlpha + itemsInRow * 32 - 1) / (itemsInRow * 32) * (itemsInRow * 32) - 1;
410 		writeln("// table for fast checking of UniversalAlpha (as per ISO/IEC 9899:1999 Annex E) OR a..z OR A..Z OR _");
411 		writefln("// max code is 0x%04x", maxAlpha);
412 		writeln("immutable uint[", (maxAlpha + 1) / 32,"] UNIVERSAL_ALPHA_FLAGS = [");
413 		for (uint i = 0; i <= maxAlpha; i += 32) {
414 			if ((i / 32) % itemsInRow  == 0)
415 				write("    ");
416 			uint flags = 0;
417 			for (uint j = 0; j < 32; j++) {
418 				uint ch = i + j;
419 				bool flag = isUniversalAlphaSlow(ch) || ch == '_' || (ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z');
420 				if (flag)
421 					flags |= (1 << j);
422 			}
423 			writef("0x%08x", flags);
424 			if (i != maxAlpha / 32 * 32)
425 				write(",");
426 			if ((i / 32) % itemsInRow  == itemsInRow - 1)
427 				writefln("// %04x-%04x", i - itemsInRow * 32 + 1 + 31, i + 31);
428 		}
429 		writeln("];");
430 		
431 		for (uint ch = 0; ch < 0x100000; ch++) {
432 			bool flag = isUniversalAlphaSlow(ch) || ch == '_' || (ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z');
433 			bool flag2 = isUniversalAlpha(ch);
434 			if (flag2 != flag) {
435 				isUniversalAlpha(ch);
436 				writefln("universalAlpha test failed for char %06x expeced %d actual %d", ch, flag ? 1 : 0, flag2 ? 1 : 0);
437 			}
438 			assert(flag2 == flag);
439 		}
440 	}
441 }
442 
443 enum OpCode : ubyte {
444 	NONE,       //    no op
445 	DIV, 		//    /
446 	DIV_EQ, 	//    /=
447 	DOT, 		//    .
448 	DOT_DOT, 	//    ..
449 	DOT_DOT_DOT,//    ...
450 	AND, 		//    &
451 	AND_EQ, 	//    &=
452 	LOG_AND, 	//    &&
453 	OR, 		//    |
454 	OR_EQ, 		//    |=
455 	LOG_OR, 	//    ||
456 	MINUS, 		//    -
457 	MINUS_EQ, 	//    -=
458 	MINUS_MINUS,//    --
459 	PLUS, 		//    +
460 	PLUS_EQ, 	//    +=
461 	PLUS_PLUS, 	//    ++
462 	LT, 		//    <
463 	LT_EQ, 		//    <=
464 	SHL, 		//    <<
465 	SHL_EQ, 	//    <<=
466 	LT_GT, 		//    <>
467 	NE_EQ, 		//    <>=
468 	GT, 		//    >
469 	GT_EQ, 		//    >=
470 	SHR_EQ,		//    >>=
471 	ASR_EQ, 	//    >>>=
472 	SHR, 		//    >>
473 	ASR, 		//    >>>
474 	NOT, 		//    !
475 	NOT_EQ,		//    !=
476 	NOT_LT_GT, 	//    !<>
477 	NOT_LT_GT_EQ, //    !<>=
478 	NOT_LT, 	//    !<
479 	NOT_LT_EQ, 	//    !<=
480 	NOT_GT, 	//    !>
481 	NOT_GT_EQ, 	//    !>=
482 	PAR_OPEN, 	//    (
483 	PAR_CLOSE, 	//    )
484 	SQ_OPEN, 	//    [
485 	SQ_CLOSE, 	//    ]
486 	CURL_OPEN, 	//    {
487 	CURL_CLOSE, //    }
488 	QUEST, 		//    ?
489 	COMMA, 		//    ,
490 	SEMICOLON,  //    ;
491 	COLON, 	    //    :
492 	DOLLAR, 	//    $
493 	EQ, 		//    =
494 	QE_EQ, 		//    ==
495 	MUL, 		//    *
496 	MUL_EQ, 	//    *=
497 	MOD, 	//    %
498 	MOD_EQ, //    %=
499 	XOR, 		//    ^
500 	XOR_EQ, 	//    ^=
501 	LOG_XOR, 	//    ^^
502 	LOG_XOR_EQ, //    ^^=
503 	INV, 		//    ~
504 	INV_EQ, 	//    ~=
505 	AT, 		//    @
506 	EQ_GT, 		//    =>
507 	SHARP 		//    #
508 };
509 
510 immutable dstring[] OP_CODE_STRINGS = [
511 	"",
512 	"/",
513 	"/=",
514 	".",
515 	"..",
516 	"...",
517 	"&",
518 	"&=",
519 	"&&",
520 	"|",
521 	"|=",
522 	"||",
523 	"-",
524 	"-=",
525 	"--",
526 	"+",
527 	"+=",
528 	"++",
529 	"<",
530 	"<=",
531 	"<<",
532 	"<<=",
533 	"<>",
534 	"<>=",
535 	">",
536 	">=",
537 	">>=",
538 	">>>=",
539 	">>",
540 	">>>",
541 	"!",
542 	"!=",
543 	"!<>",
544 	"!<>=",
545 	"!<",
546 	"!<=",
547 	"!>",
548 	"!>=",
549 	"(",
550 	")",
551 	"[",
552 	"]",
553 	"{",
554 	"}",
555 	"?",
556 	",",
557 	";",
558 	":",
559 	"$",
560 	"=",
561 	"==",
562 	"*",
563 	"*=",
564 	"%",
565 	"%=",
566 	"^",
567 	"^=",
568 	"^^",
569 	"^^=",
570 	"~",
571 	"~=",
572 	"@",
573 	"=>",
574 	"#"
575 ];
576 
577 dstring getOpNameD(OpCode op) pure nothrow {
578 	return OP_CODE_STRINGS[op];
579 };
580 
581 enum Keyword : ubyte {
582 	NONE,
583 	ABSTRACT,
584 	ALIAS,
585 	ALIGN,
586 	ASM,
587 	ASSERT,
588 	AUTO,
589 
590 	BODY,
591 	BOOL,
592 	BREAK,
593 	BYTE,
594 
595 	CASE,
596 	CAST,
597 	CATCH,
598 	CDOUBLE,
599 	CENT,
600 	CFLOAT,
601 	CHAR,
602 	CLASS,
603 	CONST,
604 	CONTINUE,
605 	CREAL,
606 
607 	DCHAR,
608 	DEBUG,
609 	DEFAULT,
610 	DELEGATE,
611 	DELETE,
612 	DEPRECATED,
613 	DO,
614 	DOUBLE,
615 
616 	ELSE,
617 	ENUM,
618 	EXPORT,
619 	EXTERN,
620 
621 	FALSE,
622 	FINAL,
623 	FINALLY,
624 	FLOAT,
625 	FOR,
626 	FOREACH,
627 	FOREACH_REVERSE,
628 	FUNCTION,
629 
630 	GOTO,
631 
632 	IDOUBLE,
633 	IF,
634 	IFLOAT,
635 	IMMUTABLE,
636 	IMPORT,
637 	IN,
638 	INOUT,
639 	INT,
640 	INTERFACE,
641 	INVARIANT,
642 	IREAL,
643 	IS,
644 
645 	LAZY,
646 	LONG,
647 
648 	MACRO,
649 	MIXIN,
650 	MODULE,
651 
652 	NEW,
653 	NOTHROW,
654 	NULL,
655 
656 	OUT,
657 	OVERRIDE,
658 
659 	PACKAGE,
660 	PRAGMA,
661 	PRIVATE,
662 	PROTECTED,
663 	PUBLIC,
664 	PURE,
665 
666 	REAL,
667 	REF,
668 	RETURN,
669 
670 	SCOPE,
671 	SHARED,
672 	SHORT,
673 	STATIC,
674 	STRUCT,
675 	SUPER,
676 	SWITCH,
677 	SYNCHRONIZED,
678 
679 	TEMPLATE,
680 	THIS,
681 	THROW,
682 	TRUE,
683 	TRY,
684 	TYPEDEF,
685 	TYPEID,
686 	TYPEOF,
687 
688 	UBYTE,
689 	UCENT,
690 	UINT,
691 	ULONG,
692 	UNION,
693 	UNITTEST,
694 	USHORT,
695 
696 	VERSION,
697 	VOID,
698 	VOLATILE,
699 
700 	WCHAR,
701 	WHILE,
702 	WITH,
703 
704 	FILE,
705 	MODULE__,
706 	LINE,
707 	FUNCTION__,
708 	PRETTY_FUNCTION,
709 
710 	//Special Token	Replaced with
711 	DATE, //	string literal of the date of compilation "mmm dd yyyy"
712 	EOF, //	sets the scanner to the end of the file
713 	TIME, //	string literal of the time of compilation "hh:mm:ss"
714 	TIMESTAMP, //	string literal of the date and time of compilation "www mmm dd hh:mm:ss yyyy"
715 	VENDOR, //	Compiler vendor string, such as "Digital Mars D"
716 	VERSION_, //	Compiler version as an integer, such as 2001
717 	
718 	GSHARED,
719 	TRAITS,
720 	VECTOR,
721 	PARAMETERS,
722 
723 }
724 
725 immutable dstring[] KEYWORD_STRINGS = [
726 	"",
727 	"abstract",
728 	"alias",
729 	"align",
730 	"asm",
731 	"assert",
732 	"auto",
733 
734 	"body",
735 	"bool",
736 	"break",
737 	"byte",
738 
739 	"case",
740 	"cast",
741 	"catch",
742 	"cdouble",
743 	"cent",
744 	"cfloat",
745 	"char",
746 	"class",
747 	"const",
748 	"continue",
749 	"creal",
750 
751 	"dchar",
752 	"debug",
753 	"default",
754 	"delegate",
755 	"delete",
756 	"deprecated",
757 	"do",
758 	"double",
759 
760 	"else",
761 	"enum",
762 	"export",
763 	"extern",
764 
765 	"false",
766 	"final",
767 	"finally",
768 	"float",
769 	"for",
770 	"foreach",
771 	"foreach_reverse",
772 	"function",
773 
774 	"goto",
775 
776 	"idouble",
777 	"if",
778 	"ifloat",
779 	"immutable",
780 	"import",
781 	"in",
782 	"inout", 
783 	"int",
784 	"interface",
785 	"invariant",
786 	"ireal",
787 	"is",
788 
789 	"lazy",
790 	"long",
791 
792 	"macro",
793 	"mixin",
794 	"module",
795 
796 	"new",
797 	"nothrow",
798 	"null",
799 
800 	"out",
801 	"override",
802 
803 	"package",
804 	"pragma",
805 	"private",
806 	"protected",
807 	"public",
808 	"pure",
809 
810 	"real",
811 	"ref",
812 	"return",
813 
814 	"scope",
815 	"shared",
816 	"short",
817 	"static",
818 	"struct",
819 	"super",
820 	"switch",
821 	"synchronized",
822 
823 	"template",
824 	"this",
825 	"throw",
826 	"true",
827 	"try",
828 	"typedef",
829 	"typeid",
830 	"typeof",
831 
832 	"ubyte",
833 	"ucent",
834 	"uint",
835 	"ulong",
836 	"union",
837 	"unittest",
838 	"ushort",
839 
840 	"version",
841 	"void",
842 	"volatile",
843 
844 	"wchar",
845 	"while",
846 	"with",
847 
848 	"__FILE__",
849 	"__MODULE__",
850 	"__LINE__",
851 	"__FUNCTION__",
852 	"__PRETTY_FUNCTION__",
853 
854 	//Special Token	Replaced with
855 	"__DATE__", //	string literal of the date of compilation "mmm dd yyyy"
856 	"__EOF__", //	sets the scanner to the end of the file
857 	"__TIME__", //	string literal of the time of compilation "hh:mm:ss"
858 	"__TIMESTAMP__", //	string literal of the date and time of compilation "www mmm dd hh:mm:ss yyyy"
859 	"__VENDOR__", //	Compiler vendor string, such as "Digital Mars D"
860 	"__VERSION__", //	Compiler version as an integer, such as 2001
861 
862 		
863 	"__gshared",
864 	"__traits",
865 	"__vector",
866 	"__parameters"
867 ];
868 
869 public dstring getKeywordNameD(Keyword keyword) pure nothrow {
870 	return KEYWORD_STRINGS[keyword];
871 };
872 
873 public Keyword findKeyword(Keyword start, Keyword end, dchar * name, int len, ref int pos) pure nothrow {
874 	for (Keyword i = start; i <= end; i++) {
875 		dstring s = KEYWORD_STRINGS[i];
876 		if (s.length > len + 1)
877 			continue; // too long
878 		bool found = true;
879 		for (uint j = 1; j < s.length; j++) {
880 			if (s[j] != name[j - 1]) {
881 				found = false;
882 				break;
883 			}
884 		}
885 		if (found) {
886 			if (s.length == len - 1 || !isIdentMiddleChar(name[s.length - 1])) {
887 				pos += s.length - 1;
888 				return i;
889 			}
890 		}
891 	}
892 	return Keyword.NONE;
893 }
894 
895 /**
896  * Token.
897  */
898 class Token {
899 	protected SourceFile _file;
900 	protected int _line;
901 	protected int _pos;
902 	protected TokenType _type;
903     /// returns token type
904 	@property TokenType type() { return _type; }
905     /// returns file info for source
906 	@property SourceFile filename() { return _file; }
907     /// returns 1-based source line number of token start
908 	@property int line() { return _line; }
909     /// returns 1-based source line position of token start
910 	@property int pos() { return _pos; }
911     /// returns token text
912 	@property dchar[] text() { return null; }
913 
914     // number token properties
915 	@property dchar literalType() { return 0; }
916 	@property ulong intValue() { return 0; }
917 	@property bool isUnsigned() { return false; }
918 	@property ulong isLong() { return false; }
919 	@property real realValue() { return 0; }
920 	@property double doubleValue() { return 0; }
921 	@property float floatValue() { return 0; }
922 	@property byte precision() { return 0; }
923 	@property bool isImaginary() { return false; }
924 
925     /// returns opcode ID - for opcode tokens
926 	@property OpCode opCode() { return OpCode.NONE; }
927     /// returns keyword ID - for keyword tokens
928 	@property Keyword keyword() { return Keyword.NONE; }
929     /// returns true if this is documentation comment token
930     @property bool isDocumentationComment() { return false; }
931     /// returns true if this is multiline
932     @property bool isMultilineComment() { return false; }
933 
934     // error handling
935 
936     /// returns true if it's invalid token (can be returned in error tolerant mode of tokenizer)
937     @property bool isError() { return type == TokenType.INVALID; }
938     /// returns error message if it's invalid token (can be returned in error tolerant mode of tokenizer)
939     @property string errorMessage() { return null; }
940     /// returns error code if it's invalid token (can be returned in error tolerant mode of tokenizer)
941     @property int errorCode() { return 0; }
942     /// returns type of token parsing of which has been failed - if it's invalid token (can be returned in error tolerant mode of tokenizer)
943     @property TokenType invalidTokenType() { return TokenType.INVALID; }
944 
945 
946 	this(TokenType type) {
947 		_type = type;
948 	}
949 
950 	this(TokenType type, SourceFile file, int line, int pos) {
951 		_type = type;
952 		_file = file;
953 		_line = line;
954 		_pos = pos;
955 	}
956     /// set start position for token (line is 1-based, pos is 0-based)
957 	void setPos(SourceFile file, int line, int pos) {
958 		_file = file;
959 		_line = line;
960 		_pos = pos + 1;
961 	}
962     /// set source file information for token
963 	void setFile(SourceFile file) {
964 		_file = file;
965 	}
966     /// set start position for token (line is 1-based, pos is 0-based)
967 	void setPos(int line, int pos) {
968 		_line = line;
969 		_pos = pos + 1;
970 	}
971 
972 	public abstract Token clone();
973 	public override @property string toString() {
974 		return "" ~ to!string(_line) ~ ":" ~ to!string(_pos) ~ " " ~ to!string(type) ~ " " ~ to!string(opCode) ~ " " ~ to!string(keyword) 
975 			~" \"" ~ toUTF8(text()) ~ "\"";
976 	}
977 }
978 
979 class EofToken : Token {
980 	this() {
981 		super(TokenType.EOF);
982 	}
983 	this(SourceFile file, uint line, uint pos) {
984 		super(TokenType.EOF, file, line, pos);
985 	}
986 	override public Token clone() {
987 		return new EofToken(_file, _line, _pos);
988 	}
989 	public override @property string toString() {
990 		return "EOF";
991 	}
992 }
993 
994 // treat as white space
995 //class EolToken : Token {
996 //	this(string file, uint line, uint pos) {
997 //		super(TokenType.EOL, file, line, pos);
998 //	}
999 //}
1000 
1001 /// white space token
1002 class WhiteSpaceToken : Token {
1003 	this() {
1004 		super(TokenType.WHITESPACE);
1005 	}
1006 	this(SourceFile file, uint line, uint pos) {
1007 		super(TokenType.WHITESPACE, file, line, pos);
1008 	}
1009 	override public Token clone() {
1010 		return new WhiteSpaceToken(_file, _line, _pos);
1011 	}
1012 	public override @property string toString() {
1013 		return "WhiteSpace";
1014 	}
1015 }
1016 
1017 class OpToken : Token {
1018 	OpCode _op;
1019 	public @property override OpCode opCode() { return _op; }
1020 	public @property void opCode(OpCode op) { _op = op; }
1021 	public @property override dchar[] text() { return cast(dchar[])getOpNameD(_op); }
1022 	this() {
1023 		super(TokenType.OP);
1024 	}
1025 	this(SourceFile file, uint line, uint pos) {
1026 		super(TokenType.OP, file, line, pos);
1027 	}
1028 	override public Token clone() {
1029 		OpToken res = new OpToken(_file, _line, _pos);
1030         res._op = _op;
1031         return res;
1032 	}
1033 	public override @property string toString() {
1034 		return "Op:" ~ to!string(_op);
1035 	}
1036 }
1037 
1038 class KeywordToken : Token {
1039 	Keyword _keyword;
1040 	public @property override Keyword keyword() { return _keyword; }
1041 	public @property void keyword(Keyword keyword) { _keyword = keyword; }
1042 	public @property override dchar[] text() { return cast(dchar[])getKeywordNameD(_keyword); }
1043 	this() {
1044 		super(TokenType.KEYWORD);
1045 	}
1046 	this(SourceFile file, uint line, uint pos) {
1047 		super(TokenType.KEYWORD, file, line, pos);
1048 	}
1049 	override public Token clone() {
1050 		KeywordToken res = new KeywordToken(_file, _line, _pos);
1051         res._keyword = _keyword;
1052         return res;
1053 	}
1054 	public override @property string toString() {
1055 		return "Keyword:" ~ to!string(_keyword);
1056 	}
1057 }
1058 
1059 /// comment token
1060 class CommentToken : Token {
1061 	protected dchar[] _text;
1062     protected bool _isDocumentationComment;
1063     protected bool _isMultilineComment;
1064 
1065 
1066     override @property bool isDocumentationComment() {
1067         return _isDocumentationComment;
1068     }
1069 
1070     @property void isDocumentationComment(bool f) {
1071         _isDocumentationComment = f;
1072     }
1073 
1074     /// returns true if this is multiline
1075     override @property bool isMultilineComment() {
1076         return _isMultilineComment;
1077     }
1078 
1079     @property void isMultilineComment(bool f) {
1080         _isMultilineComment = f;
1081     }
1082 
1083 	@property override dchar[] text() { return _text; }
1084 	@property void text(dchar[] text) { _text = text; }
1085 	this() {
1086 		super(TokenType.COMMENT);
1087 	}
1088 	this(SourceFile file, uint line, uint pos, dchar[] text) {
1089 		super(TokenType.COMMENT, file, line, pos);
1090 		_text = text;
1091 	}
1092 	override public Token clone() {
1093 		CommentToken res = new CommentToken(_file, _line, _pos, _text.dup);
1094         res._isDocumentationComment = _isDocumentationComment;
1095         res._isMultilineComment = _isMultilineComment;
1096         return res;
1097 	}
1098 	public override @property string toString() {
1099 		return "Comment:" ~ to!string(_text);
1100 	}
1101 }
1102 
1103 /// Invalid token holder - for error tolerant parsing
1104 class InvalidToken : Token {
1105 	protected dchar[] _text;
1106     protected TokenType _invalidTokenType;
1107     protected int _errorCode;
1108     protected string _errorMessage;
1109 
1110     /// returns error message if it's invalid token (can be returned in error tolerant mode of tokenizer)
1111     override @property string errorMessage() { return _errorMessage; }
1112     /// sets error message
1113     @property void errorMessage(string s) { _errorMessage = s; }
1114     /// returns error code if it's invalid token (can be returned in error tolerant mode of tokenizer)
1115     override @property int errorCode() { return _errorCode; }
1116     /// sets error code
1117     @property void errorCode(int c) { _errorCode = c; }
1118     /// returns type of token parsing of which has been failed - if it's invalid token (can be returned in error tolerant mode of tokenizer)
1119     override @property TokenType invalidTokenType() { return _invalidTokenType; }
1120     /// sets type of token parsing of which has been failed
1121     @property void invalidTokenType(TokenType t) { _invalidTokenType = t; }
1122 
1123     /// text of invalid token
1124 	@property override dchar[] text() { return _text; }
1125     /// text of invalid token
1126 	@property void text(dchar[] text) { _text = text; }
1127 
1128 	this() {
1129 		super(TokenType.INVALID);
1130 	}
1131 	this(SourceFile file, uint line, uint pos, dchar[] text) {
1132 		super(TokenType.INVALID, file, line, pos);
1133 		_text = text;
1134 	}
1135 	override Token clone() {
1136 		InvalidToken res = new InvalidToken(_file, _line, _pos, _text.dup);
1137         res._errorMessage = _errorMessage.dup;
1138         res._errorCode = _errorCode;
1139         res._invalidTokenType = _invalidTokenType;
1140         return res;
1141 	}
1142 	override @property string toString() {
1143 		return "Invalid:" ~ to!string(_text);
1144 	}
1145 }
1146 
1147 alias tokenizer_ident_t = uint;
1148 alias tokenizer_ident_name_t = dchar[];
1149 
1150 enum : tokenizer_ident_t {
1151     NO_IDENT = 0
1152 }
1153 
1154 /**
1155  * Global storage for identifier strings.
1156  */
1157 class IdentHolder {
1158     protected tokenizer_ident_t _nextId;
1159     protected tokenizer_ident_name_t[tokenizer_ident_t] _idToName;
1160     protected tokenizer_ident_t[tokenizer_ident_name_t] _nameToId;
1161 
1162     public this() {
1163         _nextId = NO_IDENT + 1;
1164     }
1165 
1166     /**
1167     * Search for id by name, return NO_IDENT if not found.
1168     */
1169     uint findByName(tokenizer_ident_name_t name) {
1170         tokenizer_ident_t * found = (name in _nameToId);
1171         if (found)
1172             return *found; 
1173         return NO_IDENT;
1174     }
1175 
1176     /**
1177     * Search for name by id, return null if not found.
1178     */
1179     tokenizer_ident_name_t nameById(tokenizer_ident_t id) {
1180         auto found = (id in _idToName);
1181         if (found)
1182             return *found;
1183         return null;
1184     }
1185 
1186     /**
1187      * Search for ident id by name, create new entry if not found.
1188      */
1189     tokenizer_ident_t idByName(tokenizer_ident_name_t name) {
1190         uint * found = (name in _nameToId);
1191         if (found)
1192             return *found; 
1193         uint newid = _nextId++;
1194         _nameToId[cast(dstring)name] = newid;
1195         _idToName[newid] = cast(tokenizer_ident_name_t)name;
1196         return newid;
1197     }
1198 }
1199 
1200 /**
1201 * Thread local storage for IDs.
1202 */
1203 IdentHolder identMap;
1204 
1205 static this() {
1206     // init ID storage
1207     identMap = new IdentHolder();
1208 }
1209 
1210 class StringLiteralToken : Token {
1211 	dchar[] _text;
1212 	dchar _literalType;
1213 	public @property override dchar literalType() { return _literalType; }
1214 	public @property override dchar[] text() { return _text; }
1215 	public void setText(dchar[] text, dchar type) { _text = text; _literalType = type; }
1216 	this() {
1217 		super(TokenType.STRING);
1218 	}
1219 	this(SourceFile file, uint line, uint pos, dchar[] text, dchar type) {
1220 		super(TokenType.STRING, file, line, pos);
1221 		_text = text;
1222 		_literalType = type;
1223 	}
1224 	override public Token clone() {
1225 		return new StringLiteralToken(_file, _line, _pos, _text.dup, _literalType);
1226 	}
1227 	public override @property string toString() {
1228         return toUTF8("String:\"" ~ _text ~ "\"" ~ (_literalType ? _literalType : ' '));
1229 	}
1230 }
1231 
1232 class CharacterLiteralToken : Token {
1233 	dchar _character;
1234 	dchar _literalType;
1235 	@property override dchar literalType() { return _literalType; }
1236     @property dchar character() { return _character; }
1237 	@property override dchar[] text() { return [_character]; }
1238 	void setCharacter(dchar ch, dchar type) { _character = ch; _literalType = type; }
1239 	this() {
1240 		super(TokenType.CHARACTER);
1241 	}
1242 	this(SourceFile file, uint line, uint pos, dchar character, dchar type) {
1243 		super(TokenType.CHARACTER, file, line, pos);
1244 		_character = character;
1245 		_literalType = type;
1246 	}
1247 	override public Token clone() {
1248 		return new CharacterLiteralToken(_file, _line, _pos, _character, _literalType);
1249 	}
1250 	public override @property string toString() {
1251 		return "Char:" ~ toUTF8([_character]);
1252 	}
1253 }
1254 
1255 class IntegerLiteralToken : Token {
1256 	ulong _value;
1257 	bool _unsigned;
1258 	bool _long;
1259 	public @property override ulong intValue() { return _value; }
1260 	public @property override bool isUnsigned() { return _unsigned; }
1261 	public @property override ulong isLong() { return _long; }
1262 	public @property override dchar[] text() { return cast(dchar[])to!dstring(_value); }
1263 	public void setValue(ulong value, bool unsignedFlag = false, bool longFlag = false) {
1264 		_value = value;
1265 		_unsigned = unsignedFlag;
1266 		_long = longFlag;
1267 	}
1268 	public void setFlags(bool unsignedFlag = false, bool longFlag = false) {
1269 		_unsigned = unsignedFlag;
1270 		_long = longFlag;
1271 	}
1272 	this() {
1273 		super(TokenType.INTEGER);
1274 	}
1275 	this(SourceFile file, uint line, uint pos, ulong value, bool unsignedFlag, bool longFlag) {
1276 		super(TokenType.INTEGER, file, line, pos);
1277 		_value = value;
1278 		_unsigned = unsignedFlag;
1279 		_long = longFlag;
1280 	}
1281 	override public Token clone() {
1282 		return new IntegerLiteralToken(_file, _line, _pos, _value, _unsigned, _long);
1283 	}
1284 	public override @property string toString() {
1285 		return "Integer:" ~ to!string(_value) ~ (_long ? "L" : "") ~ (_unsigned ? "U" : "");
1286 	}
1287 }
1288 
1289 class RealLiteralToken : Token {
1290 	real _value;
1291 	byte _precision;
1292 	bool _imaginary;
1293 	public @property override ulong intValue() { return to!long(_value); }
1294 	public @property override real realValue() { return _value; }
1295 	public @property override double doubleValue() { return cast(double)_value; }
1296 	public @property override float floatValue() { return cast(float)_value; }
1297 	public @property override byte precision() { return _precision; }
1298 	public @property override bool isImaginary() { return _imaginary; }
1299 	public @property override dchar[] text() { return cast(dchar[])to!dstring(_value); }
1300 	public void setValue(real value, byte precision = 1, bool imaginary = false) {
1301 		_value = value;
1302 		_precision = precision;
1303 		_imaginary = imaginary;
1304 	}
1305 	public void setFlags(byte precision = 1, bool imaginary = false) {
1306 		_precision = precision;
1307 		_imaginary = imaginary;
1308 	}
1309 	this() {
1310 		super(TokenType.FLOAT);
1311 	}
1312 	this(SourceFile file, uint line, uint pos, real value, byte precision, bool imaginary) {
1313 		super(TokenType.FLOAT, file, line, pos);
1314 		_value = value;
1315 		_precision = precision;
1316 		_imaginary = imaginary;
1317 	}
1318 	override public Token clone() {
1319 		return new RealLiteralToken(_file, _line, _pos, _value, _precision, _imaginary);
1320 	}
1321 	public override @property string toString() {
1322 		return "Real:" ~ to!string(_value) ~ (_precision == 0 ? "f" : (_precision == 2 ? "L" : "")) ~ (_imaginary ? "i" : "");
1323 	}
1324 }
1325 
1326 class IdentToken : Token {
1327 	tokenizer_ident_t _id;
1328 	public @property override dchar[] text() { return identMap.nameById(_id); }
1329 	public void setText(dchar[] text) { _id = identMap.idByName(text); }
1330 	this() {
1331 		super(TokenType.IDENTIFIER);
1332 	}
1333 	this(SourceFile file, uint line, uint pos, dchar[] text) {
1334 		super(TokenType.IDENTIFIER, file, line, pos);
1335 		_id = identMap.idByName(text);
1336 	}
1337 	this(SourceFile file, uint line, uint pos, tokenizer_ident_t id) {
1338 		super(TokenType.IDENTIFIER, file, line, pos);
1339 		_id = id;
1340 	}
1341 	override public Token clone() {
1342 		return new IdentToken(_file, _line, _pos, _id);
1343 	}
1344 	public override @property string toString() {
1345 		return "Ident:" ~ to!string(text);
1346 	}
1347 }
1348 
1349 // shared appender buffer, to avoid extra heap allocations
1350 struct StringAppender {
1351 	dchar[] buf;
1352 	uint len;
1353 	dchar[] get() {
1354 		return buf[0 .. len];
1355 	}
1356 	void appendEol() {
1357 		if (len + 1 > buf.length) {
1358 			uint newsize = cast(uint)((len + 1 + buf.length) * 2);
1359 			if (newsize < 128)
1360 				newsize = 128;
1361 			buf.length = newsize;
1362 		}
1363 		buf[len] = '\n';
1364 		len++;
1365 	}
1366 	void append(dchar[] s) {
1367 		if (s.length == 0)
1368 			return;
1369 		if (len + s.length > buf.length) {
1370 			uint newsize = cast(uint)((len + s.length + buf.length) * 2);
1371 			if (newsize < 128)
1372 				newsize = 128;
1373 			buf.length = newsize;
1374 		}
1375 		buf[len .. len + s.length] = s;
1376 		len += s.length;
1377 	}
1378 	void append(dchar ch) {
1379 		if (len + 1 > buf.length) {
1380 			uint newsize = cast(uint)(buf.length * 2);
1381 			if (newsize < 128)
1382 				newsize = 128;
1383 			buf.length = newsize;
1384 		}
1385 		buf[len++] = ch;
1386 	}
1387 	void reset() {
1388 		len = 0;
1389 	}
1390     static int parseHexDigit(dchar ch) {
1391         if (ch >= '0' && ch <='9')
1392             return ch - '0';
1393         if (ch >= 'a' && ch <='f')
1394             return ch - 'a' + 10;
1395         if (ch >= 'A' && ch <='F')
1396             return ch - 'A' + 10;
1397         return -1;
1398     }
1399     bool errorFlag = false;
1400     dchar decodeHex(ref int pos, int count) {
1401         dchar res = 0;
1402         for (int i = 0; i < count; i++) {
1403             if (pos >= len - 1) {
1404                 errorFlag = true;
1405                 return res;
1406             }
1407             dchar ch = buf[++pos];
1408             int digit = parseHexDigit(ch);
1409             if (digit < 0) {
1410                 errorFlag = true;
1411                 digit = 0;
1412             }
1413             res = (res << 4) | digit;
1414         }
1415         return res;
1416     }
1417     dchar decodeOct(dchar firstChar, ref int pos) {
1418         dchar res = 0;
1419         res = firstChar - '0';
1420         if (pos < len - 1 && buf[pos + 1] >= '0' && buf[pos + 1] <= '7') {
1421             res = (res << 3) | (buf[++pos] - '0');
1422         }
1423         if (pos < len - 1 && buf[pos + 1] >= '0' && buf[pos + 1] <= '7') {
1424             res = (res << 3) | (buf[++pos] - '0');
1425         }
1426         return res;
1427     }
1428 
1429     char[] entityNameBuf;
1430     int entityNameLen;
1431 
1432     dchar decodeCharacterEntity(ref int pos) {
1433         entityNameLen = 0;
1434         pos++;
1435         for(; pos < len && buf[pos] != ';'; pos++) {
1436             dchar ch = buf[pos];
1437             if (ch >= 0x80)
1438                 errorFlag = true;
1439             if (entityNameBuf.length < entityNameLen + 4)
1440                 entityNameBuf.length += 32;
1441             entityNameBuf[entityNameLen++] = cast(char)ch;
1442         }
1443         if (pos < len && buf[pos] == ';') {
1444             dchar ch = entityToChar(cast(string)entityNameBuf[0 .. entityNameLen]);
1445             if (ch)
1446                 return ch;
1447         }
1448         errorFlag = true;
1449         return '?';
1450     }
1451 
1452     bool processEscapeSequences() {
1453         errorFlag = false;
1454         int dst = 0;
1455         for (int src = 0; src < len; src++) {
1456             dchar ch = buf[src];
1457             if (ch == '\\') {
1458                 if (src == len - 1)
1459                     break; // INVALID
1460                 ch = buf[++src];
1461                 switch (ch) {
1462                     case '\'':
1463                     case '\"':
1464                     case '?':
1465                     case '\\':
1466                         buf[dst++] = ch;
1467                         break;
1468                     case '0':
1469                         buf[dst++] = '\0';
1470                         break;
1471                     case 'a':
1472                         buf[dst++] = '\a';
1473                         break;
1474                     case 'b':
1475                         buf[dst++] = '\b';
1476                         break;
1477                     case 'f':
1478                         buf[dst++] = '\f';
1479                         break;
1480                     case 'n':
1481                         buf[dst++] = '\n';
1482                         break;
1483                     case 'r':
1484                         buf[dst++] = '\r';
1485                         break;
1486                     case 't':
1487                         buf[dst++] = '\t';
1488                         break;
1489                     case 'v':
1490                         buf[dst++] = '\v';
1491                         break;
1492                     case 'x':
1493                         buf[dst++] = decodeHex(src, 2);
1494                         break;
1495                     case 'u':
1496                         buf[dst++] = decodeHex(src, 4);
1497                         break;
1498                     case 'U':
1499                         buf[dst++] = decodeHex(src, 8);
1500                         break;
1501                     default:
1502                         if (ch >= '0' && ch <= '7') {
1503                             // octal X XX or XXX
1504                             buf[dst++] = decodeOct(ch, src); // something wrong
1505                         } else if (ch == '&') {
1506                             // named character entity
1507                             buf[dst++] = decodeCharacterEntity(src);
1508                             // just show it as is
1509                         } else {
1510                             buf[dst++] = ch; // something wrong
1511                             errorFlag = true;
1512                         }
1513                         break;
1514                 }
1515             } else {
1516                 buf[dst++] = ch;
1517             }
1518         }
1519         len = dst;
1520         return errorFlag;
1521     }
1522 }
1523 
1524 class Tokenizer
1525 {
1526 	protected SourceLines _lineStream;
1527 	protected dchar[] _lineText;
1528 	protected int _line; // current line number
1529 	protected int _len; // current line length
1530 	protected int _pos; // current line read position
1531     protected int _prevLineLength; // previous line length
1532 	protected uint _state; // tokenizer state
1533 	
1534 	enum : int {
1535 		EOF_CHAR = 0x001A,
1536 		EOL_CHAR = 0x000A
1537 	};
1538 	
1539 	protected WhiteSpaceToken _sharedWhiteSpaceToken = new WhiteSpaceToken();
1540 	protected CommentToken _sharedCommentToken = new CommentToken();
1541 	protected StringLiteralToken _sharedStringLiteralToken = new StringLiteralToken();
1542 	protected IdentToken _sharedIdentToken = new IdentToken();
1543 	protected OpToken _sharedOpToken = new OpToken();
1544 	protected KeywordToken _sharedKeywordToken = new KeywordToken();
1545 	protected IntegerLiteralToken _sharedIntegerToken = new IntegerLiteralToken();
1546 	protected RealLiteralToken _sharedRealToken = new RealLiteralToken();
1547     protected InvalidToken _sharedInvalidToken = new InvalidToken();
1548     protected CharacterLiteralToken _sharedCharacterLiteralToken = new CharacterLiteralToken();
1549 	protected StringAppender _stringLiteralAppender;
1550 	protected StringAppender _commentAppender;
1551 	protected StringAppender _identAppender;
1552 	
1553 	protected bool _enableCommentText = true;
1554     /// when false, does not put comment text into comment token - for less allocations
1555 	@property void enableCommentText(bool enabled) {
1556 		_enableCommentText = enabled;
1557 	}
1558     /// when false, does not put comment text into comment token - for less allocations
1559 	@property bool enableCommentText() {
1560 		return _enableCommentText;
1561 	}
1562 
1563 	protected bool _errorTolerant = false;
1564     /// when true, returns BadToken instead of throwing exception
1565 	@property void errorTolerant(bool enabled) {
1566 		_errorTolerant = enabled;
1567 	}
1568     /// when true, returns BadToken instead of throwing exception
1569 	@property bool errorTolerant() {
1570 		return _errorTolerant;
1571 	}
1572 
1573 	this(SourceLines lineStream) {
1574         init(lineStream);
1575 	}
1576 
1577     void init(SourceLines lineStream, int pos = 0) {
1578 		_lineStream = lineStream;
1579         SourceFile file = _lineStream.file;
1580 		_sharedWhiteSpaceToken.setFile(file);
1581 		_sharedCommentToken.setFile(file);
1582 		_sharedStringLiteralToken.setFile(file);
1583 		_sharedIdentToken.setFile(file);
1584 		_sharedOpToken.setFile(file);
1585 		_sharedKeywordToken.setFile(file);
1586 		_sharedIntegerToken.setFile(file);
1587 		_sharedRealToken.setFile(file);
1588         _sharedInvalidToken.setFile(file);
1589         _sharedCharacterLiteralToken.setFile(file);
1590 		buildTime = Clock.currTime();
1591         _line = lineStream.line;
1592         _pos = 0;
1593         _prevLineLength = 0;
1594         _lineText = null;
1595         nextLine();
1596         _pos = pos;
1597     }
1598 	
1599 	this(string code, string filename = "") {
1600 		this(new ArraySourceLines(code, filename));
1601 	}
1602 	
1603 	// fetch next line from source stream
1604 	protected bool nextLine() {
1605         _prevLineLength = cast(int)_lineText.length;
1606 		_lineText = _lineStream.readLine();
1607 		if (!_lineText) {
1608 			if (_lineStream.errorCode != 0)
1609 				throw new SourceEncodingException(_lineStream.errorMessage, _lineStream.file, _lineStream.errorLine, _lineStream.errorPos);
1610             if (_lineStream.eof) {
1611                 // end of file
1612                 _pos = 0;
1613 			    _len = 0;
1614 			    return false;
1615             }
1616             // just an empty line
1617 		}
1618 		_line = _lineStream.line;
1619 		_pos = 0;
1620 		_len = cast(int)_lineText.length; // do not support lines longer that 4Gb
1621 		return true;
1622 	}
1623 	
1624 	protected dchar nextChar() {
1625 	    if (_pos >= _len) {
1626 			if (!nextLine()) {
1627                 _pos = _prevLineLength + 1;
1628 				return EOF_CHAR;
1629 			}
1630 			return EOL_CHAR;
1631 		}
1632 		dchar res = _lineText[_pos++];
1633         if (_pos >= _len)
1634             nextLine();
1635         return res;
1636 	}
1637 	
1638 	protected dchar peekChar() {
1639 		if (_lineText is null) {
1640 			if (!nextLine()) {
1641 				return EOF_CHAR;
1642 			}
1643 		}
1644 		if (_pos >= _len)
1645 			return EOL_CHAR;
1646 		return _lineText[_pos++];
1647 	}
1648 	
1649 	protected Token emitEof() {
1650 		// TODO: check for current state
1651 		return new EofToken(_lineStream.file, _startLine, _startPos + 2);
1652 	}
1653 	
1654 	protected Token processWhiteSpace(dchar firstChar) {
1655 		// reuse the same token instance, to avoid extra heap spamming
1656         _sharedWhiteSpaceToken.setPos(_startLine, _startPos);
1657 		for (;;) {
1658 			int i = _pos;
1659 			for (; i < _len; i++) {
1660 				dchar ch = _lineText[i];
1661 				if (!(ch == 0x0020 || ch == 0x0009 || ch == 0x000B || ch == 0x000C || ch == EOL_CHAR))
1662 					break;
1663 			}
1664 			_pos = i;
1665 			if (_pos < _len)
1666 				break;
1667 			// go to next line
1668 			if (!nextLine())
1669 				break;
1670 		}
1671 		return _sharedWhiteSpaceToken;
1672 	}
1673 	
1674 	protected Token processOneLineComment() {
1675 		_sharedCommentToken.setPos(_startLine, _startPos);
1676         _sharedCommentToken.isDocumentationComment = _pos + 1 < _lineText.length && _lineText[_pos + 1] == '/';
1677         _sharedCommentToken.isMultilineComment = false;
1678 		if (_enableCommentText) {
1679 			_sharedCommentToken.text = _lineText[_pos + 1 .. $];
1680 		}
1681 		_pos = _len;
1682         nextChar();
1683 		return _sharedCommentToken;
1684 	}
1685 
1686 	protected Token processOneLineSharpComment() {
1687 		_sharedCommentToken.setPos(_startLine, _startPos);
1688 		if (_enableCommentText) {
1689 			_sharedCommentToken.text = _lineText[_pos .. $];
1690 		}
1691 		_pos = _len;
1692 		return _sharedCommentToken;
1693 	}
1694 
1695 	// Comment /*   */	
1696 	protected Token processMultilineComment() {
1697 		_sharedCommentToken.setPos(_startLine, _startPos);
1698         _sharedCommentToken.isDocumentationComment = _pos + 1 < _lineText.length && _lineText[_pos + 1] == '*';
1699         _sharedCommentToken.isMultilineComment = true;
1700 		_commentAppender.reset();
1701 		int textStart = _pos + 1;
1702 		for (;;) {
1703 			int textEnd = int.max;
1704 			int i = textStart;
1705 			for (; i < _len - 1; i++) {
1706 				if (_lineText[i] == '*' && _lineText[i + 1] == '/') {
1707 					textEnd = i;
1708 					break;
1709 				}
1710 			}
1711 			if (textEnd != int.max) {
1712 				if (_enableCommentText)
1713 					_commentAppender.append(_lineText[textStart .. textEnd]);
1714 				_pos = textEnd + 2;
1715 				break;
1716 			}
1717 			if (!nextLine()) {
1718 				// TODO: do we need throw exception if comment not closed by end of file?
1719 				_pos = _len;
1720 				break;
1721 			}
1722 			textStart = 0;
1723 		}
1724 		if (_enableCommentText) {
1725 			_sharedCommentToken.text = _commentAppender.get();
1726 		}
1727 		return _sharedCommentToken;
1728 	}
1729 	
1730 	// Comment /+   +/	
1731 	protected Token processNestedComment() {
1732 		_sharedCommentToken.setPos(_startLine, _startPos);
1733         _sharedCommentToken.isDocumentationComment = _pos + 1 < _lineText.length && _lineText[_pos + 1] == '+';
1734         _sharedCommentToken.isMultilineComment = true;
1735 		_commentAppender.reset();
1736 		dchar[] text;
1737 		int textStart = _pos + 1;
1738 		int level = 1;
1739 		for (;;) {
1740 			int textEnd = int.max;
1741 			int i = textStart;
1742 			for (; i < _len - 1; i++) {
1743 				if (_lineText[i] == '/' && _lineText[i + 1] == '+') {
1744 					level++;
1745 					i++;
1746 				} else if (_lineText[i] == '+' && _lineText[i + 1] == '/') {
1747 					if (--level == 0) {
1748 						textEnd = i;
1749 						break;
1750 					}
1751 				}
1752 			}
1753 			if (textEnd != int.max) {
1754 				if (_enableCommentText)
1755 					_commentAppender.append(_lineText[textStart .. textEnd]);
1756 				_pos = textEnd + 2;
1757 				break;
1758 			}
1759 			if (!nextLine()) {
1760 				// TODO: do we need throw exception if comment not closed by end of file?
1761 				_pos = _len;
1762 				break;
1763 			}
1764 			if (_enableCommentText)
1765 				_commentAppender.appendEol();
1766 			textStart = 0;
1767 		}
1768 		if (_enableCommentText) {
1769 			_sharedCommentToken.text = _commentAppender.get();
1770 		}
1771 		return _sharedCommentToken;
1772 	}
1773 	
1774 	protected Token processHexString() {
1775 		_pos++;
1776 		// TODO:
1777 		return null;
1778 	}
1779 	
1780 	protected Token processDelimitedString() {
1781 		_pos++;
1782 		// TODO:
1783 		return null;
1784 	}
1785 	
1786 	// r"string"   or    `string`
1787 	protected Token processWysiwygString(dchar ch) {
1788 		_pos++;
1789 		// TODO:
1790 		return null;
1791 	}
1792 	
1793 	protected Token processIdent(dchar firstChar) {
1794 		_sharedIdentToken.setPos(_startLine, _startPos);
1795 		_identAppender.reset();
1796 		_identAppender.append(firstChar);
1797 		for (; _pos < _len; ) {
1798 			dchar ch = _lineText[_pos];
1799 			if (!isIdentMiddleChar(ch)) {
1800 				break;
1801 			}
1802 			_identAppender.append(ch);
1803 			_pos++;
1804 		}
1805 		_sharedIdentToken.setText(_identAppender.get);
1806 		return _sharedIdentToken;
1807 	}
1808 
1809 	protected Token processIntegerSuffix() {
1810 		if (_pos >= _len)
1811 			return _sharedIntegerToken;
1812 		bool longFlag = false;
1813 		bool unsignedFlag = false;
1814 		dchar ch = _lineText[_pos];
1815 		dchar ch2 = _pos < _len - 1 ? _lineText[_pos + 1] : 0;
1816 		if (ch == 'l' || ch == 'L') {
1817 			longFlag = true;
1818 			_pos++;
1819 			if (ch2 == 'u' || ch2 == 'U') {
1820 				unsignedFlag = true;
1821 				_pos++;
1822 			} 
1823 		} else if (ch == 'u' || ch == 'U') {
1824 			unsignedFlag = true;
1825 			_pos++;
1826 			if (ch2 == 'l' || ch2 == 'L') {
1827 				longFlag = true;
1828 				_pos++;
1829 			} 
1830 		}
1831 		_sharedIntegerToken.setFlags(unsignedFlag, longFlag);
1832 		ch = _pos < _len ? _lineText[_pos] : 0;
1833 		if (isIdentMiddleChar(ch))
1834 			return parserError("Unexpected character after number", _sharedIntegerToken);
1835 		return _sharedIntegerToken;
1836 	}
1837 	
1838 	protected Token processBinaryNumber() {
1839 		_sharedIntegerToken.setPos(_startLine, _startPos);
1840 		_pos++;
1841 		if (_pos >= _len)
1842 			return parserError("Unexpected end of line in binary number", _sharedIntegerToken);
1843 		int digits = 0;
1844 		ulong number = 0;
1845 		int i = _pos;
1846 		for (;i < _len; i++) {
1847 			dchar ch = _lineText[i];
1848 			if (ch != '0' && ch != '1')
1849 				break;
1850 			number = (number << 1) | (ch == '1' ? 1 : 0);
1851 			digits++;
1852 		}
1853 		_pos = i;
1854 		if (digits > 64)
1855 			return parserError("number is too big", _sharedIntegerToken);
1856 		_sharedIntegerToken.setValue(number);
1857 		return processIntegerSuffix();
1858 	}
1859 
1860 	protected Token processHexNumber() {
1861 		_sharedIntegerToken.setPos(_startLine, _startPos);
1862 		_sharedRealToken.setPos(_startLine, _startPos);
1863 		_pos++;
1864 		if (_pos >= _len)
1865 			return parserError("Unexpected end of line in hex number", _sharedIntegerToken);
1866 		int digits = 0;
1867 		ulong number = 0;
1868 		int i = _pos;
1869 		for (;i < _len; i++) {
1870 			dchar ch = _lineText[i];
1871 			uint digit = 0;
1872 			if (ch >= '0' && ch <= '9')
1873 				digit = ch - '0';
1874 			else if (ch >= 'a' && ch <= 'f')
1875 				digit = ch - 'a' + 10;
1876 			else if (ch >= 'A' && ch <= 'F')
1877 				digit = ch - 'A' + 10;
1878 			else if (ch == '_')
1879 				continue;
1880 			else
1881 				break;
1882 			number = (number << 4) | digit;
1883 			digits++;
1884 		}
1885 		_pos = i;
1886 		if (digits > 16)
1887 			return parserError("number is too big to fit 64 bits", _sharedIntegerToken);
1888 		_sharedIntegerToken.setValue(number);
1889 		return processIntegerSuffix();
1890 	}
1891 	
1892 	protected Token processOctNumber() {
1893 		_sharedIntegerToken.setPos(_startLine, _startPos);
1894 		if (_pos >= _len)
1895 			return parserError("Unexpected end of line in octal number", _sharedIntegerToken);
1896 		int digits = 0;
1897 		ulong number = 0;
1898 		int i = _pos;
1899 		bool overflow = false;
1900 		for (;i < _len; i++) {
1901 			dchar ch = _lineText[i];
1902 			int digit = 0;
1903 			if (ch >= '0' && ch <= '7')
1904 				digit = ch - '0';
1905 			else if (ch == '_')
1906 				continue;
1907 			else
1908 				break;
1909 			number <<= 3;
1910 			if (digits >= 20) {
1911 				if ((number >> 3) << 3 != number) {
1912 					overflow = true;
1913 					break;
1914 				}
1915 			}
1916 			number |= digit;
1917 			digits++;
1918 		}
1919 		_pos = i;
1920 		if (overflow)
1921 			return parserError("number is too big to fit 64 bits", _sharedIntegerToken);
1922 		_sharedIntegerToken.setValue(number);
1923 		return processIntegerSuffix();
1924 	}
1925 	
1926 	// 
1927 	protected Token processDecFloatSuffix(real value) {
1928         ubyte precision = 1;
1929         bool imaginary = false;
1930 		dchar next = _pos < _len ? _lineText[_pos] : 0;
1931         if (next == 'f') {
1932             _pos++;
1933             precision = 0;
1934         } else if (next == 'L') {
1935             _pos++;
1936             precision = 2;
1937         }
1938 		next = _pos < _len ? _lineText[_pos] : 0;
1939         if (next == 'i') {
1940             _pos++;
1941             imaginary = true;
1942         }
1943 		next = _pos < _len ? _lineText[_pos] : 0;
1944         if (isIdentMiddleChar(next))
1945 			return parserError("invalid suffix for floating point literal", _sharedRealToken);
1946 		_sharedRealToken.setValue(value, precision, imaginary);
1947 		return _sharedRealToken;
1948 	}
1949 	
1950 	// after E char
1951 	protected Token processDecFloatExponent(real value) {
1952 		dchar next = _pos < _len ? _lineText[_pos] : 0;
1953 		int sign = 1;
1954 		if (next == '+') {
1955 			_pos++;
1956 		} else if (next == '-') {
1957 			_pos++;
1958 			sign = -1;
1959 		}
1960 		if (_pos >= _len)
1961 			return parserError("Invalid exponent", _sharedRealToken);
1962 		ulong digits = 0;
1963 		ulong number = 0;
1964 		int i = _pos;
1965 		bool overflow = false;
1966 		for (;i < _len; i++) {
1967 			dchar ch = _lineText[i];
1968 			uint digit = 0;
1969 			if (ch >= '0' && ch <= '9')
1970 				digit = ch - '0';
1971 			else if (ch == '_')
1972 				continue;
1973 			else
1974 				break;
1975 			number *= 10;
1976 			if (digits >= 18) {
1977 				if ((number * 10) / 10 != number) {
1978 					overflow = true;
1979 					break;
1980 				}
1981 			}
1982 			number += digit;
1983 			digits++;
1984 		}
1985 		if (digits == 0)
1986 			return parserError("Invalid exponent", _sharedRealToken);
1987 		_pos = i;
1988 		value *= pow(10., cast(long)number * sign);
1989 		return processDecFloatSuffix(value);
1990 	}
1991 		
1992 	protected Token processDecFloatSecondPart(ulong firstPart) {
1993 		if (_pos >= _len) {
1994 			_sharedRealToken.setValue(cast(real)firstPart);
1995 			return _sharedRealToken;
1996 		}
1997 		ulong divider = 1;
1998 		ulong number = 0;
1999 		int i = _pos;
2000 		bool overflow = false;
2001 		for (;i < _len; i++) {
2002 			dchar ch = _lineText[i];
2003 			uint digit = 0;
2004 			if (ch >= '0' && ch <= '9')
2005 				digit = ch - '0';
2006 			else if (ch == '_')
2007 				continue;
2008 			else
2009 				break;
2010 			if (divider * 10 < divider)
2011 				continue; // ignore extra digits
2012 			number *= 10;
2013 			number += digit;
2014 			divider *= 10;
2015 		}
2016 		_pos = i;
2017 		real value = cast(real)firstPart + (cast(real)number / divider);
2018 		dchar next = _pos < _len ? _lineText[_pos] : 0;
2019 		if (next == 0) {
2020 			// neither exponent nor suffix
2021 			_sharedRealToken.setValue(value);
2022 			return _sharedRealToken;
2023 		}
2024    		if (next == 'e' || next == 'E') {
2025 			_pos++;
2026 			return processDecFloatExponent(value);
2027 		}
2028 		return processDecFloatSuffix(value);
2029 	}
2030 		
2031 	protected Token processDecNumber(dchar c) {
2032 		_sharedIntegerToken.setPos(_startLine, _startPos);
2033 		_sharedRealToken.setPos(_startLine, _startPos);
2034 		//if (_pos >= _len)
2035 		//	return parserError("Unexpected end of line in number", _sharedIntegerToken);
2036 		int digits = 1;
2037 		ulong number = c - '0';
2038 		int i = _pos;
2039 		bool overflow = false;
2040 		if (_line == _startLine) {
2041 			for (;i < _len; i++) {
2042 				dchar ch = _lineText[i];
2043 				uint digit = 0;
2044 				if (ch >= '0' && ch <= '9')
2045 					digit = ch - '0';
2046 				else if (ch == '_')
2047 					continue;
2048 				else
2049 					break;
2050 				number *= 10;
2051 				if (digits >= 18) {
2052 					if ((number * 10) / 10 != number) {
2053 						overflow = true;
2054 						break;
2055 					}
2056 				}
2057 				number += digit;
2058 				digits++;
2059 			}
2060 			_pos = i;
2061 		}
2062 		if (overflow)
2063 			return parserError("number is too big to fit 64 bits", _sharedIntegerToken);
2064 		_sharedIntegerToken.setValue(number);
2065 		dchar next = _line == _startLine && _pos < _len ? _lineText[_pos] : 0;
2066 		if (next == 0)
2067 			return _sharedIntegerToken;
2068         if (next == 'e' || next == 'E') {
2069 			_pos++;
2070             return processDecFloatExponent(number);
2071         } else if (next == '.') {
2072 			_pos++;
2073 			return processDecFloatSecondPart(number);
2074 		}
2075 		return processIntegerSuffix();
2076 	}
2077 		
2078     /// Either return InvalidToken or throw parser exception depending on current errorTolerant flag
2079 	protected Token parserError(string msg, Token incompleteToken) {
2080         return parserError(msg, incompleteToken.line, incompleteToken.pos, incompleteToken.type);
2081     }
2082     /// Either return InvalidToken or throw parser exception depending on current errorTolerant flag
2083     protected Token parserError(string msg, int startLine, int startPos, TokenType failedTokenType = TokenType.INVALID) {
2084         if (_errorTolerant) {
2085             startPos--;
2086             _sharedInvalidToken.setPos(startLine, startPos);
2087             _sharedInvalidToken.errorMessage = msg;
2088             _sharedInvalidToken.errorCode = 1; // for future extension
2089             _sharedInvalidToken.invalidTokenType = failedTokenType; // for future extension
2090             // make invalid source text
2091             dchar[] invalidText;
2092             int p = startLine == _line ? startPos : 0;
2093             for (int i = p; i < _pos && i < _lineText.length; i++)
2094                 invalidText ~= _lineText[i];
2095 
2096             // recover after error
2097             for (; _pos < _lineText.length; _pos++) {
2098                 dchar ch = _lineText[_pos];
2099                 if (ch == ' ' || ch == '\t' || ch == '(' || ch == ')' || ch == '[' || ch == ']' || ch == '{' || ch == '}')
2100                     break;
2101                 if (failedTokenType == TokenType.INTEGER || failedTokenType == TokenType.FLOAT) {
2102                     if (ch == '*' || ch == '/')
2103                         break;
2104                 }
2105                 invalidText ~= ch;
2106             }
2107             _sharedInvalidToken.text = invalidText;
2108             return _sharedInvalidToken;
2109         }
2110 		throw new ParserException(msg, _lineStream.file, _line, _pos);
2111 	}
2112 
2113 	protected Keyword detectKeyword(dchar ch) {
2114 		if (ch > 'z')
2115 			return Keyword.NONE;
2116 		int len = _len - _pos;
2117 		switch (cast(ubyte)ch) {
2118 			//	ABSTRACT,
2119 			//	ALIAS,
2120 			//	ALIGN,
2121 			//	ASM,
2122 			//	ASSERT,
2123 			//	AUTO,
2124 			case 'a': return findKeyword(Keyword.ABSTRACT, Keyword.AUTO, _lineText.ptr + _pos, len, _pos);
2125 
2126 			//	BODY,
2127 			//	BOOL,
2128 			//	BREAK,
2129 			//	BYTE,
2130 			case 'b': return findKeyword(Keyword.BODY, Keyword.BYTE, _lineText.ptr + _pos, len, _pos);
2131 				
2132 			//	CASE,
2133 			//	CAST,
2134 			//	CATCH,
2135 			//	CDOUBLE,
2136 			//	CENT,
2137 			//	CFLOAT,
2138 			//	CHAR,
2139 			//	CLASS,
2140 			//	CONST,
2141 			//	CONTINUE,
2142 			//	CREAL,
2143 			case 'c': return findKeyword(Keyword.CASE, Keyword.CREAL, _lineText.ptr + _pos, len, _pos);
2144 				
2145 			//	DCHAR,
2146 			//	DEBUG,
2147 			//	DEFAULT,
2148 			//	DELEGATE,
2149 			//	DELETE,
2150 			//	DEPRECATED,
2151 			//	DO,
2152 			//	DOUBLE,
2153 			case 'd': return findKeyword(Keyword.DCHAR, Keyword.DOUBLE, _lineText.ptr + _pos, len, _pos);
2154 				
2155 			//	ELSE,
2156 			//	ENUM,
2157 			//	EXPORT,
2158 			//	EXTERN,
2159 			case 'e': return findKeyword(Keyword.ELSE, Keyword.EXTERN, _lineText.ptr + _pos, len, _pos);
2160 				
2161 			//	FALSE,
2162 			//	FINAL,
2163 			//	FINALLY,
2164 			//	FLOAT,
2165 			//	FOR,
2166 			//	FOREACH,
2167 			//	FOREACH_REVERSE,
2168 			//	FUNCTION,
2169 			case 'f': return findKeyword(Keyword.FALSE, Keyword.FUNCTION, _lineText.ptr + _pos, len, _pos);
2170 				
2171 			//	GOTO,
2172 			case 'g': return findKeyword(Keyword.GOTO, Keyword.GOTO, _lineText.ptr + _pos, len, _pos);
2173 				
2174 			//	IDOUBLE,
2175 			//	IF,
2176 			//	IFLOAT,
2177 			//	IMMUTABLE,
2178 			//	IMPORT,
2179 			//	IN,
2180 			//	INOUT,
2181 			//	INT,
2182 			//	INTERFACE,
2183 			//	INVARIANT,
2184 			//	IREAL,
2185 			//	IS,
2186 			case 'i': return findKeyword(Keyword.IDOUBLE, Keyword.IS, _lineText.ptr + _pos, len, _pos);
2187 				
2188 			//	LAZY,
2189 			//	LONG,
2190 			case 'l': return findKeyword(Keyword.LAZY, Keyword.LONG, _lineText.ptr + _pos, len, _pos);
2191 				
2192 			//	MACRO,
2193 			//	MIXIN,
2194 			//	MODULE,
2195 			case 'm': return findKeyword(Keyword.MACRO, Keyword.MODULE, _lineText.ptr + _pos, len, _pos);
2196 				
2197 			//	NEW,
2198 			//	NOTHROW,
2199 			//	NULL,
2200 			case 'n': return findKeyword(Keyword.NEW, Keyword.NULL, _lineText.ptr + _pos, len, _pos);
2201 				
2202 			//	OUT,
2203 			//	OVERRIDE,
2204 			case 'o': return findKeyword(Keyword.OUT, Keyword.OVERRIDE, _lineText.ptr + _pos, len, _pos);
2205 				
2206 			//	PACKAGE,
2207 			//	PRAGMA,
2208 			//	PRIVATE,
2209 			//	PROTECTED,
2210 			//	PUBLIC,
2211 			//	PURE,
2212 			case 'p': return findKeyword(Keyword.PACKAGE, Keyword.PURE, _lineText.ptr + _pos, len, _pos);
2213 				
2214 			//	REAL,
2215 			//	REF,
2216 			//	RETURN,
2217 			case 'r': return findKeyword(Keyword.REAL, Keyword.RETURN, _lineText.ptr + _pos, len, _pos);
2218 				
2219 			//	SCOPE,
2220 			//	SHARED,
2221 			//	SHORT,
2222 			//	STATIC,
2223 			//	STRUCT,
2224 			//	SUPER,
2225 			//	SWITCH,
2226 			//	SYNCHRONIZED,
2227 			case 's': return findKeyword(Keyword.SCOPE, Keyword.SYNCHRONIZED, _lineText.ptr + _pos, len, _pos);
2228 				
2229 			//	TEMPLATE,
2230 			//	THIS,
2231 			//	THROW,
2232 			//	TRUE,
2233 			//	TRY,
2234 			//	TYPEDEF,
2235 			//	TYPEID,
2236 			//	TYPEOF,
2237 			case 't': return findKeyword(Keyword.TEMPLATE, Keyword.TYPEOF, _lineText.ptr + _pos, len, _pos);
2238 				
2239 			//	UBYTE,
2240 			//	UCENT,
2241 			//	UINT,
2242 			//	ULONG,
2243 			//	UNION,
2244 			//	UNITTEST,
2245 			//	USHORT,
2246 			case 'u': return findKeyword(Keyword.UBYTE, Keyword.USHORT, _lineText.ptr + _pos, len, _pos);
2247 				
2248 			//	VERSION,
2249 			//	VOID,
2250 			//	VOLATILE,
2251 			case 'v': return findKeyword(Keyword.VERSION, Keyword.VOLATILE, _lineText.ptr + _pos, len, _pos);
2252 				
2253 			//	WCHAR,
2254 			//	WHILE,
2255 			//	WITH,
2256 			case 'w': return findKeyword(Keyword.WCHAR, Keyword.WITH, _lineText.ptr + _pos, len, _pos);
2257 				
2258 			//	FILE,
2259 			//	MODULE,
2260 			//	LINE,
2261 			//	FUNCTION,
2262 			//	PRETTY_FUNCTION,
2263 			//
2264 			//	GSHARED,
2265 			//	TRAITS,
2266 			//	VECTOR,
2267 			//	PARAMETERS,
2268 			case '_': return findKeyword(Keyword.FILE, Keyword.PARAMETERS, _lineText.ptr + _pos, len, _pos);
2269 			default: return Keyword.NONE;				
2270 		}
2271 	}	
2272 	protected OpCode detectOp(dchar ch) nothrow {
2273 		if (ch >= 128)
2274 			return OpCode.NONE;
2275 		dchar ch2 = _pos < _len ? _lineText[_pos] : 0;
2276 		dchar ch3 = _pos < _len - 1 ? _lineText[_pos + 1] : 0;
2277 		switch(cast(ubyte)ch) {
2278 			//	DIV, 		//    /
2279 			//	DIV_EQ, 	//    /=
2280 			case '/':
2281 				if (ch2 == '=') {
2282 					_pos++;
2283 					return OpCode.DIV_EQ;
2284 				}
2285 				return OpCode.DIV;
2286 			//	DOT, 		//    .
2287 			//	DOT_DOT, 	//    ..
2288 			//	DOT_DOT_DOT,//    ...
2289 			case '.':
2290 				if (ch2 == '.') {
2291 					if (ch3 == '.') {
2292 						_pos += 2;
2293 						return OpCode.DOT_DOT_DOT;
2294 					}
2295 					_pos++;
2296 					return OpCode.DOT_DOT;
2297 				}
2298 				return OpCode.DOT;
2299 			//	AND, 		//    &
2300 			//	AND_EQ, 	//    &=
2301 			//	LOG_AND, 	//    &&
2302 			case '&':
2303 				if (ch2 == '=') {
2304 					_pos++;
2305 					return OpCode.AND_EQ;
2306 				}
2307 				if (ch2 == '&') {
2308 					_pos++;
2309 					return OpCode.LOG_AND;
2310 				}
2311 				return OpCode.AND;
2312 			//	OR, 		//    |
2313 			//	OR_EQ, 		//    |=
2314 			//	LOG_OR, 	//    ||
2315 			case '|':
2316 				if (ch2 == '=') {
2317 					_pos++;
2318 					return OpCode.OR_EQ;
2319 				}
2320 				if (ch2 == '|') {
2321 					_pos++;
2322 					return OpCode.LOG_OR;
2323 				}
2324 				return OpCode.OR;
2325 			//	MINUS, 		//    -
2326 			//	MINUS_EQ, 	//    -=
2327 			//	MINUS_MINUS,//    --
2328 			case '-':
2329 				if (ch2 == '=') {
2330 					_pos++;
2331 					return OpCode.MINUS_EQ;
2332 				}
2333 				if (ch2 == '-') {
2334 					_pos++;
2335 					return OpCode.MINUS_MINUS;
2336 				}
2337 				return OpCode.MINUS;
2338 			//	PLUS, 		//    +
2339 			//	PLUS_EQ, 	//    +=
2340 			//	PLUS_PLUS, 	//    ++
2341 			case '+':
2342 				if (ch2 == '=') {
2343 					_pos++;
2344 					return OpCode.PLUS_EQ;
2345 				}
2346 				if (ch2 == '+') {
2347 					_pos++;
2348 					return OpCode.PLUS_PLUS;
2349 				}
2350 				return OpCode.PLUS;
2351 			//	LT, 		//    <
2352 			//	LT_EQ, 		//    <=
2353 			//	SHL, 		//    <<
2354 			//	SHL_EQ, 	//    <<=
2355 			//	LT_GT, 		//    <>
2356 			//	NE_EQ, 		//    <>=
2357 			case '<':
2358 				if (ch2 == '<') {
2359 					if (ch3 == '=') {
2360 						_pos += 2;
2361 						return OpCode.SHL_EQ;
2362 					}
2363 					_pos++;
2364 					return OpCode.SHL;
2365 				}
2366 				if (ch2 == '>') {
2367 					if (ch3 == '=') {
2368 						_pos += 2;
2369 						return OpCode.NE_EQ;
2370 					}
2371 					_pos++;
2372 					return OpCode.LT_GT;
2373 				}
2374 				if (ch2 == '=') {
2375 					_pos++;
2376 					return OpCode.LT_EQ;
2377 				}
2378 				return OpCode.LT;
2379 			//	GT, 		//    >
2380 			//	GT_EQ, 		//    >=
2381 			//	SHR_EQ		//    >>=
2382 			//	ASR_EQ, 	//    >>>=
2383 			//	SHR, 		//    >>
2384 			//	ASR, 		//    >>>
2385 			case '>':
2386 				if (ch2 == '>') {
2387 					if (ch3 == '>') {
2388 						dchar ch4 = _pos < _len - 2 ? _lineText[_pos + 2] : 0;
2389 						if (ch4 == '=') { // >>>=
2390 							_pos += 3;
2391 							return OpCode.ASR_EQ;
2392 						}
2393 						_pos += 2;
2394 						return OpCode.ASR; // >>>
2395 					}
2396 					if (ch3 == '=') { // >>=
2397 						_pos += 2;
2398 						return OpCode.SHR_EQ;
2399 					}
2400 					_pos++;
2401 					return OpCode.SHR;
2402 				}
2403 				if (ch2 == '=') { // >=
2404 					_pos++;
2405 					return OpCode.GT_EQ;
2406 				}
2407 				// >
2408 				return OpCode.GT;
2409 			//	NOT, 		//    !
2410 			//	NOT_EQ		//    !=
2411 			//	NOT_LT_GT, 	//    !<>
2412 			//	NOT_LT_GT_EQ, //    !<>=
2413 			//	NOT_LT, 	//    !<
2414 			//	NOT_LT_EQ, 	//    !<=
2415 			//	NOT_GT, 	//    !>
2416 			//	NOT_GT_EQ, 	//    !>=
2417 			case '!':
2418 				if (ch2 == '<') { // !<
2419 					if (ch3 == '>') { // !<>
2420 						dchar ch4 = _pos < _len - 2 ? _lineText[_pos + 2] : 0;
2421 						if (ch4 == '=') { // !<>=
2422 							_pos += 3;
2423 							return OpCode.NOT_LT_GT_EQ;
2424 						}
2425 						_pos += 2;
2426 						return OpCode.NOT_LT_GT; // !<>
2427 					}
2428 					if (ch3 == '=') { // !<=
2429 						_pos += 2;
2430 						return OpCode.NOT_LT_EQ;
2431 					}
2432 					_pos++;
2433 					return OpCode.NOT_LT; // !<
2434 				}
2435 				if (ch2 == '=') { // !=
2436 					_pos++;
2437 					return OpCode.NOT_EQ;
2438 				}
2439 				return OpCode.NOT;
2440 			//	PAR_OPEN, 	//    (
2441 			case '(':
2442 				return OpCode.PAR_OPEN;
2443 			//	PAR_CLOSE, 	//    )
2444 			case ')':
2445 				return OpCode.PAR_CLOSE;
2446 			//	SQ_OPEN, 	//    [
2447 			case '[':
2448 				return OpCode.SQ_OPEN;
2449 			//	SQ_CLOSE, 	//    ]
2450 			case ']':
2451 				return OpCode.SQ_CLOSE;
2452 			//	CURL_OPEN, 	//    {
2453 			case '{':
2454 				return OpCode.CURL_OPEN;
2455 			//	CURL_CLOSE, //    }
2456 			case '}':
2457 				return OpCode.CURL_CLOSE;
2458 			//	QUEST, 		//    ?
2459 			case '?':
2460 				return OpCode.QUEST;
2461 			//	COMMA, 		//    ,
2462 			case ',':
2463 				return OpCode.COMMA;
2464 			//	SEMICOLON, 	//    ;
2465 			case ';':
2466 				return OpCode.SEMICOLON;
2467 			//	COLON, 	    //    :
2468 			case ':':
2469 				return OpCode.COLON;
2470 			//	DOLLAR, 	//    $
2471 			case '$':
2472 				return OpCode.DOLLAR;
2473 			//	EQ, 		//    =
2474 			//	QE_EQ, 		//    ==
2475 			//	EQ_GT, 		//    =>
2476 			case '=':
2477 				if (ch2 == '=') { // ==
2478 					_pos++;
2479 					return OpCode.QE_EQ;
2480 				}
2481 				if (ch2 == '>') { // =>
2482 					_pos++;
2483 					return OpCode.EQ_GT;
2484 				}
2485 				return OpCode.EQ;
2486 			//	MUL, 		//    *
2487 			//	MUL_EQ, 	//    *=
2488 			case '*':
2489 				if (ch2 == '=') {
2490 					_pos++;
2491 					return OpCode.MUL_EQ;
2492 				}
2493 				return OpCode.MUL;
2494 			//	MOD, 	//    %
2495 			//	MOD_EQ, //    %=
2496 			case '%':
2497 				if (ch2 == '=') {
2498 					_pos++;
2499 					return OpCode.MOD_EQ;
2500 				}
2501 				return OpCode.MOD;
2502 			//	XOR, 		//    ^
2503 			//	XOR_EQ, 	//    ^=
2504 			//	LOG_XOR, 	//    ^^
2505 			//	LOG_XOR_EQ, //    ^^=
2506 			case '^':
2507 				if (ch2 == '^') {
2508 					if (ch3 == '=') {
2509 						_pos += 2;
2510 						return OpCode.LOG_XOR_EQ;
2511 					}
2512 					_pos++;
2513 					return OpCode.LOG_XOR;
2514 				}
2515 				if (ch2 == '=') {
2516 					_pos++;
2517 					return OpCode.XOR_EQ;
2518 				}
2519 				return OpCode.XOR;
2520 			//	INV, 		//    ~
2521 			//	INV_EQ, 	//    ~=
2522 			case '~':
2523 				if (ch2 == '=') {
2524 					_pos++;
2525 					return OpCode.INV_EQ;
2526 				}
2527 				return OpCode.INV;
2528 			//	AT, 		//    @
2529 			case '@':
2530 				return OpCode.AT;
2531 			//	SHARP 		//    #
2532 			case '#':
2533 				return OpCode.SHARP;
2534 			default:
2535 				return OpCode.NONE;
2536 		}
2537 	}
2538 	
2539     protected Token processCharacterLiteral() {
2540 		_sharedCharacterLiteralToken.setPos(_startLine, _startPos);
2541         if (_pos + 2 > _len)
2542             return parserError("Invalid character literal", _sharedCharacterLiteralToken);
2543         dchar ch = _lineText[_pos++];
2544         dchar ch2 = _lineText[_pos++];
2545         dchar type = 0;
2546         if (ch == '\\') {
2547             // process escaped character - store it in ch
2548             // TODO: support all escape sequences
2549             switch(ch2) {
2550                 case 'r':
2551                     ch = '\r';
2552                     break;
2553                 case 'n':
2554                     ch = '\n';
2555                     break;
2556                 case 't':
2557                     ch = '\t';
2558                     break;
2559                 case '\\':
2560                     ch = '\\';
2561                     break;
2562                 default:
2563                     ch = ch2;
2564                     break;
2565             }
2566             // here must be closing '
2567             if (_pos + 1 > _len)
2568                 return parserError("Invalid character literal", _sharedCharacterLiteralToken);
2569             ch2 = _lineText[_pos++];
2570         }
2571         if (ch2 != '\'')
2572             return parserError("Invalid character literal", _sharedCharacterLiteralToken);
2573         if (_pos < _len) {
2574             dchar t = _lineText[_pos];
2575             if (t == 'd' || t == 'w' || t == 'c') {
2576                 type = t;
2577                 _pos++;
2578             } else if (isIdentMiddleChar(ch)) {
2579                 return parserError("Unexpected character after character literal", _sharedCharacterLiteralToken);
2580             }
2581         }
2582         _sharedCharacterLiteralToken.setCharacter(ch, type);
2583         return _sharedCharacterLiteralToken;
2584     }
2585 
2586 	protected Token processDoubleQuotedOrWysiwygString(dchar delimiter) {
2587 		bool wysiwyg = (delimiter == 'r' || delimiter == '`');
2588 		//writeln("processDoubleQuotedString()");
2589 		_sharedStringLiteralToken.setPos(_startLine, _startPos);
2590 		_stringLiteralAppender.reset();
2591 		if (delimiter == 'r') {
2592 			_pos++;
2593 			delimiter = '\"';
2594 		}
2595 		dchar type = 0;
2596 		for (;;) {
2597 			int i = _pos;
2598 			int endPos = int.max;
2599             bool lastBackSlash = false;
2600 			for(; i < _len; i++) {
2601                 dchar ch = _lineText[i];
2602                 if (ch == '\\') {
2603                     if (lastBackSlash)
2604                         lastBackSlash = false;
2605                     else
2606                         lastBackSlash = true;
2607                 }
2608                 else if (ch == delimiter && !lastBackSlash) {
2609 					endPos = i;
2610 					break;
2611 				}
2612                 else if(lastBackSlash)
2613                     lastBackSlash = false;
2614 			}
2615 			if (endPos != int.max) {
2616 				// found end quote
2617 				_stringLiteralAppender.append(_lineText[_pos .. endPos]);
2618 				_pos = endPos + 1;
2619 				break;
2620 			}
2621 			// no quote by end of line
2622 			_stringLiteralAppender.append(_lineText[_pos .. $]);
2623 			_stringLiteralAppender.appendEol();
2624 			if (!nextLine()) {
2625 				// do we need to throw exception if eof comes before end of string?
2626 				break;
2627 			}
2628 		}
2629 		dchar t = 0;
2630 		if (_pos < _len) {
2631 			dchar ch = _lineText[_pos];
2632 			if (ch == 'c' || ch == 'w' || ch == 'd') {
2633 				t = ch;
2634                 _pos++;
2635                 if (_pos < _len) {
2636                     ch = _lineText[_pos];
2637                     if (isIdentMiddleChar(ch))
2638                         return parserError("Unexpected character after string literal", _sharedStringLiteralToken);
2639                 }
2640             } else if (isIdentMiddleChar(ch))
2641 				return parserError("Unexpected character after string literal", _sharedStringLiteralToken);
2642 		}
2643 		if (t != 0) {
2644 			if (type != 0 && t != type)
2645 				return parserError("Cannot concatenate strings of different type", _sharedStringLiteralToken);
2646 			type = t;
2647 		}
2648 		if (wysiwyg) {
2649 			// no escape processing
2650 			_sharedStringLiteralToken.setText(_stringLiteralAppender.get(), type);
2651 			return _sharedStringLiteralToken;
2652 		}
2653         _stringLiteralAppender.processEscapeSequences();
2654 		_sharedStringLiteralToken.setText(_stringLiteralAppender.get(), type);
2655 		return _sharedStringLiteralToken;
2656 	}
2657 
2658 	protected SysTime buildTime;
2659 	
2660 	//	string literal of the date of compilation "mmm dd yyyy"
2661 	protected dstring formatBuildDate() {
2662 		// TODO: provide proper format
2663 		return to!dstring(buildTime);
2664 	}
2665 	
2666 	//	string literal of the time of compilation "hh:mm:ss"
2667 	protected dstring formatBuildTime() {
2668 		// TODO: provide proper format
2669 		return to!dstring(buildTime);
2670 	}
2671 	
2672 	//	string literal of the date and time of compilation "www mmm dd hh:mm:ss yyyy"
2673 	protected dstring formatBuildTimestamp() {
2674 		// TODO: provide proper format
2675 		return to!dstring(buildTime);
2676 	}
2677 	
2678 	static immutable dstring VERSION = "0.1";
2679 	static immutable dstring VENDOR = "coolreader.org";
2680 	
2681 	protected Token makeSpecialTokenString(dstring str, int pos) {
2682 		_sharedStringLiteralToken.setPos(_startLine, _startPos);
2683 		_sharedStringLiteralToken.setText(cast(dchar[])str, 0);
2684 		return _sharedStringLiteralToken;
2685 	}
2686 	
2687 	protected Token processSpecialToken(Keyword keyword, int pos) {
2688 		switch (keyword) {
2689 			//Special Token	Replaced with
2690 			case Keyword.DATE: //	string literal of the date of compilation "mmm dd yyyy"
2691 				return makeSpecialTokenString(formatBuildDate(), pos);
2692 			case Keyword.TIME: //	string literal of the time of compilation "hh:mm:ss"
2693 				return makeSpecialTokenString(formatBuildTime(), pos);
2694 			case Keyword.TIMESTAMP: //	string literal of the date and time of compilation "www mmm dd hh:mm:ss yyyy"
2695 				return makeSpecialTokenString(formatBuildTimestamp(), pos);
2696 			case Keyword.VENDOR: //	Compiler vendor string, such as "Digital Mars D"
2697 				return makeSpecialTokenString(VENDOR, pos);
2698 			case Keyword.VERSION_: //	Compiler version as an integer, such as 2001
2699 				return makeSpecialTokenString(VERSION, pos);
2700 			default:
2701 				parserError("Unknown special token", _line, pos);
2702 		}
2703 		return null;
2704 	}
2705 	
2706     protected int _startLine;
2707     protected int _startPos;
2708 
2709 	// returns next token (clone it if you want to store for future usage, otherwise it may be overwritten by further nextToken() calls).
2710 	Token nextToken() {
2711         _startLine = _line;
2712         _startPos = _pos;
2713 		dchar ch = nextChar();
2714 		if (ch == EOF_CHAR) {
2715 			return emitEof();
2716 		}
2717 		if (ch == EOL_CHAR || ch == 0x0020 || ch == 0x0009 || ch == 0x000B || ch == 0x000C) {
2718 			// white space (treat EOL as whitespace, too)
2719 			return processWhiteSpace(ch);
2720 		}
2721 		dchar next = _pos < _len ? _lineText[_pos] : 0;
2722 		if (ch == '/') {
2723 			if (next == '/')
2724 				return processOneLineComment();
2725 			else if (next == '*')
2726 				return processMultilineComment();
2727 			else if (next == '+')
2728 				return processNestedComment();
2729 		}
2730         if (ch == '#' && _line == 1)
2731             return processOneLineSharpComment();
2732 		if (ch == '\"')
2733 			return processDoubleQuotedOrWysiwygString(ch);
2734 		if (ch == '\'')
2735 			return processCharacterLiteral();
2736 		if (ch == 'x' && next == '\"')
2737 			return processHexString();
2738 		if (ch == 'q' && next == '\"')
2739 			return processDelimitedString();
2740 		if ((ch == 'r' && next == '\"') || (ch == '`'))
2741 			return processDoubleQuotedOrWysiwygString(ch);
2742 		int oldPos = _pos - 1;
2743 		
2744 		if (ch == '0') {
2745 			if (next == 'b' || next == 'B')
2746 				return processBinaryNumber();
2747 			if (next == 'x' || next == 'X')
2748 				return processHexNumber();
2749 			if (next >= '0' && next <= '9')
2750 				return processOctNumber();
2751 			if (next >= '0' && next <= '9')
2752 				return processDecNumber(ch);
2753 		}
2754 		if (ch >= '0' && ch <= '9')
2755 			return processDecNumber(ch);
2756 		if (ch == '.' && next >= '0' && next <= '9') // .123
2757 			return processDecFloatSecondPart(0);
2758 				
2759 		if (ch == '_' || isUniversalAlpha(ch)) {
2760 			// start of identifier or keyword?
2761 			Keyword keyword = detectKeyword(ch);
2762 			if (keyword != Keyword.NONE) {
2763 				switch (keyword) {
2764 					//Special Token	Replaced with
2765 					case Keyword.EOF: return emitEof(); //	sets the scanner to the end of the file
2766 					case Keyword.DATE: //	string literal of the date of compilation "mmm dd yyyy"
2767 					case Keyword.TIME: //	string literal of the time of compilation "hh:mm:ss"
2768 					case Keyword.TIMESTAMP: //	string literal of the date and time of compilation "www mmm dd hh:mm:ss yyyy"
2769 					case Keyword.VENDOR: //	Compiler vendor string, such as "Digital Mars D"
2770 					case Keyword.VERSION_: //	Compiler version as an integer, such as 2001
2771 						return processSpecialToken(keyword, oldPos);
2772 					default:
2773 						_sharedKeywordToken.setPos(_startLine, _startPos);
2774 						_sharedKeywordToken.keyword = keyword;
2775 						return _sharedKeywordToken;
2776 				}
2777 			}
2778 			return processIdent(ch);
2779 		}
2780 		OpCode op = detectOp(ch);
2781 		if (op != OpCode.NONE) {
2782 			_sharedOpToken.setPos(_startLine, _startPos);
2783 			_sharedOpToken.opCode = op;
2784 			return _sharedOpToken;
2785 		}
2786         return parserError("Invalid token", _line, _pos);
2787 	}
2788 
2789 	
2790 }
2791 
2792 unittest {
2793     version(DisableLexerTest) {
2794     import std.stdio;
2795     import std.conv;
2796     import std.utf;
2797     import dlangui.core.linestream;
2798     string fname = "/home/lve/src/d/ddc/ddclexer/tests/tokenizer_test.d";
2799 	writeln("opening file");
2800     try {
2801         std.stream.File f = new std.stream.File(fname);
2802 	    scope(exit) { f.close(); }
2803         try {
2804             LineStream lines = LineStream.create(f, fname);
2805 		    Tokenizer tokenizer = new Tokenizer(lines);
2806 	        for (;;) {
2807 			    Token token = tokenizer.nextToken();
2808 			    if (token is null) {
2809 				    writeln("Null token returned");
2810 				    break;
2811 			    }
2812 			    if (token.type == TokenType.EOF) {
2813 				    writeln("EOF token");
2814 				    break;
2815 			    }
2816 			    writeln("", token.line, ":", token.pos, "\t", token.toString);
2817 	        }
2818         } catch (Exception e) {
2819             writeln("Exception " ~ e.toString);
2820         }
2821     } catch (Exception e) {
2822         writeln("Exception " ~ e.toString);
2823     }
2824     }
2825 }
2826 
2827 /// converts named entity to character, returns 0 if not found
2828 dchar entityToChar(string name) {
2829     if (auto ch = name in entityToCharMap) {
2830         return *ch;
2831     }
2832     return 0;
2833 }
2834 
2835 /// fings entity name for character, returns null if not found
2836 string charToEntity(dchar ch) {
2837     if (auto name = ch in charToEntityMap) {
2838         return *name;
2839     }
2840     return null;
2841 }
2842 
2843 private __gshared dchar[string]entityToCharMap;
2844 private __gshared string[dchar]charToEntityMap;
2845 private void addEntity(string name, dchar ch) {
2846     entityToCharMap[name] = ch;
2847     charToEntityMap[ch] = name;
2848 }
2849 __gshared static this() {
2850     addEntity("quot", 34);
2851     addEntity("amp",	38);
2852     addEntity("lt",	60);
2853     addEntity("gt",	62);
2854     addEntity("OElig",	338);
2855     addEntity("oelig",	339);
2856     addEntity("Scaron",	352);
2857     addEntity("scaron",	353);
2858     addEntity("Yuml",	376);
2859     addEntity("circ",	710);
2860     addEntity("tilde",	732);
2861     addEntity("ensp",	8194);
2862     addEntity("emsp",	8195);
2863     addEntity("thinsp",	8201);
2864     addEntity("zwnj",	8204);
2865     addEntity("zwj",	8205);
2866     addEntity("lrm",	8206);
2867     addEntity("rlm",	8207);
2868     addEntity("ndash",	8211);
2869     addEntity("mdash",	8212);
2870     addEntity("lsquo",	8216);
2871     addEntity("rsquo",	8217);
2872     addEntity("sbquo",	8218);
2873     addEntity("ldquo",	8220);
2874     addEntity("rdquo",	8221);
2875     addEntity("bdquo",	8222);
2876     addEntity("dagger",	8224);
2877     addEntity("Dagger",	8225);
2878     addEntity("permil",	8240);
2879     addEntity("lsaquo",	8249);
2880     addEntity("rsaquo",	8250);
2881     addEntity("euro",	8364);
2882     addEntity("nbsp",	160);
2883     addEntity("iexcl",	161);
2884     addEntity("cent",	162);
2885     addEntity("pound",	163);
2886     addEntity("curren",	164);
2887     addEntity("yen",	165);
2888     addEntity("brvbar",	166);
2889     addEntity("sect",	167);
2890     addEntity("uml",	168);
2891     addEntity("copy",	169);
2892     addEntity("ordf",	170);
2893     addEntity("laquo",	171);
2894     addEntity("not",	172);
2895     addEntity("shy",	173);
2896     addEntity("reg",	174);
2897     addEntity("macr",	175);
2898     addEntity("deg",	176);
2899     addEntity("plusmn",	177);
2900     addEntity("sup2",	178);
2901     addEntity("sup3",	179);
2902     addEntity("acute",	180);
2903     addEntity("micro",	181);
2904     addEntity("para",	182);
2905     addEntity("middot",	183);
2906     addEntity("cedil",	184);
2907     addEntity("sup1",	185);
2908     addEntity("ordm",	186);
2909     addEntity("raquo",	187);
2910     addEntity("frac14",	188);
2911     addEntity("frac12",	189);
2912     addEntity("frac34",	190);
2913     addEntity("iquest",	191);
2914     addEntity("Agrave",	192);
2915     addEntity("Aacute",	193);
2916     addEntity("Acirc",	194);
2917     addEntity("Atilde",	195);
2918     addEntity("Auml",	196);
2919     addEntity("Aring",	197);
2920     addEntity("AElig",	198);
2921     addEntity("Ccedil",	199);
2922     addEntity("Egrave",	200);
2923     addEntity("Eacute",	201);
2924     addEntity("Ecirc",	202);
2925     addEntity("Euml",	203);
2926     addEntity("Igrave",	204);
2927     addEntity("Iacute",	205);
2928     addEntity("Icirc",	206);
2929     addEntity("Iuml",	207);
2930     addEntity("ETH",	208);
2931     addEntity("Ntilde",	209);
2932     addEntity("Ograve",	210);
2933     addEntity("Oacute",	211);
2934     addEntity("Ocirc",	212);
2935     addEntity("Otilde",	213);
2936     addEntity("Ouml",	214);
2937     addEntity("times",	215);
2938     addEntity("Oslash",	216);
2939     addEntity("Ugrave",	217);
2940     addEntity("Uacute",	218);
2941     addEntity("Ucirc",	219);
2942     addEntity("Uuml",	220);
2943     addEntity("Yacute",	221);
2944     addEntity("THORN",	222);
2945     addEntity("szlig",	223);
2946     addEntity("agrave",	224);
2947     addEntity("aacute",	225);
2948     addEntity("acirc",	226);
2949     addEntity("atilde",	227);
2950     addEntity("auml",	228);
2951     addEntity("aring",	229);
2952     addEntity("aelig",	230);
2953     addEntity("ccedil",	231);
2954     addEntity("egrave",	232);
2955     addEntity("eacute",	233);
2956     addEntity("ecirc",	234);
2957     addEntity("euml",	235);
2958     addEntity("igrave",	236);
2959     addEntity("iacute",	237);
2960     addEntity("icirc",	238);
2961     addEntity("iuml",	239);
2962     addEntity("eth",	240);
2963     addEntity("ntilde",	241);
2964     addEntity("ograve",	242);
2965     addEntity("oacute",	243);
2966     addEntity("ocirc",	244);
2967     addEntity("otilde",	245);
2968     addEntity("ouml",	246);
2969     addEntity("divide",	247);
2970     addEntity("oslash",	248);
2971     addEntity("ugrave",	249);
2972     addEntity("uacute",	250);
2973     addEntity("ucirc",	251);
2974     addEntity("uuml",	252);
2975     addEntity("yacute",	253);
2976     addEntity("thorn",	254);
2977     addEntity("yuml",	255);
2978     addEntity("fnof",	402);
2979     addEntity("Alpha",	913);
2980     addEntity("Beta",	914);
2981     addEntity("Gamma",	915);
2982     addEntity("Delta",	916);
2983     addEntity("Epsilon",	917);
2984     addEntity("Zeta",	918);
2985     addEntity("Eta",	919);
2986     addEntity("Theta",	920);
2987     addEntity("Iota",	921);
2988     addEntity("Kappa",	922);
2989     addEntity("Lambda",	923);
2990     addEntity("Mu",	924);
2991     addEntity("Nu",	925);
2992     addEntity("Xi",	926);
2993     addEntity("Omicron",	927);
2994     addEntity("Pi",	928);
2995     addEntity("Rho",	929);
2996     addEntity("Sigma",	931);
2997     addEntity("Tau",	932);
2998     addEntity("Upsilon",	933);
2999     addEntity("Phi",	934);
3000     addEntity("Chi",	935);
3001     addEntity("Psi",	936);
3002     addEntity("Omega",	937);
3003     addEntity("alpha",	945);
3004     addEntity("beta",	946);
3005     addEntity("gamma",	947);
3006     addEntity("delta",	948);
3007     addEntity("epsilon",	949);
3008     addEntity("zeta",	950);
3009     addEntity("eta",	951);
3010     addEntity("theta",	952);
3011     addEntity("iota",	953);
3012     addEntity("kappa",	954);
3013     addEntity("lambda",	955);
3014     addEntity("mu",	956);
3015     addEntity("nu",	957);
3016     addEntity("xi",	958);
3017     addEntity("omicron",	959);
3018     addEntity("pi",	960);
3019     addEntity("rho",	961);
3020     addEntity("sigmaf",	962);
3021     addEntity("sigma",	963);
3022     addEntity("tau",	964);
3023     addEntity("upsilon",	965);
3024     addEntity("phi",	966);
3025     addEntity("chi",	967);
3026     addEntity("psi",	968);
3027     addEntity("omega",	969);
3028     addEntity("thetasym",	977);
3029     addEntity("upsih",	978);
3030     addEntity("piv",	982);
3031     addEntity("bull",	8226);
3032     addEntity("hellip",	8230);
3033     addEntity("prime",	8242);
3034     addEntity("Prime",	8243);
3035     addEntity("oline",	8254);
3036     addEntity("frasl",	8260);
3037     addEntity("weierp",	8472);
3038     addEntity("image",	8465);
3039     addEntity("real",	8476);
3040     addEntity("trade",	8482);
3041     addEntity("alefsym",	8501);
3042     addEntity("larr",	8592);
3043     addEntity("uarr",	8593);
3044     addEntity("rarr",	8594);
3045     addEntity("darr",	8595);
3046     addEntity("harr",	8596);
3047     addEntity("crarr",	8629);
3048     addEntity("lArr",	8656);
3049     addEntity("uArr",	8657);
3050     addEntity("rArr",	8658);
3051     addEntity("dArr",	8659);
3052     addEntity("hArr",	8660);
3053     addEntity("forall",	8704);
3054     addEntity("part",	8706);
3055     addEntity("exist",	8707);
3056     addEntity("empty",	8709);
3057     addEntity("nabla",	8711);
3058     addEntity("isin",	8712);
3059     addEntity("notin",	8713);
3060     addEntity("ni",	8715);
3061     addEntity("prod",	8719);
3062     addEntity("sum",	8721);
3063     addEntity("minus",	8722);
3064     addEntity("lowast",	8727);
3065     addEntity("radic",	8730);
3066     addEntity("prop",	8733);
3067     addEntity("infin",	8734);
3068     addEntity("ang",	8736);
3069     addEntity("and",	8743);
3070     addEntity("or",	8744);
3071     addEntity("cap",	8745);
3072     addEntity("cup",	8746);
3073     addEntity("int",	8747);
3074     addEntity("there4",	8756);
3075     addEntity("sim",	8764);
3076     addEntity("cong",	8773);
3077     addEntity("asymp",	8776);
3078     addEntity("ne",	8800);
3079     addEntity("equiv",	8801);
3080     addEntity("le",	8804);
3081     addEntity("ge",	8805);
3082     addEntity("sub",	8834);
3083     addEntity("sup",	8835);
3084     addEntity("nsub",	8836);
3085     addEntity("sube",	8838);
3086     addEntity("supe",	8839);
3087     addEntity("oplus",	8853);
3088     addEntity("otimes",	8855);
3089     addEntity("perp",	8869);
3090     addEntity("sdot",	8901);
3091     addEntity("lceil",	8968);
3092     addEntity("rceil",	8969);
3093     addEntity("lfloor",	8970);
3094     addEntity("rfloor",	8971);
3095     addEntity("loz",	9674);
3096     addEntity("spades",	9824);
3097     addEntity("clubs",	9827);
3098     addEntity("hearts",	9829);
3099     addEntity("diams",	9830);
3100     addEntity("lang",	10216);
3101     addEntity("rang",	10217);
3102 }
3103 
3104 
3105 
3106 //void runTokenizerTest()
3107 unittest 
3108 {
3109 	import std.algorithm;
3110 	class TokenTest {
3111 		int _line;
3112 		string _file;
3113 		this(string file, int line) {
3114 			_file = file;
3115 			_line = line;
3116 		}
3117 		bool doTest(Token token) {
3118 			return true;
3119 		}		
3120 		void execute(Tokenizer tokenizer) {
3121 			Token token = tokenizer.nextToken();
3122 			if (!doTest(token)) {
3123 				assert(false, "	token doesn not match at " ~ _file ~ ":" ~ to!string(_line) ~ "  foundToken: " ~ token.toString ~ " expected: " ~ toString);
3124 			}
3125 		}
3126 		public override @property string toString() {
3127 			return "TokenTest";
3128 		}
3129 	}
3130 	void testTokenizer(string code, TokenTest[] tokens, string file = __FILE__, uint line = __LINE__) {
3131 		Tokenizer tokenizer = new Tokenizer(code, "tokenizerTest:" ~ file ~ ":" ~ to!string(line));
3132 		for (int i = 0; i < tokens.length; i++) {
3133 			tokens[i].execute(tokenizer);
3134 		}
3135 	}
3136 	class KeywordTest : TokenTest {
3137 		Keyword _code;
3138 		this(Keyword code, string file = __FILE__, uint line = __LINE__) {
3139 			super(file, line);
3140 			_code = code;
3141 		}
3142 		override bool doTest(Token token) {
3143 			if (token.type != TokenType.KEYWORD)
3144 				return false;
3145 			if (token.keyword != _code)
3146 				return false;
3147 			return true;
3148 		}		
3149 		public override @property string toString() {
3150 			return "Keyword:" ~ to!string(_code);
3151 		}
3152 	}
3153 	class OpTest : TokenTest {
3154 		OpCode _code;
3155 		this(OpCode code, string file = __FILE__, uint line = __LINE__) {
3156 			super(file, line);
3157 			_code = code;
3158 		}
3159 		override bool doTest(Token token) {
3160 			if (token.type != TokenType.OP)
3161 				return false;
3162 			if (token.opCode != _code)
3163 				return false;
3164 			return true;
3165 		}		
3166 		public override @property string toString() {
3167 			return "Op:" ~ to!string(_code);
3168 		}
3169 	}
3170 	class StringTest : TokenTest {
3171 		dstring _value;
3172         dchar _literalType;
3173 		this(dstring value, dchar literalType = 0, string file = __FILE__, uint line = __LINE__) {
3174 			super(file, line);
3175 			_value = value;
3176             _literalType = literalType;
3177 		}
3178 		override bool doTest(Token token) {
3179 			if (token.type != TokenType.STRING)
3180 				return false;
3181 			if (!token.text.equal(_value))
3182 				return false;
3183 			if (token.literalType != _literalType)
3184 				return false;
3185 			return true;
3186 		}		
3187 		public override @property string toString() {
3188 			return toUTF8("String:\"" ~ _value ~ "\"" ~ (_literalType ? _literalType : ' '));
3189 		}
3190 	}
3191 	class IntegerTest : TokenTest {
3192 		ulong _value;
3193 		bool _unsigned;
3194 		bool _long;
3195 		this(ulong value, bool unsignedFlag = false, bool longFlag = false, string file = __FILE__, uint line = __LINE__) {
3196 			super(file, line);
3197 			_value = value;
3198 			_unsigned = unsignedFlag;
3199 			_long = longFlag;
3200 		}
3201 		override bool doTest(Token token) {
3202 			if (token.type != TokenType.INTEGER)
3203 				return false;
3204 			if (token.intValue != _value)
3205 				return false;
3206 			if (token.isUnsigned != _unsigned)
3207 				return false;
3208 			if (token.isLong != _long)
3209 				return false;
3210 			return true;
3211 		}		
3212 		public override @property string toString() {
3213 			return "Integer:" ~ to!string(_value);
3214 		}
3215 	}
3216 	class RealTest : TokenTest {
3217 		real _value;
3218 		ubyte _precision;
3219 		bool _imaginary;
3220 		this(real value, ubyte precision = 1, bool imaginary = false, string file = __FILE__, uint line = __LINE__) {
3221 			super(file, line);
3222 			_value = value;
3223 			_precision = precision;
3224 			_imaginary = imaginary;
3225 		}
3226 		override bool doTest(Token token) {
3227 			if (token.type != TokenType.FLOAT)
3228 				return false;
3229             real diff = token.realValue - _value;
3230             real maxerr = _value / 1000000;
3231             if (diff < 0) diff = -diff;
3232             if (maxerr < 0) maxerr = -maxerr;
3233 			if (diff > maxerr)
3234 				return false;
3235 			if (token.precision != _precision)
3236 				return false;
3237 			if (token.isImaginary != _imaginary)
3238 				return false;
3239 			return true;
3240 		}		
3241 		public override @property string toString() {
3242 			return "Real:" ~ to!string(_value) ~ (_precision == 0 ? "f" : (_precision == 2 ? "L" : "")) ~ (_imaginary ? "i" : "");
3243 		}
3244 	}
3245 	class IdentTest : TokenTest {
3246 		string _value;
3247 		this(string value, string file = __FILE__, uint line = __LINE__) {
3248 			super(file, line);
3249 			_value = value;
3250 		}
3251 		override bool doTest(Token token) {
3252 			if (token.type != TokenType.IDENTIFIER)
3253 				return false;
3254 			if (! to!string(token.text).equal(_value))
3255 				return false;
3256 			return true;
3257 		}		
3258 		public override @property string toString() {
3259 			return "Ident:" ~ _value;
3260 		}
3261 	}
3262 	class CommentTest : TokenTest {
3263 		this(string file = __FILE__, uint line = __LINE__) {
3264 			super(file, line);
3265 		}
3266 		override bool doTest(Token token) {
3267 			if (token.type != TokenType.COMMENT)
3268 				return false;
3269 			return true;
3270 		}		
3271 		public override @property string toString() {
3272 			return "Comment";
3273 		}
3274 	}
3275 	class EOFTest : TokenTest {
3276 		this(string file = __FILE__, uint line = __LINE__) {
3277 			super(file, line);
3278 		}
3279 		override bool doTest(Token token) {
3280 			if (token.type != TokenType.EOF)
3281 				return false;
3282 			return true;
3283 		}		
3284 		public override @property string toString() {
3285 			return "EOF";
3286 		}
3287 	}
3288 	class WhiteSpaceTest : TokenTest {
3289 		this(string file = __FILE__, uint line = __LINE__) {
3290 			super(file, line);
3291 		}
3292 		override bool doTest(Token token) {
3293 			if (token.type != TokenType.WHITESPACE)
3294 				return false;
3295 			return true;
3296 		}		
3297 		public override @property string toString() {
3298 			return "whiteSpace";
3299 		}
3300 	}
3301 	TokenTest checkString(dstring value, dchar literalType = 0, string file = __FILE__, uint line = __LINE__) { 
3302 		return new StringTest(value, literalType, file, line);
3303 	}
3304 	TokenTest checkInteger(ulong value, bool unsignedFlag = false, bool longFlag = false, string file = __FILE__, uint line = __LINE__) { 
3305 		return new IntegerTest(value, unsignedFlag, longFlag, file, line);
3306 	}
3307 	TokenTest checkReal(real value, byte precision = 1, bool imaginary = false, string file = __FILE__, uint line = __LINE__) { 
3308 		return new RealTest(value, precision, imaginary, file, line);
3309 	}
3310 	TokenTest checkIdent(string value, string file = __FILE__, uint line = __LINE__) { 
3311 		return new IdentTest(value, file, line);
3312 	}
3313 	TokenTest checkKeyword(Keyword value, string file = __FILE__, uint line = __LINE__) { 
3314 		return new KeywordTest(value, file, line);
3315 	}
3316 	TokenTest checkOp(OpCode value, string file = __FILE__, uint line = __LINE__) { 
3317 		return new OpTest(value, file, line);
3318 	}
3319 	TokenTest checkSpace(string file = __FILE__, uint line = __LINE__) { 
3320 		return new WhiteSpaceTest(file, line);
3321 	}
3322 	TokenTest checkComment(string file = __FILE__, uint line = __LINE__) { 
3323 		return new CommentTest(file, line);
3324 	}
3325 	TokenTest checkEOF(string file = __FILE__, uint line = __LINE__) { 
3326 		return new EOFTest(file, line);
3327 	}
3328 
3329     // test strings
3330 	testTokenizer("r\"simple\\nstring\"", [checkString( r"simple\nstring" )]);
3331 
3332     // test strings
3333 	testTokenizer(q"TEST
3334 "simple string"
3335 "simple\nstring"
3336 `simple string`
3337 "simple string"d
3338 "simple string"c
3339 "simple string"w
3340 "simple\&quot;string"
3341 "\r\n\f\t\\\"\'&"
3342 TEST"
3343                   , [
3344                       checkString("simple string"),
3345                       checkSpace(),
3346                       checkString("simple\nstring"),
3347                       checkSpace(),
3348                       checkString("simple string"),
3349                       checkSpace(),
3350                       checkString("simple string", 'd'),
3351                       checkSpace(),
3352                       checkString("simple string", 'c'),
3353                       checkSpace(),
3354                       checkString("simple string", 'w'),
3355                       checkSpace(),
3356                       checkString("simple\&quot;string"),
3357                       checkSpace(),
3358                       checkString("\r\n\f\t\\\"\'&"),
3359     ]);
3360     // basic test
3361 	testTokenizer(q"TEST
3362 int i;
3363 TEST"
3364                   , [
3365                       checkKeyword(Keyword.INT),
3366                       checkSpace(),
3367                       checkIdent("i"),
3368                       checkOp(OpCode.SEMICOLON),
3369                       checkEOF()
3370                   ]);
3371     // test numbers
3372 	testTokenizer("0b1101 0x123abcdU 0xABCL 0743 192837465 0 192_837_465 5.25 12.3f 54.1L 67.1i 3e3 25.67e-5f"
3373                   , [
3374                       checkInteger(13),
3375                       checkSpace(),
3376                       checkInteger(0x123abcd, true, false),
3377                       checkSpace(),
3378                       checkInteger(0xabc, false, true),
3379                       checkSpace(),
3380                       checkInteger(std.conv.octal!743),
3381                       checkSpace(),
3382                       checkInteger(192_837_465),
3383                       checkSpace(),
3384                       checkInteger(0),
3385                       checkSpace(),
3386                       checkInteger(192837465),
3387                       checkSpace(),
3388                       checkReal(5.25),
3389                       checkSpace(),
3390                       checkReal(12.3f, 0),
3391                       checkSpace(),
3392                       checkReal(54.1L, 2),
3393                       checkSpace(),
3394                       checkReal(67.1, 1, true),
3395                       checkSpace(),
3396                       checkReal(3e3),
3397                       checkSpace(),
3398                       checkReal(25.67e-5f, 0),
3399                       checkEOF()
3400                   ]);
3401 }
3402