1 module ddc.lexer.tokenizer; 2 3 import ddc.lexer.textsource; 4 import ddc.lexer.exceptions; 5 6 import std.stdio; 7 import std.datetime; 8 import std.conv; 9 import std.utf; 10 import std.math; 11 12 enum TokenType : ubyte { 13 EOF, 14 //EOL, 15 WHITESPACE, 16 COMMENT, 17 IDENTIFIER, 18 STRING, 19 CHARACTER, 20 INTEGER, 21 FLOAT, 22 KEYWORD, 23 OP, 24 INVALID 25 } 26 27 // table for fast checking of UniversalAlpha (as per ISO/IEC 9899:1999 Annex E) OR a..z OR A..Z OR _ 28 // max code is 0xd7ff 29 //1728 30 const uint[1728] UNIVERSAL_ALPHA_FLAGS = [ 31 0x00000000,0x00000000,0x87fffffe,0x07fffffe,0x00000000,0x04a00400,0xff7fffff,0xff7fffff,// 0000-00ff 32 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xfc3fffff,// 0100-01ff 33 0x00ffffff,0x00000000,0xffff0000,0xffffffff,0xffffffff,0xe9ff01ff,0x00030003,0x0000001f,// 0200-02ff 34 0x00000000,0x00000000,0x00000000,0x04000000,0xffffd740,0xfffffffb,0x547f7fff,0x000ffffd,// 0300-03ff 35 0xffffdffe,0xffffffff,0xdffeffff,0xffffffff,0xffff0003,0xffffffff,0xffff199f,0x033fcfff,// 0400-04ff 36 0x00000000,0xfffe0000,0x027fffff,0xfffffffe,0x000000ff,0xbbff0000,0xffff0006,0x000707ff,// 0500-05ff 37 0x00000000,0x07fffffe,0x0007ffff,0xffff03ff,0xffffffff,0x7cffffff,0x1fff7fff,0x03ff3de0,// 0600-06ff 38 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 0700-07ff 39 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 0800-08ff 40 0xffffffee,0xe3ffffff,0xff073fff,0x0000ffcf,0xfff99fee,0xc3c5fdff,0xb000399f,0x0003ffcf,// 0900-09ff 41 0xfff987e4,0xc36dfdff,0x5e003987,0x0010ffc0,0xfffbafee,0xe3edfdff,0x00013bbf,0x0000ffc1,// 0a00-0aff 42 0xfff99fee,0xe3cdfdff,0xb000398f,0x0000ffc3,0xd63dc7ec,0xc3bfc718,0x00003dc7,0x0000ff80,// 0b00-0bff 43 0xfffddfee,0xc3effdff,0x00003ddf,0x0000ffc3,0xfffddfec,0xc3effdff,0x40003ddf,0x0000ffc3,// 0c00-0cff 44 0xfffddfec,0xc3fffdff,0x00003dcf,0x0000ffc3,0x00000000,0x00000000,0x00000000,0x00000000,// 0d00-0dff 45 0xfffffffe,0x07ffffff,0x0fffffff,0x00000000,0xfef02596,0x3bff6cae,0x33ff3f5f,0x00000000,// 0e00-0eff 46 0x03000001,0xc2afffff,0xfffffeff,0xfffe03ff,0xfebf0fdf,0x02fe3fff,0x00000000,0x00000000,// 0f00-0fff 47 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0xffffffff,0xffff003f,0x007fffff,// 1000-10ff 48 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 1100-11ff 49 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 1200-12ff 50 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 1300-13ff 51 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 1400-14ff 52 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 1500-15ff 53 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 1600-16ff 54 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 1700-17ff 55 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 1800-18ff 56 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 1900-19ff 57 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 1a00-1aff 58 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 1b00-1bff 59 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 1c00-1cff 60 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 1d00-1dff 61 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0x0fffffff,0xffffffff,0xffffffff,0x03ffffff,// 1e00-1eff 62 0x3f3fffff,0xffffffff,0xaaff3f3f,0x3fffffff,0xffffffff,0x5fdfffff,0x0fcf1fdc,0x1fdc1fff,// 1f00-1fff 63 0x00000000,0x80000000,0x00000001,0x80000000,0x00000000,0x00000000,0x00000000,0x00000000,// 2000-20ff 64 0x3f2ffc84,0x01fbfd50,0x00000000,0xffffffff,0x00000007,0x00000000,0x00000000,0x00000000,// 2100-21ff 65 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 2200-22ff 66 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 2300-23ff 67 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 2400-24ff 68 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 2500-25ff 69 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 2600-26ff 70 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 2700-27ff 71 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 2800-28ff 72 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 2900-29ff 73 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 2a00-2aff 74 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 2b00-2bff 75 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 2c00-2cff 76 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 2d00-2dff 77 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 2e00-2eff 78 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 2f00-2fff 79 0x000000e0,0x000003fe,0xfffffffe,0xffffffff,0x180fffff,0xfffffffe,0xffffffff,0x187fffff,// 3000-30ff 80 0xffffffe0,0x00001fff,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 3100-31ff 81 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 3200-32ff 82 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 3300-33ff 83 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 3400-34ff 84 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 3500-35ff 85 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 3600-36ff 86 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 3700-37ff 87 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 3800-38ff 88 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 3900-39ff 89 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 3a00-3aff 90 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 3b00-3bff 91 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 3c00-3cff 92 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 3d00-3dff 93 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 3e00-3eff 94 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 3f00-3fff 95 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 4000-40ff 96 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 4100-41ff 97 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 4200-42ff 98 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 4300-43ff 99 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 4400-44ff 100 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 4500-45ff 101 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 4600-46ff 102 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 4700-47ff 103 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 4800-48ff 104 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 4900-49ff 105 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 4a00-4aff 106 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 4b00-4bff 107 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 4c00-4cff 108 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 4d00-4dff 109 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 4e00-4eff 110 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 4f00-4fff 111 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 5000-50ff 112 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 5100-51ff 113 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 5200-52ff 114 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 5300-53ff 115 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 5400-54ff 116 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 5500-55ff 117 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 5600-56ff 118 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 5700-57ff 119 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 5800-58ff 120 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 5900-59ff 121 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 5a00-5aff 122 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 5b00-5bff 123 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 5c00-5cff 124 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 5d00-5dff 125 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 5e00-5eff 126 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 5f00-5fff 127 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 6000-60ff 128 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 6100-61ff 129 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 6200-62ff 130 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 6300-63ff 131 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 6400-64ff 132 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 6500-65ff 133 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 6600-66ff 134 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 6700-67ff 135 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 6800-68ff 136 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 6900-69ff 137 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 6a00-6aff 138 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 6b00-6bff 139 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 6c00-6cff 140 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 6d00-6dff 141 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 6e00-6eff 142 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 6f00-6fff 143 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 7000-70ff 144 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 7100-71ff 145 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 7200-72ff 146 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 7300-73ff 147 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 7400-74ff 148 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 7500-75ff 149 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 7600-76ff 150 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 7700-77ff 151 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 7800-78ff 152 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 7900-79ff 153 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 7a00-7aff 154 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 7b00-7bff 155 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 7c00-7cff 156 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 7d00-7dff 157 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 7e00-7eff 158 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 7f00-7fff 159 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 8000-80ff 160 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 8100-81ff 161 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 8200-82ff 162 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 8300-83ff 163 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 8400-84ff 164 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 8500-85ff 165 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 8600-86ff 166 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 8700-87ff 167 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 8800-88ff 168 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 8900-89ff 169 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 8a00-8aff 170 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 8b00-8bff 171 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 8c00-8cff 172 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 8d00-8dff 173 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 8e00-8eff 174 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 8f00-8fff 175 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 9000-90ff 176 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 9100-91ff 177 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 9200-92ff 178 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 9300-93ff 179 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 9400-94ff 180 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 9500-95ff 181 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 9600-96ff 182 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 9700-97ff 183 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 9800-98ff 184 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 9900-99ff 185 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 9a00-9aff 186 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 9b00-9bff 187 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 9c00-9cff 188 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 9d00-9dff 189 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 9e00-9eff 190 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0x0000003f,0x00000000,0x00000000,// 9f00-9fff 191 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// a000-a0ff 192 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// a100-a1ff 193 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// a200-a2ff 194 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// a300-a3ff 195 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// a400-a4ff 196 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// a500-a5ff 197 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// a600-a6ff 198 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// a700-a7ff 199 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// a800-a8ff 200 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// a900-a9ff 201 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// aa00-aaff 202 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// ab00-abff 203 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// ac00-acff 204 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// ad00-adff 205 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// ae00-aeff 206 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// af00-afff 207 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// b000-b0ff 208 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// b100-b1ff 209 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// b200-b2ff 210 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// b300-b3ff 211 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// b400-b4ff 212 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// b500-b5ff 213 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// b600-b6ff 214 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// b700-b7ff 215 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// b800-b8ff 216 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// b900-b9ff 217 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// ba00-baff 218 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// bb00-bbff 219 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// bc00-bcff 220 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// bd00-bdff 221 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// be00-beff 222 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// bf00-bfff 223 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// c000-c0ff 224 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// c100-c1ff 225 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// c200-c2ff 226 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// c300-c3ff 227 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// c400-c4ff 228 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// c500-c5ff 229 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// c600-c6ff 230 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// c700-c7ff 231 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// c800-c8ff 232 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// c900-c9ff 233 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// ca00-caff 234 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// cb00-cbff 235 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// cc00-ccff 236 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// cd00-cdff 237 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// ce00-ceff 238 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// cf00-cfff 239 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// d000-d0ff 240 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// d100-d1ff 241 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// d200-d2ff 242 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// d300-d3ff 243 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// d400-d4ff 244 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// d500-d5ff 245 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// d600-d6ff 246 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0x0000000f,0x00000000,0x00000000// d700-d7ff 247 ]; 248 249 /// returns true if character is A..Z, a..z, _ or universal alpha 250 bool isUniversalAlpha(dchar ch) pure nothrow { 251 return (ch <= 0xd7ff && (UNIVERSAL_ALPHA_FLAGS[ch >> 5] & (1 << (ch & 31)))); 252 } 253 254 /// character can present at the beginning of identifier 255 bool isIdentStartChar(dchar ch) pure nothrow { 256 return isUniversalAlpha(ch); 257 } 258 259 /// character can present in middle of identifier 260 bool isIdentMiddleChar(dchar ch) pure nothrow { 261 return (ch >= '0' && ch <='9') || isUniversalAlpha(ch); 262 } 263 264 immutable bool ENABLE_DUMP_UNIVERSAL_ALPHA_TABLE = false; 265 static if (ENABLE_DUMP_UNIVERSAL_ALPHA_TABLE) { 266 bool r(dchar ch, wchar v) pure nothrow { 267 return ch == v; 268 } 269 270 bool r(dchar ch, wchar v1, wchar v2) pure nothrow { 271 return ch >= v1 && ch <= v2; 272 } 273 274 bool isUniversalAlphaSlow(dchar c) pure nothrow { 275 return 276 // Latin: 00AA, 00BA, 00C0−00D6, 00D8−00F6, 00F8−01F5, 01FA−0217, 277 // 0250−02A8, 1E00−1E9B, 1EA0−1EF9, 207F 278 r(c, 0xAA) || r(c, 0x00BA) || r(c, 0x00C0,0x00D6) || r(c, 0x00D8,0x00F6) || r(c, 0x00F8,0x01F5) || r(c, 0x01FA,0x0217) 279 || r(c, 0x0250,0x02A8) || r(c, 0x1E00,0x1E9B) || r(c, 0x1EA0,0x1EF9) || r(c, 0x207F) 280 //Greek: 0386, 0388−038A, 038C, 038E−03A1, 03A3−03CE, 03D0−03D6, 281 //03DA, 03DC, 03DE, 03E0, 03E2−03F3, 1F00−1F15, 1F18−1F1D, 282 //1F20−1F45, 1F48−1F4D, 1F50−1F57, 1F59, 1F5B, 1F5D, 283 //1F5F−1F7D, 1F80−1FB4, 1FB6−1FBC, 1FC2−1FC4, 1FC6−1FCC, 284 //1FD0−1FD3, 1FD6−1FDB, 1FE0−1FEC, 1FF2−1FF4, 1FF6−1FFC 285 || r(c, 0x0386) || r(c, 0x0388,0x038A) || r(c, 0x038C) || r(c, 0x038E,0x03A1) || r(c, 0x03A3,0x03CE) || r(c, 0x03D0,0x03D6) 286 || r(c, 0x03DA) || r(c, 0x03DC) || r(c, 0x03DE) || r(c, 0x03E0) || r(c, 0x03E2,0x03F3) || r(c, 0x1F00,0x1F15) || r(c, 0x1F18,0x1F1D) 287 || r(c, 0x1F20,0x1F45) || r(c, 0x1F48,0x1F4D) || r(c, 0x1F50,0x1F57) || r(c, 0x1F59) || r(c, 0x1F5B) || r(c, 0x1F5D) 288 || r(c, 0x1F5F,0x1F7D) || r(c, 0x1F80,0x1FB4) || r(c, 0x1FB6,0x1FBC) || r(c, 0x1FC2,0x1FC4) || r(c, 0x1FC6,0x1FCC) 289 || r(c, 0x1FD0,0x1FD3) || r(c, 0x1FD6,0x1FDB) || r(c, 0x1FE0,0x1FEC) || r(c, 0x1FF2,0x1FF4) || r(c, 0x1FF6,0x1FFC) 290 //Cyrillic: 0401−040C, 040E−044F, 0451−045C, 045E−0481, 0490−04C4, 291 //04C7−04C8, 04CB−04CC, 04D0−04EB, 04EE−04F5, 04F8−04F9 292 || r(c, 0x0401,0x040C) || r(c, 0x040E,0x044F) || r(c, 0x0451,0x045C) || r(c, 0x045E,0x0481) || r(c, 0x0490,0x04C4) 293 || r(c, 0x04C7,0x04C8) || r(c, 0x04CB,0x04CC) || r(c, 0x04D0,0x04EB) || r(c, 0x04EE,0x04F5) || r(c, 0x04F8,0x04F9) 294 //Armenian: 0531−0556, 0561−0587 295 || r(c, 0x0531,0x0556) || r(c, 0x0561,0x0587) 296 //Hebrew: 05B0−05B9, 05BB−05BD, 05BF, 05C1−05C2, 05D0−05EA, 297 //05F0−05F2 298 || r(c, 0x05B0,0x05B9) || r(c, 0x05BB,0x05BD) || r(c, 0x05BF) || r(c, 0x05C1,0x05C2) || r(c, 0x05D0,0x05EA) 299 || r(c, 0x05F0,0x05F2) 300 //Arabic: 0621−063A, 0640−0652, 0670−06B7, 06BA−06BE, 06C0−06CE, 301 //06D0−06DC, 06E5−06E8, 06EA−06ED 302 || r(c, 0x0621,0x063A) || r(c, 0x0640,0x0652) || r(c, 0x0670,0x06B7) || r(c, 0x06BA,0x06BE) || r(c, 0x06C0,0x06CE) 303 || r(c, 0x06D0,0x06DC) || r(c, 0x06E5,0x06E8) || r(c, 0x06EA,0x06ED) 304 //Devanagari: 0901−0903, 0905−0939, 093E−094D, 0950−0952, 0958−0963 305 || r(c, 0x0901,0x0903) || r(c, 0x0905,0x0939) || r(c, 0x093E,0x094D) || r(c, 0x0950,0x0952) || r(c, 0x0958,0x0963) 306 //Bengali: 0981−0983, 0985−098C, 098F−0990, 0993−09A8, 09AA−09B0, 307 //09B2, 09B6−09B9, 09BE−09C4, 09C7−09C8, 09CB−09CD, 308 //09DC−09DD, 09DF−09E3, 09F0−09F1 309 || r(c, 0x0981,0x0983) || r(c, 0x0985,0x098C) || r(c, 0x098F,0x0990) || r(c, 0x0993,0x09A8) || r(c, 0x09AA,0x09B0) 310 || r(c, 0x09B2) || r(c, 0x09B6,0x09B9) || r(c, 0x09BE,0x09C4) || r(c, 0x09C7,0x09C8) || r(c, 0x09CB,0x09CD) 311 || r(c, 0x09DC,0x09DD) || r(c, 0x09DF,0x09E3) || r(c, 0x09F0,0x09F1) 312 //Gurmukhi: 0A02, 0A05−0A0A, 0A0F−0A10, 0A13−0A28, 0A2A−0A30, 313 //0A32−0A33, 0A35−0A36, 0A38−0A39, 0A3E−0A42, 0A47−0A48, 314 //0A4B−0A4D, 0A59−0A5C, 0A5E, 0A74 315 || r(c, 0x0A02) || r(c, 0x0A05,0x0A0A) || r(c, 0x0A0F,0x0A10) || r(c, 0x0A13,0x0A28) || r(c, 0x0A2A,0x0A30) 316 || r(c, 0x0A32,0x0A33) || r(c, 0x0A35,0x0A36) || r(c, 0x0A38,0x0A39) || r(c, 0x0A3E,0x0A42) || r(c, 0x0A47,0x0A48) 317 || r(c, 0x0A4B,0x0A4D) || r(c, 0x0A59,0x0A5C) || r(c, 0x0A5E) || r(c, 0x0A74) 318 //Gujarati: 0A81−0A83, 0A85−0A8B, 0A8D, 0A8F−0A91, 0A93−0AA8, 319 //0AAA−0AB0, 0AB2−0AB3, 0AB5−0AB9, 0ABD−0AC5, 320 //0AC7−0AC9, 0ACB−0ACD, 0AD0, 0AE0 321 || r(c, 0x0A81,0x0A83) || r(c, 0x0A85,0x0A8B) || r(c, 0x0A8D) || r(c, 0x0A8F,0x0A91) || r(c, 0x0A93,0x0AA8) 322 || r(c, 0x0AAA,0x0AB0) || r(c, 0x0AB2,0x0AB3) || r(c, 0x0AB5,0x0AB9) || r(c, 0x0ABD,0x0AC5) 323 || r(c, 0x0AC7,0x0AC9) || r(c, 0x0ACB,0x0ACD) || r(c, 0x0AD0) || r(c, 0x0AE0) 324 // Oriya: 0B01−0B03, 0B05−0B0C, 0B0F−0B10, 0B13−0B28, 0B2A−0B30, 325 //0B32−0B33, 0B36−0B39, 0B3E−0B43, 0B47−0B48, 0B4B−0B4D, 326 //0B5C−0B5D, 0B5F−0B61 327 || r(c, 0x0B01,0x0B03) || r(c, 0x0B05,0x0B0C) || r(c, 0x0B0F,0x0B10) || r(c, 0x0B13,0x0B28) || r(c, 0x0B2A,0x0B30) 328 || r(c, 0x0B32,0x0B33) || r(c, 0x0B36,0x0B39) || r(c, 0x0B3E,0x0B43) || r(c, 0x0B47,0x0B48) || r(c, 0x0B4B,0x0B4D) 329 || r(c, 0x0B5C,0x0B5D) || r(c, 0x0B5F,0x0B61) 330 //Tamil: 0B82−0B83, 0B85−0B8A, 0B8E−0B90, 0B92−0B95, 0B99−0B9A, 331 //0B9C, 0B9E−0B9F, 0BA3−0BA4, 0BA8−0BAA, 0BAE−0BB5, 332 //0BB7−0BB9, 0BBE−0BC2, 0BC6−0BC8, 0BCA−0BCD 333 || r(c, 0x0B82,0x0B83) || r(c, 0x0B85,0x0B8A) || r(c, 0x0B8E,0x0B90) || r(c, 0x0B92,0x0B95) || r(c, 0x0B99,0x0B9A) 334 || r(c, 0x0B9C) || r(c, 0x0B9E,0x0B9F) || r(c, 0x0BA3,0x0BA4) || r(c, 0x0BA8,0x0BAA) || r(c, 0x0BAE,0x0BB5) 335 || r(c, 0x0BB7,0x0BB9) || r(c, 0x0BBE,0x0BC2) || r(c, 0x0BC6,0x0BC8) || r(c, 0x0BCA,0x0BCD) 336 //Telugu: 0C01−0C03, 0C05−0C0C, 0C0E−0C10, 0C12−0C28, 0C2A−0C33, 337 //0C35−0C39, 0C3E−0C44, 0C46−0C48, 0C4A−0C4D, 0C60−0C61 338 || r(c, 0x0C01,0x0C03) || r(c, 0x0C05,0x0C0C) || r(c, 0x0C0E,0x0C10) || r(c, 0x0C12,0x0C28) || r(c, 0x0C2A,0x0C33) 339 || r(c, 0x0C35,0x0C39) || r(c, 0x0C3E,0x0C44) || r(c, 0x0C46,0x0C48) || r(c, 0x0C4A,0x0C4D) || r(c, 0x0C60,0x0C61) 340 //Kannada: 0C82−0C83, 0C85−0C8C, 0C8E−0C90, 0C92−0CA8, 0CAA−0CB3, 341 //0CB5−0CB9, 0CBE−0CC4, 0CC6−0CC8, 0CCA−0CCD, 0CDE, 342 //0CE0−0CE1 343 || r(c, 0x0C82,0x0C83) || r(c, 0x0C85,0x0C8C) || r(c, 0x0C8E,0x0C90) || r(c, 0x0C92,0x0CA8) || r(c, 0x0CAA,0x0CB3) 344 || r(c, 0x0CB5,0x0CB9) || r(c, 0x0CBE,0x0CC4) || r(c, 0x0CC6,0x0CC8) || r(c, 0x0CCA,0x0CCD) || r(c, 0x0CDE) 345 || r(c, 0x0CE0,0x0CE1) 346 //Malayalam: 0D02−0D03, 0D05−0D0C, 0D0E−0D10, 0D12−0D28, 0D2A−0D39, 347 //0D3E−0D43, 0D46−0D48, 0D4A−0D4D, 0D60−0D61 348 || r(c, 0x0D02,0x0D03) || r(c, 0x0D05,0x0D0C) || r(c, 0x0D0E,0x0D10) || r(c, 0x0D12,0x0D28) || r(c, 0x0D2A,0x0D39) 349 || r(c, 0xD3E,0x0D43) || r(c, 0x0D46,0x0D48) || r(c, 0x0D4A,0x0D4D) || r(c, 0x0D60,0x0D61) 350 //Thai: 0E01−0E3A, 0E40−0E5B 351 || r(c, 0x0E01,0x0E3A) || r(c, 0x0E40,0x0E5B) 352 //Lao: 0E81−0E82, 0E84, 0E87−0E88, 0E8A, 0E8D, 0E94−0E97, 353 //0E99−0E9F, 0EA1−0EA3, 0EA5, 0EA7, 0EAA−0EAB, 354 //0EAD−0EAE, 0EB0−0EB9, 0EBB−0EBD, 0EC0−0EC4, 0EC6, 355 //0EC8−0ECD, 0EDC−0EDD 356 || r(c, 0x0E81,0x0E82) || r(c, 0x0E84) || r(c, 0x0E87,0x0E88) || r(c, 0x0E8A) || r(c, 0x0E8D) || r(c, 0x0E94,0x0E97) 357 || r(c, 0x0E99,0x0E9F) || r(c, 0x0EA1,0x0EA3) || r(c, 0x0EA5) || r(c, 0x0EA7) || r(c, 0x0EAA,0x0EAB) 358 || r(c, 0x0EAD,0x0EAE) || r(c, 0x0EB0,0x0EB9) || r(c, 0x0EBB,0x0EBD) || r(c, 0x0EC0,0x0EC4) || r(c, 0x0EC6) 359 || r(c, 0x0EC8,0x0ECD) || r(c, 0x0EDC,0x0EDD) 360 //Tibetan: 0F00, 0F18−0F19, 0F35, 0F37, 0F39, 0F3E−0F47, 0F49−0F69, 361 //0F71−0F84, 0F86−0F8B, 0F90−0F95, 0F97, 0F99−0FAD, 362 //0FB1−0FB7, 0FB9 363 || r(c, 0x0F00) || r(c, 0x0F18,0x0F19) || r(c, 0x0F35) || r(c, 0x0F37) || r(c, 0x0F39) || r(c, 0x0F3E,0x0F47) || r(c, 0x0F49,0x0F69) 364 || r(c, 0x0F71,0x0F84) || r(c, 0x0F86,0x0F8B) || r(c, 0x0F90,0x0F95) || r(c, 0x0F97) || r(c, 0x0F99,0x0FAD) 365 || r(c, 0x0FB1,0x0FB7) || r(c, 0x0FB9) 366 //Georgian: 10A0−10C5, 10D0−10F6 367 || r(c, 0x10A0,0x10C5) || r(c, 0x10D0,0x10F6) 368 //Hiragana: 3041−3093, 309B−309C 369 || r(c, 0x3041,0x3093) || r(c, 0x309B,0x309C) 370 //Katakana: 30A1−30F6, 30FB−30FC 371 || r(c, 0x30A1,0x30F6) || r(c, 0x30FB,0x30FC) 372 //Bopomofo: 3105−312C 373 || r(c, 0x3105,0x312C) 374 //CJK Unified Ideographs: 4E00−9FA5 375 || r(c, 0x4E00,0x9FA5) 376 //Hangul: AC00−D7A3 377 || r(c, 0xAC00,0xD7A3) 378 //Digits: 0660−0669, 06F0−06F9, 0966−096F, 09E6−09EF, 0A66−0A6F, 379 //0AE6−0AEF, 0B66−0B6F, 0BE7−0BEF, 0C66−0C6F, 0CE6−0CEF, 380 //0D66−0D6F, 0E50−0E59, 0ED0−0ED9, 0F20−0F33 381 || r(c, 0x0660,0x0669) || r(c, 0x06F0,0x06F9) || r(c, 0x0966,0x096F) || r(c, 0x09E6,0x09EF) || r(c, 0x0A66,0x0A6F) 382 || r(c, 0x0AE6,0x0AEF) || r(c, 0x0B66,0x0B6F) || r(c, 0x0BE7,0x0BEF) || r(c, 0x0C66,0x0C6F) || r(c, 0x0CE6,0x0CEF) 383 || r(c, 0x0D66,0x0D6F) || r(c, 0x0E50,0x0E59) || r(c, 0x0ED0,0x0ED9) || r(c, 0x0F20,0x0F33) 384 //Special characters: 00B5, 00B7, 02B0−02B8, 02BB, 02BD−02C1, 02D0−02D1, 385 //02E0−02E4, 037A, 0559, 093D, 0B3D, 1FBE, 203F−2040, 2102, 386 //2107, 210A−2113, 2115, 2118−211D, 2124, 2126, 2128, 212A−2131, 387 //2133−2138, 2160−2182, 3005−3007, 3021−3029 388 || r(c, 0x00B5) || r(c, 0x00B7) || r(c, 0x02B0,0x02B8) || r(c, 0x02BB) || r(c, 0x02BD,0x02C1) || r(c, 0x02D0,0x02D1) 389 || r(c, 0x2E0,0x02E4) || r(c, 0x037A) || r(c, 0x0559) || r(c, 0x093D) || r(c, 0x0B3D) || r(c, 0x1FBE) || r(c, 0x203F,0x2040) || r(c, 0x2102) 390 || r(c, 0x2107) || r(c, 0x210A,0x2113) || r(c, 0x2115) || r(c, 0x2118,0x211D) || r(c, 0x2124) || r(c, 0x2126) || r(c, 0x2128) || r(c, 0x212A,0x2131) 391 || r(c, 0x2133,0x2138) || r(c, 0x2160,0x2182) || r(c, 0x3005,0x3007) || r(c, 0x3021,0x3029) 392 ; 393 } 394 395 } 396 397 unittest { 398 399 400 static if (ENABLE_DUMP_UNIVERSAL_ALPHA_TABLE) { 401 immutable uint itemsInRow = 8; 402 403 uint maxAlpha = 0; 404 for (uint i = 0; i < 0x10000; i++) { 405 uint ch = i; 406 if (isUniversalAlphaSlow(ch) || ch == '_' || (ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z')) 407 maxAlpha = i; 408 } 409 maxAlpha = (maxAlpha + itemsInRow * 32 - 1) / (itemsInRow * 32) * (itemsInRow * 32) - 1; 410 writeln("// table for fast checking of UniversalAlpha (as per ISO/IEC 9899:1999 Annex E) OR a..z OR A..Z OR _"); 411 writefln("// max code is 0x%04x", maxAlpha); 412 writeln("immutable uint[", (maxAlpha + 1) / 32,"] UNIVERSAL_ALPHA_FLAGS = ["); 413 for (uint i = 0; i <= maxAlpha; i += 32) { 414 if ((i / 32) % itemsInRow == 0) 415 write(" "); 416 uint flags = 0; 417 for (uint j = 0; j < 32; j++) { 418 uint ch = i + j; 419 bool flag = isUniversalAlphaSlow(ch) || ch == '_' || (ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z'); 420 if (flag) 421 flags |= (1 << j); 422 } 423 writef("0x%08x", flags); 424 if (i != maxAlpha / 32 * 32) 425 write(","); 426 if ((i / 32) % itemsInRow == itemsInRow - 1) 427 writefln("// %04x-%04x", i - itemsInRow * 32 + 1 + 31, i + 31); 428 } 429 writeln("];"); 430 431 for (uint ch = 0; ch < 0x100000; ch++) { 432 bool flag = isUniversalAlphaSlow(ch) || ch == '_' || (ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z'); 433 bool flag2 = isUniversalAlpha(ch); 434 if (flag2 != flag) { 435 isUniversalAlpha(ch); 436 writefln("universalAlpha test failed for char %06x expeced %d actual %d", ch, flag ? 1 : 0, flag2 ? 1 : 0); 437 } 438 assert(flag2 == flag); 439 } 440 } 441 } 442 443 enum OpCode : ubyte { 444 NONE, // no op 445 DIV, // / 446 DIV_EQ, // /= 447 DOT, // . 448 DOT_DOT, // .. 449 DOT_DOT_DOT,// ... 450 AND, // & 451 AND_EQ, // &= 452 LOG_AND, // && 453 OR, // | 454 OR_EQ, // |= 455 LOG_OR, // || 456 MINUS, // - 457 MINUS_EQ, // -= 458 MINUS_MINUS,// -- 459 PLUS, // + 460 PLUS_EQ, // += 461 PLUS_PLUS, // ++ 462 LT, // < 463 LT_EQ, // <= 464 SHL, // << 465 SHL_EQ, // <<= 466 LT_GT, // <> 467 NE_EQ, // <>= 468 GT, // > 469 GT_EQ, // >= 470 SHR_EQ, // >>= 471 ASR_EQ, // >>>= 472 SHR, // >> 473 ASR, // >>> 474 NOT, // ! 475 NOT_EQ, // != 476 NOT_LT_GT, // !<> 477 NOT_LT_GT_EQ, // !<>= 478 NOT_LT, // !< 479 NOT_LT_EQ, // !<= 480 NOT_GT, // !> 481 NOT_GT_EQ, // !>= 482 PAR_OPEN, // ( 483 PAR_CLOSE, // ) 484 SQ_OPEN, // [ 485 SQ_CLOSE, // ] 486 CURL_OPEN, // { 487 CURL_CLOSE, // } 488 QUEST, // ? 489 COMMA, // , 490 SEMICOLON, // ; 491 COLON, // : 492 DOLLAR, // $ 493 EQ, // = 494 QE_EQ, // == 495 MUL, // * 496 MUL_EQ, // *= 497 MOD, // % 498 MOD_EQ, // %= 499 XOR, // ^ 500 XOR_EQ, // ^= 501 LOG_XOR, // ^^ 502 LOG_XOR_EQ, // ^^= 503 INV, // ~ 504 INV_EQ, // ~= 505 AT, // @ 506 EQ_GT, // => 507 SHARP // # 508 }; 509 510 immutable dstring[] OP_CODE_STRINGS = [ 511 "", 512 "/", 513 "/=", 514 ".", 515 "..", 516 "...", 517 "&", 518 "&=", 519 "&&", 520 "|", 521 "|=", 522 "||", 523 "-", 524 "-=", 525 "--", 526 "+", 527 "+=", 528 "++", 529 "<", 530 "<=", 531 "<<", 532 "<<=", 533 "<>", 534 "<>=", 535 ">", 536 ">=", 537 ">>=", 538 ">>>=", 539 ">>", 540 ">>>", 541 "!", 542 "!=", 543 "!<>", 544 "!<>=", 545 "!<", 546 "!<=", 547 "!>", 548 "!>=", 549 "(", 550 ")", 551 "[", 552 "]", 553 "{", 554 "}", 555 "?", 556 ",", 557 ";", 558 ":", 559 "$", 560 "=", 561 "==", 562 "*", 563 "*=", 564 "%", 565 "%=", 566 "^", 567 "^=", 568 "^^", 569 "^^=", 570 "~", 571 "~=", 572 "@", 573 "=>", 574 "#" 575 ]; 576 577 dstring getOpNameD(OpCode op) pure nothrow { 578 return OP_CODE_STRINGS[op]; 579 }; 580 581 enum Keyword : ubyte { 582 NONE, 583 ABSTRACT, 584 ALIAS, 585 ALIGN, 586 ASM, 587 ASSERT, 588 AUTO, 589 590 BODY, 591 BOOL, 592 BREAK, 593 BYTE, 594 595 CASE, 596 CAST, 597 CATCH, 598 CDOUBLE, 599 CENT, 600 CFLOAT, 601 CHAR, 602 CLASS, 603 CONST, 604 CONTINUE, 605 CREAL, 606 607 DCHAR, 608 DEBUG, 609 DEFAULT, 610 DELEGATE, 611 DELETE, 612 DEPRECATED, 613 DO, 614 DOUBLE, 615 616 ELSE, 617 ENUM, 618 EXPORT, 619 EXTERN, 620 621 FALSE, 622 FINAL, 623 FINALLY, 624 FLOAT, 625 FOR, 626 FOREACH, 627 FOREACH_REVERSE, 628 FUNCTION, 629 630 GOTO, 631 632 IDOUBLE, 633 IF, 634 IFLOAT, 635 IMMUTABLE, 636 IMPORT, 637 IN, 638 INOUT, 639 INT, 640 INTERFACE, 641 INVARIANT, 642 IREAL, 643 IS, 644 645 LAZY, 646 LONG, 647 648 MACRO, 649 MIXIN, 650 MODULE, 651 652 NEW, 653 NOTHROW, 654 NULL, 655 656 OUT, 657 OVERRIDE, 658 659 PACKAGE, 660 PRAGMA, 661 PRIVATE, 662 PROTECTED, 663 PUBLIC, 664 PURE, 665 666 REAL, 667 REF, 668 RETURN, 669 670 SCOPE, 671 SHARED, 672 SHORT, 673 STATIC, 674 STRUCT, 675 SUPER, 676 SWITCH, 677 SYNCHRONIZED, 678 679 TEMPLATE, 680 THIS, 681 THROW, 682 TRUE, 683 TRY, 684 TYPEDEF, 685 TYPEID, 686 TYPEOF, 687 688 UBYTE, 689 UCENT, 690 UINT, 691 ULONG, 692 UNION, 693 UNITTEST, 694 USHORT, 695 696 VERSION, 697 VOID, 698 VOLATILE, 699 700 WCHAR, 701 WHILE, 702 WITH, 703 704 FILE, 705 MODULE__, 706 LINE, 707 FUNCTION__, 708 PRETTY_FUNCTION, 709 710 //Special Token Replaced with 711 DATE, // string literal of the date of compilation "mmm dd yyyy" 712 EOF, // sets the scanner to the end of the file 713 TIME, // string literal of the time of compilation "hh:mm:ss" 714 TIMESTAMP, // string literal of the date and time of compilation "www mmm dd hh:mm:ss yyyy" 715 VENDOR, // Compiler vendor string, such as "Digital Mars D" 716 VERSION_, // Compiler version as an integer, such as 2001 717 718 GSHARED, 719 TRAITS, 720 VECTOR, 721 PARAMETERS, 722 723 } 724 725 immutable dstring[] KEYWORD_STRINGS = [ 726 "", 727 "abstract", 728 "alias", 729 "align", 730 "asm", 731 "assert", 732 "auto", 733 734 "body", 735 "bool", 736 "break", 737 "byte", 738 739 "case", 740 "cast", 741 "catch", 742 "cdouble", 743 "cent", 744 "cfloat", 745 "char", 746 "class", 747 "const", 748 "continue", 749 "creal", 750 751 "dchar", 752 "debug", 753 "default", 754 "delegate", 755 "delete", 756 "deprecated", 757 "do", 758 "double", 759 760 "else", 761 "enum", 762 "export", 763 "extern", 764 765 "false", 766 "final", 767 "finally", 768 "float", 769 "for", 770 "foreach", 771 "foreach_reverse", 772 "function", 773 774 "goto", 775 776 "idouble", 777 "if", 778 "ifloat", 779 "immutable", 780 "import", 781 "in", 782 "inout", 783 "int", 784 "interface", 785 "invariant", 786 "ireal", 787 "is", 788 789 "lazy", 790 "long", 791 792 "macro", 793 "mixin", 794 "module", 795 796 "new", 797 "nothrow", 798 "null", 799 800 "out", 801 "override", 802 803 "package", 804 "pragma", 805 "private", 806 "protected", 807 "public", 808 "pure", 809 810 "real", 811 "ref", 812 "return", 813 814 "scope", 815 "shared", 816 "short", 817 "static", 818 "struct", 819 "super", 820 "switch", 821 "synchronized", 822 823 "template", 824 "this", 825 "throw", 826 "true", 827 "try", 828 "typedef", 829 "typeid", 830 "typeof", 831 832 "ubyte", 833 "ucent", 834 "uint", 835 "ulong", 836 "union", 837 "unittest", 838 "ushort", 839 840 "version", 841 "void", 842 "volatile", 843 844 "wchar", 845 "while", 846 "with", 847 848 "__FILE__", 849 "__MODULE__", 850 "__LINE__", 851 "__FUNCTION__", 852 "__PRETTY_FUNCTION__", 853 854 //Special Token Replaced with 855 "__DATE__", // string literal of the date of compilation "mmm dd yyyy" 856 "__EOF__", // sets the scanner to the end of the file 857 "__TIME__", // string literal of the time of compilation "hh:mm:ss" 858 "__TIMESTAMP__", // string literal of the date and time of compilation "www mmm dd hh:mm:ss yyyy" 859 "__VENDOR__", // Compiler vendor string, such as "Digital Mars D" 860 "__VERSION__", // Compiler version as an integer, such as 2001 861 862 863 "__gshared", 864 "__traits", 865 "__vector", 866 "__parameters" 867 ]; 868 869 public dstring getKeywordNameD(Keyword keyword) pure nothrow { 870 return KEYWORD_STRINGS[keyword]; 871 }; 872 873 public Keyword findKeyword(Keyword start, Keyword end, dchar * name, int len, ref int pos) pure nothrow { 874 for (Keyword i = start; i <= end; i++) { 875 dstring s = KEYWORD_STRINGS[i]; 876 if (s.length > len + 1) 877 continue; // too long 878 bool found = true; 879 for (uint j = 1; j < s.length; j++) { 880 if (s[j] != name[j - 1]) { 881 found = false; 882 break; 883 } 884 } 885 if (found) { 886 if (s.length == len - 1 || !isIdentMiddleChar(name[s.length - 1])) { 887 pos += s.length - 1; 888 return i; 889 } 890 } 891 } 892 return Keyword.NONE; 893 } 894 895 /** 896 * Token. 897 */ 898 class Token { 899 protected SourceFile _file; 900 protected int _line; 901 protected int _pos; 902 protected TokenType _type; 903 /// returns token type 904 @property TokenType type() { return _type; } 905 /// returns file info for source 906 @property SourceFile filename() { return _file; } 907 /// returns 1-based source line number of token start 908 @property int line() { return _line; } 909 /// returns 1-based source line position of token start 910 @property int pos() { return _pos; } 911 /// returns token text 912 @property dchar[] text() { return null; } 913 914 // number token properties 915 @property dchar literalType() { return 0; } 916 @property ulong intValue() { return 0; } 917 @property bool isUnsigned() { return false; } 918 @property ulong isLong() { return false; } 919 @property real realValue() { return 0; } 920 @property double doubleValue() { return 0; } 921 @property float floatValue() { return 0; } 922 @property byte precision() { return 0; } 923 @property bool isImaginary() { return false; } 924 925 /// returns opcode ID - for opcode tokens 926 @property OpCode opCode() { return OpCode.NONE; } 927 /// returns keyword ID - for keyword tokens 928 @property Keyword keyword() { return Keyword.NONE; } 929 /// returns true if this is documentation comment token 930 @property bool isDocumentationComment() { return false; } 931 932 // error handling 933 934 /// returns true if it's invalid token (can be returned in error tolerant mode of tokenizer) 935 @property bool isError() { return type == TokenType.INVALID; } 936 /// returns error message if it's invalid token (can be returned in error tolerant mode of tokenizer) 937 @property string errorMessage() { return null; } 938 /// returns error code if it's invalid token (can be returned in error tolerant mode of tokenizer) 939 @property int errorCode() { return 0; } 940 /// returns type of token parsing of which has been failed - if it's invalid token (can be returned in error tolerant mode of tokenizer) 941 @property TokenType invalidTokenType() { return TokenType.INVALID; } 942 943 944 this(TokenType type) { 945 _type = type; 946 } 947 948 this(TokenType type, SourceFile file, int line, int pos) { 949 _type = type; 950 _file = file; 951 _line = line; 952 _pos = pos; 953 } 954 /// set start position for token (line is 1-based, pos is 0-based) 955 void setPos(SourceFile file, int line, int pos) { 956 _file = file; 957 _line = line; 958 _pos = pos + 1; 959 } 960 /// set source file information for token 961 void setFile(SourceFile file) { 962 _file = file; 963 } 964 /// set start position for token (line is 1-based, pos is 0-based) 965 void setPos(int line, int pos) { 966 _line = line; 967 _pos = pos + 1; 968 } 969 970 public abstract Token clone(); 971 public override @property string toString() { 972 return "" ~ to!string(_line) ~ ":" ~ to!string(_pos) ~ " " ~ to!string(type) ~ " " ~ to!string(opCode) ~ " " ~ to!string(keyword) 973 ~" \"" ~ toUTF8(text()) ~ "\""; 974 } 975 } 976 977 class EofToken : Token { 978 this() { 979 super(TokenType.EOF); 980 } 981 this(SourceFile file, uint line, uint pos) { 982 super(TokenType.EOF, file, line, pos); 983 } 984 override public Token clone() { 985 return new EofToken(_file, _line, _pos); 986 } 987 public override @property string toString() { 988 return "EOF"; 989 } 990 } 991 992 // treat as white space 993 //class EolToken : Token { 994 // this(string file, uint line, uint pos) { 995 // super(TokenType.EOL, file, line, pos); 996 // } 997 //} 998 999 /// white space token 1000 class WhiteSpaceToken : Token { 1001 this() { 1002 super(TokenType.WHITESPACE); 1003 } 1004 this(SourceFile file, uint line, uint pos) { 1005 super(TokenType.WHITESPACE, file, line, pos); 1006 } 1007 override public Token clone() { 1008 return new WhiteSpaceToken(_file, _line, _pos); 1009 } 1010 public override @property string toString() { 1011 return "WhiteSpace"; 1012 } 1013 } 1014 1015 class OpToken : Token { 1016 OpCode _op; 1017 public @property override OpCode opCode() { return _op; } 1018 public @property void opCode(OpCode op) { _op = op; } 1019 public @property override dchar[] text() { return cast(dchar[])getOpNameD(_op); } 1020 this() { 1021 super(TokenType.OP); 1022 } 1023 this(SourceFile file, uint line, uint pos) { 1024 super(TokenType.OP, file, line, pos); 1025 } 1026 override public Token clone() { 1027 return new OpToken(_file, _line, _pos); 1028 } 1029 public override @property string toString() { 1030 return "Op:" ~ to!string(_op); 1031 } 1032 } 1033 1034 class KeywordToken : Token { 1035 Keyword _keyword; 1036 public @property override Keyword keyword() { return _keyword; } 1037 public @property void keyword(Keyword keyword) { _keyword = keyword; } 1038 public @property override dchar[] text() { return cast(dchar[])getKeywordNameD(_keyword); } 1039 this() { 1040 super(TokenType.KEYWORD); 1041 } 1042 this(SourceFile file, uint line, uint pos) { 1043 super(TokenType.KEYWORD, file, line, pos); 1044 } 1045 override public Token clone() { 1046 return new KeywordToken(_file, _line, _pos); 1047 } 1048 public override @property string toString() { 1049 return "Keyword:" ~ to!string(_keyword); 1050 } 1051 } 1052 1053 /// comment token 1054 class CommentToken : Token { 1055 protected dchar[] _text; 1056 protected bool _isDocumentationComment; 1057 1058 override @property bool isDocumentationComment() { 1059 return _isDocumentationComment; 1060 } 1061 1062 @property void isDocumentationComment(bool f) { 1063 _isDocumentationComment = f; 1064 } 1065 1066 @property override dchar[] text() { return _text; } 1067 @property void text(dchar[] text) { _text = text; } 1068 this() { 1069 super(TokenType.COMMENT); 1070 } 1071 this(SourceFile file, uint line, uint pos, dchar[] text) { 1072 super(TokenType.COMMENT, file, line, pos); 1073 _text = text; 1074 } 1075 override public Token clone() { 1076 return new CommentToken(_file, _line, _pos, _text.dup); 1077 } 1078 public override @property string toString() { 1079 return "Comment:" ~ to!string(_text); 1080 } 1081 } 1082 1083 /// Invalid token holder - for error tolerant parsing 1084 class InvalidToken : Token { 1085 protected dchar[] _text; 1086 protected TokenType _invalidTokenType; 1087 protected int _errorCode; 1088 protected string _errorMessage; 1089 1090 /// returns error message if it's invalid token (can be returned in error tolerant mode of tokenizer) 1091 override @property string errorMessage() { return _errorMessage; } 1092 /// sets error message 1093 @property void errorMessage(string s) { _errorMessage = s; } 1094 /// returns error code if it's invalid token (can be returned in error tolerant mode of tokenizer) 1095 override @property int errorCode() { return _errorCode; } 1096 /// sets error code 1097 @property void errorCode(int c) { _errorCode = c; } 1098 /// returns type of token parsing of which has been failed - if it's invalid token (can be returned in error tolerant mode of tokenizer) 1099 override @property TokenType invalidTokenType() { return _invalidTokenType; } 1100 /// sets type of token parsing of which has been failed 1101 @property void invalidTokenType(TokenType t) { _invalidTokenType = t; } 1102 1103 /// text of invalid token 1104 @property override dchar[] text() { return _text; } 1105 /// text of invalid token 1106 @property void text(dchar[] text) { _text = text; } 1107 1108 this() { 1109 super(TokenType.INVALID); 1110 } 1111 this(SourceFile file, uint line, uint pos, dchar[] text) { 1112 super(TokenType.INVALID, file, line, pos); 1113 _text = text; 1114 } 1115 override Token clone() { 1116 InvalidToken res = new InvalidToken(_file, _line, _pos, _text.dup); 1117 res._errorMessage = _errorMessage.dup; 1118 res._errorCode = _errorCode; 1119 res._invalidTokenType = _invalidTokenType; 1120 return res; 1121 } 1122 override @property string toString() { 1123 return "Invalid:" ~ to!string(_text); 1124 } 1125 } 1126 1127 alias tokenizer_ident_t = uint; 1128 alias tokenizer_ident_name_t = dchar[]; 1129 1130 enum : tokenizer_ident_t { 1131 NO_IDENT = 0 1132 } 1133 1134 /** 1135 * Global storage for identifier strings. 1136 */ 1137 class IdentHolder { 1138 protected tokenizer_ident_t _nextId; 1139 protected tokenizer_ident_name_t[tokenizer_ident_t] _idToName; 1140 protected tokenizer_ident_t[tokenizer_ident_name_t] _nameToId; 1141 1142 public this() { 1143 _nextId = NO_IDENT + 1; 1144 } 1145 1146 /** 1147 * Search for id by name, return NO_IDENT if not found. 1148 */ 1149 uint findByName(tokenizer_ident_name_t name) { 1150 tokenizer_ident_t * found = (name in _nameToId); 1151 if (found) 1152 return *found; 1153 return NO_IDENT; 1154 } 1155 1156 /** 1157 * Search for name by id, return null if not found. 1158 */ 1159 tokenizer_ident_name_t nameById(tokenizer_ident_t id) { 1160 auto found = (id in _idToName); 1161 if (found) 1162 return *found; 1163 return null; 1164 } 1165 1166 /** 1167 * Search for ident id by name, create new entry if not found. 1168 */ 1169 tokenizer_ident_t idByName(tokenizer_ident_name_t name) { 1170 uint * found = (name in _nameToId); 1171 if (found) 1172 return *found; 1173 uint newid = _nextId++; 1174 _nameToId[cast(dstring)name] = newid; 1175 _idToName[newid] = cast(tokenizer_ident_name_t)name; 1176 return newid; 1177 } 1178 } 1179 1180 /** 1181 * Thread local storage for IDs. 1182 */ 1183 IdentHolder identMap; 1184 1185 static this() { 1186 // init ID storage 1187 identMap = new IdentHolder(); 1188 } 1189 1190 class StringLiteralToken : Token { 1191 dchar[] _text; 1192 dchar _literalType; 1193 public @property override dchar literalType() { return _literalType; } 1194 public @property override dchar[] text() { return _text; } 1195 public void setText(dchar[] text, dchar type) { _text = text; _literalType = type; } 1196 this() { 1197 super(TokenType.STRING); 1198 } 1199 this(SourceFile file, uint line, uint pos, dchar[] text, dchar type) { 1200 super(TokenType.STRING, file, line, pos); 1201 _text = text; 1202 _literalType = type; 1203 } 1204 override public Token clone() { 1205 return new StringLiteralToken(_file, _line, _pos, _text.dup, _literalType); 1206 } 1207 public override @property string toString() { 1208 return "String:" ~ to!string(_text); 1209 } 1210 } 1211 1212 class CharacterLiteralToken : Token { 1213 dchar _character; 1214 dchar _literalType; 1215 @property override dchar literalType() { return _literalType; } 1216 @property dchar character() { return _character; } 1217 @property override dchar[] text() { return [_character]; } 1218 void setCharacter(dchar ch, dchar type) { _character = ch; _literalType = type; } 1219 this() { 1220 super(TokenType.CHARACTER); 1221 } 1222 this(SourceFile file, uint line, uint pos, dchar character, dchar type) { 1223 super(TokenType.CHARACTER, file, line, pos); 1224 _character = character; 1225 _literalType = type; 1226 } 1227 override public Token clone() { 1228 return new CharacterLiteralToken(_file, _line, _pos, _character, _literalType); 1229 } 1230 public override @property string toString() { 1231 return "Char:" ~ toUTF8([_character]); 1232 } 1233 } 1234 1235 class IntegerLiteralToken : Token { 1236 ulong _value; 1237 bool _unsigned; 1238 bool _long; 1239 public @property override ulong intValue() { return _value; } 1240 public @property override bool isUnsigned() { return _unsigned; } 1241 public @property override ulong isLong() { return _long; } 1242 public @property override dchar[] text() { return cast(dchar[])to!dstring(_value); } 1243 public void setValue(ulong value, bool unsignedFlag = false, bool longFlag = false) { 1244 _value = value; 1245 _unsigned = unsignedFlag; 1246 _long = longFlag; 1247 } 1248 public void setFlags(bool unsignedFlag = false, bool longFlag = false) { 1249 _unsigned = unsignedFlag; 1250 _long = longFlag; 1251 } 1252 this() { 1253 super(TokenType.INTEGER); 1254 } 1255 this(SourceFile file, uint line, uint pos, ulong value, bool unsignedFlag, bool longFlag) { 1256 super(TokenType.INTEGER, file, line, pos); 1257 _value = value; 1258 _unsigned = unsignedFlag; 1259 _long = longFlag; 1260 } 1261 override public Token clone() { 1262 return new IntegerLiteralToken(_file, _line, _pos, _value, _unsigned, _long); 1263 } 1264 public override @property string toString() { 1265 return "Integer:" ~ to!string(_value) ~ (_long ? "L" : "") ~ (_unsigned ? "U" : ""); 1266 } 1267 } 1268 1269 class RealLiteralToken : Token { 1270 real _value; 1271 byte _precision; 1272 bool _imaginary; 1273 public @property override ulong intValue() { return to!long(_value); } 1274 public @property override real realValue() { return _value; } 1275 public @property override double doubleValue() { return cast(double)_value; } 1276 public @property override float floatValue() { return cast(float)_value; } 1277 public @property override byte precision() { return _precision; } 1278 public @property override bool isImaginary() { return _imaginary; } 1279 public @property override dchar[] text() { return cast(dchar[])to!dstring(_value); } 1280 public void setValue(real value, byte precision = 1, bool imaginary = false) { 1281 _value = value; 1282 _precision = precision; 1283 _imaginary = imaginary; 1284 } 1285 public void setFlags(byte precision = 1, bool imaginary = false) { 1286 _precision = precision; 1287 _imaginary = imaginary; 1288 } 1289 this() { 1290 super(TokenType.FLOAT); 1291 } 1292 this(SourceFile file, uint line, uint pos, real value, byte precision, bool imaginary) { 1293 super(TokenType.FLOAT, file, line, pos); 1294 _value = value; 1295 _precision = precision; 1296 _imaginary = imaginary; 1297 } 1298 override public Token clone() { 1299 return new RealLiteralToken(_file, _line, _pos, _value, _precision, _imaginary); 1300 } 1301 public override @property string toString() { 1302 return "Integer:" ~ to!string(_value) ~ (_precision == 0 ? "f" : (_precision == 2 ? "L" : "")) ~ (_imaginary ? "i" : ""); 1303 } 1304 } 1305 1306 class IdentToken : Token { 1307 tokenizer_ident_t _id; 1308 public @property override dchar[] text() { return identMap.nameById(_id); } 1309 public void setText(dchar[] text) { _id = identMap.idByName(text); } 1310 this() { 1311 super(TokenType.IDENTIFIER); 1312 } 1313 this(SourceFile file, uint line, uint pos, dchar[] text) { 1314 super(TokenType.IDENTIFIER, file, line, pos); 1315 _id = identMap.idByName(text); 1316 } 1317 this(SourceFile file, uint line, uint pos, tokenizer_ident_t id) { 1318 super(TokenType.IDENTIFIER, file, line, pos); 1319 _id = id; 1320 } 1321 override public Token clone() { 1322 return new IdentToken(_file, _line, _pos, _id); 1323 } 1324 public override @property string toString() { 1325 return "Ident:" ~ to!string(text); 1326 } 1327 } 1328 1329 // shared appender buffer, to avoid extra heap allocations 1330 struct StringAppender { 1331 dchar[] buf; 1332 uint len; 1333 dchar[] get() { 1334 return buf[0 .. len]; 1335 } 1336 void appendEol() { 1337 if (len + 1 > buf.length) { 1338 uint newsize = cast(uint)((len + 1 + buf.length) * 2); 1339 if (newsize < 128) 1340 newsize = 128; 1341 buf.length = newsize; 1342 } 1343 buf[len] = '\n'; 1344 len++; 1345 } 1346 void append(dchar[] s) { 1347 if (s.length == 0) 1348 return; 1349 if (len + s.length > buf.length) { 1350 uint newsize = cast(uint)((len + s.length + buf.length) * 2); 1351 if (newsize < 128) 1352 newsize = 128; 1353 buf.length = newsize; 1354 } 1355 buf[len .. len + s.length] = s; 1356 len += s.length; 1357 } 1358 void reset() { 1359 len = 0; 1360 } 1361 } 1362 1363 class Tokenizer 1364 { 1365 protected SourceLines _lineStream; 1366 protected dchar[] _lineText; 1367 protected int _line; // current line number 1368 protected int _len; // current line length 1369 protected int _pos; // current line read position 1370 protected int _prevLineLength; // previous line length 1371 protected uint _state; // tokenizer state 1372 1373 enum : int { 1374 EOF_CHAR = 0x001A, 1375 EOL_CHAR = 0x000A 1376 }; 1377 1378 protected WhiteSpaceToken _sharedWhiteSpaceToken = new WhiteSpaceToken(); 1379 protected CommentToken _sharedCommentToken = new CommentToken(); 1380 protected StringLiteralToken _sharedStringLiteralToken = new StringLiteralToken(); 1381 protected IdentToken _sharedIdentToken = new IdentToken(); 1382 protected OpToken _sharedOpToken = new OpToken(); 1383 protected KeywordToken _sharedKeywordToken = new KeywordToken(); 1384 protected IntegerLiteralToken _sharedIntegerToken = new IntegerLiteralToken(); 1385 protected RealLiteralToken _sharedRealToken = new RealLiteralToken(); 1386 protected InvalidToken _sharedInvalidToken = new InvalidToken(); 1387 protected CharacterLiteralToken _sharedCharacterLiteralToken = new CharacterLiteralToken(); 1388 protected StringAppender _stringLiteralAppender; 1389 protected StringAppender _commentAppender; 1390 protected StringAppender _identAppender; 1391 1392 protected bool _enableCommentText = true; 1393 /// when false, does not put comment text into comment token - for less allocations 1394 @property void enableCommentText(bool enabled) { 1395 _enableCommentText = enabled; 1396 } 1397 /// when false, does not put comment text into comment token - for less allocations 1398 @property bool enableCommentText() { 1399 return _enableCommentText; 1400 } 1401 1402 protected bool _errorTolerant = false; 1403 /// when true, returns BadToken instead of throwing exception 1404 @property void errorTolerant(bool enabled) { 1405 _errorTolerant = enabled; 1406 } 1407 /// when true, returns BadToken instead of throwing exception 1408 @property bool errorTolerant() { 1409 return _errorTolerant; 1410 } 1411 1412 this(SourceLines lineStream) { 1413 init(lineStream); 1414 } 1415 1416 void init(SourceLines lineStream) { 1417 _lineStream = lineStream; 1418 SourceFile file = _lineStream.file; 1419 _sharedWhiteSpaceToken.setFile(file); 1420 _sharedCommentToken.setFile(file); 1421 _sharedStringLiteralToken.setFile(file); 1422 _sharedIdentToken.setFile(file); 1423 _sharedOpToken.setFile(file); 1424 _sharedKeywordToken.setFile(file); 1425 _sharedIntegerToken.setFile(file); 1426 _sharedRealToken.setFile(file); 1427 _sharedInvalidToken.setFile(file); 1428 _sharedCharacterLiteralToken.setFile(file); 1429 buildTime = Clock.currTime(); 1430 _line = lineStream.line; 1431 _pos = 0; 1432 _prevLineLength = 0; 1433 _lineText = null; 1434 nextLine(); 1435 } 1436 1437 this(string code, string filename = "") { 1438 this(new ArraySourceLines(code, filename)); 1439 } 1440 1441 // fetch next line from source stream 1442 protected bool nextLine() { 1443 _prevLineLength = cast(int)_lineText.length; 1444 _lineText = _lineStream.readLine(); 1445 if (!_lineText) { 1446 if (_lineStream.errorCode != 0) 1447 throw new SourceEncodingException(_lineStream.errorMessage, _lineStream.file, _lineStream.errorLine, _lineStream.errorPos); 1448 if (_lineStream.eof) { 1449 // end of file 1450 _pos = 0; 1451 _len = 0; 1452 return false; 1453 } 1454 // just an empty line 1455 } 1456 _line = _lineStream.line; 1457 _pos = 0; 1458 _len = cast(int)_lineText.length; // do not support lines longer that 4Gb 1459 return true; 1460 } 1461 1462 protected dchar nextChar() { 1463 if (_pos >= _len) { 1464 if (!nextLine()) { 1465 return EOF_CHAR; 1466 } 1467 return EOL_CHAR; 1468 } 1469 return _lineText[_pos++]; 1470 } 1471 1472 protected dchar peekChar() { 1473 if (_lineText is null) { 1474 if (!nextLine()) { 1475 return EOF_CHAR; 1476 } 1477 } 1478 if (_pos >= _len) 1479 return EOL_CHAR; 1480 return _lineText[_pos++]; 1481 } 1482 1483 protected Token emitEof() { 1484 // TODO: check for current state 1485 return new EofToken(_lineStream.file, _line, _pos); 1486 } 1487 1488 protected Token processWhiteSpace(dchar firstChar) { 1489 // reuse the same token instance, to avoid extra heap spamming 1490 if (_pos == 0) { 1491 _sharedWhiteSpaceToken.setPos(_line - 1, _prevLineLength); 1492 } else { 1493 _sharedWhiteSpaceToken.setPos(_line, _pos - 1); 1494 } 1495 for (;;) { 1496 int i = _pos; 1497 for (; i < _len; i++) { 1498 dchar ch = _lineText[i]; 1499 if (!(ch == 0x0020 || ch == 0x0009 || ch == 0x000B || ch == 0x000C || ch == EOL_CHAR)) 1500 break; 1501 } 1502 _pos = i; 1503 if (_pos < _len) 1504 break; 1505 // go to next line 1506 if (!nextLine()) 1507 break; 1508 } 1509 return _sharedWhiteSpaceToken; 1510 } 1511 1512 protected Token processOneLineComment() { 1513 _sharedCommentToken.setPos(_line, _pos - 1); 1514 _sharedCommentToken.isDocumentationComment = _pos + 1 < _lineText.length && _lineText[_pos + 1] == '/'; 1515 if (_enableCommentText) { 1516 _sharedCommentToken.text = _lineText[_pos + 1 .. $]; 1517 } 1518 _pos = _len; 1519 return _sharedCommentToken; 1520 } 1521 1522 protected Token processOneLineSharpComment() { 1523 _sharedCommentToken.setPos(_line, _pos - 1); 1524 if (_enableCommentText) { 1525 _sharedCommentToken.text = _lineText[_pos .. $]; 1526 } 1527 _pos = _len; 1528 return _sharedCommentToken; 1529 } 1530 1531 // Comment /* */ 1532 protected Token processMultilineComment() { 1533 _sharedCommentToken.setPos(_line, _pos - 1); 1534 _sharedCommentToken.isDocumentationComment = _pos + 1 < _lineText.length && _lineText[_pos + 1] == '*'; 1535 _commentAppender.reset(); 1536 int textStart = _pos + 1; 1537 for (;;) { 1538 int textEnd = int.max; 1539 int i = textStart; 1540 for (; i < _len - 1; i++) { 1541 if (_lineText[i] == '*' && _lineText[i + 1] == '/') { 1542 textEnd = i; 1543 break; 1544 } 1545 } 1546 if (textEnd != int.max) { 1547 if (_enableCommentText) 1548 _commentAppender.append(_lineText[textStart .. textEnd]); 1549 _pos = textEnd + 2; 1550 break; 1551 } 1552 if (!nextLine()) { 1553 // TODO: do we need throw exception if comment not closed by end of file? 1554 _pos = _len; 1555 break; 1556 } 1557 textStart = 0; 1558 } 1559 if (_enableCommentText) { 1560 _sharedCommentToken.text = _commentAppender.get(); 1561 } 1562 return _sharedCommentToken; 1563 } 1564 1565 // Comment /+ +/ 1566 protected Token processNestedComment() { 1567 _sharedCommentToken.setPos(_line, _pos - 1); 1568 _sharedCommentToken.isDocumentationComment = _pos + 1 < _lineText.length && _lineText[_pos + 1] == '+'; 1569 _commentAppender.reset(); 1570 dchar[] text; 1571 int textStart = _pos + 1; 1572 int level = 1; 1573 for (;;) { 1574 int textEnd = int.max; 1575 int i = textStart; 1576 for (; i < _len - 1; i++) { 1577 if (_lineText[i] == '/' && _lineText[i + 1] == '+') { 1578 level++; 1579 i++; 1580 } else if (_lineText[i] == '+' && _lineText[i + 1] == '/') { 1581 if (--level == 0) { 1582 textEnd = i; 1583 break; 1584 } 1585 } 1586 } 1587 if (textEnd != int.max) { 1588 if (_enableCommentText) 1589 _commentAppender.append(_lineText[textStart .. textEnd]); 1590 _pos = textEnd + 2; 1591 break; 1592 } 1593 if (!nextLine()) { 1594 // TODO: do we need throw exception if comment not closed by end of file? 1595 _pos = _len; 1596 break; 1597 } 1598 if (_enableCommentText) 1599 _commentAppender.appendEol(); 1600 textStart = 0; 1601 } 1602 if (_enableCommentText) { 1603 _sharedCommentToken.text = _commentAppender.get(); 1604 } 1605 return _sharedCommentToken; 1606 } 1607 1608 protected Token processHexString() { 1609 _pos++; 1610 // TODO: 1611 return null; 1612 } 1613 1614 protected Token processDelimitedString() { 1615 _pos++; 1616 // TODO: 1617 return null; 1618 } 1619 1620 // r"string" or `string` 1621 protected Token processWysiwygString(dchar ch) { 1622 _pos++; 1623 // TODO: 1624 return null; 1625 } 1626 1627 protected Token processIdent() { 1628 _sharedIdentToken.setPos(_line, _pos - 1); 1629 _identAppender.reset(); 1630 int startPos = _pos - 1; 1631 int endPos = _len; 1632 for (int i = _pos; i < _len; i++) { 1633 dchar ch = _lineText[i]; 1634 if (!isIdentMiddleChar(ch)) { 1635 endPos = i; 1636 break; 1637 } 1638 } 1639 _pos = endPos; 1640 _sharedIdentToken.setText(_lineText[startPos .. endPos]); 1641 return _sharedIdentToken; 1642 } 1643 1644 protected Token processIntegerSuffix() { 1645 if (_pos >= _len) 1646 return _sharedIntegerToken; 1647 bool longFlag = false; 1648 bool unsignedFlag = false; 1649 dchar ch = _lineText[_pos]; 1650 dchar ch2 = _pos < _len - 1 ? _lineText[_pos + 1] : 0; 1651 if (ch == 'l' || ch == 'L') { 1652 longFlag = true; 1653 _pos++; 1654 if (ch2 == 'u' || ch2 == 'U') { 1655 unsignedFlag = true; 1656 _pos++; 1657 } 1658 } else if (ch == 'u' || ch == 'U') { 1659 unsignedFlag = true; 1660 _pos++; 1661 if (ch2 == 'l' || ch2 == 'L') { 1662 longFlag = true; 1663 _pos++; 1664 } 1665 } 1666 _sharedIntegerToken.setFlags(unsignedFlag, longFlag); 1667 ch = _pos < _len ? _lineText[_pos] : 0; 1668 if (isIdentMiddleChar(ch)) 1669 return parserError("Unexpected character after number", _sharedIntegerToken); 1670 return _sharedIntegerToken; 1671 } 1672 1673 protected Token processBinaryNumber() { 1674 _sharedIntegerToken.setPos(_line, _pos - 1); 1675 _pos++; 1676 if (_pos >= _len) 1677 return parserError("Unexpected end of line in binary number", _sharedIntegerToken); 1678 int digits = 0; 1679 ulong number = 0; 1680 int i = _pos; 1681 for (;i < _len; i++) { 1682 dchar ch = _lineText[i]; 1683 if (ch != '0' && ch != '1') 1684 break; 1685 number = (number << 1) | (ch == '1' ? 1 : 0); 1686 digits++; 1687 } 1688 _pos = i; 1689 if (digits > 64) 1690 return parserError("number is too big", _sharedIntegerToken); 1691 _sharedIntegerToken.setValue(number); 1692 return processIntegerSuffix(); 1693 } 1694 1695 protected Token processHexNumber() { 1696 _sharedIntegerToken.setPos(_line, _pos - 1); 1697 _sharedRealToken.setPos(_line, _pos - 1); 1698 _pos++; 1699 if (_pos >= _len) 1700 return parserError("Unexpected end of line in hex number", _sharedIntegerToken); 1701 int digits = 0; 1702 ulong number = 0; 1703 int i = _pos; 1704 for (;i < _len; i++) { 1705 dchar ch = _lineText[i]; 1706 uint digit = 0; 1707 if (ch >= '0' && ch <= '9') 1708 digit = ch - '0'; 1709 else if (ch >= 'a' && ch <= 'f') 1710 digit = ch - 'a' + 10; 1711 else if (ch >= 'A' && ch <= 'F') 1712 digit = ch - 'A' + 10; 1713 else if (ch == '_') 1714 continue; 1715 else 1716 break; 1717 number = (number << 4) | digit; 1718 digits++; 1719 } 1720 _pos = i; 1721 if (digits > 16) 1722 return parserError("number is too big to fit 64 bits", _sharedIntegerToken); 1723 _sharedIntegerToken.setValue(number); 1724 return processIntegerSuffix(); 1725 } 1726 1727 protected Token processOctNumber() { 1728 _sharedIntegerToken.setPos(_line, _pos - 1); 1729 if (_pos >= _len) 1730 return parserError("Unexpected end of line in octal number", _sharedIntegerToken); 1731 int digits = 0; 1732 ulong number = 0; 1733 int i = _pos; 1734 bool overflow = false; 1735 for (;i < _len; i++) { 1736 dchar ch = _lineText[i]; 1737 int digit = 0; 1738 if (ch >= '0' && ch <= '7') 1739 digit = ch - '0'; 1740 else if (ch == '_') 1741 continue; 1742 else 1743 break; 1744 number <<= 3; 1745 if (digits >= 20) { 1746 if ((number >> 3) << 3 != number) { 1747 overflow = true; 1748 break; 1749 } 1750 } 1751 number |= digit; 1752 digits++; 1753 } 1754 _pos = i; 1755 if (overflow) 1756 return parserError("number is too big to fit 64 bits", _sharedIntegerToken); 1757 _sharedIntegerToken.setValue(number); 1758 return processIntegerSuffix(); 1759 } 1760 1761 // 1762 protected Token processDecFloatSuffix(real value) { 1763 _sharedRealToken.setValue(value); 1764 // TODO 1765 return _sharedRealToken; 1766 } 1767 1768 // after E char 1769 protected Token processDecFloatExponent(real value) { 1770 dchar next = _pos < _len ? _lineText[_pos] : 0; 1771 int sign = 1; 1772 if (next == '+') { 1773 _pos++; 1774 } else if (next == '-') { 1775 _pos++; 1776 sign = -1; 1777 } 1778 if (_pos >= _len) 1779 return parserError("Invalid exponent", _sharedRealToken); 1780 ulong digits = 0; 1781 ulong number = 0; 1782 int i = _pos; 1783 bool overflow = false; 1784 for (;i < _len; i++) { 1785 dchar ch = _lineText[i]; 1786 uint digit = 0; 1787 if (ch >= '0' && ch <= '9') 1788 digit = ch - '0'; 1789 else if (ch == '_') 1790 continue; 1791 else 1792 break; 1793 number *= 10; 1794 if (digits >= 18) { 1795 if ((number * 10) / 10 != number) { 1796 overflow = true; 1797 break; 1798 } 1799 } 1800 number += digit; 1801 digits++; 1802 } 1803 if (digits == 0) 1804 return parserError("Invalid exponent", _sharedRealToken); 1805 _pos = i; 1806 value *= pow(10., cast(long)number * sign); 1807 return processDecFloatSuffix(value); 1808 } 1809 1810 protected Token processDecFloatSecondPart(ulong firstPart) { 1811 if (_pos >= _len) { 1812 _sharedRealToken.setValue(cast(real)firstPart); 1813 return _sharedRealToken; 1814 } 1815 ulong divider = 1; 1816 ulong number = 0; 1817 int i = _pos; 1818 bool overflow = false; 1819 for (;i < _len; i++) { 1820 dchar ch = _lineText[i]; 1821 uint digit = 0; 1822 if (ch >= '0' && ch <= '9') 1823 digit = ch - '0'; 1824 else if (ch == '_') 1825 continue; 1826 else 1827 break; 1828 if (divider * 10 < divider) 1829 continue; // ignore extra digits 1830 number *= 10; 1831 number += digit; 1832 divider *= 10; 1833 } 1834 _pos = i; 1835 real value = cast(real)firstPart + (cast(real)number / divider); 1836 dchar next = _pos < _len ? _lineText[_pos] : 0; 1837 if (next == 0) { 1838 // neither exponent nor suffix 1839 _sharedRealToken.setValue(value); 1840 return _sharedRealToken; 1841 } 1842 if (next == 'e' || next == 'E') { 1843 _pos++; 1844 return processDecFloatExponent(value); 1845 } 1846 return processDecFloatSuffix(value); 1847 } 1848 1849 protected Token processDecNumber(dchar c) { 1850 _pos--; 1851 _sharedIntegerToken.setPos(_line, _pos); 1852 _sharedRealToken.setPos(_line, _pos); 1853 if (_pos >= _len) 1854 return parserError("Unexpected end of line in number", _sharedIntegerToken); 1855 int digits = 0; 1856 ulong number = 0; 1857 int i = _pos; 1858 bool overflow = false; 1859 for (;i < _len; i++) { 1860 dchar ch = _lineText[i]; 1861 uint digit = 0; 1862 if (ch >= '0' && ch <= '9') 1863 digit = ch - '0'; 1864 else if (ch == '_') 1865 continue; 1866 else 1867 break; 1868 number *= 10; 1869 if (digits >= 18) { 1870 if ((number * 10) / 10 != number) { 1871 overflow = true; 1872 break; 1873 } 1874 } 1875 number += digit; 1876 digits++; 1877 } 1878 _pos = i; 1879 if (overflow) 1880 return parserError("number is too big to fit 64 bits", _sharedIntegerToken); 1881 _sharedIntegerToken.setValue(number); 1882 dchar next = _pos < _len ? _lineText[_pos] : 0; 1883 if (next == 0) 1884 return _sharedIntegerToken; 1885 if (next == '.') { 1886 _pos++; 1887 return processDecFloatSecondPart(number); 1888 } 1889 return processIntegerSuffix(); 1890 } 1891 1892 /// Either return InvalidToken or throw parser exception depending on current errorTolerant flag 1893 protected Token parserError(string msg, Token incompleteToken) { 1894 return parserError(msg, incompleteToken.line, incompleteToken.pos, incompleteToken.type); 1895 } 1896 /// Either return InvalidToken or throw parser exception depending on current errorTolerant flag 1897 protected Token parserError(string msg, int startLine, int startPos, TokenType failedTokenType = TokenType.INVALID) { 1898 if (_errorTolerant) { 1899 startPos--; 1900 _sharedInvalidToken.setPos(startLine, startPos); 1901 _sharedInvalidToken.errorMessage = msg; 1902 _sharedInvalidToken.errorCode = 1; // for future extension 1903 _sharedInvalidToken.invalidTokenType = failedTokenType; // for future extension 1904 // make invalid source text 1905 dchar[] invalidText; 1906 int p = startLine == _line ? startPos : 0; 1907 for (int i = p; i < _pos && i < _lineText.length; i++) 1908 invalidText ~= _lineText[i]; 1909 1910 // recover after error 1911 for (; _pos < _lineText.length; _pos++) { 1912 dchar ch = _lineText[_pos]; 1913 if (ch == ' ' || ch == '\t' || ch == '(' || ch == ')' || ch == '[' || ch == ']' || ch == '{' || ch == '}') 1914 break; 1915 if (failedTokenType == TokenType.INTEGER || failedTokenType == TokenType.FLOAT) { 1916 if (ch == '*' || ch == '/') 1917 break; 1918 } 1919 invalidText ~= ch; 1920 } 1921 _sharedInvalidToken.text = invalidText; 1922 return _sharedInvalidToken; 1923 } 1924 throw new ParserException(msg, _lineStream.file, _line, _pos); 1925 } 1926 1927 protected Keyword detectKeyword(dchar ch) { 1928 if (ch > 'z') 1929 return Keyword.NONE; 1930 int len = _len - _pos; 1931 switch (cast(ubyte)ch) { 1932 // ABSTRACT, 1933 // ALIAS, 1934 // ALIGN, 1935 // ASM, 1936 // ASSERT, 1937 // AUTO, 1938 case 'a': return findKeyword(Keyword.ABSTRACT, Keyword.AUTO, _lineText.ptr + _pos, len, _pos); 1939 1940 // BODY, 1941 // BOOL, 1942 // BREAK, 1943 // BYTE, 1944 case 'b': return findKeyword(Keyword.BODY, Keyword.BYTE, _lineText.ptr + _pos, len, _pos); 1945 1946 // CASE, 1947 // CAST, 1948 // CATCH, 1949 // CDOUBLE, 1950 // CENT, 1951 // CFLOAT, 1952 // CHAR, 1953 // CLASS, 1954 // CONST, 1955 // CONTINUE, 1956 // CREAL, 1957 case 'c': return findKeyword(Keyword.CASE, Keyword.CREAL, _lineText.ptr + _pos, len, _pos); 1958 1959 // DCHAR, 1960 // DEBUG, 1961 // DEFAULT, 1962 // DELEGATE, 1963 // DELETE, 1964 // DEPRECATED, 1965 // DO, 1966 // DOUBLE, 1967 case 'd': return findKeyword(Keyword.DCHAR, Keyword.DOUBLE, _lineText.ptr + _pos, len, _pos); 1968 1969 // ELSE, 1970 // ENUM, 1971 // EXPORT, 1972 // EXTERN, 1973 case 'e': return findKeyword(Keyword.ELSE, Keyword.EXTERN, _lineText.ptr + _pos, len, _pos); 1974 1975 // FALSE, 1976 // FINAL, 1977 // FINALLY, 1978 // FLOAT, 1979 // FOR, 1980 // FOREACH, 1981 // FOREACH_REVERSE, 1982 // FUNCTION, 1983 case 'f': return findKeyword(Keyword.FALSE, Keyword.FUNCTION, _lineText.ptr + _pos, len, _pos); 1984 1985 // GOTO, 1986 case 'g': return findKeyword(Keyword.GOTO, Keyword.GOTO, _lineText.ptr + _pos, len, _pos); 1987 1988 // IDOUBLE, 1989 // IF, 1990 // IFLOAT, 1991 // IMMUTABLE, 1992 // IMPORT, 1993 // IN, 1994 // INOUT, 1995 // INT, 1996 // INTERFACE, 1997 // INVARIANT, 1998 // IREAL, 1999 // IS, 2000 case 'i': return findKeyword(Keyword.IDOUBLE, Keyword.IS, _lineText.ptr + _pos, len, _pos); 2001 2002 // LAZY, 2003 // LONG, 2004 case 'l': return findKeyword(Keyword.LAZY, Keyword.LONG, _lineText.ptr + _pos, len, _pos); 2005 2006 // MACRO, 2007 // MIXIN, 2008 // MODULE, 2009 case 'm': return findKeyword(Keyword.MACRO, Keyword.MODULE, _lineText.ptr + _pos, len, _pos); 2010 2011 // NEW, 2012 // NOTHROW, 2013 // NULL, 2014 case 'n': return findKeyword(Keyword.NEW, Keyword.NULL, _lineText.ptr + _pos, len, _pos); 2015 2016 // OUT, 2017 // OVERRIDE, 2018 case 'o': return findKeyword(Keyword.OUT, Keyword.OVERRIDE, _lineText.ptr + _pos, len, _pos); 2019 2020 // PACKAGE, 2021 // PRAGMA, 2022 // PRIVATE, 2023 // PROTECTED, 2024 // PUBLIC, 2025 // PURE, 2026 case 'p': return findKeyword(Keyword.PACKAGE, Keyword.PURE, _lineText.ptr + _pos, len, _pos); 2027 2028 // REAL, 2029 // REF, 2030 // RETURN, 2031 case 'r': return findKeyword(Keyword.REAL, Keyword.RETURN, _lineText.ptr + _pos, len, _pos); 2032 2033 // SCOPE, 2034 // SHARED, 2035 // SHORT, 2036 // STATIC, 2037 // STRUCT, 2038 // SUPER, 2039 // SWITCH, 2040 // SYNCHRONIZED, 2041 case 's': return findKeyword(Keyword.SCOPE, Keyword.SYNCHRONIZED, _lineText.ptr + _pos, len, _pos); 2042 2043 // TEMPLATE, 2044 // THIS, 2045 // THROW, 2046 // TRUE, 2047 // TRY, 2048 // TYPEDEF, 2049 // TYPEID, 2050 // TYPEOF, 2051 case 't': return findKeyword(Keyword.TEMPLATE, Keyword.TYPEOF, _lineText.ptr + _pos, len, _pos); 2052 2053 // UBYTE, 2054 // UCENT, 2055 // UINT, 2056 // ULONG, 2057 // UNION, 2058 // UNITTEST, 2059 // USHORT, 2060 case 'u': return findKeyword(Keyword.UBYTE, Keyword.USHORT, _lineText.ptr + _pos, len, _pos); 2061 2062 // VERSION, 2063 // VOID, 2064 // VOLATILE, 2065 case 'v': return findKeyword(Keyword.VERSION, Keyword.VOLATILE, _lineText.ptr + _pos, len, _pos); 2066 2067 // WCHAR, 2068 // WHILE, 2069 // WITH, 2070 case 'w': return findKeyword(Keyword.WCHAR, Keyword.WITH, _lineText.ptr + _pos, len, _pos); 2071 2072 // FILE, 2073 // MODULE, 2074 // LINE, 2075 // FUNCTION, 2076 // PRETTY_FUNCTION, 2077 // 2078 // GSHARED, 2079 // TRAITS, 2080 // VECTOR, 2081 // PARAMETERS, 2082 case '_': return findKeyword(Keyword.FILE, Keyword.PARAMETERS, _lineText.ptr + _pos, len, _pos); 2083 default: return Keyword.NONE; 2084 } 2085 } 2086 protected OpCode detectOp(dchar ch) nothrow { 2087 if (ch >= 128) 2088 return OpCode.NONE; 2089 dchar ch2 = _pos < _len ? _lineText[_pos] : 0; 2090 dchar ch3 = _pos < _len - 1 ? _lineText[_pos + 1] : 0; 2091 switch(cast(ubyte)ch) { 2092 // DIV, // / 2093 // DIV_EQ, // /= 2094 case '/': 2095 if (ch2 == '=') { 2096 _pos++; 2097 return OpCode.DIV_EQ; 2098 } 2099 return OpCode.DIV; 2100 // DOT, // . 2101 // DOT_DOT, // .. 2102 // DOT_DOT_DOT,// ... 2103 case '.': 2104 if (ch2 == '.') { 2105 if (ch3 == '.') { 2106 _pos += 2; 2107 return OpCode.DOT_DOT_DOT; 2108 } 2109 _pos++; 2110 return OpCode.DOT_DOT; 2111 } 2112 return OpCode.DOT; 2113 // AND, // & 2114 // AND_EQ, // &= 2115 // LOG_AND, // && 2116 case '&': 2117 if (ch2 == '=') { 2118 _pos++; 2119 return OpCode.AND_EQ; 2120 } 2121 if (ch2 == '&') { 2122 _pos++; 2123 return OpCode.LOG_AND; 2124 } 2125 return OpCode.AND; 2126 // OR, // | 2127 // OR_EQ, // |= 2128 // LOG_OR, // || 2129 case '|': 2130 if (ch2 == '=') { 2131 _pos++; 2132 return OpCode.OR_EQ; 2133 } 2134 if (ch2 == '|') { 2135 _pos++; 2136 return OpCode.LOG_OR; 2137 } 2138 return OpCode.OR; 2139 // MINUS, // - 2140 // MINUS_EQ, // -= 2141 // MINUS_MINUS,// -- 2142 case '-': 2143 if (ch2 == '=') { 2144 _pos++; 2145 return OpCode.MINUS_EQ; 2146 } 2147 if (ch2 == '-') { 2148 _pos++; 2149 return OpCode.MINUS_MINUS; 2150 } 2151 return OpCode.MINUS; 2152 // PLUS, // + 2153 // PLUS_EQ, // += 2154 // PLUS_PLUS, // ++ 2155 case '+': 2156 if (ch2 == '=') { 2157 _pos++; 2158 return OpCode.PLUS_EQ; 2159 } 2160 if (ch2 == '+') { 2161 _pos++; 2162 return OpCode.PLUS_PLUS; 2163 } 2164 return OpCode.PLUS; 2165 // LT, // < 2166 // LT_EQ, // <= 2167 // SHL, // << 2168 // SHL_EQ, // <<= 2169 // LT_GT, // <> 2170 // NE_EQ, // <>= 2171 case '<': 2172 if (ch2 == '<') { 2173 if (ch3 == '=') { 2174 _pos += 2; 2175 return OpCode.SHL_EQ; 2176 } 2177 _pos++; 2178 return OpCode.SHL; 2179 } 2180 if (ch2 == '>') { 2181 if (ch3 == '=') { 2182 _pos += 2; 2183 return OpCode.NE_EQ; 2184 } 2185 _pos++; 2186 return OpCode.LT_GT; 2187 } 2188 if (ch2 == '=') { 2189 _pos++; 2190 return OpCode.LT_EQ; 2191 } 2192 return OpCode.LT; 2193 // GT, // > 2194 // GT_EQ, // >= 2195 // SHR_EQ // >>= 2196 // ASR_EQ, // >>>= 2197 // SHR, // >> 2198 // ASR, // >>> 2199 case '>': 2200 if (ch2 == '>') { 2201 if (ch3 == '>') { 2202 dchar ch4 = _pos < _len - 2 ? _lineText[_pos + 2] : 0; 2203 if (ch4 == '=') { // >>>= 2204 _pos += 3; 2205 return OpCode.ASR_EQ; 2206 } 2207 _pos += 2; 2208 return OpCode.ASR; // >>> 2209 } 2210 if (ch3 == '=') { // >>= 2211 _pos += 2; 2212 return OpCode.SHR_EQ; 2213 } 2214 _pos++; 2215 return OpCode.SHR; 2216 } 2217 if (ch2 == '=') { // >= 2218 _pos++; 2219 return OpCode.GT_EQ; 2220 } 2221 // > 2222 return OpCode.GT; 2223 // NOT, // ! 2224 // NOT_EQ // != 2225 // NOT_LT_GT, // !<> 2226 // NOT_LT_GT_EQ, // !<>= 2227 // NOT_LT, // !< 2228 // NOT_LT_EQ, // !<= 2229 // NOT_GT, // !> 2230 // NOT_GT_EQ, // !>= 2231 case '!': 2232 if (ch2 == '<') { // !< 2233 if (ch3 == '>') { // !<> 2234 dchar ch4 = _pos < _len - 2 ? _lineText[_pos + 2] : 0; 2235 if (ch4 == '=') { // !<>= 2236 _pos += 3; 2237 return OpCode.NOT_LT_GT_EQ; 2238 } 2239 _pos += 2; 2240 return OpCode.NOT_LT_GT; // !<> 2241 } 2242 if (ch3 == '=') { // !<= 2243 _pos += 2; 2244 return OpCode.NOT_LT_EQ; 2245 } 2246 _pos++; 2247 return OpCode.NOT_LT; // !< 2248 } 2249 if (ch2 == '=') { // != 2250 _pos++; 2251 return OpCode.NOT_EQ; 2252 } 2253 return OpCode.NOT; 2254 // PAR_OPEN, // ( 2255 case '(': 2256 return OpCode.PAR_OPEN; 2257 // PAR_CLOSE, // ) 2258 case ')': 2259 return OpCode.PAR_CLOSE; 2260 // SQ_OPEN, // [ 2261 case '[': 2262 return OpCode.SQ_OPEN; 2263 // SQ_CLOSE, // ] 2264 case ']': 2265 return OpCode.SQ_CLOSE; 2266 // CURL_OPEN, // { 2267 case '{': 2268 return OpCode.CURL_OPEN; 2269 // CURL_CLOSE, // } 2270 case '}': 2271 return OpCode.CURL_CLOSE; 2272 // QUEST, // ? 2273 case '?': 2274 return OpCode.QUEST; 2275 // COMMA, // , 2276 case ',': 2277 return OpCode.COMMA; 2278 // SEMICOLON, // ; 2279 case ';': 2280 return OpCode.SEMICOLON; 2281 // COLON, // : 2282 case ':': 2283 return OpCode.COLON; 2284 // DOLLAR, // $ 2285 case '$': 2286 return OpCode.DOLLAR; 2287 // EQ, // = 2288 // QE_EQ, // == 2289 // EQ_GT, // => 2290 case '=': 2291 if (ch2 == '=') { // == 2292 _pos++; 2293 return OpCode.QE_EQ; 2294 } 2295 if (ch2 == '>') { // => 2296 _pos++; 2297 return OpCode.EQ_GT; 2298 } 2299 return OpCode.EQ; 2300 // MUL, // * 2301 // MUL_EQ, // *= 2302 case '*': 2303 if (ch2 == '=') { 2304 _pos++; 2305 return OpCode.MUL_EQ; 2306 } 2307 return OpCode.MUL; 2308 // MOD, // % 2309 // MOD_EQ, // %= 2310 case '%': 2311 if (ch2 == '=') { 2312 _pos++; 2313 return OpCode.MOD_EQ; 2314 } 2315 return OpCode.MOD; 2316 // XOR, // ^ 2317 // XOR_EQ, // ^= 2318 // LOG_XOR, // ^^ 2319 // LOG_XOR_EQ, // ^^= 2320 case '^': 2321 if (ch2 == '^') { 2322 if (ch3 == '=') { 2323 _pos += 2; 2324 return OpCode.LOG_XOR_EQ; 2325 } 2326 _pos++; 2327 return OpCode.LOG_XOR; 2328 } 2329 if (ch2 == '=') { 2330 _pos++; 2331 return OpCode.XOR_EQ; 2332 } 2333 return OpCode.XOR; 2334 // INV, // ~ 2335 // INV_EQ, // ~= 2336 case '~': 2337 if (ch2 == '=') { 2338 _pos++; 2339 return OpCode.INV_EQ; 2340 } 2341 return OpCode.INV; 2342 // AT, // @ 2343 case '@': 2344 return OpCode.AT; 2345 // SHARP // # 2346 case '#': 2347 return OpCode.SHARP; 2348 default: 2349 return OpCode.NONE; 2350 } 2351 } 2352 2353 protected Token processCharacterLiteral() { 2354 _sharedCharacterLiteralToken.setPos(_line, _pos - 1); 2355 if (_pos + 2 > _len) 2356 return parserError("Invalid character literal", _sharedCharacterLiteralToken); 2357 dchar ch = _lineText[_pos++]; 2358 dchar ch2 = _lineText[_pos++]; 2359 dchar type = 0; 2360 if (ch == '\\') { 2361 // process escaped character - store it in ch 2362 // TODO: support all escape sequences 2363 switch(ch2) { 2364 case 'r': 2365 ch = '\r'; 2366 break; 2367 case 'n': 2368 ch = '\n'; 2369 break; 2370 case 't': 2371 ch = '\t'; 2372 break; 2373 case '\\': 2374 ch = '\\'; 2375 break; 2376 default: 2377 ch = ch2; 2378 break; 2379 } 2380 // here must be closing ' 2381 if (_pos + 1 > _len) 2382 return parserError("Invalid character literal", _sharedCharacterLiteralToken); 2383 ch2 = _lineText[_pos++]; 2384 } 2385 if (ch2 != '\'') 2386 return parserError("Invalid character literal", _sharedCharacterLiteralToken); 2387 if (_pos < _len) { 2388 dchar t = _lineText[_pos]; 2389 if (t == 'd' || t == 'w' || t == 'c') { 2390 type = t; 2391 _pos++; 2392 } else if (isIdentMiddleChar(ch)) { 2393 return parserError("Unexpected character after character literal", _sharedCharacterLiteralToken); 2394 } 2395 } 2396 _sharedCharacterLiteralToken.setCharacter(ch, type); 2397 return _sharedCharacterLiteralToken; 2398 } 2399 2400 protected Token processDoubleQuotedOrWysiwygString(dchar delimiter) { 2401 bool wysiwyg = (delimiter == 'r' || delimiter == '`'); 2402 //writeln("processDoubleQuotedString()"); 2403 _sharedStringLiteralToken.setPos(_line, _pos - 1); 2404 _stringLiteralAppender.reset(); 2405 if (delimiter == 'r') { 2406 _pos++; 2407 delimiter = '\"'; 2408 } 2409 dchar type = 0; 2410 for (;;) { 2411 int i = _pos; 2412 int endPos = int.max; 2413 for(; i < _len; i++) { 2414 if (_lineText[i] == delimiter && (i == 0 || _lineText[i - 1] != '\\')) { 2415 endPos = i; 2416 break; 2417 } 2418 } 2419 if (endPos != int.max) { 2420 // found end quote 2421 _stringLiteralAppender.append(_lineText[_pos .. endPos]); 2422 _pos = endPos + 1; 2423 break; 2424 } 2425 // no quote by end of line 2426 _stringLiteralAppender.append(_lineText[_pos .. $]); 2427 _stringLiteralAppender.appendEol(); 2428 if (!nextLine()) { 2429 // do we need to throw exception if eof comes before end of string? 2430 break; 2431 } 2432 } 2433 dchar t = 0; 2434 if (_pos < _len) { 2435 dchar ch = _lineText[_pos]; 2436 if (ch == 'c' || ch == 'w' || ch == 'd') 2437 t = ch; 2438 else if (isIdentMiddleChar(ch)) 2439 return parserError("Unexpected character after string literal", _sharedStringLiteralToken); 2440 } 2441 if (t != 0) { 2442 if (type != 0 && t != type) 2443 return parserError("Cannot concatenate strings of different type", _sharedStringLiteralToken); 2444 type = t; 2445 } 2446 if (!wysiwyg) { 2447 // no escape processing 2448 _sharedStringLiteralToken.setText(_stringLiteralAppender.get(), type); 2449 return _sharedStringLiteralToken; 2450 } 2451 // TODO: process escape sequences 2452 _sharedStringLiteralToken.setText(_stringLiteralAppender.get(), type); 2453 return _sharedStringLiteralToken; 2454 } 2455 2456 protected SysTime buildTime; 2457 2458 // string literal of the date of compilation "mmm dd yyyy" 2459 protected dstring formatBuildDate() { 2460 // TODO: provide proper format 2461 return to!dstring(buildTime); 2462 } 2463 2464 // string literal of the time of compilation "hh:mm:ss" 2465 protected dstring formatBuildTime() { 2466 // TODO: provide proper format 2467 return to!dstring(buildTime); 2468 } 2469 2470 // string literal of the date and time of compilation "www mmm dd hh:mm:ss yyyy" 2471 protected dstring formatBuildTimestamp() { 2472 // TODO: provide proper format 2473 return to!dstring(buildTime); 2474 } 2475 2476 static immutable dstring VERSION = "0.1"; 2477 static immutable dstring VENDOR = "coolreader.org"; 2478 2479 protected Token makeSpecialTokenString(dstring str, int pos) { 2480 _sharedStringLiteralToken.setPos(_line, pos); 2481 _sharedStringLiteralToken.setText(cast(dchar[])str, 0); 2482 return _sharedStringLiteralToken; 2483 } 2484 2485 protected Token processSpecialToken(Keyword keyword, int pos) { 2486 switch (keyword) { 2487 //Special Token Replaced with 2488 case Keyword.DATE: // string literal of the date of compilation "mmm dd yyyy" 2489 return makeSpecialTokenString(formatBuildDate(), pos); 2490 case Keyword.TIME: // string literal of the time of compilation "hh:mm:ss" 2491 return makeSpecialTokenString(formatBuildTime(), pos); 2492 case Keyword.TIMESTAMP: // string literal of the date and time of compilation "www mmm dd hh:mm:ss yyyy" 2493 return makeSpecialTokenString(formatBuildTimestamp(), pos); 2494 case Keyword.VENDOR: // Compiler vendor string, such as "Digital Mars D" 2495 return makeSpecialTokenString(VENDOR, pos); 2496 case Keyword.VERSION_: // Compiler version as an integer, such as 2001 2497 return makeSpecialTokenString(VERSION, pos); 2498 default: 2499 parserError("Unknown special token", _line, pos); 2500 } 2501 return null; 2502 } 2503 2504 // returns next token (clone it if you want to store for future usage, otherwise it may be overwritten by further nextToken() calls). 2505 Token nextToken() { 2506 dchar ch = nextChar(); 2507 if (ch == EOF_CHAR) { 2508 return emitEof(); 2509 } 2510 if (ch == EOL_CHAR || ch == 0x0020 || ch == 0x0009 || ch == 0x000B || ch == 0x000C) { 2511 // white space (treat EOL as whitespace, too) 2512 return processWhiteSpace(ch); 2513 } 2514 dchar next = _pos < _len ? _lineText[_pos] : 0; 2515 if (ch == '/') { 2516 if (next == '/') 2517 return processOneLineComment(); 2518 else if (next == '*') 2519 return processMultilineComment(); 2520 else if (next == '+') 2521 return processNestedComment(); 2522 } 2523 if (ch == '#' && _line == 1) 2524 return processOneLineSharpComment(); 2525 if (ch == '\"') 2526 return processDoubleQuotedOrWysiwygString(ch); 2527 if (ch == '\'') 2528 return processCharacterLiteral(); 2529 if (ch == 'x' && next == '\"') 2530 return processHexString(); 2531 if (ch == 'q' && next == '\"') 2532 return processDelimitedString(); 2533 if ((ch == 'r' && next == '\"') || (ch == '`')) 2534 return processDoubleQuotedOrWysiwygString(ch); 2535 int oldPos = _pos - 1; 2536 2537 if (ch == '0') { 2538 if (next == 'b' || next == 'B') 2539 return processBinaryNumber(); 2540 if (next == 'x' || next == 'X') 2541 return processHexNumber(); 2542 if (next >= '0' && next <= '9') 2543 return processOctNumber(); 2544 if (next >= '0' && next <= '9') 2545 return processDecNumber(ch); 2546 } 2547 if (ch >= '0' && ch <= '9') 2548 return processDecNumber(ch); 2549 if (ch == '.' && next >= '0' && next <= '9') // .123 2550 return processDecFloatSecondPart(0); 2551 2552 if (ch == '_' || isUniversalAlpha(ch)) { 2553 // start of identifier or keyword? 2554 Keyword keyword = detectKeyword(ch); 2555 if (keyword != Keyword.NONE) { 2556 switch (keyword) { 2557 //Special Token Replaced with 2558 case Keyword.EOF: return emitEof(); // sets the scanner to the end of the file 2559 case Keyword.DATE: // string literal of the date of compilation "mmm dd yyyy" 2560 case Keyword.TIME: // string literal of the time of compilation "hh:mm:ss" 2561 case Keyword.TIMESTAMP: // string literal of the date and time of compilation "www mmm dd hh:mm:ss yyyy" 2562 case Keyword.VENDOR: // Compiler vendor string, such as "Digital Mars D" 2563 case Keyword.VERSION_: // Compiler version as an integer, such as 2001 2564 return processSpecialToken(keyword, oldPos); 2565 default: 2566 _sharedKeywordToken.setPos(_line, oldPos); 2567 _sharedKeywordToken.keyword = keyword; 2568 return _sharedKeywordToken; 2569 } 2570 } 2571 return processIdent(); 2572 } 2573 OpCode op = detectOp(ch); 2574 if (op != OpCode.NONE) { 2575 _sharedOpToken.setPos(_line, oldPos); 2576 _sharedOpToken.opCode = op; 2577 return _sharedOpToken; 2578 } 2579 return null; 2580 } 2581 2582 2583 } 2584 2585 unittest { 2586 import std.algorithm; 2587 class TokenTest { 2588 int _line; 2589 string _file; 2590 this(string file, int line) { 2591 _file = file; 2592 _line = line; 2593 } 2594 bool doTest(Token token) { 2595 return true; 2596 } 2597 void execute(Tokenizer tokenizer) { 2598 Token token = tokenizer.nextToken(); 2599 if (!doTest(token)) { 2600 assert(false, " token doesn not match at " ~ _file ~ ":" ~ to!string(_line) ~ " foundToken: " ~ token.toString ~ " expected: " ~ toString); 2601 } 2602 } 2603 public override @property string toString() { 2604 return "TokenTest"; 2605 } 2606 } 2607 void testTokenizer(string code, TokenTest[] tokens, string file = __FILE__, uint line = __LINE__) { 2608 Tokenizer tokenizer = new Tokenizer(code, "tokenizerTest:" ~ file ~ ":" ~ to!string(line)); 2609 for (int i = 0; i < tokens.length; i++) { 2610 tokens[i].execute(tokenizer); 2611 } 2612 } 2613 class KeywordTest : TokenTest { 2614 Keyword _code; 2615 this(Keyword code, string file = __FILE__, uint line = __LINE__) { 2616 super(file, line); 2617 _code = code; 2618 } 2619 override bool doTest(Token token) { 2620 if (token.type != TokenType.KEYWORD) 2621 return false; 2622 if (token.keyword != _code) 2623 return false; 2624 return true; 2625 } 2626 public override @property string toString() { 2627 return "Keyword:" ~ to!string(_code); 2628 } 2629 } 2630 class OpTest : TokenTest { 2631 OpCode _code; 2632 this(OpCode code, string file = __FILE__, uint line = __LINE__) { 2633 super(file, line); 2634 _code = code; 2635 } 2636 override bool doTest(Token token) { 2637 if (token.type != TokenType.OP) 2638 return false; 2639 if (token.opCode != _code) 2640 return false; 2641 return true; 2642 } 2643 public override @property string toString() { 2644 return "Op:" ~ to!string(_code); 2645 } 2646 } 2647 class StringTest : TokenTest { 2648 string _value; 2649 this(string value, string file = __FILE__, uint line = __LINE__) { 2650 super(file, line); 2651 _value = value; 2652 } 2653 override bool doTest(Token token) { 2654 if (token.type != TokenType.STRING) 2655 return false; 2656 if (to!string(token.text).equal(_value)) 2657 return false; 2658 return true; 2659 } 2660 public override @property string toString() { 2661 return "String:" ~ _value; 2662 } 2663 } 2664 class IntegerTest : TokenTest { 2665 ulong _value; 2666 bool _unsigned; 2667 bool _long; 2668 this(ulong value, bool unsignedFlag = false, bool longFlag = false, string file = __FILE__, uint line = __LINE__) { 2669 super(file, line); 2670 _value = value; 2671 _unsigned = unsignedFlag; 2672 _long = longFlag; 2673 } 2674 override bool doTest(Token token) { 2675 if (token.type != TokenType.INTEGER) 2676 return false; 2677 if (token.intValue != _value) 2678 return false; 2679 if (token.isUnsigned != _unsigned) 2680 return false; 2681 if (token.isLong != _long) 2682 return false; 2683 return true; 2684 } 2685 public override @property string toString() { 2686 return "Integer:" ~ to!string(_value); 2687 } 2688 } 2689 class RealTest : TokenTest { 2690 real _value; 2691 ubyte _precision; 2692 bool _imaginary; 2693 this(real value, ubyte precision = 1, bool imaginary = false, string file = __FILE__, uint line = __LINE__) { 2694 super(file, line); 2695 _value = value; 2696 _precision = precision; 2697 _imaginary = imaginary; 2698 } 2699 override bool doTest(Token token) { 2700 if (token.type != TokenType.FLOAT) 2701 return false; 2702 if (token.realValue != _value) 2703 return false; 2704 if (token.precision != _precision) 2705 return false; 2706 if (token.isImaginary != _imaginary) 2707 return false; 2708 return true; 2709 } 2710 public override @property string toString() { 2711 return "Real:" ~ to!string(_value); 2712 } 2713 } 2714 class IdentTest : TokenTest { 2715 string _value; 2716 this(string value, string file = __FILE__, uint line = __LINE__) { 2717 super(file, line); 2718 _value = value; 2719 } 2720 override bool doTest(Token token) { 2721 if (token.type != TokenType.IDENTIFIER) 2722 return false; 2723 if (! to!string(token.text).equal(_value)) 2724 return false; 2725 return true; 2726 } 2727 public override @property string toString() { 2728 return "Ident:" ~ _value; 2729 } 2730 } 2731 class CommentTest : TokenTest { 2732 this(string file = __FILE__, uint line = __LINE__) { 2733 super(file, line); 2734 } 2735 override bool doTest(Token token) { 2736 if (token.type != TokenType.COMMENT) 2737 return false; 2738 return true; 2739 } 2740 public override @property string toString() { 2741 return "Comment"; 2742 } 2743 } 2744 class EOFTest : TokenTest { 2745 this(string file = __FILE__, uint line = __LINE__) { 2746 super(file, line); 2747 } 2748 override bool doTest(Token token) { 2749 if (token.type != TokenType.EOF) 2750 return false; 2751 return true; 2752 } 2753 public override @property string toString() { 2754 return "EOF"; 2755 } 2756 } 2757 class WhiteSpaceTest : TokenTest { 2758 this(string file = __FILE__, uint line = __LINE__) { 2759 super(file, line); 2760 } 2761 override bool doTest(Token token) { 2762 if (token.type != TokenType.WHITESPACE) 2763 return false; 2764 return true; 2765 } 2766 public override @property string toString() { 2767 return "whiteSpace"; 2768 } 2769 } 2770 TokenTest checkString(string value, string file = __FILE__, uint line = __LINE__) { 2771 return new StringTest(value, file, line); 2772 } 2773 TokenTest checkInteger(ulong value, bool unsignedFlag = false, bool longFlag = false, string file = __FILE__, uint line = __LINE__) { 2774 return new IntegerTest(value, unsignedFlag, longFlag, file, line); 2775 } 2776 TokenTest checkReal(real value, byte precision = 0, bool imaginary = false, string file = __FILE__, uint line = __LINE__) { 2777 return new RealTest(value, precision, imaginary, file, line); 2778 } 2779 TokenTest checkIdent(string value, string file = __FILE__, uint line = __LINE__) { 2780 return new IdentTest(value, file, line); 2781 } 2782 TokenTest checkKeyword(Keyword value, string file = __FILE__, uint line = __LINE__) { 2783 return new KeywordTest(value, file, line); 2784 } 2785 TokenTest checkOp(OpCode value, string file = __FILE__, uint line = __LINE__) { 2786 return new OpTest(value, file, line); 2787 } 2788 TokenTest checkSpace(string file = __FILE__, uint line = __LINE__) { 2789 return new WhiteSpaceTest(file, line); 2790 } 2791 TokenTest checkComment(string file = __FILE__, uint line = __LINE__) { 2792 return new CommentTest(file, line); 2793 } 2794 TokenTest checkEOF(string file = __FILE__, uint line = __LINE__) { 2795 return new EOFTest(file, line); 2796 } 2797 2798 testTokenizer(q"TEST 2799 int i; 2800 TEST" 2801 , [ 2802 checkKeyword(Keyword.INT), 2803 checkSpace(), 2804 checkIdent("i"), 2805 checkOp(OpCode.SEMICOLON), 2806 checkEOF() 2807 ]); 2808 testTokenizer("0b1101 0x123abcdU 0xABCL 0743 192837465 0 192_837_465 5.25" 2809 , [ 2810 checkInteger(13), 2811 checkSpace(), 2812 checkInteger(0x123abcd, true, false), 2813 checkSpace(), 2814 checkInteger(0xabc, false, true), 2815 checkSpace(), 2816 checkInteger(std.conv.octal!743), 2817 checkSpace(), 2818 checkInteger(192_837_465), 2819 checkSpace(), 2820 checkInteger(0), 2821 checkSpace(), 2822 checkInteger(192837465), 2823 checkSpace(), 2824 checkReal(5.25), 2825 checkEOF() 2826 ]); 2827 } 2828 2829 unittest { 2830 import std.stdio; 2831 import std.conv; 2832 import std.utf; 2833 import ddx.lexer.LineStream; 2834 string fname = "/home/lve/src/d/ddc/ddclexer/tests/tokenizer_test.d"; 2835 writeln("opening file"); 2836 try { 2837 std.stream.File f = new std.stream.File(fname); 2838 scope(exit) { f.close(); } 2839 try { 2840 LineStream lines = LineStream.create(f, fname); 2841 Tokenizer tokenizer = new Tokenizer(lines); 2842 for (;;) { 2843 Token token = tokenizer.nextToken(); 2844 if (token is null) { 2845 writeln("Null token returned"); 2846 break; 2847 } 2848 if (token.type == TokenType.EOF) { 2849 writeln("EOF token"); 2850 break; 2851 } 2852 writeln("", token.line, ":", token.pos, "\t", token.toString); 2853 } 2854 } catch (Exception e) { 2855 writeln("Exception " ~ e.toString); 2856 } 2857 } catch (Exception e) { 2858 writeln("Exception " ~ e.toString); 2859 } 2860 }