1 module ddc.lexer.tokenizer; 2 3 import ddc.lexer.textsource; 4 import ddc.lexer.exceptions; 5 6 import std.stdio; 7 import std.datetime; 8 import std.conv; 9 import std.utf; 10 import std.math; 11 12 enum TokenType : ubyte { 13 EOF, 14 //EOL, 15 WHITESPACE, 16 COMMENT, 17 IDENTIFIER, 18 STRING, 19 CHARACTER, 20 INTEGER, 21 FLOAT, 22 KEYWORD, 23 OP, 24 INVALID 25 } 26 27 // table for fast checking of UniversalAlpha (as per ISO/IEC 9899:1999 Annex E) OR a..z OR A..Z OR _ 28 // max code is 0xd7ff 29 //1728 30 const uint[1728] UNIVERSAL_ALPHA_FLAGS = [ 31 0x00000000,0x00000000,0x87fffffe,0x07fffffe,0x00000000,0x04a00400,0xff7fffff,0xff7fffff,// 0000-00ff 32 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xfc3fffff,// 0100-01ff 33 0x00ffffff,0x00000000,0xffff0000,0xffffffff,0xffffffff,0xe9ff01ff,0x00030003,0x0000001f,// 0200-02ff 34 0x00000000,0x00000000,0x00000000,0x04000000,0xffffd740,0xfffffffb,0x547f7fff,0x000ffffd,// 0300-03ff 35 0xffffdffe,0xffffffff,0xdffeffff,0xffffffff,0xffff0003,0xffffffff,0xffff199f,0x033fcfff,// 0400-04ff 36 0x00000000,0xfffe0000,0x027fffff,0xfffffffe,0x000000ff,0xbbff0000,0xffff0006,0x000707ff,// 0500-05ff 37 0x00000000,0x07fffffe,0x0007ffff,0xffff03ff,0xffffffff,0x7cffffff,0x1fff7fff,0x03ff3de0,// 0600-06ff 38 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 0700-07ff 39 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 0800-08ff 40 0xffffffee,0xe3ffffff,0xff073fff,0x0000ffcf,0xfff99fee,0xc3c5fdff,0xb000399f,0x0003ffcf,// 0900-09ff 41 0xfff987e4,0xc36dfdff,0x5e003987,0x0010ffc0,0xfffbafee,0xe3edfdff,0x00013bbf,0x0000ffc1,// 0a00-0aff 42 0xfff99fee,0xe3cdfdff,0xb000398f,0x0000ffc3,0xd63dc7ec,0xc3bfc718,0x00003dc7,0x0000ff80,// 0b00-0bff 43 0xfffddfee,0xc3effdff,0x00003ddf,0x0000ffc3,0xfffddfec,0xc3effdff,0x40003ddf,0x0000ffc3,// 0c00-0cff 44 0xfffddfec,0xc3fffdff,0x00003dcf,0x0000ffc3,0x00000000,0x00000000,0x00000000,0x00000000,// 0d00-0dff 45 0xfffffffe,0x07ffffff,0x0fffffff,0x00000000,0xfef02596,0x3bff6cae,0x33ff3f5f,0x00000000,// 0e00-0eff 46 0x03000001,0xc2afffff,0xfffffeff,0xfffe03ff,0xfebf0fdf,0x02fe3fff,0x00000000,0x00000000,// 0f00-0fff 47 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0xffffffff,0xffff003f,0x007fffff,// 1000-10ff 48 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 1100-11ff 49 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 1200-12ff 50 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 1300-13ff 51 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 1400-14ff 52 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 1500-15ff 53 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 1600-16ff 54 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 1700-17ff 55 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 1800-18ff 56 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 1900-19ff 57 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 1a00-1aff 58 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 1b00-1bff 59 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 1c00-1cff 60 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 1d00-1dff 61 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0x0fffffff,0xffffffff,0xffffffff,0x03ffffff,// 1e00-1eff 62 0x3f3fffff,0xffffffff,0xaaff3f3f,0x3fffffff,0xffffffff,0x5fdfffff,0x0fcf1fdc,0x1fdc1fff,// 1f00-1fff 63 0x00000000,0x80000000,0x00000001,0x80000000,0x00000000,0x00000000,0x00000000,0x00000000,// 2000-20ff 64 0x3f2ffc84,0x01fbfd50,0x00000000,0xffffffff,0x00000007,0x00000000,0x00000000,0x00000000,// 2100-21ff 65 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 2200-22ff 66 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 2300-23ff 67 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 2400-24ff 68 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 2500-25ff 69 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 2600-26ff 70 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 2700-27ff 71 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 2800-28ff 72 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 2900-29ff 73 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 2a00-2aff 74 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 2b00-2bff 75 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 2c00-2cff 76 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 2d00-2dff 77 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 2e00-2eff 78 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 2f00-2fff 79 0x000000e0,0x000003fe,0xfffffffe,0xffffffff,0x180fffff,0xfffffffe,0xffffffff,0x187fffff,// 3000-30ff 80 0xffffffe0,0x00001fff,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 3100-31ff 81 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 3200-32ff 82 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 3300-33ff 83 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 3400-34ff 84 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 3500-35ff 85 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 3600-36ff 86 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 3700-37ff 87 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 3800-38ff 88 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 3900-39ff 89 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 3a00-3aff 90 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 3b00-3bff 91 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 3c00-3cff 92 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 3d00-3dff 93 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 3e00-3eff 94 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 3f00-3fff 95 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 4000-40ff 96 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 4100-41ff 97 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 4200-42ff 98 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 4300-43ff 99 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 4400-44ff 100 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 4500-45ff 101 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 4600-46ff 102 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 4700-47ff 103 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 4800-48ff 104 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 4900-49ff 105 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 4a00-4aff 106 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 4b00-4bff 107 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 4c00-4cff 108 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 4d00-4dff 109 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 4e00-4eff 110 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 4f00-4fff 111 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 5000-50ff 112 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 5100-51ff 113 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 5200-52ff 114 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 5300-53ff 115 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 5400-54ff 116 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 5500-55ff 117 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 5600-56ff 118 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 5700-57ff 119 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 5800-58ff 120 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 5900-59ff 121 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 5a00-5aff 122 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 5b00-5bff 123 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 5c00-5cff 124 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 5d00-5dff 125 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 5e00-5eff 126 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 5f00-5fff 127 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 6000-60ff 128 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 6100-61ff 129 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 6200-62ff 130 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 6300-63ff 131 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 6400-64ff 132 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 6500-65ff 133 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 6600-66ff 134 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 6700-67ff 135 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 6800-68ff 136 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 6900-69ff 137 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 6a00-6aff 138 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 6b00-6bff 139 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 6c00-6cff 140 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 6d00-6dff 141 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 6e00-6eff 142 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 6f00-6fff 143 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 7000-70ff 144 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 7100-71ff 145 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 7200-72ff 146 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 7300-73ff 147 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 7400-74ff 148 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 7500-75ff 149 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 7600-76ff 150 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 7700-77ff 151 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 7800-78ff 152 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 7900-79ff 153 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 7a00-7aff 154 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 7b00-7bff 155 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 7c00-7cff 156 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 7d00-7dff 157 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 7e00-7eff 158 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 7f00-7fff 159 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 8000-80ff 160 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 8100-81ff 161 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 8200-82ff 162 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 8300-83ff 163 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 8400-84ff 164 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 8500-85ff 165 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 8600-86ff 166 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 8700-87ff 167 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 8800-88ff 168 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 8900-89ff 169 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 8a00-8aff 170 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 8b00-8bff 171 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 8c00-8cff 172 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 8d00-8dff 173 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 8e00-8eff 174 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 8f00-8fff 175 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 9000-90ff 176 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 9100-91ff 177 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 9200-92ff 178 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 9300-93ff 179 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 9400-94ff 180 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 9500-95ff 181 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 9600-96ff 182 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 9700-97ff 183 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 9800-98ff 184 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 9900-99ff 185 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 9a00-9aff 186 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 9b00-9bff 187 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 9c00-9cff 188 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 9d00-9dff 189 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 9e00-9eff 190 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0x0000003f,0x00000000,0x00000000,// 9f00-9fff 191 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// a000-a0ff 192 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// a100-a1ff 193 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// a200-a2ff 194 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// a300-a3ff 195 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// a400-a4ff 196 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// a500-a5ff 197 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// a600-a6ff 198 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// a700-a7ff 199 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// a800-a8ff 200 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// a900-a9ff 201 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// aa00-aaff 202 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// ab00-abff 203 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// ac00-acff 204 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// ad00-adff 205 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// ae00-aeff 206 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// af00-afff 207 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// b000-b0ff 208 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// b100-b1ff 209 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// b200-b2ff 210 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// b300-b3ff 211 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// b400-b4ff 212 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// b500-b5ff 213 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// b600-b6ff 214 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// b700-b7ff 215 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// b800-b8ff 216 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// b900-b9ff 217 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// ba00-baff 218 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// bb00-bbff 219 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// bc00-bcff 220 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// bd00-bdff 221 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// be00-beff 222 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// bf00-bfff 223 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// c000-c0ff 224 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// c100-c1ff 225 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// c200-c2ff 226 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// c300-c3ff 227 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// c400-c4ff 228 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// c500-c5ff 229 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// c600-c6ff 230 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// c700-c7ff 231 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// c800-c8ff 232 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// c900-c9ff 233 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// ca00-caff 234 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// cb00-cbff 235 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// cc00-ccff 236 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// cd00-cdff 237 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// ce00-ceff 238 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// cf00-cfff 239 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// d000-d0ff 240 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// d100-d1ff 241 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// d200-d2ff 242 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// d300-d3ff 243 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// d400-d4ff 244 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// d500-d5ff 245 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// d600-d6ff 246 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0x0000000f,0x00000000,0x00000000// d700-d7ff 247 ]; 248 249 /// returns true if character is A..Z, a..z, _ or universal alpha 250 bool isUniversalAlpha(dchar ch) pure nothrow { 251 return (ch <= 0xd7ff && (UNIVERSAL_ALPHA_FLAGS[ch >> 5] & (1 << (ch & 31)))); 252 } 253 254 /// character can present at the beginning of identifier 255 bool isIdentStartChar(dchar ch) pure nothrow { 256 return isUniversalAlpha(ch); 257 } 258 259 /// character can present in middle of identifier 260 bool isIdentMiddleChar(dchar ch) pure nothrow { 261 return (ch >= '0' && ch <='9') || isUniversalAlpha(ch); 262 } 263 264 immutable bool ENABLE_DUMP_UNIVERSAL_ALPHA_TABLE = false; 265 static if (ENABLE_DUMP_UNIVERSAL_ALPHA_TABLE) { 266 bool r(dchar ch, wchar v) pure nothrow { 267 return ch == v; 268 } 269 270 bool r(dchar ch, wchar v1, wchar v2) pure nothrow { 271 return ch >= v1 && ch <= v2; 272 } 273 274 bool isUniversalAlphaSlow(dchar c) pure nothrow { 275 return 276 // Latin: 00AA, 00BA, 00C0−00D6, 00D8−00F6, 00F8−01F5, 01FA−0217, 277 // 0250−02A8, 1E00−1E9B, 1EA0−1EF9, 207F 278 r(c, 0xAA) || r(c, 0x00BA) || r(c, 0x00C0,0x00D6) || r(c, 0x00D8,0x00F6) || r(c, 0x00F8,0x01F5) || r(c, 0x01FA,0x0217) 279 || r(c, 0x0250,0x02A8) || r(c, 0x1E00,0x1E9B) || r(c, 0x1EA0,0x1EF9) || r(c, 0x207F) 280 //Greek: 0386, 0388−038A, 038C, 038E−03A1, 03A3−03CE, 03D0−03D6, 281 //03DA, 03DC, 03DE, 03E0, 03E2−03F3, 1F00−1F15, 1F18−1F1D, 282 //1F20−1F45, 1F48−1F4D, 1F50−1F57, 1F59, 1F5B, 1F5D, 283 //1F5F−1F7D, 1F80−1FB4, 1FB6−1FBC, 1FC2−1FC4, 1FC6−1FCC, 284 //1FD0−1FD3, 1FD6−1FDB, 1FE0−1FEC, 1FF2−1FF4, 1FF6−1FFC 285 || r(c, 0x0386) || r(c, 0x0388,0x038A) || r(c, 0x038C) || r(c, 0x038E,0x03A1) || r(c, 0x03A3,0x03CE) || r(c, 0x03D0,0x03D6) 286 || r(c, 0x03DA) || r(c, 0x03DC) || r(c, 0x03DE) || r(c, 0x03E0) || r(c, 0x03E2,0x03F3) || r(c, 0x1F00,0x1F15) || r(c, 0x1F18,0x1F1D) 287 || r(c, 0x1F20,0x1F45) || r(c, 0x1F48,0x1F4D) || r(c, 0x1F50,0x1F57) || r(c, 0x1F59) || r(c, 0x1F5B) || r(c, 0x1F5D) 288 || r(c, 0x1F5F,0x1F7D) || r(c, 0x1F80,0x1FB4) || r(c, 0x1FB6,0x1FBC) || r(c, 0x1FC2,0x1FC4) || r(c, 0x1FC6,0x1FCC) 289 || r(c, 0x1FD0,0x1FD3) || r(c, 0x1FD6,0x1FDB) || r(c, 0x1FE0,0x1FEC) || r(c, 0x1FF2,0x1FF4) || r(c, 0x1FF6,0x1FFC) 290 //Cyrillic: 0401−040C, 040E−044F, 0451−045C, 045E−0481, 0490−04C4, 291 //04C7−04C8, 04CB−04CC, 04D0−04EB, 04EE−04F5, 04F8−04F9 292 || r(c, 0x0401,0x040C) || r(c, 0x040E,0x044F) || r(c, 0x0451,0x045C) || r(c, 0x045E,0x0481) || r(c, 0x0490,0x04C4) 293 || r(c, 0x04C7,0x04C8) || r(c, 0x04CB,0x04CC) || r(c, 0x04D0,0x04EB) || r(c, 0x04EE,0x04F5) || r(c, 0x04F8,0x04F9) 294 //Armenian: 0531−0556, 0561−0587 295 || r(c, 0x0531,0x0556) || r(c, 0x0561,0x0587) 296 //Hebrew: 05B0−05B9, 05BB−05BD, 05BF, 05C1−05C2, 05D0−05EA, 297 //05F0−05F2 298 || r(c, 0x05B0,0x05B9) || r(c, 0x05BB,0x05BD) || r(c, 0x05BF) || r(c, 0x05C1,0x05C2) || r(c, 0x05D0,0x05EA) 299 || r(c, 0x05F0,0x05F2) 300 //Arabic: 0621−063A, 0640−0652, 0670−06B7, 06BA−06BE, 06C0−06CE, 301 //06D0−06DC, 06E5−06E8, 06EA−06ED 302 || r(c, 0x0621,0x063A) || r(c, 0x0640,0x0652) || r(c, 0x0670,0x06B7) || r(c, 0x06BA,0x06BE) || r(c, 0x06C0,0x06CE) 303 || r(c, 0x06D0,0x06DC) || r(c, 0x06E5,0x06E8) || r(c, 0x06EA,0x06ED) 304 //Devanagari: 0901−0903, 0905−0939, 093E−094D, 0950−0952, 0958−0963 305 || r(c, 0x0901,0x0903) || r(c, 0x0905,0x0939) || r(c, 0x093E,0x094D) || r(c, 0x0950,0x0952) || r(c, 0x0958,0x0963) 306 //Bengali: 0981−0983, 0985−098C, 098F−0990, 0993−09A8, 09AA−09B0, 307 //09B2, 09B6−09B9, 09BE−09C4, 09C7−09C8, 09CB−09CD, 308 //09DC−09DD, 09DF−09E3, 09F0−09F1 309 || r(c, 0x0981,0x0983) || r(c, 0x0985,0x098C) || r(c, 0x098F,0x0990) || r(c, 0x0993,0x09A8) || r(c, 0x09AA,0x09B0) 310 || r(c, 0x09B2) || r(c, 0x09B6,0x09B9) || r(c, 0x09BE,0x09C4) || r(c, 0x09C7,0x09C8) || r(c, 0x09CB,0x09CD) 311 || r(c, 0x09DC,0x09DD) || r(c, 0x09DF,0x09E3) || r(c, 0x09F0,0x09F1) 312 //Gurmukhi: 0A02, 0A05−0A0A, 0A0F−0A10, 0A13−0A28, 0A2A−0A30, 313 //0A32−0A33, 0A35−0A36, 0A38−0A39, 0A3E−0A42, 0A47−0A48, 314 //0A4B−0A4D, 0A59−0A5C, 0A5E, 0A74 315 || r(c, 0x0A02) || r(c, 0x0A05,0x0A0A) || r(c, 0x0A0F,0x0A10) || r(c, 0x0A13,0x0A28) || r(c, 0x0A2A,0x0A30) 316 || r(c, 0x0A32,0x0A33) || r(c, 0x0A35,0x0A36) || r(c, 0x0A38,0x0A39) || r(c, 0x0A3E,0x0A42) || r(c, 0x0A47,0x0A48) 317 || r(c, 0x0A4B,0x0A4D) || r(c, 0x0A59,0x0A5C) || r(c, 0x0A5E) || r(c, 0x0A74) 318 //Gujarati: 0A81−0A83, 0A85−0A8B, 0A8D, 0A8F−0A91, 0A93−0AA8, 319 //0AAA−0AB0, 0AB2−0AB3, 0AB5−0AB9, 0ABD−0AC5, 320 //0AC7−0AC9, 0ACB−0ACD, 0AD0, 0AE0 321 || r(c, 0x0A81,0x0A83) || r(c, 0x0A85,0x0A8B) || r(c, 0x0A8D) || r(c, 0x0A8F,0x0A91) || r(c, 0x0A93,0x0AA8) 322 || r(c, 0x0AAA,0x0AB0) || r(c, 0x0AB2,0x0AB3) || r(c, 0x0AB5,0x0AB9) || r(c, 0x0ABD,0x0AC5) 323 || r(c, 0x0AC7,0x0AC9) || r(c, 0x0ACB,0x0ACD) || r(c, 0x0AD0) || r(c, 0x0AE0) 324 // Oriya: 0B01−0B03, 0B05−0B0C, 0B0F−0B10, 0B13−0B28, 0B2A−0B30, 325 //0B32−0B33, 0B36−0B39, 0B3E−0B43, 0B47−0B48, 0B4B−0B4D, 326 //0B5C−0B5D, 0B5F−0B61 327 || r(c, 0x0B01,0x0B03) || r(c, 0x0B05,0x0B0C) || r(c, 0x0B0F,0x0B10) || r(c, 0x0B13,0x0B28) || r(c, 0x0B2A,0x0B30) 328 || r(c, 0x0B32,0x0B33) || r(c, 0x0B36,0x0B39) || r(c, 0x0B3E,0x0B43) || r(c, 0x0B47,0x0B48) || r(c, 0x0B4B,0x0B4D) 329 || r(c, 0x0B5C,0x0B5D) || r(c, 0x0B5F,0x0B61) 330 //Tamil: 0B82−0B83, 0B85−0B8A, 0B8E−0B90, 0B92−0B95, 0B99−0B9A, 331 //0B9C, 0B9E−0B9F, 0BA3−0BA4, 0BA8−0BAA, 0BAE−0BB5, 332 //0BB7−0BB9, 0BBE−0BC2, 0BC6−0BC8, 0BCA−0BCD 333 || r(c, 0x0B82,0x0B83) || r(c, 0x0B85,0x0B8A) || r(c, 0x0B8E,0x0B90) || r(c, 0x0B92,0x0B95) || r(c, 0x0B99,0x0B9A) 334 || r(c, 0x0B9C) || r(c, 0x0B9E,0x0B9F) || r(c, 0x0BA3,0x0BA4) || r(c, 0x0BA8,0x0BAA) || r(c, 0x0BAE,0x0BB5) 335 || r(c, 0x0BB7,0x0BB9) || r(c, 0x0BBE,0x0BC2) || r(c, 0x0BC6,0x0BC8) || r(c, 0x0BCA,0x0BCD) 336 //Telugu: 0C01−0C03, 0C05−0C0C, 0C0E−0C10, 0C12−0C28, 0C2A−0C33, 337 //0C35−0C39, 0C3E−0C44, 0C46−0C48, 0C4A−0C4D, 0C60−0C61 338 || r(c, 0x0C01,0x0C03) || r(c, 0x0C05,0x0C0C) || r(c, 0x0C0E,0x0C10) || r(c, 0x0C12,0x0C28) || r(c, 0x0C2A,0x0C33) 339 || r(c, 0x0C35,0x0C39) || r(c, 0x0C3E,0x0C44) || r(c, 0x0C46,0x0C48) || r(c, 0x0C4A,0x0C4D) || r(c, 0x0C60,0x0C61) 340 //Kannada: 0C82−0C83, 0C85−0C8C, 0C8E−0C90, 0C92−0CA8, 0CAA−0CB3, 341 //0CB5−0CB9, 0CBE−0CC4, 0CC6−0CC8, 0CCA−0CCD, 0CDE, 342 //0CE0−0CE1 343 || r(c, 0x0C82,0x0C83) || r(c, 0x0C85,0x0C8C) || r(c, 0x0C8E,0x0C90) || r(c, 0x0C92,0x0CA8) || r(c, 0x0CAA,0x0CB3) 344 || r(c, 0x0CB5,0x0CB9) || r(c, 0x0CBE,0x0CC4) || r(c, 0x0CC6,0x0CC8) || r(c, 0x0CCA,0x0CCD) || r(c, 0x0CDE) 345 || r(c, 0x0CE0,0x0CE1) 346 //Malayalam: 0D02−0D03, 0D05−0D0C, 0D0E−0D10, 0D12−0D28, 0D2A−0D39, 347 //0D3E−0D43, 0D46−0D48, 0D4A−0D4D, 0D60−0D61 348 || r(c, 0x0D02,0x0D03) || r(c, 0x0D05,0x0D0C) || r(c, 0x0D0E,0x0D10) || r(c, 0x0D12,0x0D28) || r(c, 0x0D2A,0x0D39) 349 || r(c, 0xD3E,0x0D43) || r(c, 0x0D46,0x0D48) || r(c, 0x0D4A,0x0D4D) || r(c, 0x0D60,0x0D61) 350 //Thai: 0E01−0E3A, 0E40−0E5B 351 || r(c, 0x0E01,0x0E3A) || r(c, 0x0E40,0x0E5B) 352 //Lao: 0E81−0E82, 0E84, 0E87−0E88, 0E8A, 0E8D, 0E94−0E97, 353 //0E99−0E9F, 0EA1−0EA3, 0EA5, 0EA7, 0EAA−0EAB, 354 //0EAD−0EAE, 0EB0−0EB9, 0EBB−0EBD, 0EC0−0EC4, 0EC6, 355 //0EC8−0ECD, 0EDC−0EDD 356 || r(c, 0x0E81,0x0E82) || r(c, 0x0E84) || r(c, 0x0E87,0x0E88) || r(c, 0x0E8A) || r(c, 0x0E8D) || r(c, 0x0E94,0x0E97) 357 || r(c, 0x0E99,0x0E9F) || r(c, 0x0EA1,0x0EA3) || r(c, 0x0EA5) || r(c, 0x0EA7) || r(c, 0x0EAA,0x0EAB) 358 || r(c, 0x0EAD,0x0EAE) || r(c, 0x0EB0,0x0EB9) || r(c, 0x0EBB,0x0EBD) || r(c, 0x0EC0,0x0EC4) || r(c, 0x0EC6) 359 || r(c, 0x0EC8,0x0ECD) || r(c, 0x0EDC,0x0EDD) 360 //Tibetan: 0F00, 0F18−0F19, 0F35, 0F37, 0F39, 0F3E−0F47, 0F49−0F69, 361 //0F71−0F84, 0F86−0F8B, 0F90−0F95, 0F97, 0F99−0FAD, 362 //0FB1−0FB7, 0FB9 363 || r(c, 0x0F00) || r(c, 0x0F18,0x0F19) || r(c, 0x0F35) || r(c, 0x0F37) || r(c, 0x0F39) || r(c, 0x0F3E,0x0F47) || r(c, 0x0F49,0x0F69) 364 || r(c, 0x0F71,0x0F84) || r(c, 0x0F86,0x0F8B) || r(c, 0x0F90,0x0F95) || r(c, 0x0F97) || r(c, 0x0F99,0x0FAD) 365 || r(c, 0x0FB1,0x0FB7) || r(c, 0x0FB9) 366 //Georgian: 10A0−10C5, 10D0−10F6 367 || r(c, 0x10A0,0x10C5) || r(c, 0x10D0,0x10F6) 368 //Hiragana: 3041−3093, 309B−309C 369 || r(c, 0x3041,0x3093) || r(c, 0x309B,0x309C) 370 //Katakana: 30A1−30F6, 30FB−30FC 371 || r(c, 0x30A1,0x30F6) || r(c, 0x30FB,0x30FC) 372 //Bopomofo: 3105−312C 373 || r(c, 0x3105,0x312C) 374 //CJK Unified Ideographs: 4E00−9FA5 375 || r(c, 0x4E00,0x9FA5) 376 //Hangul: AC00−D7A3 377 || r(c, 0xAC00,0xD7A3) 378 //Digits: 0660−0669, 06F0−06F9, 0966−096F, 09E6−09EF, 0A66−0A6F, 379 //0AE6−0AEF, 0B66−0B6F, 0BE7−0BEF, 0C66−0C6F, 0CE6−0CEF, 380 //0D66−0D6F, 0E50−0E59, 0ED0−0ED9, 0F20−0F33 381 || r(c, 0x0660,0x0669) || r(c, 0x06F0,0x06F9) || r(c, 0x0966,0x096F) || r(c, 0x09E6,0x09EF) || r(c, 0x0A66,0x0A6F) 382 || r(c, 0x0AE6,0x0AEF) || r(c, 0x0B66,0x0B6F) || r(c, 0x0BE7,0x0BEF) || r(c, 0x0C66,0x0C6F) || r(c, 0x0CE6,0x0CEF) 383 || r(c, 0x0D66,0x0D6F) || r(c, 0x0E50,0x0E59) || r(c, 0x0ED0,0x0ED9) || r(c, 0x0F20,0x0F33) 384 //Special characters: 00B5, 00B7, 02B0−02B8, 02BB, 02BD−02C1, 02D0−02D1, 385 //02E0−02E4, 037A, 0559, 093D, 0B3D, 1FBE, 203F−2040, 2102, 386 //2107, 210A−2113, 2115, 2118−211D, 2124, 2126, 2128, 212A−2131, 387 //2133−2138, 2160−2182, 3005−3007, 3021−3029 388 || r(c, 0x00B5) || r(c, 0x00B7) || r(c, 0x02B0,0x02B8) || r(c, 0x02BB) || r(c, 0x02BD,0x02C1) || r(c, 0x02D0,0x02D1) 389 || r(c, 0x2E0,0x02E4) || r(c, 0x037A) || r(c, 0x0559) || r(c, 0x093D) || r(c, 0x0B3D) || r(c, 0x1FBE) || r(c, 0x203F,0x2040) || r(c, 0x2102) 390 || r(c, 0x2107) || r(c, 0x210A,0x2113) || r(c, 0x2115) || r(c, 0x2118,0x211D) || r(c, 0x2124) || r(c, 0x2126) || r(c, 0x2128) || r(c, 0x212A,0x2131) 391 || r(c, 0x2133,0x2138) || r(c, 0x2160,0x2182) || r(c, 0x3005,0x3007) || r(c, 0x3021,0x3029) 392 ; 393 } 394 395 } 396 397 unittest { 398 399 400 static if (ENABLE_DUMP_UNIVERSAL_ALPHA_TABLE) { 401 immutable uint itemsInRow = 8; 402 403 uint maxAlpha = 0; 404 for (uint i = 0; i < 0x10000; i++) { 405 uint ch = i; 406 if (isUniversalAlphaSlow(ch) || ch == '_' || (ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z')) 407 maxAlpha = i; 408 } 409 maxAlpha = (maxAlpha + itemsInRow * 32 - 1) / (itemsInRow * 32) * (itemsInRow * 32) - 1; 410 writeln("// table for fast checking of UniversalAlpha (as per ISO/IEC 9899:1999 Annex E) OR a..z OR A..Z OR _"); 411 writefln("// max code is 0x%04x", maxAlpha); 412 writeln("immutable uint[", (maxAlpha + 1) / 32,"] UNIVERSAL_ALPHA_FLAGS = ["); 413 for (uint i = 0; i <= maxAlpha; i += 32) { 414 if ((i / 32) % itemsInRow == 0) 415 write(" "); 416 uint flags = 0; 417 for (uint j = 0; j < 32; j++) { 418 uint ch = i + j; 419 bool flag = isUniversalAlphaSlow(ch) || ch == '_' || (ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z'); 420 if (flag) 421 flags |= (1 << j); 422 } 423 writef("0x%08x", flags); 424 if (i != maxAlpha / 32 * 32) 425 write(","); 426 if ((i / 32) % itemsInRow == itemsInRow - 1) 427 writefln("// %04x-%04x", i - itemsInRow * 32 + 1 + 31, i + 31); 428 } 429 writeln("];"); 430 431 for (uint ch = 0; ch < 0x100000; ch++) { 432 bool flag = isUniversalAlphaSlow(ch) || ch == '_' || (ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z'); 433 bool flag2 = isUniversalAlpha(ch); 434 if (flag2 != flag) { 435 isUniversalAlpha(ch); 436 writefln("universalAlpha test failed for char %06x expeced %d actual %d", ch, flag ? 1 : 0, flag2 ? 1 : 0); 437 } 438 assert(flag2 == flag); 439 } 440 } 441 } 442 443 enum OpCode : ubyte { 444 NONE, // no op 445 DIV, // / 446 DIV_EQ, // /= 447 DOT, // . 448 DOT_DOT, // .. 449 DOT_DOT_DOT,// ... 450 AND, // & 451 AND_EQ, // &= 452 LOG_AND, // && 453 OR, // | 454 OR_EQ, // |= 455 LOG_OR, // || 456 MINUS, // - 457 MINUS_EQ, // -= 458 MINUS_MINUS,// -- 459 PLUS, // + 460 PLUS_EQ, // += 461 PLUS_PLUS, // ++ 462 LT, // < 463 LT_EQ, // <= 464 SHL, // << 465 SHL_EQ, // <<= 466 LT_GT, // <> 467 NE_EQ, // <>= 468 GT, // > 469 GT_EQ, // >= 470 SHR_EQ, // >>= 471 ASR_EQ, // >>>= 472 SHR, // >> 473 ASR, // >>> 474 NOT, // ! 475 NOT_EQ, // != 476 NOT_LT_GT, // !<> 477 NOT_LT_GT_EQ, // !<>= 478 NOT_LT, // !< 479 NOT_LT_EQ, // !<= 480 NOT_GT, // !> 481 NOT_GT_EQ, // !>= 482 PAR_OPEN, // ( 483 PAR_CLOSE, // ) 484 SQ_OPEN, // [ 485 SQ_CLOSE, // ] 486 CURL_OPEN, // { 487 CURL_CLOSE, // } 488 QUEST, // ? 489 COMMA, // , 490 SEMICOLON, // ; 491 COLON, // : 492 DOLLAR, // $ 493 EQ, // = 494 QE_EQ, // == 495 MUL, // * 496 MUL_EQ, // *= 497 MOD, // % 498 MOD_EQ, // %= 499 XOR, // ^ 500 XOR_EQ, // ^= 501 LOG_XOR, // ^^ 502 LOG_XOR_EQ, // ^^= 503 INV, // ~ 504 INV_EQ, // ~= 505 AT, // @ 506 EQ_GT, // => 507 SHARP // # 508 }; 509 510 immutable dstring[] OP_CODE_STRINGS = [ 511 "", 512 "/", 513 "/=", 514 ".", 515 "..", 516 "...", 517 "&", 518 "&=", 519 "&&", 520 "|", 521 "|=", 522 "||", 523 "-", 524 "-=", 525 "--", 526 "+", 527 "+=", 528 "++", 529 "<", 530 "<=", 531 "<<", 532 "<<=", 533 "<>", 534 "<>=", 535 ">", 536 ">=", 537 ">>=", 538 ">>>=", 539 ">>", 540 ">>>", 541 "!", 542 "!=", 543 "!<>", 544 "!<>=", 545 "!<", 546 "!<=", 547 "!>", 548 "!>=", 549 "(", 550 ")", 551 "[", 552 "]", 553 "{", 554 "}", 555 "?", 556 ",", 557 ";", 558 ":", 559 "$", 560 "=", 561 "==", 562 "*", 563 "*=", 564 "%", 565 "%=", 566 "^", 567 "^=", 568 "^^", 569 "^^=", 570 "~", 571 "~=", 572 "@", 573 "=>", 574 "#" 575 ]; 576 577 dstring getOpNameD(OpCode op) pure nothrow { 578 return OP_CODE_STRINGS[op]; 579 }; 580 581 enum Keyword : ubyte { 582 NONE, 583 ABSTRACT, 584 ALIAS, 585 ALIGN, 586 ASM, 587 ASSERT, 588 AUTO, 589 590 BODY, 591 BOOL, 592 BREAK, 593 BYTE, 594 595 CASE, 596 CAST, 597 CATCH, 598 CDOUBLE, 599 CENT, 600 CFLOAT, 601 CHAR, 602 CLASS, 603 CONST, 604 CONTINUE, 605 CREAL, 606 607 DCHAR, 608 DEBUG, 609 DEFAULT, 610 DELEGATE, 611 DELETE, 612 DEPRECATED, 613 DO, 614 DOUBLE, 615 616 ELSE, 617 ENUM, 618 EXPORT, 619 EXTERN, 620 621 FALSE, 622 FINAL, 623 FINALLY, 624 FLOAT, 625 FOR, 626 FOREACH, 627 FOREACH_REVERSE, 628 FUNCTION, 629 630 GOTO, 631 632 IDOUBLE, 633 IF, 634 IFLOAT, 635 IMMUTABLE, 636 IMPORT, 637 IN, 638 INOUT, 639 INT, 640 INTERFACE, 641 INVARIANT, 642 IREAL, 643 IS, 644 645 LAZY, 646 LONG, 647 648 MACRO, 649 MIXIN, 650 MODULE, 651 652 NEW, 653 NOTHROW, 654 NULL, 655 656 OUT, 657 OVERRIDE, 658 659 PACKAGE, 660 PRAGMA, 661 PRIVATE, 662 PROTECTED, 663 PUBLIC, 664 PURE, 665 666 REAL, 667 REF, 668 RETURN, 669 670 SCOPE, 671 SHARED, 672 SHORT, 673 STATIC, 674 STRUCT, 675 SUPER, 676 SWITCH, 677 SYNCHRONIZED, 678 679 TEMPLATE, 680 THIS, 681 THROW, 682 TRUE, 683 TRY, 684 TYPEDEF, 685 TYPEID, 686 TYPEOF, 687 688 UBYTE, 689 UCENT, 690 UINT, 691 ULONG, 692 UNION, 693 UNITTEST, 694 USHORT, 695 696 VERSION, 697 VOID, 698 VOLATILE, 699 700 WCHAR, 701 WHILE, 702 WITH, 703 704 FILE, 705 MODULE__, 706 LINE, 707 FUNCTION__, 708 PRETTY_FUNCTION, 709 710 //Special Token Replaced with 711 DATE, // string literal of the date of compilation "mmm dd yyyy" 712 EOF, // sets the scanner to the end of the file 713 TIME, // string literal of the time of compilation "hh:mm:ss" 714 TIMESTAMP, // string literal of the date and time of compilation "www mmm dd hh:mm:ss yyyy" 715 VENDOR, // Compiler vendor string, such as "Digital Mars D" 716 VERSION_, // Compiler version as an integer, such as 2001 717 718 GSHARED, 719 TRAITS, 720 VECTOR, 721 PARAMETERS, 722 723 } 724 725 immutable dstring[] KEYWORD_STRINGS = [ 726 "", 727 "abstract", 728 "alias", 729 "align", 730 "asm", 731 "assert", 732 "auto", 733 734 "body", 735 "bool", 736 "break", 737 "byte", 738 739 "case", 740 "cast", 741 "catch", 742 "cdouble", 743 "cent", 744 "cfloat", 745 "char", 746 "class", 747 "const", 748 "continue", 749 "creal", 750 751 "dchar", 752 "debug", 753 "default", 754 "delegate", 755 "delete", 756 "deprecated", 757 "do", 758 "double", 759 760 "else", 761 "enum", 762 "export", 763 "extern", 764 765 "false", 766 "final", 767 "finally", 768 "float", 769 "for", 770 "foreach", 771 "foreach_reverse", 772 "function", 773 774 "goto", 775 776 "idouble", 777 "if", 778 "ifloat", 779 "immutable", 780 "import", 781 "in", 782 "inout", 783 "int", 784 "interface", 785 "invariant", 786 "ireal", 787 "is", 788 789 "lazy", 790 "long", 791 792 "macro", 793 "mixin", 794 "module", 795 796 "new", 797 "nothrow", 798 "null", 799 800 "out", 801 "override", 802 803 "package", 804 "pragma", 805 "private", 806 "protected", 807 "public", 808 "pure", 809 810 "real", 811 "ref", 812 "return", 813 814 "scope", 815 "shared", 816 "short", 817 "static", 818 "struct", 819 "super", 820 "switch", 821 "synchronized", 822 823 "template", 824 "this", 825 "throw", 826 "true", 827 "try", 828 "typedef", 829 "typeid", 830 "typeof", 831 832 "ubyte", 833 "ucent", 834 "uint", 835 "ulong", 836 "union", 837 "unittest", 838 "ushort", 839 840 "version", 841 "void", 842 "volatile", 843 844 "wchar", 845 "while", 846 "with", 847 848 "__FILE__", 849 "__MODULE__", 850 "__LINE__", 851 "__FUNCTION__", 852 "__PRETTY_FUNCTION__", 853 854 //Special Token Replaced with 855 "__DATE__", // string literal of the date of compilation "mmm dd yyyy" 856 "__EOF__", // sets the scanner to the end of the file 857 "__TIME__", // string literal of the time of compilation "hh:mm:ss" 858 "__TIMESTAMP__", // string literal of the date and time of compilation "www mmm dd hh:mm:ss yyyy" 859 "__VENDOR__", // Compiler vendor string, such as "Digital Mars D" 860 "__VERSION__", // Compiler version as an integer, such as 2001 861 862 863 "__gshared", 864 "__traits", 865 "__vector", 866 "__parameters" 867 ]; 868 869 public dstring getKeywordNameD(Keyword keyword) pure nothrow { 870 return KEYWORD_STRINGS[keyword]; 871 }; 872 873 public Keyword findKeyword(Keyword start, Keyword end, dchar * name, int len, ref int pos) pure nothrow { 874 for (Keyword i = start; i <= end; i++) { 875 dstring s = KEYWORD_STRINGS[i]; 876 if (s.length > len + 1) 877 continue; // too long 878 bool found = true; 879 for (uint j = 1; j < s.length; j++) { 880 if (s[j] != name[j - 1]) { 881 found = false; 882 break; 883 } 884 } 885 if (found) { 886 if (s.length == len - 1 || !isIdentMiddleChar(name[s.length - 1])) { 887 pos += s.length - 1; 888 return i; 889 } 890 } 891 } 892 return Keyword.NONE; 893 } 894 895 /** 896 * Token. 897 */ 898 class Token { 899 protected SourceFile _file; 900 protected int _line; 901 protected int _pos; 902 protected TokenType _type; 903 /// returns token type 904 @property TokenType type() { return _type; } 905 /// returns file info for source 906 @property SourceFile filename() { return _file; } 907 /// returns 1-based source line number of token start 908 @property int line() { return _line; } 909 /// returns 1-based source line position of token start 910 @property int pos() { return _pos; } 911 /// returns token text 912 @property dchar[] text() { return null; } 913 914 // number token properties 915 @property dchar literalType() { return 0; } 916 @property ulong intValue() { return 0; } 917 @property bool isUnsigned() { return false; } 918 @property ulong isLong() { return false; } 919 @property real realValue() { return 0; } 920 @property double doubleValue() { return 0; } 921 @property float floatValue() { return 0; } 922 @property byte precision() { return 0; } 923 @property bool isImaginary() { return false; } 924 925 /// returns opcode ID - for opcode tokens 926 @property OpCode opCode() { return OpCode.NONE; } 927 /// returns keyword ID - for keyword tokens 928 @property Keyword keyword() { return Keyword.NONE; } 929 /// returns true if this is documentation comment token 930 @property bool isDocumentationComment() { return false; } 931 932 // error handling 933 934 /// returns true if it's invalid token (can be returned in error tolerant mode of tokenizer) 935 @property bool isError() { return type == TokenType.INVALID; } 936 /// returns error message if it's invalid token (can be returned in error tolerant mode of tokenizer) 937 @property string errorMessage() { return null; } 938 /// returns error code if it's invalid token (can be returned in error tolerant mode of tokenizer) 939 @property int errorCode() { return 0; } 940 /// returns type of token parsing of which has been failed - if it's invalid token (can be returned in error tolerant mode of tokenizer) 941 @property TokenType invalidTokenType() { return TokenType.INVALID; } 942 943 944 this(TokenType type) { 945 _type = type; 946 } 947 948 this(TokenType type, SourceFile file, int line, int pos) { 949 _type = type; 950 _file = file; 951 _line = line; 952 _pos = pos; 953 } 954 /// set start position for token (line is 1-based, pos is 0-based) 955 void setPos(SourceFile file, int line, int pos) { 956 _file = file; 957 _line = line; 958 _pos = pos + 1; 959 } 960 /// set source file information for token 961 void setFile(SourceFile file) { 962 _file = file; 963 } 964 /// set start position for token (line is 1-based, pos is 0-based) 965 void setPos(int line, int pos) { 966 _line = line; 967 _pos = pos + 1; 968 } 969 970 public abstract Token clone(); 971 public override @property string toString() { 972 return "" ~ to!string(_line) ~ ":" ~ to!string(_pos) ~ " " ~ to!string(type) ~ " " ~ to!string(opCode) ~ " " ~ to!string(keyword) 973 ~" \"" ~ toUTF8(text()) ~ "\""; 974 } 975 } 976 977 class EofToken : Token { 978 this() { 979 super(TokenType.EOF); 980 } 981 this(SourceFile file, uint line, uint pos) { 982 super(TokenType.EOF, file, line, pos); 983 } 984 override public Token clone() { 985 return new EofToken(_file, _line, _pos); 986 } 987 public override @property string toString() { 988 return "EOF"; 989 } 990 } 991 992 // treat as white space 993 //class EolToken : Token { 994 // this(string file, uint line, uint pos) { 995 // super(TokenType.EOL, file, line, pos); 996 // } 997 //} 998 999 /// white space token 1000 class WhiteSpaceToken : Token { 1001 this() { 1002 super(TokenType.WHITESPACE); 1003 } 1004 this(SourceFile file, uint line, uint pos) { 1005 super(TokenType.WHITESPACE, file, line, pos); 1006 } 1007 override public Token clone() { 1008 return new WhiteSpaceToken(_file, _line, _pos); 1009 } 1010 public override @property string toString() { 1011 return "WhiteSpace"; 1012 } 1013 } 1014 1015 class OpToken : Token { 1016 OpCode _op; 1017 public @property override OpCode opCode() { return _op; } 1018 public @property void opCode(OpCode op) { _op = op; } 1019 public @property override dchar[] text() { return cast(dchar[])getOpNameD(_op); } 1020 this() { 1021 super(TokenType.OP); 1022 } 1023 this(SourceFile file, uint line, uint pos) { 1024 super(TokenType.OP, file, line, pos); 1025 } 1026 override public Token clone() { 1027 return new OpToken(_file, _line, _pos); 1028 } 1029 public override @property string toString() { 1030 return "Op:" ~ to!string(_op); 1031 } 1032 } 1033 1034 class KeywordToken : Token { 1035 Keyword _keyword; 1036 public @property override Keyword keyword() { return _keyword; } 1037 public @property void keyword(Keyword keyword) { _keyword = keyword; } 1038 public @property override dchar[] text() { return cast(dchar[])getKeywordNameD(_keyword); } 1039 this() { 1040 super(TokenType.KEYWORD); 1041 } 1042 this(SourceFile file, uint line, uint pos) { 1043 super(TokenType.KEYWORD, file, line, pos); 1044 } 1045 override public Token clone() { 1046 return new KeywordToken(_file, _line, _pos); 1047 } 1048 public override @property string toString() { 1049 return "Keyword:" ~ to!string(_keyword); 1050 } 1051 } 1052 1053 /// comment token 1054 class CommentToken : Token { 1055 protected dchar[] _text; 1056 protected bool _isDocumentationComment; 1057 1058 override @property bool isDocumentationComment() { 1059 return _isDocumentationComment; 1060 } 1061 1062 @property void isDocumentationComment(bool f) { 1063 _isDocumentationComment = f; 1064 } 1065 1066 @property override dchar[] text() { return _text; } 1067 @property void text(dchar[] text) { _text = text; } 1068 this() { 1069 super(TokenType.COMMENT); 1070 } 1071 this(SourceFile file, uint line, uint pos, dchar[] text) { 1072 super(TokenType.COMMENT, file, line, pos); 1073 _text = text; 1074 } 1075 override public Token clone() { 1076 return new CommentToken(_file, _line, _pos, _text.dup); 1077 } 1078 public override @property string toString() { 1079 return "Comment:" ~ to!string(_text); 1080 } 1081 } 1082 1083 /// Invalid token holder - for error tolerant parsing 1084 class InvalidToken : Token { 1085 protected dchar[] _text; 1086 protected TokenType _invalidTokenType; 1087 protected int _errorCode; 1088 protected string _errorMessage; 1089 1090 /// returns error message if it's invalid token (can be returned in error tolerant mode of tokenizer) 1091 override @property string errorMessage() { return _errorMessage; } 1092 /// sets error message 1093 @property void errorMessage(string s) { _errorMessage = s; } 1094 /// returns error code if it's invalid token (can be returned in error tolerant mode of tokenizer) 1095 override @property int errorCode() { return _errorCode; } 1096 /// sets error code 1097 @property void errorCode(int c) { _errorCode = c; } 1098 /// returns type of token parsing of which has been failed - if it's invalid token (can be returned in error tolerant mode of tokenizer) 1099 override @property TokenType invalidTokenType() { return _invalidTokenType; } 1100 /// sets type of token parsing of which has been failed 1101 @property void invalidTokenType(TokenType t) { _invalidTokenType = t; } 1102 1103 /// text of invalid token 1104 @property override dchar[] text() { return _text; } 1105 /// text of invalid token 1106 @property void text(dchar[] text) { _text = text; } 1107 1108 this() { 1109 super(TokenType.INVALID); 1110 } 1111 this(SourceFile file, uint line, uint pos, dchar[] text) { 1112 super(TokenType.INVALID, file, line, pos); 1113 _text = text; 1114 } 1115 override Token clone() { 1116 InvalidToken res = new InvalidToken(_file, _line, _pos, _text.dup); 1117 res._errorMessage = _errorMessage.dup; 1118 res._errorCode = _errorCode; 1119 res._invalidTokenType = _invalidTokenType; 1120 return res; 1121 } 1122 override @property string toString() { 1123 return "Invalid:" ~ to!string(_text); 1124 } 1125 } 1126 1127 alias tokenizer_ident_t = uint; 1128 alias tokenizer_ident_name_t = dchar[]; 1129 1130 enum : tokenizer_ident_t { 1131 NO_IDENT = 0 1132 } 1133 1134 /** 1135 * Global storage for identifier strings. 1136 */ 1137 class IdentHolder { 1138 protected tokenizer_ident_t _nextId; 1139 protected tokenizer_ident_name_t[tokenizer_ident_t] _idToName; 1140 protected tokenizer_ident_t[tokenizer_ident_name_t] _nameToId; 1141 1142 public this() { 1143 _nextId = NO_IDENT + 1; 1144 } 1145 1146 /** 1147 * Search for id by name, return NO_IDENT if not found. 1148 */ 1149 uint findByName(tokenizer_ident_name_t name) { 1150 tokenizer_ident_t * found = (name in _nameToId); 1151 if (found) 1152 return *found; 1153 return NO_IDENT; 1154 } 1155 1156 /** 1157 * Search for name by id, return null if not found. 1158 */ 1159 tokenizer_ident_name_t nameById(tokenizer_ident_t id) { 1160 auto found = (id in _idToName); 1161 if (found) 1162 return *found; 1163 return null; 1164 } 1165 1166 /** 1167 * Search for ident id by name, create new entry if not found. 1168 */ 1169 tokenizer_ident_t idByName(tokenizer_ident_name_t name) { 1170 uint * found = (name in _nameToId); 1171 if (found) 1172 return *found; 1173 uint newid = _nextId++; 1174 _nameToId[cast(dstring)name] = newid; 1175 _idToName[newid] = cast(tokenizer_ident_name_t)name; 1176 return newid; 1177 } 1178 } 1179 1180 /** 1181 * Thread local storage for IDs. 1182 */ 1183 IdentHolder identMap; 1184 1185 static this() { 1186 // init ID storage 1187 identMap = new IdentHolder(); 1188 } 1189 1190 class StringLiteralToken : Token { 1191 dchar[] _text; 1192 dchar _literalType; 1193 public @property override dchar literalType() { return _literalType; } 1194 public @property override dchar[] text() { return _text; } 1195 public void setText(dchar[] text, dchar type) { _text = text; _literalType = type; } 1196 this() { 1197 super(TokenType.STRING); 1198 } 1199 this(SourceFile file, uint line, uint pos, dchar[] text, dchar type) { 1200 super(TokenType.STRING, file, line, pos); 1201 _text = text; 1202 _literalType = type; 1203 } 1204 override public Token clone() { 1205 return new StringLiteralToken(_file, _line, _pos, _text.dup, _literalType); 1206 } 1207 public override @property string toString() { 1208 return "String:" ~ to!string(_text); 1209 } 1210 } 1211 1212 class CharacterLiteralToken : Token { 1213 dchar _character; 1214 dchar _literalType; 1215 @property override dchar literalType() { return _literalType; } 1216 @property dchar character() { return _character; } 1217 @property override dchar[] text() { return [_character]; } 1218 void setCharacter(dchar ch, dchar type) { _character = ch; _literalType = type; } 1219 this() { 1220 super(TokenType.CHARACTER); 1221 } 1222 this(SourceFile file, uint line, uint pos, dchar character, dchar type) { 1223 super(TokenType.CHARACTER, file, line, pos); 1224 _character = character; 1225 _literalType = type; 1226 } 1227 override public Token clone() { 1228 return new CharacterLiteralToken(_file, _line, _pos, _character, _literalType); 1229 } 1230 public override @property string toString() { 1231 return "Char:" ~ toUTF8([_character]); 1232 } 1233 } 1234 1235 class IntegerLiteralToken : Token { 1236 ulong _value; 1237 bool _unsigned; 1238 bool _long; 1239 public @property override ulong intValue() { return _value; } 1240 public @property override bool isUnsigned() { return _unsigned; } 1241 public @property override ulong isLong() { return _long; } 1242 public @property override dchar[] text() { return cast(dchar[])to!dstring(_value); } 1243 public void setValue(ulong value, bool unsignedFlag = false, bool longFlag = false) { 1244 _value = value; 1245 _unsigned = unsignedFlag; 1246 _long = longFlag; 1247 } 1248 public void setFlags(bool unsignedFlag = false, bool longFlag = false) { 1249 _unsigned = unsignedFlag; 1250 _long = longFlag; 1251 } 1252 this() { 1253 super(TokenType.INTEGER); 1254 } 1255 this(SourceFile file, uint line, uint pos, ulong value, bool unsignedFlag, bool longFlag) { 1256 super(TokenType.INTEGER, file, line, pos); 1257 _value = value; 1258 _unsigned = unsignedFlag; 1259 _long = longFlag; 1260 } 1261 override public Token clone() { 1262 return new IntegerLiteralToken(_file, _line, _pos, _value, _unsigned, _long); 1263 } 1264 public override @property string toString() { 1265 return "Integer:" ~ to!string(_value) ~ (_long ? "L" : "") ~ (_unsigned ? "U" : ""); 1266 } 1267 } 1268 1269 class RealLiteralToken : Token { 1270 real _value; 1271 byte _precision; 1272 bool _imaginary; 1273 public @property override ulong intValue() { return to!long(_value); } 1274 public @property override real realValue() { return _value; } 1275 public @property override double doubleValue() { return cast(double)_value; } 1276 public @property override float floatValue() { return cast(float)_value; } 1277 public @property override byte precision() { return _precision; } 1278 public @property override bool isImaginary() { return _imaginary; } 1279 public @property override dchar[] text() { return cast(dchar[])to!dstring(_value); } 1280 public void setValue(real value, byte precision = 1, bool imaginary = false) { 1281 _value = value; 1282 _precision = precision; 1283 _imaginary = imaginary; 1284 } 1285 public void setFlags(byte precision = 1, bool imaginary = false) { 1286 _precision = precision; 1287 _imaginary = imaginary; 1288 } 1289 this() { 1290 super(TokenType.FLOAT); 1291 } 1292 this(SourceFile file, uint line, uint pos, real value, byte precision, bool imaginary) { 1293 super(TokenType.FLOAT, file, line, pos); 1294 _value = value; 1295 _precision = precision; 1296 _imaginary = imaginary; 1297 } 1298 override public Token clone() { 1299 return new RealLiteralToken(_file, _line, _pos, _value, _precision, _imaginary); 1300 } 1301 public override @property string toString() { 1302 return "Integer:" ~ to!string(_value) ~ (_precision == 0 ? "f" : (_precision == 2 ? "L" : "")) ~ (_imaginary ? "i" : ""); 1303 } 1304 } 1305 1306 class IdentToken : Token { 1307 tokenizer_ident_t _id; 1308 public @property override dchar[] text() { return identMap.nameById(_id); } 1309 public void setText(dchar[] text) { _id = identMap.idByName(text); } 1310 this() { 1311 super(TokenType.IDENTIFIER); 1312 } 1313 this(SourceFile file, uint line, uint pos, dchar[] text) { 1314 super(TokenType.IDENTIFIER, file, line, pos); 1315 _id = identMap.idByName(text); 1316 } 1317 this(SourceFile file, uint line, uint pos, tokenizer_ident_t id) { 1318 super(TokenType.IDENTIFIER, file, line, pos); 1319 _id = id; 1320 } 1321 override public Token clone() { 1322 return new IdentToken(_file, _line, _pos, _id); 1323 } 1324 public override @property string toString() { 1325 return "Ident:" ~ to!string(text); 1326 } 1327 } 1328 1329 // shared appender buffer, to avoid extra heap allocations 1330 struct StringAppender { 1331 dchar[] buf; 1332 uint len; 1333 dchar[] get() { 1334 return buf[0 .. len]; 1335 } 1336 void appendEol() { 1337 if (len + 1 > buf.length) { 1338 uint newsize = cast(uint)((len + 1 + buf.length) * 2); 1339 if (newsize < 128) 1340 newsize = 128; 1341 buf.length = newsize; 1342 } 1343 buf[len] = '\n'; 1344 len++; 1345 } 1346 void append(dchar[] s) { 1347 if (s.length == 0) 1348 return; 1349 if (len + s.length > buf.length) { 1350 uint newsize = cast(uint)((len + s.length + buf.length) * 2); 1351 if (newsize < 128) 1352 newsize = 128; 1353 buf.length = newsize; 1354 } 1355 buf[len .. len + s.length] = s; 1356 len += s.length; 1357 } 1358 void reset() { 1359 len = 0; 1360 } 1361 } 1362 1363 class Tokenizer 1364 { 1365 protected SourceLines _lineStream; 1366 protected dchar[] _lineText; 1367 protected int _line; // current line number 1368 protected int _len; // current line length 1369 protected int _pos; // current line read position 1370 protected int _prevLineLength; // previous line length 1371 protected uint _state; // tokenizer state 1372 1373 enum : int { 1374 EOF_CHAR = 0x001A, 1375 EOL_CHAR = 0x000A 1376 }; 1377 1378 protected WhiteSpaceToken _sharedWhiteSpaceToken = new WhiteSpaceToken(); 1379 protected CommentToken _sharedCommentToken = new CommentToken(); 1380 protected StringLiteralToken _sharedStringLiteralToken = new StringLiteralToken(); 1381 protected IdentToken _sharedIdentToken = new IdentToken(); 1382 protected OpToken _sharedOpToken = new OpToken(); 1383 protected KeywordToken _sharedKeywordToken = new KeywordToken(); 1384 protected IntegerLiteralToken _sharedIntegerToken = new IntegerLiteralToken(); 1385 protected RealLiteralToken _sharedRealToken = new RealLiteralToken(); 1386 protected InvalidToken _sharedInvalidToken = new InvalidToken(); 1387 protected CharacterLiteralToken _sharedCharacterLiteralToken = new CharacterLiteralToken(); 1388 protected StringAppender _stringLiteralAppender; 1389 protected StringAppender _commentAppender; 1390 protected StringAppender _identAppender; 1391 1392 protected bool _enableCommentText = true; 1393 /// when false, does not put comment text into comment token - for less allocations 1394 @property void enableCommentText(bool enabled) { 1395 _enableCommentText = enabled; 1396 } 1397 /// when false, does not put comment text into comment token - for less allocations 1398 @property bool enableCommentText() { 1399 return _enableCommentText; 1400 } 1401 1402 protected bool _errorTolerant = false; 1403 /// when true, returns BadToken instead of throwing exception 1404 @property void errorTolerant(bool enabled) { 1405 _errorTolerant = enabled; 1406 } 1407 /// when true, returns BadToken instead of throwing exception 1408 @property bool errorTolerant() { 1409 return _errorTolerant; 1410 } 1411 1412 this(SourceLines lineStream) { 1413 init(lineStream); 1414 } 1415 1416 void init(SourceLines lineStream) { 1417 _lineStream = lineStream; 1418 SourceFile file = _lineStream.file; 1419 _sharedWhiteSpaceToken.setFile(file); 1420 _sharedCommentToken.setFile(file); 1421 _sharedStringLiteralToken.setFile(file); 1422 _sharedIdentToken.setFile(file); 1423 _sharedOpToken.setFile(file); 1424 _sharedKeywordToken.setFile(file); 1425 _sharedIntegerToken.setFile(file); 1426 _sharedRealToken.setFile(file); 1427 _sharedInvalidToken.setFile(file); 1428 _sharedCharacterLiteralToken.setFile(file); 1429 buildTime = Clock.currTime(); 1430 _line = lineStream.line; 1431 _pos = 0; 1432 _prevLineLength = 0; 1433 _lineText = null; 1434 } 1435 1436 this(string code, string filename = "") { 1437 this(new ArraySourceLines(code, filename)); 1438 } 1439 1440 // fetch next line from source stream 1441 protected bool nextLine() { 1442 _prevLineLength = _lineText.length; 1443 _lineText = _lineStream.readLine(); 1444 if (!_lineText) { 1445 if (_lineStream.errorCode != 0) 1446 throw new SourceEncodingException(_lineStream.errorMessage, _lineStream.file, _lineStream.errorLine, _lineStream.errorPos); 1447 if (_lineStream.eof) { 1448 // end of file 1449 _pos = 0; 1450 _len = 0; 1451 return false; 1452 } 1453 // just an empty line 1454 } 1455 _line = _lineStream.line; 1456 _pos = 0; 1457 _len = cast(int)_lineText.length; // do not support lines longer that 4Gb 1458 return true; 1459 } 1460 1461 protected dchar nextChar() { 1462 if (_lineText is null) { 1463 if (!nextLine()) { 1464 return EOF_CHAR; 1465 } 1466 } else if (_pos >= _len) { 1467 if (!nextLine()) { 1468 return EOF_CHAR; 1469 } 1470 return EOL_CHAR; 1471 } 1472 return _lineText[_pos++]; 1473 } 1474 1475 protected dchar peekChar() { 1476 if (_lineText is null) { 1477 if (!nextLine()) { 1478 return EOF_CHAR; 1479 } 1480 } 1481 if (_pos >= _len) 1482 return EOL_CHAR; 1483 return _lineText[_pos++]; 1484 } 1485 1486 protected Token emitEof() { 1487 // TODO: check for current state 1488 return new EofToken(_lineStream.file, _line, _pos); 1489 } 1490 1491 protected Token processWhiteSpace(dchar firstChar) { 1492 // reuse the same token instance, to avoid extra heap spamming 1493 if (_pos == 0) { 1494 _sharedWhiteSpaceToken.setPos(_line - 1, _prevLineLength); 1495 } else { 1496 _sharedWhiteSpaceToken.setPos(_line, _pos - 1); 1497 } 1498 for (;;) { 1499 int i = _pos; 1500 for (; i < _len; i++) { 1501 dchar ch = _lineText[i]; 1502 if (!(ch == 0x0020 || ch == 0x0009 || ch == 0x000B || ch == 0x000C || ch == EOL_CHAR)) 1503 break; 1504 } 1505 _pos = i; 1506 if (_pos < _len) 1507 break; 1508 // go to next line 1509 if (!nextLine()) 1510 break; 1511 } 1512 return _sharedWhiteSpaceToken; 1513 } 1514 1515 protected Token processOneLineComment() { 1516 _sharedCommentToken.setPos(_line, _pos - 1); 1517 _sharedCommentToken.isDocumentationComment = _pos + 1 < _lineText.length && _lineText[_pos + 1] == '/'; 1518 if (_enableCommentText) { 1519 _sharedCommentToken.text = _lineText[_pos + 1 .. $]; 1520 } 1521 _pos = _len; 1522 return _sharedCommentToken; 1523 } 1524 1525 protected Token processOneLineSharpComment() { 1526 _sharedCommentToken.setPos(_line, _pos - 1); 1527 if (_enableCommentText) { 1528 _sharedCommentToken.text = _lineText[_pos .. $]; 1529 } 1530 _pos = _len; 1531 return _sharedCommentToken; 1532 } 1533 1534 // Comment /* */ 1535 protected Token processMultilineComment() { 1536 _sharedCommentToken.setPos(_line, _pos - 1); 1537 _sharedCommentToken.isDocumentationComment = _pos + 1 < _lineText.length && _lineText[_pos + 1] == '*'; 1538 _commentAppender.reset(); 1539 int textStart = _pos + 1; 1540 for (;;) { 1541 int textEnd = int.max; 1542 int i = textStart; 1543 for (; i < _len - 1; i++) { 1544 if (_lineText[i] == '*' && _lineText[i + 1] == '/') { 1545 textEnd = i; 1546 break; 1547 } 1548 } 1549 if (textEnd != int.max) { 1550 if (_enableCommentText) 1551 _commentAppender.append(_lineText[textStart .. textEnd]); 1552 _pos = textEnd + 2; 1553 break; 1554 } 1555 if (!nextLine()) { 1556 // TODO: do we need throw exception if comment not closed by end of file? 1557 _pos = _len; 1558 break; 1559 } 1560 textStart = 0; 1561 } 1562 if (_enableCommentText) { 1563 _sharedCommentToken.text = _commentAppender.get(); 1564 } 1565 return _sharedCommentToken; 1566 } 1567 1568 // Comment /+ +/ 1569 protected Token processNestedComment() { 1570 _sharedCommentToken.setPos(_line, _pos - 1); 1571 _sharedCommentToken.isDocumentationComment = _pos + 1 < _lineText.length && _lineText[_pos + 1] == '+'; 1572 _commentAppender.reset(); 1573 dchar[] text; 1574 int textStart = _pos + 1; 1575 int level = 1; 1576 for (;;) { 1577 int textEnd = int.max; 1578 int i = textStart; 1579 for (; i < _len - 1; i++) { 1580 if (_lineText[i] == '/' && _lineText[i + 1] == '+') { 1581 level++; 1582 i++; 1583 } else if (_lineText[i] == '+' && _lineText[i + 1] == '/') { 1584 if (--level == 0) { 1585 textEnd = i; 1586 break; 1587 } 1588 } 1589 } 1590 if (textEnd != int.max) { 1591 if (_enableCommentText) 1592 _commentAppender.append(_lineText[textStart .. textEnd]); 1593 _pos = textEnd + 2; 1594 break; 1595 } 1596 if (!nextLine()) { 1597 // TODO: do we need throw exception if comment not closed by end of file? 1598 _pos = _len; 1599 break; 1600 } 1601 if (_enableCommentText) 1602 _commentAppender.appendEol(); 1603 textStart = 0; 1604 } 1605 if (_enableCommentText) { 1606 _sharedCommentToken.text = _commentAppender.get(); 1607 } 1608 return _sharedCommentToken; 1609 } 1610 1611 protected Token processHexString() { 1612 _pos++; 1613 // TODO: 1614 return null; 1615 } 1616 1617 protected Token processDelimitedString() { 1618 _pos++; 1619 // TODO: 1620 return null; 1621 } 1622 1623 // r"string" or `string` 1624 protected Token processWysiwygString(dchar ch) { 1625 _pos++; 1626 // TODO: 1627 return null; 1628 } 1629 1630 protected Token processIdent() { 1631 _sharedIdentToken.setPos(_line, _pos - 1); 1632 _identAppender.reset(); 1633 int startPos = _pos - 1; 1634 int endPos = _len; 1635 for (int i = _pos; i < _len; i++) { 1636 dchar ch = _lineText[i]; 1637 if (!isIdentMiddleChar(ch)) { 1638 endPos = i; 1639 break; 1640 } 1641 } 1642 _pos = endPos; 1643 _sharedIdentToken.setText(_lineText[startPos .. endPos]); 1644 return _sharedIdentToken; 1645 } 1646 1647 protected Token processIntegerSuffix() { 1648 if (_pos >= _len) 1649 return _sharedIntegerToken; 1650 bool longFlag = false; 1651 bool unsignedFlag = false; 1652 dchar ch = _lineText[_pos]; 1653 dchar ch2 = _pos < _len - 1 ? _lineText[_pos + 1] : 0; 1654 if (ch == 'l' || ch == 'L') { 1655 longFlag = true; 1656 _pos++; 1657 if (ch2 == 'u' || ch2 == 'U') { 1658 unsignedFlag = true; 1659 _pos++; 1660 } 1661 } else if (ch == 'u' || ch == 'U') { 1662 unsignedFlag = true; 1663 _pos++; 1664 if (ch2 == 'l' || ch2 == 'L') { 1665 longFlag = true; 1666 _pos++; 1667 } 1668 } 1669 _sharedIntegerToken.setFlags(unsignedFlag, longFlag); 1670 ch = _pos < _len ? _lineText[_pos] : 0; 1671 if (isIdentMiddleChar(ch)) 1672 return parserError("Unexpected character after number", _sharedIntegerToken); 1673 return _sharedIntegerToken; 1674 } 1675 1676 protected Token processBinaryNumber() { 1677 _sharedIntegerToken.setPos(_line, _pos - 1); 1678 _pos++; 1679 if (_pos >= _len) 1680 return parserError("Unexpected end of line in binary number", _sharedIntegerToken); 1681 int digits = 0; 1682 ulong number = 0; 1683 int i = _pos; 1684 for (;i < _len; i++) { 1685 dchar ch = _lineText[i]; 1686 if (ch != '0' && ch != '1') 1687 break; 1688 number = (number << 1) | (ch == '1' ? 1 : 0); 1689 digits++; 1690 } 1691 _pos = i; 1692 if (digits > 64) 1693 return parserError("number is too big", _sharedIntegerToken); 1694 _sharedIntegerToken.setValue(number); 1695 return processIntegerSuffix(); 1696 } 1697 1698 protected Token processHexNumber() { 1699 _sharedIntegerToken.setPos(_line, _pos - 1); 1700 _sharedRealToken.setPos(_line, _pos - 1); 1701 _pos++; 1702 if (_pos >= _len) 1703 return parserError("Unexpected end of line in hex number", _sharedIntegerToken); 1704 int digits = 0; 1705 ulong number = 0; 1706 int i = _pos; 1707 for (;i < _len; i++) { 1708 dchar ch = _lineText[i]; 1709 uint digit = 0; 1710 if (ch >= '0' && ch <= '9') 1711 digit = ch - '0'; 1712 else if (ch >= 'a' && ch <= 'f') 1713 digit = ch - 'a' + 10; 1714 else if (ch >= 'A' && ch <= 'F') 1715 digit = ch - 'A' + 10; 1716 else if (ch == '_') 1717 continue; 1718 else 1719 break; 1720 number = (number << 4) | digit; 1721 digits++; 1722 } 1723 _pos = i; 1724 if (digits > 16) 1725 return parserError("number is too big to fit 64 bits", _sharedIntegerToken); 1726 _sharedIntegerToken.setValue(number); 1727 return processIntegerSuffix(); 1728 } 1729 1730 protected Token processOctNumber() { 1731 _sharedIntegerToken.setPos(_line, _pos - 1); 1732 if (_pos >= _len) 1733 return parserError("Unexpected end of line in octal number", _sharedIntegerToken); 1734 int digits = 0; 1735 ulong number = 0; 1736 int i = _pos; 1737 bool overflow = false; 1738 for (;i < _len; i++) { 1739 dchar ch = _lineText[i]; 1740 int digit = 0; 1741 if (ch >= '0' && ch <= '7') 1742 digit = ch - '0'; 1743 else if (ch == '_') 1744 continue; 1745 else 1746 break; 1747 number <<= 3; 1748 if (digits >= 20) { 1749 if ((number >> 3) << 3 != number) { 1750 overflow = true; 1751 break; 1752 } 1753 } 1754 number |= digit; 1755 digits++; 1756 } 1757 _pos = i; 1758 if (overflow) 1759 return parserError("number is too big to fit 64 bits", _sharedIntegerToken); 1760 _sharedIntegerToken.setValue(number); 1761 return processIntegerSuffix(); 1762 } 1763 1764 // 1765 protected Token processDecFloatSuffix(real value) { 1766 _sharedRealToken.setValue(value); 1767 // TODO 1768 return _sharedRealToken; 1769 } 1770 1771 // after E char 1772 protected Token processDecFloatExponent(real value) { 1773 dchar next = _pos < _len ? _lineText[_pos] : 0; 1774 int sign = 1; 1775 if (next == '+') { 1776 _pos++; 1777 } else if (next == '-') { 1778 _pos++; 1779 sign = -1; 1780 } 1781 if (_pos >= _len) 1782 return parserError("Invalid exponent", _sharedRealToken); 1783 ulong digits = 0; 1784 ulong number = 0; 1785 int i = _pos; 1786 bool overflow = false; 1787 for (;i < _len; i++) { 1788 dchar ch = _lineText[i]; 1789 uint digit = 0; 1790 if (ch >= '0' && ch <= '9') 1791 digit = ch - '0'; 1792 else if (ch == '_') 1793 continue; 1794 else 1795 break; 1796 number *= 10; 1797 if (digits >= 18) { 1798 if ((number * 10) / 10 != number) { 1799 overflow = true; 1800 break; 1801 } 1802 } 1803 number += digit; 1804 digits++; 1805 } 1806 if (digits == 0) 1807 return parserError("Invalid exponent", _sharedRealToken); 1808 _pos = i; 1809 value *= pow(10., cast(long)number * sign); 1810 return processDecFloatSuffix(value); 1811 } 1812 1813 protected Token processDecFloatSecondPart(ulong firstPart) { 1814 if (_pos >= _len) { 1815 _sharedRealToken.setValue(cast(real)firstPart); 1816 return _sharedRealToken; 1817 } 1818 ulong divider = 1; 1819 ulong number = 0; 1820 int i = _pos; 1821 bool overflow = false; 1822 for (;i < _len; i++) { 1823 dchar ch = _lineText[i]; 1824 uint digit = 0; 1825 if (ch >= '0' && ch <= '9') 1826 digit = ch - '0'; 1827 else if (ch == '_') 1828 continue; 1829 else 1830 break; 1831 if (divider * 10 < divider) 1832 continue; // ignore extra digits 1833 number *= 10; 1834 number += digit; 1835 divider *= 10; 1836 } 1837 _pos = i; 1838 real value = cast(real)firstPart + (cast(real)number / divider); 1839 dchar next = _pos < _len ? _lineText[_pos] : 0; 1840 if (next == 0) { 1841 // neither exponent nor suffix 1842 _sharedRealToken.setValue(value); 1843 return _sharedRealToken; 1844 } 1845 if (next == 'e' || next == 'E') { 1846 _pos++; 1847 return processDecFloatExponent(value); 1848 } 1849 return processDecFloatSuffix(value); 1850 } 1851 1852 protected Token processDecNumber(dchar c) { 1853 _pos--; 1854 _sharedIntegerToken.setPos(_line, _pos); 1855 _sharedRealToken.setPos(_line, _pos); 1856 if (_pos >= _len) 1857 return parserError("Unexpected end of line in number", _sharedIntegerToken); 1858 int digits = 0; 1859 ulong number = 0; 1860 int i = _pos; 1861 bool overflow = false; 1862 for (;i < _len; i++) { 1863 dchar ch = _lineText[i]; 1864 uint digit = 0; 1865 if (ch >= '0' && ch <= '9') 1866 digit = ch - '0'; 1867 else if (ch == '_') 1868 continue; 1869 else 1870 break; 1871 number *= 10; 1872 if (digits >= 18) { 1873 if ((number * 10) / 10 != number) { 1874 overflow = true; 1875 break; 1876 } 1877 } 1878 number += digit; 1879 digits++; 1880 } 1881 _pos = i; 1882 if (overflow) 1883 return parserError("number is too big to fit 64 bits", _sharedIntegerToken); 1884 _sharedIntegerToken.setValue(number); 1885 dchar next = _pos < _len ? _lineText[_pos] : 0; 1886 if (next == 0) 1887 return _sharedIntegerToken; 1888 if (next == '.') { 1889 _pos++; 1890 return processDecFloatSecondPart(number); 1891 } 1892 return processIntegerSuffix(); 1893 } 1894 1895 /// Either return InvalidToken or throw parser exception depending on current errorTolerant flag 1896 protected Token parserError(string msg, Token incompleteToken) { 1897 return parserError(msg, incompleteToken.line, incompleteToken.pos, incompleteToken.type); 1898 } 1899 /// Either return InvalidToken or throw parser exception depending on current errorTolerant flag 1900 protected Token parserError(string msg, int startLine, int startPos, TokenType failedTokenType = TokenType.INVALID) { 1901 if (_errorTolerant) { 1902 startPos--; 1903 _sharedInvalidToken.setPos(startLine, startPos); 1904 _sharedInvalidToken.errorMessage = msg; 1905 _sharedInvalidToken.errorCode = 1; // for future extension 1906 _sharedInvalidToken.invalidTokenType = failedTokenType; // for future extension 1907 // make invalid source text 1908 dchar[] invalidText; 1909 int p = startLine == _line ? startPos : 0; 1910 for (int i = p; i < _pos && i < _lineText.length; i++) 1911 invalidText ~= _lineText[i]; 1912 1913 // recover after error 1914 for (; _pos < _lineText.length; _pos++) { 1915 dchar ch = _lineText[_pos]; 1916 if (ch == ' ' || ch == '\t' || ch == '(' || ch == ')' || ch == '[' || ch == ']' || ch == '{' || ch == '}') 1917 break; 1918 if (failedTokenType == TokenType.INTEGER || failedTokenType == TokenType.FLOAT) { 1919 if (ch == '*' || ch == '/') 1920 break; 1921 } 1922 invalidText ~= ch; 1923 } 1924 _sharedInvalidToken.text = invalidText; 1925 return _sharedInvalidToken; 1926 } 1927 throw new ParserException(msg, _lineStream.file, _line, _pos); 1928 } 1929 1930 protected Keyword detectKeyword(dchar ch) { 1931 if (ch > 'z') 1932 return Keyword.NONE; 1933 int len = _len - _pos; 1934 switch (cast(ubyte)ch) { 1935 // ABSTRACT, 1936 // ALIAS, 1937 // ALIGN, 1938 // ASM, 1939 // ASSERT, 1940 // AUTO, 1941 case 'a': return findKeyword(Keyword.ABSTRACT, Keyword.AUTO, _lineText.ptr + _pos, len, _pos); 1942 1943 // BODY, 1944 // BOOL, 1945 // BREAK, 1946 // BYTE, 1947 case 'b': return findKeyword(Keyword.BODY, Keyword.BYTE, _lineText.ptr + _pos, len, _pos); 1948 1949 // CASE, 1950 // CAST, 1951 // CATCH, 1952 // CDOUBLE, 1953 // CENT, 1954 // CFLOAT, 1955 // CHAR, 1956 // CLASS, 1957 // CONST, 1958 // CONTINUE, 1959 // CREAL, 1960 case 'c': return findKeyword(Keyword.CASE, Keyword.CREAL, _lineText.ptr + _pos, len, _pos); 1961 1962 // DCHAR, 1963 // DEBUG, 1964 // DEFAULT, 1965 // DELEGATE, 1966 // DELETE, 1967 // DEPRECATED, 1968 // DO, 1969 // DOUBLE, 1970 case 'd': return findKeyword(Keyword.DCHAR, Keyword.DOUBLE, _lineText.ptr + _pos, len, _pos); 1971 1972 // ELSE, 1973 // ENUM, 1974 // EXPORT, 1975 // EXTERN, 1976 case 'e': return findKeyword(Keyword.ELSE, Keyword.EXTERN, _lineText.ptr + _pos, len, _pos); 1977 1978 // FALSE, 1979 // FINAL, 1980 // FINALLY, 1981 // FLOAT, 1982 // FOR, 1983 // FOREACH, 1984 // FOREACH_REVERSE, 1985 // FUNCTION, 1986 case 'f': return findKeyword(Keyword.FALSE, Keyword.FUNCTION, _lineText.ptr + _pos, len, _pos); 1987 1988 // GOTO, 1989 case 'g': return findKeyword(Keyword.GOTO, Keyword.GOTO, _lineText.ptr + _pos, len, _pos); 1990 1991 // IDOUBLE, 1992 // IF, 1993 // IFLOAT, 1994 // IMMUTABLE, 1995 // IMPORT, 1996 // IN, 1997 // INOUT, 1998 // INT, 1999 // INTERFACE, 2000 // INVARIANT, 2001 // IREAL, 2002 // IS, 2003 case 'i': return findKeyword(Keyword.IDOUBLE, Keyword.IS, _lineText.ptr + _pos, len, _pos); 2004 2005 // LAZY, 2006 // LONG, 2007 case 'l': return findKeyword(Keyword.LAZY, Keyword.LONG, _lineText.ptr + _pos, len, _pos); 2008 2009 // MACRO, 2010 // MIXIN, 2011 // MODULE, 2012 case 'm': return findKeyword(Keyword.MACRO, Keyword.MODULE, _lineText.ptr + _pos, len, _pos); 2013 2014 // NEW, 2015 // NOTHROW, 2016 // NULL, 2017 case 'n': return findKeyword(Keyword.NEW, Keyword.NULL, _lineText.ptr + _pos, len, _pos); 2018 2019 // OUT, 2020 // OVERRIDE, 2021 case 'o': return findKeyword(Keyword.OUT, Keyword.OVERRIDE, _lineText.ptr + _pos, len, _pos); 2022 2023 // PACKAGE, 2024 // PRAGMA, 2025 // PRIVATE, 2026 // PROTECTED, 2027 // PUBLIC, 2028 // PURE, 2029 case 'p': return findKeyword(Keyword.PACKAGE, Keyword.PURE, _lineText.ptr + _pos, len, _pos); 2030 2031 // REAL, 2032 // REF, 2033 // RETURN, 2034 case 'r': return findKeyword(Keyword.REAL, Keyword.RETURN, _lineText.ptr + _pos, len, _pos); 2035 2036 // SCOPE, 2037 // SHARED, 2038 // SHORT, 2039 // STATIC, 2040 // STRUCT, 2041 // SUPER, 2042 // SWITCH, 2043 // SYNCHRONIZED, 2044 case 's': return findKeyword(Keyword.SCOPE, Keyword.SYNCHRONIZED, _lineText.ptr + _pos, len, _pos); 2045 2046 // TEMPLATE, 2047 // THIS, 2048 // THROW, 2049 // TRUE, 2050 // TRY, 2051 // TYPEDEF, 2052 // TYPEID, 2053 // TYPEOF, 2054 case 't': return findKeyword(Keyword.TEMPLATE, Keyword.TYPEOF, _lineText.ptr + _pos, len, _pos); 2055 2056 // UBYTE, 2057 // UCENT, 2058 // UINT, 2059 // ULONG, 2060 // UNION, 2061 // UNITTEST, 2062 // USHORT, 2063 case 'u': return findKeyword(Keyword.UBYTE, Keyword.USHORT, _lineText.ptr + _pos, len, _pos); 2064 2065 // VERSION, 2066 // VOID, 2067 // VOLATILE, 2068 case 'v': return findKeyword(Keyword.VERSION, Keyword.VOLATILE, _lineText.ptr + _pos, len, _pos); 2069 2070 // WCHAR, 2071 // WHILE, 2072 // WITH, 2073 case 'w': return findKeyword(Keyword.WCHAR, Keyword.WITH, _lineText.ptr + _pos, len, _pos); 2074 2075 // FILE, 2076 // MODULE, 2077 // LINE, 2078 // FUNCTION, 2079 // PRETTY_FUNCTION, 2080 // 2081 // GSHARED, 2082 // TRAITS, 2083 // VECTOR, 2084 // PARAMETERS, 2085 case '_': return findKeyword(Keyword.FILE, Keyword.PARAMETERS, _lineText.ptr + _pos, len, _pos); 2086 default: return Keyword.NONE; 2087 } 2088 } 2089 protected OpCode detectOp(dchar ch) nothrow { 2090 if (ch >= 128) 2091 return OpCode.NONE; 2092 dchar ch2 = _pos < _len ? _lineText[_pos] : 0; 2093 dchar ch3 = _pos < _len - 1 ? _lineText[_pos + 1] : 0; 2094 switch(cast(ubyte)ch) { 2095 // DIV, // / 2096 // DIV_EQ, // /= 2097 case '/': 2098 if (ch2 == '=') { 2099 _pos++; 2100 return OpCode.DIV_EQ; 2101 } 2102 return OpCode.DIV; 2103 // DOT, // . 2104 // DOT_DOT, // .. 2105 // DOT_DOT_DOT,// ... 2106 case '.': 2107 if (ch2 == '.') { 2108 if (ch3 == '.') { 2109 _pos += 2; 2110 return OpCode.DOT_DOT_DOT; 2111 } 2112 _pos++; 2113 return OpCode.DOT_DOT; 2114 } 2115 return OpCode.DOT; 2116 // AND, // & 2117 // AND_EQ, // &= 2118 // LOG_AND, // && 2119 case '&': 2120 if (ch2 == '=') { 2121 _pos++; 2122 return OpCode.AND_EQ; 2123 } 2124 if (ch2 == '&') { 2125 _pos++; 2126 return OpCode.LOG_AND; 2127 } 2128 return OpCode.AND; 2129 // OR, // | 2130 // OR_EQ, // |= 2131 // LOG_OR, // || 2132 case '|': 2133 if (ch2 == '=') { 2134 _pos++; 2135 return OpCode.OR_EQ; 2136 } 2137 if (ch2 == '|') { 2138 _pos++; 2139 return OpCode.LOG_OR; 2140 } 2141 return OpCode.OR; 2142 // MINUS, // - 2143 // MINUS_EQ, // -= 2144 // MINUS_MINUS,// -- 2145 case '-': 2146 if (ch2 == '=') { 2147 _pos++; 2148 return OpCode.MINUS_EQ; 2149 } 2150 if (ch2 == '-') { 2151 _pos++; 2152 return OpCode.MINUS_MINUS; 2153 } 2154 return OpCode.MINUS; 2155 // PLUS, // + 2156 // PLUS_EQ, // += 2157 // PLUS_PLUS, // ++ 2158 case '+': 2159 if (ch2 == '=') { 2160 _pos++; 2161 return OpCode.PLUS_EQ; 2162 } 2163 if (ch2 == '+') { 2164 _pos++; 2165 return OpCode.PLUS_PLUS; 2166 } 2167 return OpCode.PLUS; 2168 // LT, // < 2169 // LT_EQ, // <= 2170 // SHL, // << 2171 // SHL_EQ, // <<= 2172 // LT_GT, // <> 2173 // NE_EQ, // <>= 2174 case '<': 2175 if (ch2 == '<') { 2176 if (ch3 == '=') { 2177 _pos += 2; 2178 return OpCode.SHL_EQ; 2179 } 2180 _pos++; 2181 return OpCode.SHL; 2182 } 2183 if (ch2 == '>') { 2184 if (ch3 == '=') { 2185 _pos += 2; 2186 return OpCode.NE_EQ; 2187 } 2188 _pos++; 2189 return OpCode.LT_GT; 2190 } 2191 if (ch2 == '=') { 2192 _pos++; 2193 return OpCode.LT_EQ; 2194 } 2195 return OpCode.LT; 2196 // GT, // > 2197 // GT_EQ, // >= 2198 // SHR_EQ // >>= 2199 // ASR_EQ, // >>>= 2200 // SHR, // >> 2201 // ASR, // >>> 2202 case '>': 2203 if (ch2 == '>') { 2204 if (ch3 == '>') { 2205 dchar ch4 = _pos < _len - 2 ? _lineText[_pos + 2] : 0; 2206 if (ch4 == '=') { // >>>= 2207 _pos += 3; 2208 return OpCode.ASR_EQ; 2209 } 2210 _pos += 2; 2211 return OpCode.ASR; // >>> 2212 } 2213 if (ch3 == '=') { // >>= 2214 _pos += 2; 2215 return OpCode.SHR_EQ; 2216 } 2217 _pos++; 2218 return OpCode.SHR; 2219 } 2220 if (ch2 == '=') { // >= 2221 _pos++; 2222 return OpCode.GT_EQ; 2223 } 2224 // > 2225 return OpCode.GT; 2226 // NOT, // ! 2227 // NOT_EQ // != 2228 // NOT_LT_GT, // !<> 2229 // NOT_LT_GT_EQ, // !<>= 2230 // NOT_LT, // !< 2231 // NOT_LT_EQ, // !<= 2232 // NOT_GT, // !> 2233 // NOT_GT_EQ, // !>= 2234 case '!': 2235 if (ch2 == '<') { // !< 2236 if (ch3 == '>') { // !<> 2237 dchar ch4 = _pos < _len - 2 ? _lineText[_pos + 2] : 0; 2238 if (ch4 == '=') { // !<>= 2239 _pos += 3; 2240 return OpCode.NOT_LT_GT_EQ; 2241 } 2242 _pos += 2; 2243 return OpCode.NOT_LT_GT; // !<> 2244 } 2245 if (ch3 == '=') { // !<= 2246 _pos += 2; 2247 return OpCode.NOT_LT_EQ; 2248 } 2249 _pos++; 2250 return OpCode.NOT_LT; // !< 2251 } 2252 if (ch2 == '=') { // != 2253 _pos++; 2254 return OpCode.NOT_EQ; 2255 } 2256 return OpCode.NOT; 2257 // PAR_OPEN, // ( 2258 case '(': 2259 return OpCode.PAR_OPEN; 2260 // PAR_CLOSE, // ) 2261 case ')': 2262 return OpCode.PAR_CLOSE; 2263 // SQ_OPEN, // [ 2264 case '[': 2265 return OpCode.SQ_OPEN; 2266 // SQ_CLOSE, // ] 2267 case ']': 2268 return OpCode.SQ_CLOSE; 2269 // CURL_OPEN, // { 2270 case '{': 2271 return OpCode.CURL_OPEN; 2272 // CURL_CLOSE, // } 2273 case '}': 2274 return OpCode.CURL_CLOSE; 2275 // QUEST, // ? 2276 case '?': 2277 return OpCode.QUEST; 2278 // COMMA, // , 2279 case ',': 2280 return OpCode.COMMA; 2281 // SEMICOLON, // ; 2282 case ';': 2283 return OpCode.SEMICOLON; 2284 // COLON, // : 2285 case ':': 2286 return OpCode.COLON; 2287 // DOLLAR, // $ 2288 case '$': 2289 return OpCode.DOLLAR; 2290 // EQ, // = 2291 // QE_EQ, // == 2292 // EQ_GT, // => 2293 case '=': 2294 if (ch2 == '=') { // == 2295 _pos++; 2296 return OpCode.QE_EQ; 2297 } 2298 if (ch2 == '>') { // => 2299 _pos++; 2300 return OpCode.EQ_GT; 2301 } 2302 return OpCode.EQ; 2303 // MUL, // * 2304 // MUL_EQ, // *= 2305 case '*': 2306 if (ch2 == '=') { 2307 _pos++; 2308 return OpCode.MUL_EQ; 2309 } 2310 return OpCode.MUL; 2311 // MOD, // % 2312 // MOD_EQ, // %= 2313 case '%': 2314 if (ch2 == '=') { 2315 _pos++; 2316 return OpCode.MOD_EQ; 2317 } 2318 return OpCode.MOD; 2319 // XOR, // ^ 2320 // XOR_EQ, // ^= 2321 // LOG_XOR, // ^^ 2322 // LOG_XOR_EQ, // ^^= 2323 case '^': 2324 if (ch2 == '^') { 2325 if (ch3 == '=') { 2326 _pos += 2; 2327 return OpCode.LOG_XOR_EQ; 2328 } 2329 _pos++; 2330 return OpCode.LOG_XOR; 2331 } 2332 if (ch2 == '=') { 2333 _pos++; 2334 return OpCode.XOR_EQ; 2335 } 2336 return OpCode.XOR; 2337 // INV, // ~ 2338 // INV_EQ, // ~= 2339 case '~': 2340 if (ch2 == '=') { 2341 _pos++; 2342 return OpCode.INV_EQ; 2343 } 2344 return OpCode.INV; 2345 // AT, // @ 2346 case '@': 2347 return OpCode.AT; 2348 // SHARP // # 2349 case '#': 2350 return OpCode.SHARP; 2351 default: 2352 return OpCode.NONE; 2353 } 2354 } 2355 2356 protected Token processCharacterLiteral() { 2357 _sharedCharacterLiteralToken.setPos(_line, _pos - 1); 2358 if (_pos + 2 > _len) 2359 return parserError("Invalid character literal", _sharedCharacterLiteralToken); 2360 dchar ch = _lineText[_pos++]; 2361 dchar ch2 = _lineText[_pos++]; 2362 dchar type = 0; 2363 if (ch == '\\') { 2364 // process escaped character - store it in ch 2365 // TODO: support all escape sequences 2366 switch(ch2) { 2367 case 'r': 2368 ch = '\r'; 2369 break; 2370 case 'n': 2371 ch = '\n'; 2372 break; 2373 case 't': 2374 ch = '\t'; 2375 break; 2376 case '\\': 2377 ch = '\\'; 2378 break; 2379 default: 2380 ch = ch2; 2381 break; 2382 } 2383 // here must be closing ' 2384 if (_pos + 1 > _len) 2385 return parserError("Invalid character literal", _sharedCharacterLiteralToken); 2386 ch2 = _lineText[_pos++]; 2387 } 2388 if (ch2 != '\'') 2389 return parserError("Invalid character literal", _sharedCharacterLiteralToken); 2390 if (_pos < _len) { 2391 dchar t = _lineText[_pos]; 2392 if (t == 'd' || t == 'w' || t == 'c') { 2393 type = t; 2394 _pos++; 2395 } else if (isIdentMiddleChar(ch)) { 2396 return parserError("Unexpected character after character literal", _sharedCharacterLiteralToken); 2397 } 2398 } 2399 _sharedCharacterLiteralToken.setCharacter(ch, type); 2400 return _sharedCharacterLiteralToken; 2401 } 2402 2403 protected Token processDoubleQuotedOrWysiwygString(dchar delimiter) { 2404 bool wysiwyg = (delimiter == 'r' || delimiter == '`'); 2405 //writeln("processDoubleQuotedString()"); 2406 _sharedStringLiteralToken.setPos(_line, _pos - 1); 2407 _stringLiteralAppender.reset(); 2408 if (delimiter == 'r') { 2409 _pos++; 2410 delimiter = '\"'; 2411 } 2412 dchar type = 0; 2413 for (;;) { 2414 int i = _pos; 2415 int endPos = int.max; 2416 for(; i < _len; i++) { 2417 if (_lineText[i] == delimiter && (i == 0 || _lineText[i - 1] != '\\')) { 2418 endPos = i; 2419 break; 2420 } 2421 } 2422 if (endPos != int.max) { 2423 // found end quote 2424 _stringLiteralAppender.append(_lineText[_pos .. endPos]); 2425 _pos = endPos + 1; 2426 break; 2427 } 2428 // no quote by end of line 2429 _stringLiteralAppender.append(_lineText[_pos .. $]); 2430 _stringLiteralAppender.appendEol(); 2431 if (!nextLine()) { 2432 // do we need to throw exception if eof comes before end of string? 2433 break; 2434 } 2435 } 2436 dchar t = 0; 2437 if (_pos < _len) { 2438 dchar ch = _lineText[_pos]; 2439 if (ch == 'c' || ch == 'w' || ch == 'd') 2440 t = ch; 2441 else if (isIdentMiddleChar(ch)) 2442 return parserError("Unexpected character after string literal", _sharedStringLiteralToken); 2443 } 2444 if (t != 0) { 2445 if (type != 0 && t != type) 2446 return parserError("Cannot concatenate strings of different type", _sharedStringLiteralToken); 2447 type = t; 2448 } 2449 if (!wysiwyg) { 2450 // no escape processing 2451 _sharedStringLiteralToken.setText(_stringLiteralAppender.get(), type); 2452 return _sharedStringLiteralToken; 2453 } 2454 // TODO: process escape sequences 2455 _sharedStringLiteralToken.setText(_stringLiteralAppender.get(), type); 2456 return _sharedStringLiteralToken; 2457 } 2458 2459 protected SysTime buildTime; 2460 2461 // string literal of the date of compilation "mmm dd yyyy" 2462 protected dstring formatBuildDate() { 2463 // TODO: provide proper format 2464 return to!dstring(buildTime); 2465 } 2466 2467 // string literal of the time of compilation "hh:mm:ss" 2468 protected dstring formatBuildTime() { 2469 // TODO: provide proper format 2470 return to!dstring(buildTime); 2471 } 2472 2473 // string literal of the date and time of compilation "www mmm dd hh:mm:ss yyyy" 2474 protected dstring formatBuildTimestamp() { 2475 // TODO: provide proper format 2476 return to!dstring(buildTime); 2477 } 2478 2479 static immutable dstring VERSION = "0.1"; 2480 static immutable dstring VENDOR = "coolreader.org"; 2481 2482 protected Token makeSpecialTokenString(dstring str, int pos) { 2483 _sharedStringLiteralToken.setPos(_line, pos); 2484 _sharedStringLiteralToken.setText(cast(dchar[])str, 0); 2485 return _sharedStringLiteralToken; 2486 } 2487 2488 protected Token processSpecialToken(Keyword keyword, int pos) { 2489 switch (keyword) { 2490 //Special Token Replaced with 2491 case Keyword.DATE: // string literal of the date of compilation "mmm dd yyyy" 2492 return makeSpecialTokenString(formatBuildDate(), pos); 2493 case Keyword.TIME: // string literal of the time of compilation "hh:mm:ss" 2494 return makeSpecialTokenString(formatBuildTime(), pos); 2495 case Keyword.TIMESTAMP: // string literal of the date and time of compilation "www mmm dd hh:mm:ss yyyy" 2496 return makeSpecialTokenString(formatBuildTimestamp(), pos); 2497 case Keyword.VENDOR: // Compiler vendor string, such as "Digital Mars D" 2498 return makeSpecialTokenString(VENDOR, pos); 2499 case Keyword.VERSION_: // Compiler version as an integer, such as 2001 2500 return makeSpecialTokenString(VERSION, pos); 2501 default: 2502 parserError("Unknown special token", _line, pos); 2503 } 2504 return null; 2505 } 2506 2507 // returns next token (clone it if you want to store for future usage, otherwise it may be overwritten by further nextToken() calls). 2508 Token nextToken() { 2509 dchar ch = nextChar(); 2510 if (ch == EOF_CHAR) { 2511 return emitEof(); 2512 } 2513 if (ch == EOL_CHAR || ch == 0x0020 || ch == 0x0009 || ch == 0x000B || ch == 0x000C) { 2514 // white space (treat EOL as whitespace, too) 2515 return processWhiteSpace(ch); 2516 } 2517 dchar next = _pos < _len ? _lineText[_pos] : 0; 2518 if (ch == '/') { 2519 if (next == '/') 2520 return processOneLineComment(); 2521 else if (next == '*') 2522 return processMultilineComment(); 2523 else if (next == '+') 2524 return processNestedComment(); 2525 } 2526 if (ch == '#' && _line == 1) 2527 return processOneLineSharpComment(); 2528 if (ch == '\"') 2529 return processDoubleQuotedOrWysiwygString(ch); 2530 if (ch == '\'') 2531 return processCharacterLiteral(); 2532 if (ch == 'x' && next == '\"') 2533 return processHexString(); 2534 if (ch == 'q' && next == '\"') 2535 return processDelimitedString(); 2536 if ((ch == 'r' && next == '\"') || (ch == '`')) 2537 return processDoubleQuotedOrWysiwygString(ch); 2538 int oldPos = _pos - 1; 2539 2540 if (ch == '0') { 2541 if (next == 'b' || next == 'B') 2542 return processBinaryNumber(); 2543 if (next == 'x' || next == 'X') 2544 return processHexNumber(); 2545 if (next >= '0' && next <= '9') 2546 return processOctNumber(); 2547 if (next >= '0' && next <= '9') 2548 return processDecNumber(ch); 2549 } 2550 if (ch >= '0' && ch <= '9') 2551 return processDecNumber(ch); 2552 if (ch == '.' && next >= '0' && next <= '9') // .123 2553 return processDecFloatSecondPart(0); 2554 2555 if (ch == '_' || isUniversalAlpha(ch)) { 2556 // start of identifier or keyword? 2557 Keyword keyword = detectKeyword(ch); 2558 if (keyword != Keyword.NONE) { 2559 switch (keyword) { 2560 //Special Token Replaced with 2561 case Keyword.EOF: return emitEof(); // sets the scanner to the end of the file 2562 case Keyword.DATE: // string literal of the date of compilation "mmm dd yyyy" 2563 case Keyword.TIME: // string literal of the time of compilation "hh:mm:ss" 2564 case Keyword.TIMESTAMP: // string literal of the date and time of compilation "www mmm dd hh:mm:ss yyyy" 2565 case Keyword.VENDOR: // Compiler vendor string, such as "Digital Mars D" 2566 case Keyword.VERSION_: // Compiler version as an integer, such as 2001 2567 return processSpecialToken(keyword, oldPos); 2568 default: 2569 _sharedKeywordToken.setPos(_line, oldPos); 2570 _sharedKeywordToken.keyword = keyword; 2571 return _sharedKeywordToken; 2572 } 2573 } 2574 return processIdent(); 2575 } 2576 OpCode op = detectOp(ch); 2577 if (op != OpCode.NONE) { 2578 _sharedOpToken.setPos(_line, oldPos); 2579 _sharedOpToken.opCode = op; 2580 return _sharedOpToken; 2581 } 2582 return null; 2583 } 2584 2585 2586 } 2587 2588 unittest { 2589 import std.algorithm; 2590 class TokenTest { 2591 int _line; 2592 string _file; 2593 this(string file, int line) { 2594 _file = file; 2595 _line = line; 2596 } 2597 bool doTest(Token token) { 2598 return true; 2599 } 2600 void execute(Tokenizer tokenizer) { 2601 Token token = tokenizer.nextToken(); 2602 if (!doTest(token)) { 2603 assert(false, " token doesn not match at " ~ _file ~ ":" ~ to!string(_line) ~ " foundToken: " ~ token.toString ~ " expected: " ~ toString); 2604 } 2605 } 2606 public override @property string toString() { 2607 return "TokenTest"; 2608 } 2609 } 2610 void testTokenizer(string code, TokenTest[] tokens, string file = __FILE__, uint line = __LINE__) { 2611 Tokenizer tokenizer = new Tokenizer(code, "tokenizerTest:" ~ file ~ ":" ~ to!string(line)); 2612 for (int i = 0; i < tokens.length; i++) { 2613 tokens[i].execute(tokenizer); 2614 } 2615 } 2616 class KeywordTest : TokenTest { 2617 Keyword _code; 2618 this(Keyword code, string file = __FILE__, uint line = __LINE__) { 2619 super(file, line); 2620 _code = code; 2621 } 2622 override bool doTest(Token token) { 2623 if (token.type != TokenType.KEYWORD) 2624 return false; 2625 if (token.keyword != _code) 2626 return false; 2627 return true; 2628 } 2629 public override @property string toString() { 2630 return "Keyword:" ~ to!string(_code); 2631 } 2632 } 2633 class OpTest : TokenTest { 2634 OpCode _code; 2635 this(OpCode code, string file = __FILE__, uint line = __LINE__) { 2636 super(file, line); 2637 _code = code; 2638 } 2639 override bool doTest(Token token) { 2640 if (token.type != TokenType.OP) 2641 return false; 2642 if (token.opCode != _code) 2643 return false; 2644 return true; 2645 } 2646 public override @property string toString() { 2647 return "Op:" ~ to!string(_code); 2648 } 2649 } 2650 class StringTest : TokenTest { 2651 string _value; 2652 this(string value, string file = __FILE__, uint line = __LINE__) { 2653 super(file, line); 2654 _value = value; 2655 } 2656 override bool doTest(Token token) { 2657 if (token.type != TokenType.STRING) 2658 return false; 2659 if (to!string(token.text).equal(_value)) 2660 return false; 2661 return true; 2662 } 2663 public override @property string toString() { 2664 return "String:" ~ _value; 2665 } 2666 } 2667 class IntegerTest : TokenTest { 2668 ulong _value; 2669 bool _unsigned; 2670 bool _long; 2671 this(ulong value, bool unsignedFlag = false, bool longFlag = false, string file = __FILE__, uint line = __LINE__) { 2672 super(file, line); 2673 _value = value; 2674 _unsigned = unsignedFlag; 2675 _long = longFlag; 2676 } 2677 override bool doTest(Token token) { 2678 if (token.type != TokenType.INTEGER) 2679 return false; 2680 if (token.intValue != _value) 2681 return false; 2682 if (token.isUnsigned != _unsigned) 2683 return false; 2684 if (token.isLong != _long) 2685 return false; 2686 return true; 2687 } 2688 public override @property string toString() { 2689 return "Integer:" ~ to!string(_value); 2690 } 2691 } 2692 class RealTest : TokenTest { 2693 real _value; 2694 ubyte _precision; 2695 bool _imaginary; 2696 this(real value, ubyte precision = 1, bool imaginary = false, string file = __FILE__, uint line = __LINE__) { 2697 super(file, line); 2698 _value = value; 2699 _precision = precision; 2700 _imaginary = imaginary; 2701 } 2702 override bool doTest(Token token) { 2703 if (token.type != TokenType.FLOAT) 2704 return false; 2705 if (token.realValue != _value) 2706 return false; 2707 if (token.precision != _precision) 2708 return false; 2709 if (token.isImaginary != _imaginary) 2710 return false; 2711 return true; 2712 } 2713 public override @property string toString() { 2714 return "Real:" ~ to!string(_value); 2715 } 2716 } 2717 class IdentTest : TokenTest { 2718 string _value; 2719 this(string value, string file = __FILE__, uint line = __LINE__) { 2720 super(file, line); 2721 _value = value; 2722 } 2723 override bool doTest(Token token) { 2724 if (token.type != TokenType.IDENTIFIER) 2725 return false; 2726 if (! to!string(token.text).equal(_value)) 2727 return false; 2728 return true; 2729 } 2730 public override @property string toString() { 2731 return "Ident:" ~ _value; 2732 } 2733 } 2734 class CommentTest : TokenTest { 2735 this(string file = __FILE__, uint line = __LINE__) { 2736 super(file, line); 2737 } 2738 override bool doTest(Token token) { 2739 if (token.type != TokenType.COMMENT) 2740 return false; 2741 return true; 2742 } 2743 public override @property string toString() { 2744 return "Comment"; 2745 } 2746 } 2747 class EOFTest : TokenTest { 2748 this(string file = __FILE__, uint line = __LINE__) { 2749 super(file, line); 2750 } 2751 override bool doTest(Token token) { 2752 if (token.type != TokenType.EOF) 2753 return false; 2754 return true; 2755 } 2756 public override @property string toString() { 2757 return "EOF"; 2758 } 2759 } 2760 class WhiteSpaceTest : TokenTest { 2761 this(string file = __FILE__, uint line = __LINE__) { 2762 super(file, line); 2763 } 2764 override bool doTest(Token token) { 2765 if (token.type != TokenType.WHITESPACE) 2766 return false; 2767 return true; 2768 } 2769 public override @property string toString() { 2770 return "whiteSpace"; 2771 } 2772 } 2773 TokenTest checkString(string value, string file = __FILE__, uint line = __LINE__) { 2774 return new StringTest(value, file, line); 2775 } 2776 TokenTest checkInteger(ulong value, bool unsignedFlag = false, bool longFlag = false, string file = __FILE__, uint line = __LINE__) { 2777 return new IntegerTest(value, unsignedFlag, longFlag, file, line); 2778 } 2779 TokenTest checkReal(real value, byte precision = 0, bool imaginary = false, string file = __FILE__, uint line = __LINE__) { 2780 return new RealTest(value, precision, imaginary, file, line); 2781 } 2782 TokenTest checkIdent(string value, string file = __FILE__, uint line = __LINE__) { 2783 return new IdentTest(value, file, line); 2784 } 2785 TokenTest checkKeyword(Keyword value, string file = __FILE__, uint line = __LINE__) { 2786 return new KeywordTest(value, file, line); 2787 } 2788 TokenTest checkOp(OpCode value, string file = __FILE__, uint line = __LINE__) { 2789 return new OpTest(value, file, line); 2790 } 2791 TokenTest checkSpace(string file = __FILE__, uint line = __LINE__) { 2792 return new WhiteSpaceTest(file, line); 2793 } 2794 TokenTest checkComment(string file = __FILE__, uint line = __LINE__) { 2795 return new CommentTest(file, line); 2796 } 2797 TokenTest checkEOF(string file = __FILE__, uint line = __LINE__) { 2798 return new EOFTest(file, line); 2799 } 2800 2801 testTokenizer(q"TEST 2802 int i; 2803 TEST" 2804 , [ 2805 checkKeyword(Keyword.INT), 2806 checkSpace(), 2807 checkIdent("i"), 2808 checkOp(OpCode.SEMICOLON), 2809 checkEOF() 2810 ]); 2811 testTokenizer("0b1101 0x123abcdU 0xABCL 0743 192837465 0 192_837_465 5.25" 2812 , [ 2813 checkInteger(13), 2814 checkSpace(), 2815 checkInteger(0x123abcd, true, false), 2816 checkSpace(), 2817 checkInteger(0xabc, false, true), 2818 checkSpace(), 2819 checkInteger(std.conv.octal!743), 2820 checkSpace(), 2821 checkInteger(192_837_465), 2822 checkSpace(), 2823 checkInteger(0), 2824 checkSpace(), 2825 checkInteger(192837465), 2826 checkSpace(), 2827 checkReal(5.25), 2828 checkEOF() 2829 ]); 2830 } 2831 2832 unittest { 2833 import std.stdio; 2834 import std.conv; 2835 import std.utf; 2836 import ddx.lexer.LineStream; 2837 string fname = "/home/lve/src/d/ddc/ddclexer/tests/tokenizer_test.d"; 2838 writeln("opening file"); 2839 try { 2840 std.stream.File f = new std.stream.File(fname); 2841 scope(exit) { f.close(); } 2842 try { 2843 LineStream lines = LineStream.create(f, fname); 2844 Tokenizer tokenizer = new Tokenizer(lines); 2845 for (;;) { 2846 Token token = tokenizer.nextToken(); 2847 if (token is null) { 2848 writeln("Null token returned"); 2849 break; 2850 } 2851 if (token.type == TokenType.EOF) { 2852 writeln("EOF token"); 2853 break; 2854 } 2855 writeln("", token.line, ":", token.pos, "\t", token.toString); 2856 } 2857 } catch (Exception e) { 2858 writeln("Exception " ~ e.toString); 2859 } 2860 } catch (Exception e) { 2861 writeln("Exception " ~ e.toString); 2862 } 2863 }