1 module ddc.lexer.tokenizer; 2 3 import ddc.lexer.textsource; 4 import ddc.lexer.exceptions; 5 6 import std.stdio; 7 import std.datetime; 8 import std.conv; 9 import std.utf; 10 import std.math; 11 12 enum TokenType : ubyte { 13 EOF, 14 //EOL, 15 WHITESPACE, 16 COMMENT, 17 IDENTIFIER, 18 STRING, 19 CHARACTER, 20 INTEGER, 21 FLOAT, 22 KEYWORD, 23 OP, 24 INVALID 25 } 26 27 // table for fast checking of UniversalAlpha (as per ISO/IEC 9899:1999 Annex E) OR a..z OR A..Z OR _ 28 // max code is 0xd7ff 29 //1728 30 const uint[1728] UNIVERSAL_ALPHA_FLAGS = [ 31 0x00000000,0x00000000,0x87fffffe,0x07fffffe,0x00000000,0x04a00400,0xff7fffff,0xff7fffff,// 0000-00ff 32 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xfc3fffff,// 0100-01ff 33 0x00ffffff,0x00000000,0xffff0000,0xffffffff,0xffffffff,0xe9ff01ff,0x00030003,0x0000001f,// 0200-02ff 34 0x00000000,0x00000000,0x00000000,0x04000000,0xffffd740,0xfffffffb,0x547f7fff,0x000ffffd,// 0300-03ff 35 0xffffdffe,0xffffffff,0xdffeffff,0xffffffff,0xffff0003,0xffffffff,0xffff199f,0x033fcfff,// 0400-04ff 36 0x00000000,0xfffe0000,0x027fffff,0xfffffffe,0x000000ff,0xbbff0000,0xffff0006,0x000707ff,// 0500-05ff 37 0x00000000,0x07fffffe,0x0007ffff,0xffff03ff,0xffffffff,0x7cffffff,0x1fff7fff,0x03ff3de0,// 0600-06ff 38 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 0700-07ff 39 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 0800-08ff 40 0xffffffee,0xe3ffffff,0xff073fff,0x0000ffcf,0xfff99fee,0xc3c5fdff,0xb000399f,0x0003ffcf,// 0900-09ff 41 0xfff987e4,0xc36dfdff,0x5e003987,0x0010ffc0,0xfffbafee,0xe3edfdff,0x00013bbf,0x0000ffc1,// 0a00-0aff 42 0xfff99fee,0xe3cdfdff,0xb000398f,0x0000ffc3,0xd63dc7ec,0xc3bfc718,0x00003dc7,0x0000ff80,// 0b00-0bff 43 0xfffddfee,0xc3effdff,0x00003ddf,0x0000ffc3,0xfffddfec,0xc3effdff,0x40003ddf,0x0000ffc3,// 0c00-0cff 44 0xfffddfec,0xc3fffdff,0x00003dcf,0x0000ffc3,0x00000000,0x00000000,0x00000000,0x00000000,// 0d00-0dff 45 0xfffffffe,0x07ffffff,0x0fffffff,0x00000000,0xfef02596,0x3bff6cae,0x33ff3f5f,0x00000000,// 0e00-0eff 46 0x03000001,0xc2afffff,0xfffffeff,0xfffe03ff,0xfebf0fdf,0x02fe3fff,0x00000000,0x00000000,// 0f00-0fff 47 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0xffffffff,0xffff003f,0x007fffff,// 1000-10ff 48 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 1100-11ff 49 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 1200-12ff 50 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 1300-13ff 51 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 1400-14ff 52 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 1500-15ff 53 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 1600-16ff 54 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 1700-17ff 55 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 1800-18ff 56 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 1900-19ff 57 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 1a00-1aff 58 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 1b00-1bff 59 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 1c00-1cff 60 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 1d00-1dff 61 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0x0fffffff,0xffffffff,0xffffffff,0x03ffffff,// 1e00-1eff 62 0x3f3fffff,0xffffffff,0xaaff3f3f,0x3fffffff,0xffffffff,0x5fdfffff,0x0fcf1fdc,0x1fdc1fff,// 1f00-1fff 63 0x00000000,0x80000000,0x00000001,0x80000000,0x00000000,0x00000000,0x00000000,0x00000000,// 2000-20ff 64 0x3f2ffc84,0x01fbfd50,0x00000000,0xffffffff,0x00000007,0x00000000,0x00000000,0x00000000,// 2100-21ff 65 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 2200-22ff 66 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 2300-23ff 67 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 2400-24ff 68 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 2500-25ff 69 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 2600-26ff 70 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 2700-27ff 71 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 2800-28ff 72 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 2900-29ff 73 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 2a00-2aff 74 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 2b00-2bff 75 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 2c00-2cff 76 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 2d00-2dff 77 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 2e00-2eff 78 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 2f00-2fff 79 0x000000e0,0x000003fe,0xfffffffe,0xffffffff,0x180fffff,0xfffffffe,0xffffffff,0x187fffff,// 3000-30ff 80 0xffffffe0,0x00001fff,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 3100-31ff 81 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 3200-32ff 82 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 3300-33ff 83 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 3400-34ff 84 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 3500-35ff 85 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 3600-36ff 86 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 3700-37ff 87 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 3800-38ff 88 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 3900-39ff 89 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 3a00-3aff 90 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 3b00-3bff 91 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 3c00-3cff 92 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 3d00-3dff 93 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 3e00-3eff 94 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 3f00-3fff 95 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 4000-40ff 96 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 4100-41ff 97 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 4200-42ff 98 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 4300-43ff 99 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 4400-44ff 100 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 4500-45ff 101 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 4600-46ff 102 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 4700-47ff 103 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 4800-48ff 104 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 4900-49ff 105 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 4a00-4aff 106 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 4b00-4bff 107 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 4c00-4cff 108 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 4d00-4dff 109 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 4e00-4eff 110 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 4f00-4fff 111 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 5000-50ff 112 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 5100-51ff 113 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 5200-52ff 114 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 5300-53ff 115 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 5400-54ff 116 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 5500-55ff 117 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 5600-56ff 118 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 5700-57ff 119 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 5800-58ff 120 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 5900-59ff 121 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 5a00-5aff 122 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 5b00-5bff 123 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 5c00-5cff 124 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 5d00-5dff 125 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 5e00-5eff 126 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 5f00-5fff 127 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 6000-60ff 128 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 6100-61ff 129 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 6200-62ff 130 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 6300-63ff 131 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 6400-64ff 132 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 6500-65ff 133 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 6600-66ff 134 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 6700-67ff 135 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 6800-68ff 136 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 6900-69ff 137 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 6a00-6aff 138 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 6b00-6bff 139 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 6c00-6cff 140 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 6d00-6dff 141 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 6e00-6eff 142 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 6f00-6fff 143 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 7000-70ff 144 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 7100-71ff 145 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 7200-72ff 146 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 7300-73ff 147 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 7400-74ff 148 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 7500-75ff 149 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 7600-76ff 150 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 7700-77ff 151 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 7800-78ff 152 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 7900-79ff 153 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 7a00-7aff 154 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 7b00-7bff 155 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 7c00-7cff 156 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 7d00-7dff 157 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 7e00-7eff 158 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 7f00-7fff 159 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 8000-80ff 160 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 8100-81ff 161 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 8200-82ff 162 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 8300-83ff 163 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 8400-84ff 164 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 8500-85ff 165 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 8600-86ff 166 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 8700-87ff 167 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 8800-88ff 168 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 8900-89ff 169 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 8a00-8aff 170 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 8b00-8bff 171 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 8c00-8cff 172 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 8d00-8dff 173 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 8e00-8eff 174 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 8f00-8fff 175 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 9000-90ff 176 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 9100-91ff 177 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 9200-92ff 178 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 9300-93ff 179 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 9400-94ff 180 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 9500-95ff 181 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 9600-96ff 182 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 9700-97ff 183 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 9800-98ff 184 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 9900-99ff 185 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 9a00-9aff 186 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 9b00-9bff 187 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 9c00-9cff 188 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 9d00-9dff 189 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 9e00-9eff 190 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0x0000003f,0x00000000,0x00000000,// 9f00-9fff 191 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// a000-a0ff 192 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// a100-a1ff 193 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// a200-a2ff 194 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// a300-a3ff 195 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// a400-a4ff 196 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// a500-a5ff 197 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// a600-a6ff 198 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// a700-a7ff 199 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// a800-a8ff 200 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// a900-a9ff 201 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// aa00-aaff 202 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// ab00-abff 203 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// ac00-acff 204 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// ad00-adff 205 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// ae00-aeff 206 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// af00-afff 207 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// b000-b0ff 208 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// b100-b1ff 209 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// b200-b2ff 210 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// b300-b3ff 211 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// b400-b4ff 212 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// b500-b5ff 213 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// b600-b6ff 214 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// b700-b7ff 215 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// b800-b8ff 216 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// b900-b9ff 217 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// ba00-baff 218 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// bb00-bbff 219 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// bc00-bcff 220 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// bd00-bdff 221 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// be00-beff 222 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// bf00-bfff 223 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// c000-c0ff 224 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// c100-c1ff 225 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// c200-c2ff 226 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// c300-c3ff 227 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// c400-c4ff 228 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// c500-c5ff 229 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// c600-c6ff 230 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// c700-c7ff 231 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// c800-c8ff 232 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// c900-c9ff 233 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// ca00-caff 234 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// cb00-cbff 235 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// cc00-ccff 236 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// cd00-cdff 237 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// ce00-ceff 238 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// cf00-cfff 239 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// d000-d0ff 240 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// d100-d1ff 241 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// d200-d2ff 242 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// d300-d3ff 243 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// d400-d4ff 244 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// d500-d5ff 245 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// d600-d6ff 246 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0x0000000f,0x00000000,0x00000000// d700-d7ff 247 ]; 248 249 /// returns true if character is A..Z, a..z, _ or universal alpha 250 bool isUniversalAlpha(dchar ch) pure nothrow { 251 return (ch <= 0xd7ff && (UNIVERSAL_ALPHA_FLAGS[ch >> 5] & (1 << (ch & 31)))); 252 } 253 254 /// character can present at the beginning of identifier 255 bool isIdentStartChar(dchar ch) pure nothrow { 256 return isUniversalAlpha(ch); 257 } 258 259 /// character can present in middle of identifier 260 bool isIdentMiddleChar(dchar ch) pure nothrow { 261 return (ch >= '0' && ch <='9') || isUniversalAlpha(ch); 262 } 263 264 immutable bool ENABLE_DUMP_UNIVERSAL_ALPHA_TABLE = false; 265 static if (ENABLE_DUMP_UNIVERSAL_ALPHA_TABLE) { 266 bool r(dchar ch, wchar v) pure nothrow { 267 return ch == v; 268 } 269 270 bool r(dchar ch, wchar v1, wchar v2) pure nothrow { 271 return ch >= v1 && ch <= v2; 272 } 273 274 bool isUniversalAlphaSlow(dchar c) pure nothrow { 275 return 276 // Latin: 00AA, 00BA, 00C0−00D6, 00D8−00F6, 00F8−01F5, 01FA−0217, 277 // 0250−02A8, 1E00−1E9B, 1EA0−1EF9, 207F 278 r(c, 0xAA) || r(c, 0x00BA) || r(c, 0x00C0,0x00D6) || r(c, 0x00D8,0x00F6) || r(c, 0x00F8,0x01F5) || r(c, 0x01FA,0x0217) 279 || r(c, 0x0250,0x02A8) || r(c, 0x1E00,0x1E9B) || r(c, 0x1EA0,0x1EF9) || r(c, 0x207F) 280 //Greek: 0386, 0388−038A, 038C, 038E−03A1, 03A3−03CE, 03D0−03D6, 281 //03DA, 03DC, 03DE, 03E0, 03E2−03F3, 1F00−1F15, 1F18−1F1D, 282 //1F20−1F45, 1F48−1F4D, 1F50−1F57, 1F59, 1F5B, 1F5D, 283 //1F5F−1F7D, 1F80−1FB4, 1FB6−1FBC, 1FC2−1FC4, 1FC6−1FCC, 284 //1FD0−1FD3, 1FD6−1FDB, 1FE0−1FEC, 1FF2−1FF4, 1FF6−1FFC 285 || r(c, 0x0386) || r(c, 0x0388,0x038A) || r(c, 0x038C) || r(c, 0x038E,0x03A1) || r(c, 0x03A3,0x03CE) || r(c, 0x03D0,0x03D6) 286 || r(c, 0x03DA) || r(c, 0x03DC) || r(c, 0x03DE) || r(c, 0x03E0) || r(c, 0x03E2,0x03F3) || r(c, 0x1F00,0x1F15) || r(c, 0x1F18,0x1F1D) 287 || r(c, 0x1F20,0x1F45) || r(c, 0x1F48,0x1F4D) || r(c, 0x1F50,0x1F57) || r(c, 0x1F59) || r(c, 0x1F5B) || r(c, 0x1F5D) 288 || r(c, 0x1F5F,0x1F7D) || r(c, 0x1F80,0x1FB4) || r(c, 0x1FB6,0x1FBC) || r(c, 0x1FC2,0x1FC4) || r(c, 0x1FC6,0x1FCC) 289 || r(c, 0x1FD0,0x1FD3) || r(c, 0x1FD6,0x1FDB) || r(c, 0x1FE0,0x1FEC) || r(c, 0x1FF2,0x1FF4) || r(c, 0x1FF6,0x1FFC) 290 //Cyrillic: 0401−040C, 040E−044F, 0451−045C, 045E−0481, 0490−04C4, 291 //04C7−04C8, 04CB−04CC, 04D0−04EB, 04EE−04F5, 04F8−04F9 292 || r(c, 0x0401,0x040C) || r(c, 0x040E,0x044F) || r(c, 0x0451,0x045C) || r(c, 0x045E,0x0481) || r(c, 0x0490,0x04C4) 293 || r(c, 0x04C7,0x04C8) || r(c, 0x04CB,0x04CC) || r(c, 0x04D0,0x04EB) || r(c, 0x04EE,0x04F5) || r(c, 0x04F8,0x04F9) 294 //Armenian: 0531−0556, 0561−0587 295 || r(c, 0x0531,0x0556) || r(c, 0x0561,0x0587) 296 //Hebrew: 05B0−05B9, 05BB−05BD, 05BF, 05C1−05C2, 05D0−05EA, 297 //05F0−05F2 298 || r(c, 0x05B0,0x05B9) || r(c, 0x05BB,0x05BD) || r(c, 0x05BF) || r(c, 0x05C1,0x05C2) || r(c, 0x05D0,0x05EA) 299 || r(c, 0x05F0,0x05F2) 300 //Arabic: 0621−063A, 0640−0652, 0670−06B7, 06BA−06BE, 06C0−06CE, 301 //06D0−06DC, 06E5−06E8, 06EA−06ED 302 || r(c, 0x0621,0x063A) || r(c, 0x0640,0x0652) || r(c, 0x0670,0x06B7) || r(c, 0x06BA,0x06BE) || r(c, 0x06C0,0x06CE) 303 || r(c, 0x06D0,0x06DC) || r(c, 0x06E5,0x06E8) || r(c, 0x06EA,0x06ED) 304 //Devanagari: 0901−0903, 0905−0939, 093E−094D, 0950−0952, 0958−0963 305 || r(c, 0x0901,0x0903) || r(c, 0x0905,0x0939) || r(c, 0x093E,0x094D) || r(c, 0x0950,0x0952) || r(c, 0x0958,0x0963) 306 //Bengali: 0981−0983, 0985−098C, 098F−0990, 0993−09A8, 09AA−09B0, 307 //09B2, 09B6−09B9, 09BE−09C4, 09C7−09C8, 09CB−09CD, 308 //09DC−09DD, 09DF−09E3, 09F0−09F1 309 || r(c, 0x0981,0x0983) || r(c, 0x0985,0x098C) || r(c, 0x098F,0x0990) || r(c, 0x0993,0x09A8) || r(c, 0x09AA,0x09B0) 310 || r(c, 0x09B2) || r(c, 0x09B6,0x09B9) || r(c, 0x09BE,0x09C4) || r(c, 0x09C7,0x09C8) || r(c, 0x09CB,0x09CD) 311 || r(c, 0x09DC,0x09DD) || r(c, 0x09DF,0x09E3) || r(c, 0x09F0,0x09F1) 312 //Gurmukhi: 0A02, 0A05−0A0A, 0A0F−0A10, 0A13−0A28, 0A2A−0A30, 313 //0A32−0A33, 0A35−0A36, 0A38−0A39, 0A3E−0A42, 0A47−0A48, 314 //0A4B−0A4D, 0A59−0A5C, 0A5E, 0A74 315 || r(c, 0x0A02) || r(c, 0x0A05,0x0A0A) || r(c, 0x0A0F,0x0A10) || r(c, 0x0A13,0x0A28) || r(c, 0x0A2A,0x0A30) 316 || r(c, 0x0A32,0x0A33) || r(c, 0x0A35,0x0A36) || r(c, 0x0A38,0x0A39) || r(c, 0x0A3E,0x0A42) || r(c, 0x0A47,0x0A48) 317 || r(c, 0x0A4B,0x0A4D) || r(c, 0x0A59,0x0A5C) || r(c, 0x0A5E) || r(c, 0x0A74) 318 //Gujarati: 0A81−0A83, 0A85−0A8B, 0A8D, 0A8F−0A91, 0A93−0AA8, 319 //0AAA−0AB0, 0AB2−0AB3, 0AB5−0AB9, 0ABD−0AC5, 320 //0AC7−0AC9, 0ACB−0ACD, 0AD0, 0AE0 321 || r(c, 0x0A81,0x0A83) || r(c, 0x0A85,0x0A8B) || r(c, 0x0A8D) || r(c, 0x0A8F,0x0A91) || r(c, 0x0A93,0x0AA8) 322 || r(c, 0x0AAA,0x0AB0) || r(c, 0x0AB2,0x0AB3) || r(c, 0x0AB5,0x0AB9) || r(c, 0x0ABD,0x0AC5) 323 || r(c, 0x0AC7,0x0AC9) || r(c, 0x0ACB,0x0ACD) || r(c, 0x0AD0) || r(c, 0x0AE0) 324 // Oriya: 0B01−0B03, 0B05−0B0C, 0B0F−0B10, 0B13−0B28, 0B2A−0B30, 325 //0B32−0B33, 0B36−0B39, 0B3E−0B43, 0B47−0B48, 0B4B−0B4D, 326 //0B5C−0B5D, 0B5F−0B61 327 || r(c, 0x0B01,0x0B03) || r(c, 0x0B05,0x0B0C) || r(c, 0x0B0F,0x0B10) || r(c, 0x0B13,0x0B28) || r(c, 0x0B2A,0x0B30) 328 || r(c, 0x0B32,0x0B33) || r(c, 0x0B36,0x0B39) || r(c, 0x0B3E,0x0B43) || r(c, 0x0B47,0x0B48) || r(c, 0x0B4B,0x0B4D) 329 || r(c, 0x0B5C,0x0B5D) || r(c, 0x0B5F,0x0B61) 330 //Tamil: 0B82−0B83, 0B85−0B8A, 0B8E−0B90, 0B92−0B95, 0B99−0B9A, 331 //0B9C, 0B9E−0B9F, 0BA3−0BA4, 0BA8−0BAA, 0BAE−0BB5, 332 //0BB7−0BB9, 0BBE−0BC2, 0BC6−0BC8, 0BCA−0BCD 333 || r(c, 0x0B82,0x0B83) || r(c, 0x0B85,0x0B8A) || r(c, 0x0B8E,0x0B90) || r(c, 0x0B92,0x0B95) || r(c, 0x0B99,0x0B9A) 334 || r(c, 0x0B9C) || r(c, 0x0B9E,0x0B9F) || r(c, 0x0BA3,0x0BA4) || r(c, 0x0BA8,0x0BAA) || r(c, 0x0BAE,0x0BB5) 335 || r(c, 0x0BB7,0x0BB9) || r(c, 0x0BBE,0x0BC2) || r(c, 0x0BC6,0x0BC8) || r(c, 0x0BCA,0x0BCD) 336 //Telugu: 0C01−0C03, 0C05−0C0C, 0C0E−0C10, 0C12−0C28, 0C2A−0C33, 337 //0C35−0C39, 0C3E−0C44, 0C46−0C48, 0C4A−0C4D, 0C60−0C61 338 || r(c, 0x0C01,0x0C03) || r(c, 0x0C05,0x0C0C) || r(c, 0x0C0E,0x0C10) || r(c, 0x0C12,0x0C28) || r(c, 0x0C2A,0x0C33) 339 || r(c, 0x0C35,0x0C39) || r(c, 0x0C3E,0x0C44) || r(c, 0x0C46,0x0C48) || r(c, 0x0C4A,0x0C4D) || r(c, 0x0C60,0x0C61) 340 //Kannada: 0C82−0C83, 0C85−0C8C, 0C8E−0C90, 0C92−0CA8, 0CAA−0CB3, 341 //0CB5−0CB9, 0CBE−0CC4, 0CC6−0CC8, 0CCA−0CCD, 0CDE, 342 //0CE0−0CE1 343 || r(c, 0x0C82,0x0C83) || r(c, 0x0C85,0x0C8C) || r(c, 0x0C8E,0x0C90) || r(c, 0x0C92,0x0CA8) || r(c, 0x0CAA,0x0CB3) 344 || r(c, 0x0CB5,0x0CB9) || r(c, 0x0CBE,0x0CC4) || r(c, 0x0CC6,0x0CC8) || r(c, 0x0CCA,0x0CCD) || r(c, 0x0CDE) 345 || r(c, 0x0CE0,0x0CE1) 346 //Malayalam: 0D02−0D03, 0D05−0D0C, 0D0E−0D10, 0D12−0D28, 0D2A−0D39, 347 //0D3E−0D43, 0D46−0D48, 0D4A−0D4D, 0D60−0D61 348 || r(c, 0x0D02,0x0D03) || r(c, 0x0D05,0x0D0C) || r(c, 0x0D0E,0x0D10) || r(c, 0x0D12,0x0D28) || r(c, 0x0D2A,0x0D39) 349 || r(c, 0xD3E,0x0D43) || r(c, 0x0D46,0x0D48) || r(c, 0x0D4A,0x0D4D) || r(c, 0x0D60,0x0D61) 350 //Thai: 0E01−0E3A, 0E40−0E5B 351 || r(c, 0x0E01,0x0E3A) || r(c, 0x0E40,0x0E5B) 352 //Lao: 0E81−0E82, 0E84, 0E87−0E88, 0E8A, 0E8D, 0E94−0E97, 353 //0E99−0E9F, 0EA1−0EA3, 0EA5, 0EA7, 0EAA−0EAB, 354 //0EAD−0EAE, 0EB0−0EB9, 0EBB−0EBD, 0EC0−0EC4, 0EC6, 355 //0EC8−0ECD, 0EDC−0EDD 356 || r(c, 0x0E81,0x0E82) || r(c, 0x0E84) || r(c, 0x0E87,0x0E88) || r(c, 0x0E8A) || r(c, 0x0E8D) || r(c, 0x0E94,0x0E97) 357 || r(c, 0x0E99,0x0E9F) || r(c, 0x0EA1,0x0EA3) || r(c, 0x0EA5) || r(c, 0x0EA7) || r(c, 0x0EAA,0x0EAB) 358 || r(c, 0x0EAD,0x0EAE) || r(c, 0x0EB0,0x0EB9) || r(c, 0x0EBB,0x0EBD) || r(c, 0x0EC0,0x0EC4) || r(c, 0x0EC6) 359 || r(c, 0x0EC8,0x0ECD) || r(c, 0x0EDC,0x0EDD) 360 //Tibetan: 0F00, 0F18−0F19, 0F35, 0F37, 0F39, 0F3E−0F47, 0F49−0F69, 361 //0F71−0F84, 0F86−0F8B, 0F90−0F95, 0F97, 0F99−0FAD, 362 //0FB1−0FB7, 0FB9 363 || r(c, 0x0F00) || r(c, 0x0F18,0x0F19) || r(c, 0x0F35) || r(c, 0x0F37) || r(c, 0x0F39) || r(c, 0x0F3E,0x0F47) || r(c, 0x0F49,0x0F69) 364 || r(c, 0x0F71,0x0F84) || r(c, 0x0F86,0x0F8B) || r(c, 0x0F90,0x0F95) || r(c, 0x0F97) || r(c, 0x0F99,0x0FAD) 365 || r(c, 0x0FB1,0x0FB7) || r(c, 0x0FB9) 366 //Georgian: 10A0−10C5, 10D0−10F6 367 || r(c, 0x10A0,0x10C5) || r(c, 0x10D0,0x10F6) 368 //Hiragana: 3041−3093, 309B−309C 369 || r(c, 0x3041,0x3093) || r(c, 0x309B,0x309C) 370 //Katakana: 30A1−30F6, 30FB−30FC 371 || r(c, 0x30A1,0x30F6) || r(c, 0x30FB,0x30FC) 372 //Bopomofo: 3105−312C 373 || r(c, 0x3105,0x312C) 374 //CJK Unified Ideographs: 4E00−9FA5 375 || r(c, 0x4E00,0x9FA5) 376 //Hangul: AC00−D7A3 377 || r(c, 0xAC00,0xD7A3) 378 //Digits: 0660−0669, 06F0−06F9, 0966−096F, 09E6−09EF, 0A66−0A6F, 379 //0AE6−0AEF, 0B66−0B6F, 0BE7−0BEF, 0C66−0C6F, 0CE6−0CEF, 380 //0D66−0D6F, 0E50−0E59, 0ED0−0ED9, 0F20−0F33 381 || r(c, 0x0660,0x0669) || r(c, 0x06F0,0x06F9) || r(c, 0x0966,0x096F) || r(c, 0x09E6,0x09EF) || r(c, 0x0A66,0x0A6F) 382 || r(c, 0x0AE6,0x0AEF) || r(c, 0x0B66,0x0B6F) || r(c, 0x0BE7,0x0BEF) || r(c, 0x0C66,0x0C6F) || r(c, 0x0CE6,0x0CEF) 383 || r(c, 0x0D66,0x0D6F) || r(c, 0x0E50,0x0E59) || r(c, 0x0ED0,0x0ED9) || r(c, 0x0F20,0x0F33) 384 //Special characters: 00B5, 00B7, 02B0−02B8, 02BB, 02BD−02C1, 02D0−02D1, 385 //02E0−02E4, 037A, 0559, 093D, 0B3D, 1FBE, 203F−2040, 2102, 386 //2107, 210A−2113, 2115, 2118−211D, 2124, 2126, 2128, 212A−2131, 387 //2133−2138, 2160−2182, 3005−3007, 3021−3029 388 || r(c, 0x00B5) || r(c, 0x00B7) || r(c, 0x02B0,0x02B8) || r(c, 0x02BB) || r(c, 0x02BD,0x02C1) || r(c, 0x02D0,0x02D1) 389 || r(c, 0x2E0,0x02E4) || r(c, 0x037A) || r(c, 0x0559) || r(c, 0x093D) || r(c, 0x0B3D) || r(c, 0x1FBE) || r(c, 0x203F,0x2040) || r(c, 0x2102) 390 || r(c, 0x2107) || r(c, 0x210A,0x2113) || r(c, 0x2115) || r(c, 0x2118,0x211D) || r(c, 0x2124) || r(c, 0x2126) || r(c, 0x2128) || r(c, 0x212A,0x2131) 391 || r(c, 0x2133,0x2138) || r(c, 0x2160,0x2182) || r(c, 0x3005,0x3007) || r(c, 0x3021,0x3029) 392 ; 393 } 394 395 } 396 397 unittest { 398 399 400 static if (ENABLE_DUMP_UNIVERSAL_ALPHA_TABLE) { 401 immutable uint itemsInRow = 8; 402 403 uint maxAlpha = 0; 404 for (uint i = 0; i < 0x10000; i++) { 405 uint ch = i; 406 if (isUniversalAlphaSlow(ch) || ch == '_' || (ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z')) 407 maxAlpha = i; 408 } 409 maxAlpha = (maxAlpha + itemsInRow * 32 - 1) / (itemsInRow * 32) * (itemsInRow * 32) - 1; 410 writeln("// table for fast checking of UniversalAlpha (as per ISO/IEC 9899:1999 Annex E) OR a..z OR A..Z OR _"); 411 writefln("// max code is 0x%04x", maxAlpha); 412 writeln("immutable uint[", (maxAlpha + 1) / 32,"] UNIVERSAL_ALPHA_FLAGS = ["); 413 for (uint i = 0; i <= maxAlpha; i += 32) { 414 if ((i / 32) % itemsInRow == 0) 415 write(" "); 416 uint flags = 0; 417 for (uint j = 0; j < 32; j++) { 418 uint ch = i + j; 419 bool flag = isUniversalAlphaSlow(ch) || ch == '_' || (ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z'); 420 if (flag) 421 flags |= (1 << j); 422 } 423 writef("0x%08x", flags); 424 if (i != maxAlpha / 32 * 32) 425 write(","); 426 if ((i / 32) % itemsInRow == itemsInRow - 1) 427 writefln("// %04x-%04x", i - itemsInRow * 32 + 1 + 31, i + 31); 428 } 429 writeln("];"); 430 431 for (uint ch = 0; ch < 0x100000; ch++) { 432 bool flag = isUniversalAlphaSlow(ch) || ch == '_' || (ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z'); 433 bool flag2 = isUniversalAlpha(ch); 434 if (flag2 != flag) { 435 isUniversalAlpha(ch); 436 writefln("universalAlpha test failed for char %06x expeced %d actual %d", ch, flag ? 1 : 0, flag2 ? 1 : 0); 437 } 438 assert(flag2 == flag); 439 } 440 } 441 } 442 443 enum OpCode : ubyte { 444 NONE, // no op 445 DIV, // / 446 DIV_EQ, // /= 447 DOT, // . 448 DOT_DOT, // .. 449 DOT_DOT_DOT,// ... 450 AND, // & 451 AND_EQ, // &= 452 LOG_AND, // && 453 OR, // | 454 OR_EQ, // |= 455 LOG_OR, // || 456 MINUS, // - 457 MINUS_EQ, // -= 458 MINUS_MINUS,// -- 459 PLUS, // + 460 PLUS_EQ, // += 461 PLUS_PLUS, // ++ 462 LT, // < 463 LT_EQ, // <= 464 SHL, // << 465 SHL_EQ, // <<= 466 LT_GT, // <> 467 NE_EQ, // <>= 468 GT, // > 469 GT_EQ, // >= 470 SHR_EQ, // >>= 471 ASR_EQ, // >>>= 472 SHR, // >> 473 ASR, // >>> 474 NOT, // ! 475 NOT_EQ, // != 476 NOT_LT_GT, // !<> 477 NOT_LT_GT_EQ, // !<>= 478 NOT_LT, // !< 479 NOT_LT_EQ, // !<= 480 NOT_GT, // !> 481 NOT_GT_EQ, // !>= 482 PAR_OPEN, // ( 483 PAR_CLOSE, // ) 484 SQ_OPEN, // [ 485 SQ_CLOSE, // ] 486 CURL_OPEN, // { 487 CURL_CLOSE, // } 488 QUEST, // ? 489 COMMA, // , 490 SEMICOLON, // ; 491 COLON, // : 492 DOLLAR, // $ 493 EQ, // = 494 QE_EQ, // == 495 MUL, // * 496 MUL_EQ, // *= 497 MOD, // % 498 MOD_EQ, // %= 499 XOR, // ^ 500 XOR_EQ, // ^= 501 LOG_XOR, // ^^ 502 LOG_XOR_EQ, // ^^= 503 INV, // ~ 504 INV_EQ, // ~= 505 AT, // @ 506 EQ_GT, // => 507 SHARP // # 508 }; 509 510 immutable dstring[] OP_CODE_STRINGS = [ 511 "", 512 "/", 513 "/=", 514 ".", 515 "..", 516 "...", 517 "&", 518 "&=", 519 "&&", 520 "|", 521 "|=", 522 "||", 523 "-", 524 "-=", 525 "--", 526 "+", 527 "+=", 528 "++", 529 "<", 530 "<=", 531 "<<", 532 "<<=", 533 "<>", 534 "<>=", 535 ">", 536 ">=", 537 ">>=", 538 ">>>=", 539 ">>", 540 ">>>", 541 "!", 542 "!=", 543 "!<>", 544 "!<>=", 545 "!<", 546 "!<=", 547 "!>", 548 "!>=", 549 "(", 550 ")", 551 "[", 552 "]", 553 "{", 554 "}", 555 "?", 556 ",", 557 ";", 558 ":", 559 "$", 560 "=", 561 "==", 562 "*", 563 "*=", 564 "%", 565 "%=", 566 "^", 567 "^=", 568 "^^", 569 "^^=", 570 "~", 571 "~=", 572 "@", 573 "=>", 574 "#" 575 ]; 576 577 dstring getOpNameD(OpCode op) pure nothrow { 578 return OP_CODE_STRINGS[op]; 579 }; 580 581 enum Keyword : ubyte { 582 NONE, 583 584 AT_DISABLE, //"@disable", 585 AT_NOGC, //"@nogc", 586 AT_PROPERTY, //"@property", 587 588 ABSTRACT, 589 ALIAS, 590 ALIGN, 591 ASM, 592 ASSERT, 593 AUTO, 594 595 BODY, 596 BOOL, 597 BREAK, 598 BYTE, 599 600 CASE, 601 CAST, 602 CATCH, 603 CDOUBLE, 604 CENT, 605 CFLOAT, 606 CHAR, 607 CLASS, 608 CONST, 609 CONTINUE, 610 CREAL, 611 612 DCHAR, 613 DEBUG, 614 DEFAULT, 615 DELEGATE, 616 DELETE, 617 DEPRECATED, 618 DO, 619 DOUBLE, 620 621 ELSE, 622 ENUM, 623 EXPORT, 624 EXTERN, 625 626 FALSE, 627 FINAL, 628 FINALLY, 629 FLOAT, 630 FOR, 631 FOREACH, 632 FOREACH_REVERSE, 633 FUNCTION, 634 635 GOTO, 636 637 IDOUBLE, 638 IF, 639 IFLOAT, 640 IMMUTABLE, 641 IMPORT, 642 IN, 643 INOUT, 644 INT, 645 INTERFACE, 646 INVARIANT, 647 IREAL, 648 IS, 649 650 LAZY, 651 LONG, 652 653 MACRO, 654 MIXIN, 655 MODULE, 656 657 NEW, 658 NOTHROW, 659 NULL, 660 661 OUT, 662 OVERRIDE, 663 664 PACKAGE, 665 PRAGMA, 666 PRIVATE, 667 PROTECTED, 668 PUBLIC, 669 PURE, 670 671 REAL, 672 REF, 673 RETURN, 674 675 SAFE, 676 SCOPE, 677 SHARED, 678 SHORT, 679 STATIC, 680 STRUCT, 681 SUPER, 682 SWITCH, 683 SYNCHRONIZED, 684 SYSTEM, 685 686 TEMPLATE, 687 THIS, 688 THROW, 689 TRUE, 690 TRUSTED, 691 TRY, 692 TYPEDEF, 693 TYPEID, 694 TYPEOF, 695 696 UBYTE, 697 UCENT, 698 UINT, 699 ULONG, 700 UNION, 701 UNITTEST, 702 USHORT, 703 704 VERSION, 705 VOID, 706 VOLATILE, 707 708 WCHAR, 709 WHILE, 710 WITH, 711 712 FILE, 713 MODULE__, 714 LINE, 715 FUNCTION__, 716 PRETTY_FUNCTION, 717 718 //Special Token Replaced with 719 DATE, // string literal of the date of compilation "mmm dd yyyy" 720 EOF, // sets the scanner to the end of the file 721 TIME, // string literal of the time of compilation "hh:mm:ss" 722 TIMESTAMP, // string literal of the date and time of compilation "www mmm dd hh:mm:ss yyyy" 723 VENDOR, // Compiler vendor string, such as "Digital Mars D" 724 VERSION_, // Compiler version as an integer, such as 2001 725 726 GSHARED, 727 TRAITS, 728 VECTOR, 729 PARAMETERS, 730 731 } 732 733 immutable dstring[] KEYWORD_STRINGS = [ 734 "", 735 736 "@disable", 737 "@nogc", 738 "@property", 739 740 "abstract", 741 "alias", 742 "align", 743 "asm", 744 "assert", 745 "auto", 746 747 "body", 748 "bool", 749 "break", 750 "byte", 751 752 "case", 753 "cast", 754 "catch", 755 "cdouble", 756 "cent", 757 "cfloat", 758 "char", 759 "class", 760 "const", 761 "continue", 762 "creal", 763 764 "dchar", 765 "debug", 766 "default", 767 "delegate", 768 "delete", 769 "deprecated", 770 "do", 771 "double", 772 773 "else", 774 "enum", 775 "export", 776 "extern", 777 778 "false", 779 "final", 780 "finally", 781 "float", 782 "for", 783 "foreach", 784 "foreach_reverse", 785 "function", 786 787 "goto", 788 789 "idouble", 790 "if", 791 "ifloat", 792 "immutable", 793 "import", 794 "in", 795 "inout", 796 "int", 797 "interface", 798 "invariant", 799 "ireal", 800 "is", 801 802 "lazy", 803 "long", 804 805 "macro", 806 "mixin", 807 "module", 808 809 "new", 810 "nothrow", 811 "null", 812 813 "out", 814 "override", 815 816 "package", 817 "pragma", 818 "private", 819 "protected", 820 "public", 821 "pure", 822 823 "real", 824 "ref", 825 "return", 826 827 "safe", 828 "scope", 829 "shared", 830 "short", 831 "static", 832 "struct", 833 "super", 834 "switch", 835 "synchronized", 836 "system", 837 838 "template", 839 "this", 840 "throw", 841 "true", 842 "trusted", 843 "try", 844 "typedef", 845 "typeid", 846 "typeof", 847 848 "ubyte", 849 "ucent", 850 "uint", 851 "ulong", 852 "union", 853 "unittest", 854 "ushort", 855 856 "version", 857 "void", 858 "volatile", 859 860 "wchar", 861 "while", 862 "with", 863 864 "__FILE__", 865 "__MODULE__", 866 "__LINE__", 867 "__FUNCTION__", 868 "__PRETTY_FUNCTION__", 869 870 //Special Token Replaced with 871 "__DATE__", // string literal of the date of compilation "mmm dd yyyy" 872 "__EOF__", // sets the scanner to the end of the file 873 "__TIME__", // string literal of the time of compilation "hh:mm:ss" 874 "__TIMESTAMP__", // string literal of the date and time of compilation "www mmm dd hh:mm:ss yyyy" 875 "__VENDOR__", // Compiler vendor string, such as "Digital Mars D" 876 "__VERSION__", // Compiler version as an integer, such as 2001 877 878 879 "__gshared", 880 "__traits", 881 "__vector", 882 "__parameters" 883 ]; 884 885 public dstring getKeywordNameD(Keyword keyword) pure nothrow { 886 return KEYWORD_STRINGS[keyword]; 887 }; 888 889 public Keyword findKeyword(Keyword start, Keyword end, dchar * name, int len, ref int pos) pure nothrow { 890 for (Keyword i = start; i <= end; i++) { 891 dstring s = KEYWORD_STRINGS[i]; 892 if (s.length > len + 1) 893 continue; // too long 894 bool found = true; 895 for (uint j = 1; j < s.length; j++) { 896 if (s[j] != name[j - 1]) { 897 found = false; 898 break; 899 } 900 } 901 if (found) { 902 //if (s.length == len - 1 || !isIdentMiddleChar(name[s.length - 1])) { 903 if (s.length == len + 1 || !isIdentMiddleChar(name[s.length - 1])) { 904 pos += s.length - 1; 905 return i; 906 } 907 } 908 } 909 return Keyword.NONE; 910 } 911 912 /** 913 * Token. 914 */ 915 class Token { 916 // 32bit 64bit platform 917 // vtable 4 bytes 8 bytes 918 protected SourceFile _file; // 4 bytes 8 bytes 919 protected int _line; // 4 bytes 4 bytes 920 protected int _pos; // 4 bytes 4 bytes 921 protected TokenType _type; // 1 byte 1 byte 922 // total 17 bytes 25 bytes 923 /// returns token type 924 @property TokenType type() { return _type; } 925 /// returns file info for source 926 @property SourceFile filename() { return _file; } 927 /// returns 1-based source line number of token start 928 @property int line() { return _line; } 929 /// returns 1-based source line position of token start 930 @property int pos() { return _pos; } 931 /// returns token text 932 @property dstring text() { return null; } 933 934 // number token properties 935 @property dchar literalType() { return 0; } 936 @property ulong intValue() { return 0; } 937 @property bool isUnsigned() { return false; } 938 @property ulong isLong() { return false; } 939 @property real realValue() { return 0; } 940 @property double doubleValue() { return 0; } 941 @property float floatValue() { return 0; } 942 @property byte precision() { return 0; } 943 @property bool isImaginary() { return false; } 944 @property bool isBracket() { 945 OpCode op = opCode; 946 return op == OpCode.PAR_OPEN 947 || op == OpCode.PAR_CLOSE 948 || op == OpCode.SQ_OPEN 949 || op == OpCode.SQ_CLOSE 950 || op == OpCode.CURL_OPEN 951 || op == OpCode.CURL_CLOSE; 952 } 953 @property bool isOpenBracket() { 954 OpCode op = opCode; 955 return op == OpCode.PAR_OPEN 956 || op == OpCode.SQ_OPEN 957 || op == OpCode.CURL_OPEN; 958 } 959 @property bool isCloseBracket() { 960 OpCode op = opCode; 961 return op == OpCode.PAR_CLOSE 962 || op == OpCode.SQ_CLOSE 963 || op == OpCode.CURL_CLOSE; 964 } 965 @property bool isEof() { return type == TokenType.EOF; } 966 967 /// returns opcode ID - for opcode tokens 968 @property OpCode opCode() { return OpCode.NONE; } 969 /// returns keyword ID - for keyword tokens 970 @property Keyword keyword() { return Keyword.NONE; } 971 /// returns true if this is documentation comment token 972 @property bool isDocumentationComment() { return false; } 973 /// returns true if this is multiline 974 @property bool isMultilineComment() { return false; } 975 976 // error handling 977 978 /// returns true if it's invalid token (can be returned in error tolerant mode of tokenizer) 979 @property bool isError() { return type == TokenType.INVALID; } 980 /// returns error message if it's invalid token (can be returned in error tolerant mode of tokenizer) 981 @property string errorMessage() { return null; } 982 /// returns error code if it's invalid token (can be returned in error tolerant mode of tokenizer) 983 @property int errorCode() { return 0; } 984 /// returns type of token parsing of which has been failed - if it's invalid token (can be returned in error tolerant mode of tokenizer) 985 @property TokenType invalidTokenType() { return TokenType.INVALID; } 986 987 988 this(TokenType type) { 989 _type = type; 990 } 991 992 this(TokenType type, SourceFile file, int line, int pos) { 993 _type = type; 994 _file = file; 995 _line = line; 996 _pos = pos; 997 } 998 /// set start position for token (line is 1-based, pos is 0-based) 999 void setPos(SourceFile file, int line, int pos) { 1000 _file = file; 1001 _line = line; 1002 _pos = pos + 1; 1003 } 1004 /// set source file information for token 1005 void setFile(SourceFile file) { 1006 _file = file; 1007 } 1008 /// set start position for token (line is 1-based, pos is 0-based) 1009 void setPos(int line, int pos) { 1010 _line = line; 1011 _pos = pos + 1; 1012 } 1013 1014 public abstract Token clone(); 1015 public override @property string toString() { 1016 return "" ~ to!string(_line) ~ ":" ~ to!string(_pos) ~ " " ~ to!string(type) ~ " " ~ to!string(opCode) ~ " " ~ to!string(keyword) 1017 ~" \"" ~ toUTF8(text()) ~ "\""; 1018 } 1019 } 1020 1021 class EofToken : Token { 1022 this() { 1023 super(TokenType.EOF); 1024 } 1025 this(SourceFile file, uint line, uint pos) { 1026 super(TokenType.EOF, file, line, pos); 1027 } 1028 override public Token clone() { 1029 return new EofToken(_file, _line, _pos); 1030 } 1031 public override @property string toString() { 1032 return "EOF"; 1033 } 1034 } 1035 1036 // treat as white space 1037 //class EolToken : Token { 1038 // this(string file, uint line, uint pos) { 1039 // super(TokenType.EOL, file, line, pos); 1040 // } 1041 //} 1042 1043 /// white space token 1044 class WhiteSpaceToken : Token { 1045 this() { 1046 super(TokenType.WHITESPACE); 1047 } 1048 this(SourceFile file, uint line, uint pos) { 1049 super(TokenType.WHITESPACE, file, line, pos); 1050 } 1051 override public Token clone() { 1052 return new WhiteSpaceToken(_file, _line, _pos); 1053 } 1054 public override @property string toString() { 1055 return "WhiteSpace"; 1056 } 1057 } 1058 1059 class OpToken : Token { 1060 OpCode _op; 1061 public @property override OpCode opCode() { return _op; } 1062 public @property void opCode(OpCode op) { _op = op; } 1063 public @property override dstring text() { return getOpNameD(_op); } 1064 this() { 1065 super(TokenType.OP); 1066 } 1067 this(SourceFile file, uint line, uint pos) { 1068 super(TokenType.OP, file, line, pos); 1069 } 1070 override public Token clone() { 1071 OpToken res = new OpToken(_file, _line, _pos); 1072 res._op = _op; 1073 return res; 1074 } 1075 public override @property string toString() { 1076 return "Op:" ~ to!string(_op); 1077 } 1078 } 1079 1080 class KeywordToken : Token { 1081 Keyword _keyword; 1082 public @property override Keyword keyword() { return _keyword; } 1083 public @property void keyword(Keyword keyword) { _keyword = keyword; } 1084 public @property override dstring text() { return getKeywordNameD(_keyword); } 1085 this() { 1086 super(TokenType.KEYWORD); 1087 } 1088 this(SourceFile file, uint line, uint pos) { 1089 super(TokenType.KEYWORD, file, line, pos); 1090 } 1091 override public Token clone() { 1092 KeywordToken res = new KeywordToken(_file, _line, _pos); 1093 res._keyword = _keyword; 1094 return res; 1095 } 1096 public override @property string toString() { 1097 return "Keyword:" ~ to!string(_keyword); 1098 } 1099 } 1100 1101 /// comment token 1102 class CommentToken : Token { 1103 protected dstring _text; 1104 protected bool _isDocumentationComment; 1105 protected bool _isMultilineComment; 1106 1107 1108 override @property bool isDocumentationComment() { 1109 return _isDocumentationComment; 1110 } 1111 1112 @property void isDocumentationComment(bool f) { 1113 _isDocumentationComment = f; 1114 } 1115 1116 /// returns true if this is multiline 1117 override @property bool isMultilineComment() { 1118 return _isMultilineComment; 1119 } 1120 1121 @property void isMultilineComment(bool f) { 1122 _isMultilineComment = f; 1123 } 1124 1125 @property override dstring text() { return _text; } 1126 @property void text(dchar[] text) { _text = cast(dstring)text; } 1127 this() { 1128 super(TokenType.COMMENT); 1129 } 1130 this(SourceFile file, uint line, uint pos, dchar[] text) { 1131 super(TokenType.COMMENT, file, line, pos); 1132 _text = cast(dstring)text; 1133 } 1134 override public Token clone() { 1135 CommentToken res = new CommentToken(_file, _line, _pos, _text.dup); 1136 res._isDocumentationComment = _isDocumentationComment; 1137 res._isMultilineComment = _isMultilineComment; 1138 return res; 1139 } 1140 public override @property string toString() { 1141 return "Comment:" ~ to!string(_text); 1142 } 1143 } 1144 1145 /// Invalid token holder - for error tolerant parsing 1146 class InvalidToken : Token { 1147 protected dstring _text; 1148 protected TokenType _invalidTokenType; 1149 protected int _errorCode; 1150 protected string _errorMessage; 1151 1152 /// returns error message if it's invalid token (can be returned in error tolerant mode of tokenizer) 1153 override @property string errorMessage() { return _errorMessage; } 1154 /// sets error message 1155 @property void errorMessage(string s) { _errorMessage = s; } 1156 /// returns error code if it's invalid token (can be returned in error tolerant mode of tokenizer) 1157 override @property int errorCode() { return _errorCode; } 1158 /// sets error code 1159 @property void errorCode(int c) { _errorCode = c; } 1160 /// returns type of token parsing of which has been failed - if it's invalid token (can be returned in error tolerant mode of tokenizer) 1161 override @property TokenType invalidTokenType() { return _invalidTokenType; } 1162 /// sets type of token parsing of which has been failed 1163 @property void invalidTokenType(TokenType t) { _invalidTokenType = t; } 1164 1165 /// text of invalid token 1166 @property override dstring text() { return _text; } 1167 /// text of invalid token 1168 @property void text(dchar[] text) { _text = cast(dstring)text; } 1169 1170 this() { 1171 super(TokenType.INVALID); 1172 } 1173 this(SourceFile file, uint line, uint pos, dchar[] text) { 1174 super(TokenType.INVALID, file, line, pos); 1175 _text = cast(dstring)text; 1176 } 1177 override Token clone() { 1178 InvalidToken res = new InvalidToken(_file, _line, _pos, _text.dup); 1179 res._errorMessage = _errorMessage.dup; 1180 res._errorCode = _errorCode; 1181 res._invalidTokenType = _invalidTokenType; 1182 return res; 1183 } 1184 override @property string toString() { 1185 return "Invalid:" ~ to!string(_text); 1186 } 1187 } 1188 1189 alias tokenizer_ident_t = uint; 1190 alias tokenizer_ident_name_t = dstring; 1191 1192 enum : tokenizer_ident_t { 1193 NO_IDENT = 0 1194 } 1195 1196 /** 1197 * Global storage for identifier strings. 1198 */ 1199 class IdentHolder { 1200 protected tokenizer_ident_t _nextId; 1201 protected tokenizer_ident_name_t[tokenizer_ident_t] _idToName; 1202 protected tokenizer_ident_t[tokenizer_ident_name_t] _nameToId; 1203 1204 public this() { 1205 _nextId = NO_IDENT + 1; 1206 } 1207 1208 /** 1209 * Search for id by name, return NO_IDENT if not found. 1210 */ 1211 uint findByName(tokenizer_ident_name_t name) { 1212 tokenizer_ident_t * found = (name in _nameToId); 1213 if (found) 1214 return *found; 1215 return NO_IDENT; 1216 } 1217 1218 /** 1219 * Search for name by id, return null if not found. 1220 */ 1221 tokenizer_ident_name_t nameById(tokenizer_ident_t id) { 1222 auto found = (id in _idToName); 1223 if (found) 1224 return *found; 1225 return null; 1226 } 1227 1228 /** 1229 * Search for ident id by name, create new entry if not found. 1230 */ 1231 tokenizer_ident_t idByName(tokenizer_ident_name_t name) { 1232 uint * found = (name in _nameToId); 1233 if (found) 1234 return *found; 1235 uint newid = _nextId++; 1236 immutable tokenizer_ident_name_t nameCopy = name.dup; 1237 _nameToId[nameCopy] = newid; 1238 _idToName[newid] = nameCopy; 1239 return newid; 1240 } 1241 } 1242 1243 /** 1244 * Thread local storage for IDs. 1245 */ 1246 IdentHolder identMap; 1247 1248 static this() { 1249 // init ID storage 1250 identMap = new IdentHolder(); 1251 } 1252 1253 class StringLiteralToken : Token { 1254 dstring _text; 1255 dchar _literalType; 1256 public @property override dchar literalType() { return _literalType; } 1257 public @property override dstring text() { return _text; } 1258 public void setText(dchar[] text, dchar type) { _text = cast(dstring)text; _literalType = type; } 1259 this() { 1260 super(TokenType.STRING); 1261 } 1262 this(SourceFile file, uint line, uint pos, dchar[] text, dchar type) { 1263 super(TokenType.STRING, file, line, pos); 1264 _text = cast(dstring)text; 1265 _literalType = type; 1266 } 1267 override public Token clone() { 1268 return new StringLiteralToken(_file, _line, _pos, _text.dup, _literalType); 1269 } 1270 public override @property string toString() { 1271 return toUTF8("String:\"" ~ _text ~ "\"" ~ (_literalType ? _literalType : ' ')); 1272 } 1273 } 1274 1275 class CharacterLiteralToken : Token { 1276 dchar _character; 1277 dchar _literalType; 1278 @property override dchar literalType() { return _literalType; } 1279 @property dchar character() { return _character; } 1280 @property override dstring text() { return [_character]; } 1281 void setCharacter(dchar ch, dchar type) { _character = ch; _literalType = type; } 1282 this() { 1283 super(TokenType.CHARACTER); 1284 } 1285 this(SourceFile file, uint line, uint pos, dchar character, dchar type) { 1286 super(TokenType.CHARACTER, file, line, pos); 1287 _character = character; 1288 _literalType = type; 1289 } 1290 override public Token clone() { 1291 return new CharacterLiteralToken(_file, _line, _pos, _character, _literalType); 1292 } 1293 public override @property string toString() { 1294 return "Char:" ~ toUTF8([_character]); 1295 } 1296 } 1297 1298 class IntegerLiteralToken : Token { 1299 ulong _value; 1300 bool _unsigned; 1301 bool _long; 1302 public @property override ulong intValue() { return _value; } 1303 public @property override bool isUnsigned() { return _unsigned; } 1304 public @property override ulong isLong() { return _long; } 1305 public @property override dstring text() { return to!dstring(_value); } 1306 public void setValue(ulong value, bool unsignedFlag = false, bool longFlag = false) { 1307 _value = value; 1308 _unsigned = unsignedFlag; 1309 _long = longFlag; 1310 } 1311 public void setFlags(bool unsignedFlag = false, bool longFlag = false) { 1312 _unsigned = unsignedFlag; 1313 _long = longFlag; 1314 } 1315 this() { 1316 super(TokenType.INTEGER); 1317 } 1318 this(SourceFile file, uint line, uint pos, ulong value, bool unsignedFlag, bool longFlag) { 1319 super(TokenType.INTEGER, file, line, pos); 1320 _value = value; 1321 _unsigned = unsignedFlag; 1322 _long = longFlag; 1323 } 1324 override public Token clone() { 1325 return new IntegerLiteralToken(_file, _line, _pos, _value, _unsigned, _long); 1326 } 1327 public override @property string toString() { 1328 return "Integer:" ~ to!string(_value) ~ (_long ? "L" : "") ~ (_unsigned ? "U" : ""); 1329 } 1330 } 1331 1332 class RealLiteralToken : Token { 1333 real _value; 1334 byte _precision; 1335 bool _imaginary; 1336 public @property override ulong intValue() { return to!long(_value); } 1337 public @property override real realValue() { return _value; } 1338 public @property override double doubleValue() { return cast(double)_value; } 1339 public @property override float floatValue() { return cast(float)_value; } 1340 public @property override byte precision() { return _precision; } 1341 public @property override bool isImaginary() { return _imaginary; } 1342 public @property override dstring text() { return to!dstring(_value); } 1343 public void setValue(real value, byte precision = 1, bool imaginary = false) { 1344 _value = value; 1345 _precision = precision; 1346 _imaginary = imaginary; 1347 } 1348 public void setFlags(byte precision = 1, bool imaginary = false) { 1349 _precision = precision; 1350 _imaginary = imaginary; 1351 } 1352 this() { 1353 super(TokenType.FLOAT); 1354 } 1355 this(SourceFile file, uint line, uint pos, real value, byte precision, bool imaginary) { 1356 super(TokenType.FLOAT, file, line, pos); 1357 _value = value; 1358 _precision = precision; 1359 _imaginary = imaginary; 1360 } 1361 override public Token clone() { 1362 return new RealLiteralToken(_file, _line, _pos, _value, _precision, _imaginary); 1363 } 1364 public override @property string toString() { 1365 return "Real:" ~ to!string(_value) ~ (_precision == 0 ? "f" : (_precision == 2 ? "L" : "")) ~ (_imaginary ? "i" : ""); 1366 } 1367 } 1368 1369 class IdentToken : Token { 1370 tokenizer_ident_t _id; 1371 public @property override dstring text() { 1372 return identMap.nameById(_id); 1373 } 1374 public void setText(dchar[] text) { 1375 _id = identMap.idByName(cast(immutable)text); 1376 } 1377 this() { 1378 super(TokenType.IDENTIFIER); 1379 } 1380 this(SourceFile file, uint line, uint pos, dchar[] text) { 1381 super(TokenType.IDENTIFIER, file, line, pos); 1382 _id = identMap.idByName(cast(immutable)text); 1383 } 1384 this(SourceFile file, uint line, uint pos, tokenizer_ident_t id) { 1385 super(TokenType.IDENTIFIER, file, line, pos); 1386 _id = id; 1387 } 1388 override public Token clone() { 1389 return new IdentToken(_file, _line, _pos, _id); 1390 } 1391 public override @property string toString() { 1392 return "Ident:" ~ to!string(text); 1393 } 1394 } 1395 1396 // shared appender buffer, to avoid extra heap allocations 1397 struct StringAppender { 1398 dchar[] buf; 1399 uint len; 1400 dchar[] get() { 1401 return buf[0 .. len]; 1402 } 1403 void appendEol() { 1404 if (len + 1 > buf.length) { 1405 uint newsize = cast(uint)((len + 1 + buf.length) * 2); 1406 if (newsize < 128) 1407 newsize = 128; 1408 buf.length = newsize; 1409 } 1410 buf[len] = '\n'; 1411 len++; 1412 } 1413 void append(dchar[] s) { 1414 if (s.length == 0) 1415 return; 1416 if (len + s.length > buf.length) { 1417 uint newsize = cast(uint)((len + s.length + buf.length) * 2); 1418 if (newsize < 128) 1419 newsize = 128; 1420 buf.length = newsize; 1421 } 1422 buf[len .. len + s.length] = s; 1423 len += s.length; 1424 } 1425 void append(dchar ch) { 1426 if (len + 1 > buf.length) { 1427 uint newsize = cast(uint)(buf.length * 2); 1428 if (newsize < 128) 1429 newsize = 128; 1430 buf.length = newsize; 1431 } 1432 buf[len++] = ch; 1433 } 1434 void reset() { 1435 len = 0; 1436 } 1437 static int parseHexDigit(dchar ch) { 1438 if (ch >= '0' && ch <='9') 1439 return ch - '0'; 1440 if (ch >= 'a' && ch <='f') 1441 return ch - 'a' + 10; 1442 if (ch >= 'A' && ch <='F') 1443 return ch - 'A' + 10; 1444 return -1; 1445 } 1446 bool errorFlag = false; 1447 dchar decodeHex(ref int pos, int count) { 1448 dchar res = 0; 1449 for (int i = 0; i < count; i++) { 1450 if (pos >= len - 1) { 1451 errorFlag = true; 1452 return res; 1453 } 1454 dchar ch = buf[++pos]; 1455 int digit = parseHexDigit(ch); 1456 if (digit < 0) { 1457 errorFlag = true; 1458 digit = 0; 1459 } 1460 res = (res << 4) | digit; 1461 } 1462 return res; 1463 } 1464 dchar decodeOct(dchar firstChar, ref int pos) { 1465 dchar res = 0; 1466 res = firstChar - '0'; 1467 if (pos < len - 1 && buf[pos + 1] >= '0' && buf[pos + 1] <= '7') { 1468 res = (res << 3) | (buf[++pos] - '0'); 1469 } 1470 if (pos < len - 1 && buf[pos + 1] >= '0' && buf[pos + 1] <= '7') { 1471 res = (res << 3) | (buf[++pos] - '0'); 1472 } 1473 return res; 1474 } 1475 1476 char[] entityNameBuf; 1477 int entityNameLen; 1478 1479 dchar decodeCharacterEntity(ref int pos) { 1480 entityNameLen = 0; 1481 pos++; 1482 for(; pos < len && buf[pos] != ';'; pos++) { 1483 dchar ch = buf[pos]; 1484 if (ch >= 0x80) 1485 errorFlag = true; 1486 if (entityNameBuf.length < entityNameLen + 4) 1487 entityNameBuf.length += 32; 1488 entityNameBuf[entityNameLen++] = cast(char)ch; 1489 } 1490 if (pos < len && buf[pos] == ';') { 1491 dchar ch = entityToChar(cast(string)entityNameBuf[0 .. entityNameLen]); 1492 if (ch) 1493 return ch; 1494 } 1495 errorFlag = true; 1496 return '?'; 1497 } 1498 1499 bool processEscapeSequences() { 1500 errorFlag = false; 1501 int dst = 0; 1502 for (int src = 0; src < len; src++) { 1503 dchar ch = buf[src]; 1504 if (ch == '\\') { 1505 if (src == len - 1) 1506 break; // INVALID 1507 ch = buf[++src]; 1508 switch (ch) { 1509 case '\'': 1510 case '\"': 1511 case '?': 1512 case '\\': 1513 buf[dst++] = ch; 1514 break; 1515 case '0': 1516 buf[dst++] = '\0'; 1517 break; 1518 case 'a': 1519 buf[dst++] = '\a'; 1520 break; 1521 case 'b': 1522 buf[dst++] = '\b'; 1523 break; 1524 case 'f': 1525 buf[dst++] = '\f'; 1526 break; 1527 case 'n': 1528 buf[dst++] = '\n'; 1529 break; 1530 case 'r': 1531 buf[dst++] = '\r'; 1532 break; 1533 case 't': 1534 buf[dst++] = '\t'; 1535 break; 1536 case 'v': 1537 buf[dst++] = '\v'; 1538 break; 1539 case 'x': 1540 buf[dst++] = decodeHex(src, 2); 1541 break; 1542 case 'u': 1543 buf[dst++] = decodeHex(src, 4); 1544 break; 1545 case 'U': 1546 buf[dst++] = decodeHex(src, 8); 1547 break; 1548 default: 1549 if (ch >= '0' && ch <= '7') { 1550 // octal X XX or XXX 1551 buf[dst++] = decodeOct(ch, src); // something wrong 1552 } else if (ch == '&') { 1553 // named character entity 1554 buf[dst++] = decodeCharacterEntity(src); 1555 // just show it as is 1556 } else { 1557 buf[dst++] = ch; // something wrong 1558 errorFlag = true; 1559 } 1560 break; 1561 } 1562 } else { 1563 buf[dst++] = ch; 1564 } 1565 } 1566 len = dst; 1567 return errorFlag; 1568 } 1569 } 1570 1571 class Tokenizer 1572 { 1573 protected SourceLines _lineStream; 1574 protected dchar[] _lineText; 1575 protected int _line; // current line number 1576 protected int _len; // current line length 1577 protected int _pos; // current line read position 1578 protected int _prevLineLength; // previous line length 1579 protected uint _state; // tokenizer state 1580 1581 enum : int { 1582 EOF_CHAR = 0x001A, 1583 EOL_CHAR = 0x000A 1584 }; 1585 1586 protected WhiteSpaceToken _sharedWhiteSpaceToken = new WhiteSpaceToken(); 1587 protected CommentToken _sharedCommentToken = new CommentToken(); 1588 protected StringLiteralToken _sharedStringLiteralToken = new StringLiteralToken(); 1589 protected IdentToken _sharedIdentToken = new IdentToken(); 1590 protected OpToken _sharedOpToken = new OpToken(); 1591 protected KeywordToken _sharedKeywordToken = new KeywordToken(); 1592 protected IntegerLiteralToken _sharedIntegerToken = new IntegerLiteralToken(); 1593 protected RealLiteralToken _sharedRealToken = new RealLiteralToken(); 1594 protected InvalidToken _sharedInvalidToken = new InvalidToken(); 1595 protected CharacterLiteralToken _sharedCharacterLiteralToken = new CharacterLiteralToken(); 1596 protected StringAppender _stringLiteralAppender; 1597 protected StringAppender _commentAppender; 1598 protected StringAppender _identAppender; 1599 1600 protected bool _enableCommentText = true; 1601 /// when false, does not put comment text into comment token - for less allocations 1602 @property void enableCommentText(bool enabled) { 1603 _enableCommentText = enabled; 1604 } 1605 /// when false, does not put comment text into comment token - for less allocations 1606 @property bool enableCommentText() { 1607 return _enableCommentText; 1608 } 1609 1610 protected bool _errorTolerant = false; 1611 /// when true, returns BadToken instead of throwing exception 1612 @property void errorTolerant(bool enabled) { 1613 _errorTolerant = enabled; 1614 } 1615 /// when true, returns BadToken instead of throwing exception 1616 @property bool errorTolerant() { 1617 return _errorTolerant; 1618 } 1619 1620 this(SourceLines lineStream) { 1621 initialize(lineStream); 1622 } 1623 1624 void initialize(SourceLines lineStream, int pos = 0) { 1625 _lineStream = lineStream; 1626 SourceFile file = _lineStream.file; 1627 _sharedWhiteSpaceToken.setFile(file); 1628 _sharedCommentToken.setFile(file); 1629 _sharedStringLiteralToken.setFile(file); 1630 _sharedIdentToken.setFile(file); 1631 _sharedOpToken.setFile(file); 1632 _sharedKeywordToken.setFile(file); 1633 _sharedIntegerToken.setFile(file); 1634 _sharedRealToken.setFile(file); 1635 _sharedInvalidToken.setFile(file); 1636 _sharedCharacterLiteralToken.setFile(file); 1637 buildTime = Clock.currTime(); 1638 _line = lineStream.line; 1639 _pos = 0; 1640 _prevLineLength = 0; 1641 _lineText = null; 1642 nextLine(); 1643 _pos = pos; 1644 } 1645 1646 this(string code, string filename = "") { 1647 this(new ArraySourceLines(code, filename)); 1648 } 1649 1650 // fetch next line from source stream 1651 protected bool nextLine() { 1652 _prevLineLength = cast(int)_lineText.length; 1653 _lineText = _lineStream.readLine(); 1654 if (!_lineText) { 1655 if (_lineStream.errorCode != 0) 1656 throw new SourceEncodingException(_lineStream.errorMessage, _lineStream.file, _lineStream.errorLine, _lineStream.errorPos); 1657 if (_lineStream.eof) { 1658 // end of file 1659 _pos = 0; 1660 _len = 0; 1661 return false; 1662 } 1663 // just an empty line 1664 } 1665 _line = _lineStream.line; 1666 _pos = 0; 1667 _len = cast(int)_lineText.length; // do not support lines longer that 4Gb 1668 return true; 1669 } 1670 1671 protected dchar nextChar() { 1672 if (_pos >= _len) { 1673 if (!nextLine()) { 1674 _pos = _prevLineLength + 1; 1675 return EOF_CHAR; 1676 } 1677 return EOL_CHAR; 1678 } 1679 dchar res = _lineText[_pos++]; 1680 if (_pos >= _len) 1681 nextLine(); 1682 return res; 1683 } 1684 1685 protected dchar peekChar() { 1686 if (_lineText is null) { 1687 if (!nextLine()) { 1688 return EOF_CHAR; 1689 } 1690 } 1691 if (_pos >= _len) 1692 return EOL_CHAR; 1693 return _lineText[_pos++]; 1694 } 1695 1696 protected Token emitEof() { 1697 // TODO: check for current state 1698 return new EofToken(_lineStream.file, _startLine, _startPos + 2); 1699 } 1700 1701 protected Token processWhiteSpace(dchar firstChar) { 1702 // reuse the same token instance, to avoid extra heap spamming 1703 _sharedWhiteSpaceToken.setPos(_startLine, _startPos); 1704 for (;;) { 1705 int i = _pos; 1706 for (; i < _len; i++) { 1707 dchar ch = _lineText[i]; 1708 if (!(ch == 0x0020 || ch == 0x0009 || ch == 0x000B || ch == 0x000C || ch == EOL_CHAR)) 1709 break; 1710 } 1711 _pos = i; 1712 if (_pos < _len) 1713 break; 1714 // go to next line 1715 if (!nextLine()) 1716 break; 1717 } 1718 return _sharedWhiteSpaceToken; 1719 } 1720 1721 protected Token processOneLineComment() { 1722 _sharedCommentToken.setPos(_startLine, _startPos); 1723 _sharedCommentToken.isDocumentationComment = _pos + 1 < _lineText.length && _lineText[_pos + 1] == '/'; 1724 _sharedCommentToken.isMultilineComment = false; 1725 if (_enableCommentText) { 1726 _sharedCommentToken.text = _lineText[_pos + 1 .. $]; 1727 } 1728 _pos = _len; 1729 nextChar(); 1730 return _sharedCommentToken; 1731 } 1732 1733 protected Token processOneLineSharpComment() { 1734 _sharedCommentToken.setPos(_startLine, _startPos); 1735 if (_enableCommentText) { 1736 _sharedCommentToken.text = _lineText[_pos .. $]; 1737 } 1738 _pos = _len; 1739 return _sharedCommentToken; 1740 } 1741 1742 // Comment /* */ 1743 protected Token processMultilineComment() { 1744 _sharedCommentToken.setPos(_startLine, _startPos); 1745 _sharedCommentToken.isDocumentationComment = _pos + 1 < _lineText.length && _lineText[_pos + 1] == '*'; 1746 _sharedCommentToken.isMultilineComment = true; 1747 _commentAppender.reset(); 1748 int textStart = _pos + 1; 1749 for (;;) { 1750 int textEnd = int.max; 1751 int i = textStart; 1752 for (; i < _len - 1; i++) { 1753 if (_lineText[i] == '*' && _lineText[i + 1] == '/') { 1754 textEnd = i; 1755 break; 1756 } 1757 } 1758 if (textEnd != int.max) { 1759 if (_enableCommentText) 1760 _commentAppender.append(_lineText[textStart .. textEnd]); 1761 _pos = textEnd + 2; 1762 break; 1763 } 1764 if (!nextLine()) { 1765 // TODO: do we need throw exception if comment not closed by end of file? 1766 _pos = _len; 1767 break; 1768 } 1769 textStart = 0; 1770 } 1771 if (_enableCommentText) { 1772 _sharedCommentToken.text = _commentAppender.get(); 1773 } 1774 return _sharedCommentToken; 1775 } 1776 1777 // Comment /+ +/ 1778 protected Token processNestedComment() { 1779 _sharedCommentToken.setPos(_startLine, _startPos); 1780 _sharedCommentToken.isDocumentationComment = _pos + 1 < _lineText.length && _lineText[_pos + 1] == '+'; 1781 _sharedCommentToken.isMultilineComment = true; 1782 _commentAppender.reset(); 1783 dchar[] text; 1784 int textStart = _pos + 1; 1785 int level = 1; 1786 for (;;) { 1787 int textEnd = int.max; 1788 int i = textStart; 1789 for (; i < _len - 1; i++) { 1790 if (_lineText[i] == '/' && _lineText[i + 1] == '+') { 1791 level++; 1792 i++; 1793 } else if (_lineText[i] == '+' && _lineText[i + 1] == '/') { 1794 if (--level == 0) { 1795 textEnd = i; 1796 break; 1797 } 1798 } 1799 } 1800 if (textEnd != int.max) { 1801 if (_enableCommentText) 1802 _commentAppender.append(_lineText[textStart .. textEnd]); 1803 _pos = textEnd + 2; 1804 break; 1805 } 1806 if (!nextLine()) { 1807 // TODO: do we need throw exception if comment not closed by end of file? 1808 _pos = _len; 1809 break; 1810 } 1811 if (_enableCommentText) 1812 _commentAppender.appendEol(); 1813 textStart = 0; 1814 } 1815 if (_enableCommentText) { 1816 _sharedCommentToken.text = _commentAppender.get(); 1817 } 1818 return _sharedCommentToken; 1819 } 1820 1821 protected Token processHexString() { 1822 _pos++; 1823 // TODO: 1824 return null; 1825 } 1826 1827 protected Token processDelimitedString() { 1828 _pos++; 1829 // TODO: 1830 return null; 1831 } 1832 1833 // r"string" or `string` 1834 protected Token processWysiwygString(dchar ch) { 1835 _pos++; 1836 // TODO: 1837 return null; 1838 } 1839 1840 protected Token processIdent(dchar firstChar) { 1841 _sharedIdentToken.setPos(_startLine, _startPos); 1842 _identAppender.reset(); 1843 _identAppender.append(firstChar); 1844 for (; _pos < _len; ) { 1845 dchar ch = _lineText[_pos]; 1846 if (!isIdentMiddleChar(ch)) { 1847 break; 1848 } 1849 _identAppender.append(ch); 1850 _pos++; 1851 } 1852 _sharedIdentToken.setText(_identAppender.get); 1853 return _sharedIdentToken; 1854 } 1855 1856 protected Token processIntegerSuffix() { 1857 if (_pos >= _len) 1858 return _sharedIntegerToken; 1859 bool longFlag = false; 1860 bool unsignedFlag = false; 1861 dchar ch = _lineText[_pos]; 1862 dchar ch2 = _pos < _len - 1 ? _lineText[_pos + 1] : 0; 1863 if (ch == 'l' || ch == 'L') { 1864 longFlag = true; 1865 _pos++; 1866 if (ch2 == 'u' || ch2 == 'U') { 1867 unsignedFlag = true; 1868 _pos++; 1869 } 1870 } else if (ch == 'u' || ch == 'U') { 1871 unsignedFlag = true; 1872 _pos++; 1873 if (ch2 == 'l' || ch2 == 'L') { 1874 longFlag = true; 1875 _pos++; 1876 } 1877 } 1878 _sharedIntegerToken.setFlags(unsignedFlag, longFlag); 1879 ch = _pos < _len ? _lineText[_pos] : 0; 1880 if (isIdentMiddleChar(ch)) 1881 return parserError("Unexpected character after number", _sharedIntegerToken); 1882 return _sharedIntegerToken; 1883 } 1884 1885 protected Token processBinaryNumber() { 1886 _sharedIntegerToken.setPos(_startLine, _startPos); 1887 _pos++; 1888 if (_pos >= _len) 1889 return parserError("Unexpected end of line in binary number", _sharedIntegerToken); 1890 int digits = 0; 1891 ulong number = 0; 1892 int i = _pos; 1893 for (;i < _len; i++) { 1894 dchar ch = _lineText[i]; 1895 if (ch != '0' && ch != '1') 1896 break; 1897 number = (number << 1) | (ch == '1' ? 1 : 0); 1898 digits++; 1899 } 1900 _pos = i; 1901 if (digits > 64) 1902 return parserError("number is too big", _sharedIntegerToken); 1903 _sharedIntegerToken.setValue(number); 1904 return processIntegerSuffix(); 1905 } 1906 1907 protected Token processHexNumber() { 1908 _sharedIntegerToken.setPos(_startLine, _startPos); 1909 _sharedRealToken.setPos(_startLine, _startPos); 1910 _pos++; 1911 if (_pos >= _len) 1912 return parserError("Unexpected end of line in hex number", _sharedIntegerToken); 1913 int digits = 0; 1914 ulong number = 0; 1915 int i = _pos; 1916 for (;i < _len; i++) { 1917 dchar ch = _lineText[i]; 1918 uint digit = 0; 1919 if (ch >= '0' && ch <= '9') 1920 digit = ch - '0'; 1921 else if (ch >= 'a' && ch <= 'f') 1922 digit = ch - 'a' + 10; 1923 else if (ch >= 'A' && ch <= 'F') 1924 digit = ch - 'A' + 10; 1925 else if (ch == '_') 1926 continue; 1927 else 1928 break; 1929 number = (number << 4) | digit; 1930 digits++; 1931 } 1932 _pos = i; 1933 if (digits > 16) 1934 return parserError("number is too big to fit 64 bits", _sharedIntegerToken); 1935 _sharedIntegerToken.setValue(number); 1936 return processIntegerSuffix(); 1937 } 1938 1939 protected Token processOctNumber() { 1940 _sharedIntegerToken.setPos(_startLine, _startPos); 1941 if (_pos >= _len) 1942 return parserError("Unexpected end of line in octal number", _sharedIntegerToken); 1943 int digits = 0; 1944 ulong number = 0; 1945 int i = _pos; 1946 bool overflow = false; 1947 for (;i < _len; i++) { 1948 dchar ch = _lineText[i]; 1949 int digit = 0; 1950 if (ch >= '0' && ch <= '7') 1951 digit = ch - '0'; 1952 else if (ch == '_') 1953 continue; 1954 else 1955 break; 1956 number <<= 3; 1957 if (digits >= 20) { 1958 if ((number >> 3) << 3 != number) { 1959 overflow = true; 1960 break; 1961 } 1962 } 1963 number |= digit; 1964 digits++; 1965 } 1966 _pos = i; 1967 if (overflow) 1968 return parserError("number is too big to fit 64 bits", _sharedIntegerToken); 1969 _sharedIntegerToken.setValue(number); 1970 return processIntegerSuffix(); 1971 } 1972 1973 // 1974 protected Token processDecFloatSuffix(real value) { 1975 ubyte precision = 1; 1976 bool imaginary = false; 1977 dchar next = _pos < _len ? _lineText[_pos] : 0; 1978 if (next == 'f') { 1979 _pos++; 1980 precision = 0; 1981 } else if (next == 'L') { 1982 _pos++; 1983 precision = 2; 1984 } 1985 next = _pos < _len ? _lineText[_pos] : 0; 1986 if (next == 'i') { 1987 _pos++; 1988 imaginary = true; 1989 } 1990 next = _pos < _len ? _lineText[_pos] : 0; 1991 if (isIdentMiddleChar(next)) 1992 return parserError("invalid suffix for floating point literal", _sharedRealToken); 1993 _sharedRealToken.setValue(value, precision, imaginary); 1994 return _sharedRealToken; 1995 } 1996 1997 // after E char 1998 protected Token processDecFloatExponent(real value) { 1999 dchar next = _pos < _len ? _lineText[_pos] : 0; 2000 int sign = 1; 2001 if (next == '+') { 2002 _pos++; 2003 } else if (next == '-') { 2004 _pos++; 2005 sign = -1; 2006 } 2007 if (_pos >= _len) 2008 return parserError("Invalid exponent", _sharedRealToken); 2009 ulong digits = 0; 2010 ulong number = 0; 2011 int i = _pos; 2012 bool overflow = false; 2013 for (;i < _len; i++) { 2014 dchar ch = _lineText[i]; 2015 uint digit = 0; 2016 if (ch >= '0' && ch <= '9') 2017 digit = ch - '0'; 2018 else if (ch == '_') 2019 continue; 2020 else 2021 break; 2022 number *= 10; 2023 if (digits >= 18) { 2024 if ((number * 10) / 10 != number) { 2025 overflow = true; 2026 break; 2027 } 2028 } 2029 number += digit; 2030 digits++; 2031 } 2032 if (digits == 0) 2033 return parserError("Invalid exponent", _sharedRealToken); 2034 _pos = i; 2035 value *= pow(10., cast(long)number * sign); 2036 return processDecFloatSuffix(value); 2037 } 2038 2039 protected Token processDecFloatSecondPart(ulong firstPart) { 2040 if (_pos >= _len) { 2041 _sharedRealToken.setValue(cast(real)firstPart); 2042 return _sharedRealToken; 2043 } 2044 ulong divider = 1; 2045 ulong number = 0; 2046 int i = _pos; 2047 bool overflow = false; 2048 for (;i < _len; i++) { 2049 dchar ch = _lineText[i]; 2050 uint digit = 0; 2051 if (ch >= '0' && ch <= '9') 2052 digit = ch - '0'; 2053 else if (ch == '_') 2054 continue; 2055 else 2056 break; 2057 if (divider * 10 < divider) 2058 continue; // ignore extra digits 2059 number *= 10; 2060 number += digit; 2061 divider *= 10; 2062 } 2063 _pos = i; 2064 real value = cast(real)firstPart + (cast(real)number / divider); 2065 dchar next = _pos < _len ? _lineText[_pos] : 0; 2066 if (next == 0) { 2067 // neither exponent nor suffix 2068 _sharedRealToken.setValue(value); 2069 return _sharedRealToken; 2070 } 2071 if (next == 'e' || next == 'E') { 2072 _pos++; 2073 return processDecFloatExponent(value); 2074 } 2075 return processDecFloatSuffix(value); 2076 } 2077 2078 protected Token processDecNumber(dchar c) { 2079 _sharedIntegerToken.setPos(_startLine, _startPos); 2080 _sharedRealToken.setPos(_startLine, _startPos); 2081 //if (_pos >= _len) 2082 // return parserError("Unexpected end of line in number", _sharedIntegerToken); 2083 int digits = 1; 2084 ulong number = c - '0'; 2085 int i = _pos; 2086 bool overflow = false; 2087 if (_line == _startLine) { 2088 for (;i < _len; i++) { 2089 dchar ch = _lineText[i]; 2090 uint digit = 0; 2091 if (ch >= '0' && ch <= '9') 2092 digit = ch - '0'; 2093 else if (ch == '_') 2094 continue; 2095 else 2096 break; 2097 number *= 10; 2098 if (digits >= 18) { 2099 if ((number * 10) / 10 != number) { 2100 overflow = true; 2101 break; 2102 } 2103 } 2104 number += digit; 2105 digits++; 2106 } 2107 _pos = i; 2108 } 2109 if (overflow) 2110 return parserError("number is too big to fit 64 bits", _sharedIntegerToken); 2111 _sharedIntegerToken.setValue(number); 2112 dchar next = _line == _startLine && _pos < _len ? _lineText[_pos] : 0; 2113 if (next == 0) 2114 return _sharedIntegerToken; 2115 if (next == 'e' || next == 'E') { 2116 _pos++; 2117 return processDecFloatExponent(number); 2118 } else if (next == '.') { 2119 _pos++; 2120 return processDecFloatSecondPart(number); 2121 } 2122 return processIntegerSuffix(); 2123 } 2124 2125 /// Either return InvalidToken or throw parser exception depending on current errorTolerant flag 2126 protected Token parserError(string msg, Token incompleteToken) { 2127 return parserError(msg, incompleteToken.line, incompleteToken.pos, incompleteToken.type); 2128 } 2129 /// Either return InvalidToken or throw parser exception depending on current errorTolerant flag 2130 protected Token parserError(string msg, int startLine, int startPos, TokenType failedTokenType = TokenType.INVALID) { 2131 if (_errorTolerant) { 2132 startPos--; 2133 _sharedInvalidToken.setPos(startLine, startPos); 2134 _sharedInvalidToken.errorMessage = msg; 2135 _sharedInvalidToken.errorCode = 1; // for future extension 2136 _sharedInvalidToken.invalidTokenType = failedTokenType; // for future extension 2137 // make invalid source text 2138 dchar[] invalidText; 2139 int p = startLine == _line ? startPos : 0; 2140 for (int i = p; i < _pos && i < _lineText.length; i++) 2141 invalidText ~= _lineText[i]; 2142 2143 // recover after error 2144 for (; _pos < _lineText.length; _pos++) { 2145 dchar ch = _lineText[_pos]; 2146 if (ch == ' ' || ch == '\t' || ch == '(' || ch == ')' || ch == '[' || ch == ']' || ch == '{' || ch == '}') 2147 break; 2148 if (failedTokenType == TokenType.INTEGER || failedTokenType == TokenType.FLOAT) { 2149 if (ch == '*' || ch == '/') 2150 break; 2151 } 2152 invalidText ~= ch; 2153 } 2154 _sharedInvalidToken.text = invalidText; 2155 return _sharedInvalidToken; 2156 } 2157 throw new ParserException(msg, _lineStream.file, _line, _pos); 2158 } 2159 2160 protected Keyword detectKeyword(dchar ch) { 2161 if (ch < '@' || ch > 'z') 2162 return Keyword.NONE; 2163 int len = _len - _pos; 2164 switch (cast(ubyte)ch) { 2165 // AT_DISABLE 2166 // AT_NOGC 2167 // AT_PROPERTY 2168 case '@': return findKeyword(Keyword.AT_DISABLE, Keyword.AT_PROPERTY, _lineText.ptr + _pos, len, _pos); 2169 // ABSTRACT, 2170 // ALIAS, 2171 // ALIGN, 2172 // ASM, 2173 // ASSERT, 2174 // AUTO, 2175 case 'a': return findKeyword(Keyword.ABSTRACT, Keyword.AUTO, _lineText.ptr + _pos, len, _pos); 2176 2177 // BODY, 2178 // BOOL, 2179 // BREAK, 2180 // BYTE, 2181 case 'b': return findKeyword(Keyword.BODY, Keyword.BYTE, _lineText.ptr + _pos, len, _pos); 2182 2183 // CASE, 2184 // CAST, 2185 // CATCH, 2186 // CDOUBLE, 2187 // CENT, 2188 // CFLOAT, 2189 // CHAR, 2190 // CLASS, 2191 // CONST, 2192 // CONTINUE, 2193 // CREAL, 2194 case 'c': return findKeyword(Keyword.CASE, Keyword.CREAL, _lineText.ptr + _pos, len, _pos); 2195 2196 // DCHAR, 2197 // DEBUG, 2198 // DEFAULT, 2199 // DELEGATE, 2200 // DELETE, 2201 // DEPRECATED, 2202 // DO, 2203 // DOUBLE, 2204 case 'd': return findKeyword(Keyword.DCHAR, Keyword.DOUBLE, _lineText.ptr + _pos, len, _pos); 2205 2206 // ELSE, 2207 // ENUM, 2208 // EXPORT, 2209 // EXTERN, 2210 case 'e': return findKeyword(Keyword.ELSE, Keyword.EXTERN, _lineText.ptr + _pos, len, _pos); 2211 2212 // FALSE, 2213 // FINAL, 2214 // FINALLY, 2215 // FLOAT, 2216 // FOR, 2217 // FOREACH, 2218 // FOREACH_REVERSE, 2219 // FUNCTION, 2220 case 'f': return findKeyword(Keyword.FALSE, Keyword.FUNCTION, _lineText.ptr + _pos, len, _pos); 2221 2222 // GOTO, 2223 case 'g': return findKeyword(Keyword.GOTO, Keyword.GOTO, _lineText.ptr + _pos, len, _pos); 2224 2225 // IDOUBLE, 2226 // IF, 2227 // IFLOAT, 2228 // IMMUTABLE, 2229 // IMPORT, 2230 // IN, 2231 // INOUT, 2232 // INT, 2233 // INTERFACE, 2234 // INVARIANT, 2235 // IREAL, 2236 // IS, 2237 case 'i': return findKeyword(Keyword.IDOUBLE, Keyword.IS, _lineText.ptr + _pos, len, _pos); 2238 2239 // LAZY, 2240 // LONG, 2241 case 'l': return findKeyword(Keyword.LAZY, Keyword.LONG, _lineText.ptr + _pos, len, _pos); 2242 2243 // MACRO, 2244 // MIXIN, 2245 // MODULE, 2246 case 'm': return findKeyword(Keyword.MACRO, Keyword.MODULE, _lineText.ptr + _pos, len, _pos); 2247 2248 // NEW, 2249 // NOTHROW, 2250 // NULL, 2251 case 'n': return findKeyword(Keyword.NEW, Keyword.NULL, _lineText.ptr + _pos, len, _pos); 2252 2253 // OUT, 2254 // OVERRIDE, 2255 case 'o': return findKeyword(Keyword.OUT, Keyword.OVERRIDE, _lineText.ptr + _pos, len, _pos); 2256 2257 // PACKAGE, 2258 // PRAGMA, 2259 // PRIVATE, 2260 // PROTECTED, 2261 // PUBLIC, 2262 // PURE, 2263 case 'p': return findKeyword(Keyword.PACKAGE, Keyword.PURE, _lineText.ptr + _pos, len, _pos); 2264 2265 // REAL, 2266 // REF, 2267 // RETURN, 2268 case 'r': return findKeyword(Keyword.REAL, Keyword.RETURN, _lineText.ptr + _pos, len, _pos); 2269 2270 // SAFE 2271 // SCOPE, 2272 // SHARED, 2273 // SHORT, 2274 // STATIC, 2275 // STRUCT, 2276 // SUPER, 2277 // SWITCH, 2278 // SYNCHRONIZED, 2279 // SYSTEM 2280 case 's': return findKeyword(Keyword.SAFE, Keyword.SYSTEM, _lineText.ptr + _pos, len, _pos); 2281 2282 // TEMPLATE, 2283 // THIS, 2284 // THROW, 2285 // TRUE, 2286 // TRY, 2287 // TYPEDEF, 2288 // TYPEID, 2289 // TYPEOF, 2290 case 't': return findKeyword(Keyword.TEMPLATE, Keyword.TYPEOF, _lineText.ptr + _pos, len, _pos); 2291 2292 // UBYTE, 2293 // UCENT, 2294 // UINT, 2295 // ULONG, 2296 // UNION, 2297 // UNITTEST, 2298 // USHORT, 2299 case 'u': return findKeyword(Keyword.UBYTE, Keyword.USHORT, _lineText.ptr + _pos, len, _pos); 2300 2301 // VERSION, 2302 // VOID, 2303 // VOLATILE, 2304 case 'v': return findKeyword(Keyword.VERSION, Keyword.VOLATILE, _lineText.ptr + _pos, len, _pos); 2305 2306 // WCHAR, 2307 // WHILE, 2308 // WITH, 2309 case 'w': return findKeyword(Keyword.WCHAR, Keyword.WITH, _lineText.ptr + _pos, len, _pos); 2310 2311 // FILE, 2312 // MODULE, 2313 // LINE, 2314 // FUNCTION, 2315 // PRETTY_FUNCTION, 2316 // 2317 // GSHARED, 2318 // TRAITS, 2319 // VECTOR, 2320 // PARAMETERS, 2321 case '_': return findKeyword(Keyword.FILE, Keyword.PARAMETERS, _lineText.ptr + _pos, len, _pos); 2322 default: return Keyword.NONE; 2323 } 2324 } 2325 protected OpCode detectOp(dchar ch) nothrow { 2326 if (ch >= 128) 2327 return OpCode.NONE; 2328 dchar ch2 = _pos < _len ? _lineText[_pos] : 0; 2329 dchar ch3 = _pos < _len - 1 ? _lineText[_pos + 1] : 0; 2330 switch(cast(ubyte)ch) { 2331 // DIV, // / 2332 // DIV_EQ, // /= 2333 case '/': 2334 if (ch2 == '=') { 2335 _pos++; 2336 return OpCode.DIV_EQ; 2337 } 2338 return OpCode.DIV; 2339 // DOT, // . 2340 // DOT_DOT, // .. 2341 // DOT_DOT_DOT,// ... 2342 case '.': 2343 if (ch2 == '.') { 2344 if (ch3 == '.') { 2345 _pos += 2; 2346 return OpCode.DOT_DOT_DOT; 2347 } 2348 _pos++; 2349 return OpCode.DOT_DOT; 2350 } 2351 return OpCode.DOT; 2352 // AND, // & 2353 // AND_EQ, // &= 2354 // LOG_AND, // && 2355 case '&': 2356 if (ch2 == '=') { 2357 _pos++; 2358 return OpCode.AND_EQ; 2359 } 2360 if (ch2 == '&') { 2361 _pos++; 2362 return OpCode.LOG_AND; 2363 } 2364 return OpCode.AND; 2365 // OR, // | 2366 // OR_EQ, // |= 2367 // LOG_OR, // || 2368 case '|': 2369 if (ch2 == '=') { 2370 _pos++; 2371 return OpCode.OR_EQ; 2372 } 2373 if (ch2 == '|') { 2374 _pos++; 2375 return OpCode.LOG_OR; 2376 } 2377 return OpCode.OR; 2378 // MINUS, // - 2379 // MINUS_EQ, // -= 2380 // MINUS_MINUS,// -- 2381 case '-': 2382 if (ch2 == '=') { 2383 _pos++; 2384 return OpCode.MINUS_EQ; 2385 } 2386 if (ch2 == '-') { 2387 _pos++; 2388 return OpCode.MINUS_MINUS; 2389 } 2390 return OpCode.MINUS; 2391 // PLUS, // + 2392 // PLUS_EQ, // += 2393 // PLUS_PLUS, // ++ 2394 case '+': 2395 if (ch2 == '=') { 2396 _pos++; 2397 return OpCode.PLUS_EQ; 2398 } 2399 if (ch2 == '+') { 2400 _pos++; 2401 return OpCode.PLUS_PLUS; 2402 } 2403 return OpCode.PLUS; 2404 // LT, // < 2405 // LT_EQ, // <= 2406 // SHL, // << 2407 // SHL_EQ, // <<= 2408 // LT_GT, // <> 2409 // NE_EQ, // <>= 2410 case '<': 2411 if (ch2 == '<') { 2412 if (ch3 == '=') { 2413 _pos += 2; 2414 return OpCode.SHL_EQ; 2415 } 2416 _pos++; 2417 return OpCode.SHL; 2418 } 2419 if (ch2 == '>') { 2420 if (ch3 == '=') { 2421 _pos += 2; 2422 return OpCode.NE_EQ; 2423 } 2424 _pos++; 2425 return OpCode.LT_GT; 2426 } 2427 if (ch2 == '=') { 2428 _pos++; 2429 return OpCode.LT_EQ; 2430 } 2431 return OpCode.LT; 2432 // GT, // > 2433 // GT_EQ, // >= 2434 // SHR_EQ // >>= 2435 // ASR_EQ, // >>>= 2436 // SHR, // >> 2437 // ASR, // >>> 2438 case '>': 2439 if (ch2 == '>') { 2440 if (ch3 == '>') { 2441 dchar ch4 = _pos < _len - 2 ? _lineText[_pos + 2] : 0; 2442 if (ch4 == '=') { // >>>= 2443 _pos += 3; 2444 return OpCode.ASR_EQ; 2445 } 2446 _pos += 2; 2447 return OpCode.ASR; // >>> 2448 } 2449 if (ch3 == '=') { // >>= 2450 _pos += 2; 2451 return OpCode.SHR_EQ; 2452 } 2453 _pos++; 2454 return OpCode.SHR; 2455 } 2456 if (ch2 == '=') { // >= 2457 _pos++; 2458 return OpCode.GT_EQ; 2459 } 2460 // > 2461 return OpCode.GT; 2462 // NOT, // ! 2463 // NOT_EQ // != 2464 // NOT_LT_GT, // !<> 2465 // NOT_LT_GT_EQ, // !<>= 2466 // NOT_LT, // !< 2467 // NOT_LT_EQ, // !<= 2468 // NOT_GT, // !> 2469 // NOT_GT_EQ, // !>= 2470 case '!': 2471 if (ch2 == '<') { // !< 2472 if (ch3 == '>') { // !<> 2473 dchar ch4 = _pos < _len - 2 ? _lineText[_pos + 2] : 0; 2474 if (ch4 == '=') { // !<>= 2475 _pos += 3; 2476 return OpCode.NOT_LT_GT_EQ; 2477 } 2478 _pos += 2; 2479 return OpCode.NOT_LT_GT; // !<> 2480 } 2481 if (ch3 == '=') { // !<= 2482 _pos += 2; 2483 return OpCode.NOT_LT_EQ; 2484 } 2485 _pos++; 2486 return OpCode.NOT_LT; // !< 2487 } 2488 if (ch2 == '=') { // != 2489 _pos++; 2490 return OpCode.NOT_EQ; 2491 } 2492 return OpCode.NOT; 2493 // PAR_OPEN, // ( 2494 case '(': 2495 return OpCode.PAR_OPEN; 2496 // PAR_CLOSE, // ) 2497 case ')': 2498 return OpCode.PAR_CLOSE; 2499 // SQ_OPEN, // [ 2500 case '[': 2501 return OpCode.SQ_OPEN; 2502 // SQ_CLOSE, // ] 2503 case ']': 2504 return OpCode.SQ_CLOSE; 2505 // CURL_OPEN, // { 2506 case '{': 2507 return OpCode.CURL_OPEN; 2508 // CURL_CLOSE, // } 2509 case '}': 2510 return OpCode.CURL_CLOSE; 2511 // QUEST, // ? 2512 case '?': 2513 return OpCode.QUEST; 2514 // COMMA, // , 2515 case ',': 2516 return OpCode.COMMA; 2517 // SEMICOLON, // ; 2518 case ';': 2519 return OpCode.SEMICOLON; 2520 // COLON, // : 2521 case ':': 2522 return OpCode.COLON; 2523 // DOLLAR, // $ 2524 case '$': 2525 return OpCode.DOLLAR; 2526 // EQ, // = 2527 // QE_EQ, // == 2528 // EQ_GT, // => 2529 case '=': 2530 if (ch2 == '=') { // == 2531 _pos++; 2532 return OpCode.QE_EQ; 2533 } 2534 if (ch2 == '>') { // => 2535 _pos++; 2536 return OpCode.EQ_GT; 2537 } 2538 return OpCode.EQ; 2539 // MUL, // * 2540 // MUL_EQ, // *= 2541 case '*': 2542 if (ch2 == '=') { 2543 _pos++; 2544 return OpCode.MUL_EQ; 2545 } 2546 return OpCode.MUL; 2547 // MOD, // % 2548 // MOD_EQ, // %= 2549 case '%': 2550 if (ch2 == '=') { 2551 _pos++; 2552 return OpCode.MOD_EQ; 2553 } 2554 return OpCode.MOD; 2555 // XOR, // ^ 2556 // XOR_EQ, // ^= 2557 // LOG_XOR, // ^^ 2558 // LOG_XOR_EQ, // ^^= 2559 case '^': 2560 if (ch2 == '^') { 2561 if (ch3 == '=') { 2562 _pos += 2; 2563 return OpCode.LOG_XOR_EQ; 2564 } 2565 _pos++; 2566 return OpCode.LOG_XOR; 2567 } 2568 if (ch2 == '=') { 2569 _pos++; 2570 return OpCode.XOR_EQ; 2571 } 2572 return OpCode.XOR; 2573 // INV, // ~ 2574 // INV_EQ, // ~= 2575 case '~': 2576 if (ch2 == '=') { 2577 _pos++; 2578 return OpCode.INV_EQ; 2579 } 2580 return OpCode.INV; 2581 // AT, // @ 2582 case '@': 2583 return OpCode.AT; 2584 // SHARP // # 2585 case '#': 2586 return OpCode.SHARP; 2587 default: 2588 return OpCode.NONE; 2589 } 2590 } 2591 2592 protected Token processCharacterLiteral() { 2593 _sharedCharacterLiteralToken.setPos(_startLine, _startPos); 2594 if (_pos + 2 > _len) 2595 return parserError("Invalid character literal", _sharedCharacterLiteralToken); 2596 dchar ch = _lineText[_pos++]; 2597 dchar ch2 = _lineText[_pos++]; 2598 dchar type = 0; 2599 if (ch == '\\') { 2600 // process escaped character - store it in ch 2601 // TODO: support all escape sequences 2602 switch(ch2) { 2603 case 'r': 2604 ch = '\r'; 2605 break; 2606 case 'n': 2607 ch = '\n'; 2608 break; 2609 case 't': 2610 ch = '\t'; 2611 break; 2612 case '\\': 2613 ch = '\\'; 2614 break; 2615 default: 2616 ch = ch2; 2617 break; 2618 } 2619 // here must be closing ' 2620 if (_pos + 1 > _len) 2621 return parserError("Invalid character literal", _sharedCharacterLiteralToken); 2622 ch2 = _lineText[_pos++]; 2623 } 2624 if (ch2 != '\'') 2625 return parserError("Invalid character literal", _sharedCharacterLiteralToken); 2626 if (_pos < _len) { 2627 dchar t = _lineText[_pos]; 2628 if (t == 'd' || t == 'w' || t == 'c') { 2629 type = t; 2630 _pos++; 2631 } else if (isIdentMiddleChar(ch)) { 2632 return parserError("Unexpected character after character literal", _sharedCharacterLiteralToken); 2633 } 2634 } 2635 _sharedCharacterLiteralToken.setCharacter(ch, type); 2636 return _sharedCharacterLiteralToken; 2637 } 2638 2639 protected Token processDoubleQuotedOrWysiwygString(dchar delimiter) { 2640 bool wysiwyg = (delimiter == 'r' || delimiter == '`'); 2641 //writeln("processDoubleQuotedString()"); 2642 _sharedStringLiteralToken.setPos(_startLine, _startPos); 2643 _stringLiteralAppender.reset(); 2644 if (delimiter == 'r') { 2645 _pos++; 2646 delimiter = '\"'; 2647 } 2648 dchar type = 0; 2649 for (;;) { 2650 int i = _pos; 2651 int endPos = int.max; 2652 bool lastBackSlash = false; 2653 for(; i < _len; i++) { 2654 dchar ch = _lineText[i]; 2655 if (ch == '\\') { 2656 if (lastBackSlash) 2657 lastBackSlash = false; 2658 else 2659 lastBackSlash = true; 2660 } 2661 else if (ch == delimiter && !lastBackSlash) { 2662 endPos = i; 2663 break; 2664 } 2665 else if(lastBackSlash) 2666 lastBackSlash = false; 2667 } 2668 if (endPos != int.max) { 2669 // found end quote 2670 _stringLiteralAppender.append(_lineText[_pos .. endPos]); 2671 _pos = endPos + 1; 2672 break; 2673 } 2674 // no quote by end of line 2675 _stringLiteralAppender.append(_lineText[_pos .. $]); 2676 _stringLiteralAppender.appendEol(); 2677 if (!nextLine()) { 2678 // do we need to throw exception if eof comes before end of string? 2679 break; 2680 } 2681 } 2682 dchar t = 0; 2683 if (_pos < _len) { 2684 dchar ch = _lineText[_pos]; 2685 if (ch == 'c' || ch == 'w' || ch == 'd') { 2686 t = ch; 2687 _pos++; 2688 if (_pos < _len) { 2689 ch = _lineText[_pos]; 2690 if (isIdentMiddleChar(ch)) 2691 return parserError("Unexpected character after string literal", _sharedStringLiteralToken); 2692 } 2693 } else if (isIdentMiddleChar(ch)) 2694 return parserError("Unexpected character after string literal", _sharedStringLiteralToken); 2695 } 2696 if (t != 0) { 2697 if (type != 0 && t != type) 2698 return parserError("Cannot concatenate strings of different type", _sharedStringLiteralToken); 2699 type = t; 2700 } 2701 if (wysiwyg) { 2702 // no escape processing 2703 _sharedStringLiteralToken.setText(_stringLiteralAppender.get(), type); 2704 return _sharedStringLiteralToken; 2705 } 2706 _stringLiteralAppender.processEscapeSequences(); 2707 _sharedStringLiteralToken.setText(_stringLiteralAppender.get(), type); 2708 return _sharedStringLiteralToken; 2709 } 2710 2711 protected SysTime buildTime; 2712 2713 // string literal of the date of compilation "mmm dd yyyy" 2714 protected dstring formatBuildDate() { 2715 // TODO: provide proper format 2716 return to!dstring(buildTime); 2717 } 2718 2719 // string literal of the time of compilation "hh:mm:ss" 2720 protected dstring formatBuildTime() { 2721 // TODO: provide proper format 2722 return to!dstring(buildTime); 2723 } 2724 2725 // string literal of the date and time of compilation "www mmm dd hh:mm:ss yyyy" 2726 protected dstring formatBuildTimestamp() { 2727 // TODO: provide proper format 2728 return to!dstring(buildTime); 2729 } 2730 2731 static immutable dstring VERSION = "0.1"; 2732 static immutable dstring VENDOR = "coolreader.org"; 2733 2734 protected Token makeSpecialTokenString(dstring str, int pos) { 2735 _sharedStringLiteralToken.setPos(_startLine, _startPos); 2736 _sharedStringLiteralToken.setText(cast(dchar[])str, 0); 2737 return _sharedStringLiteralToken; 2738 } 2739 2740 protected Token processSpecialToken(Keyword keyword, int pos) { 2741 switch (keyword) { 2742 //Special Token Replaced with 2743 case Keyword.DATE: // string literal of the date of compilation "mmm dd yyyy" 2744 return makeSpecialTokenString(formatBuildDate(), pos); 2745 case Keyword.TIME: // string literal of the time of compilation "hh:mm:ss" 2746 return makeSpecialTokenString(formatBuildTime(), pos); 2747 case Keyword.TIMESTAMP: // string literal of the date and time of compilation "www mmm dd hh:mm:ss yyyy" 2748 return makeSpecialTokenString(formatBuildTimestamp(), pos); 2749 case Keyword.VENDOR: // Compiler vendor string, such as "Digital Mars D" 2750 return makeSpecialTokenString(VENDOR, pos); 2751 case Keyword.VERSION_: // Compiler version as an integer, such as 2001 2752 return makeSpecialTokenString(VERSION, pos); 2753 default: 2754 parserError("Unknown special token", _line, pos); 2755 } 2756 return null; 2757 } 2758 2759 protected int _startLine; 2760 protected int _startPos; 2761 2762 // returns next token (clone it if you want to store for future usage, otherwise it may be overwritten by further nextToken() calls). 2763 Token nextToken() { 2764 _startLine = _line; 2765 _startPos = _pos; 2766 dchar ch = nextChar(); 2767 if (ch == EOF_CHAR) { 2768 return emitEof(); 2769 } 2770 if (ch == '\r' || ch == '\n' || ch == 0x0020 || ch == 0x0009 || ch == 0x000B || ch == 0x000C) { 2771 // white space (treat EOL as whitespace, too) 2772 return processWhiteSpace(ch); 2773 } 2774 dchar next = _pos < _len ? _lineText[_pos] : 0; 2775 if (ch == '/') { 2776 if (next == '/') 2777 return processOneLineComment(); 2778 else if (next == '*') 2779 return processMultilineComment(); 2780 else if (next == '+') 2781 return processNestedComment(); 2782 } 2783 if (ch == '#' && _line == 1) 2784 return processOneLineSharpComment(); 2785 if (ch == '\"') 2786 return processDoubleQuotedOrWysiwygString(ch); 2787 if (ch == '\'') 2788 return processCharacterLiteral(); 2789 if (ch == 'x' && next == '\"') 2790 return processHexString(); 2791 if (ch == 'q' && next == '\"') 2792 return processDelimitedString(); 2793 if ((ch == 'r' && next == '\"') || (ch == '`')) 2794 return processDoubleQuotedOrWysiwygString(ch); 2795 int oldPos = _pos - 1; 2796 2797 if (ch == '0') { 2798 if (next == 'b' || next == 'B') 2799 return processBinaryNumber(); 2800 if (next == 'x' || next == 'X') 2801 return processHexNumber(); 2802 if (next >= '0' && next <= '9') 2803 return processOctNumber(); 2804 if (next >= '0' && next <= '9') 2805 return processDecNumber(ch); 2806 } 2807 if (ch >= '0' && ch <= '9') 2808 return processDecNumber(ch); 2809 if (ch == '.' && next >= '0' && next <= '9') // .123 2810 return processDecFloatSecondPart(0); 2811 2812 if (ch == '_' || ch == '@' || isUniversalAlpha(ch)) { 2813 // start of identifier or keyword? 2814 Keyword keyword = detectKeyword(ch); 2815 if (keyword != Keyword.NONE) { 2816 switch (keyword) { 2817 //Special Token Replaced with 2818 case Keyword.EOF: return emitEof(); // sets the scanner to the end of the file 2819 case Keyword.DATE: // string literal of the date of compilation "mmm dd yyyy" 2820 case Keyword.TIME: // string literal of the time of compilation "hh:mm:ss" 2821 case Keyword.TIMESTAMP: // string literal of the date and time of compilation "www mmm dd hh:mm:ss yyyy" 2822 case Keyword.VENDOR: // Compiler vendor string, such as "Digital Mars D" 2823 case Keyword.VERSION_: // Compiler version as an integer, such as 2001 2824 return processSpecialToken(keyword, oldPos); 2825 default: 2826 _sharedKeywordToken.setPos(_startLine, _startPos); 2827 _sharedKeywordToken.keyword = keyword; 2828 return _sharedKeywordToken; 2829 } 2830 } 2831 return processIdent(ch); 2832 } 2833 OpCode op = detectOp(ch); 2834 if (op != OpCode.NONE) { 2835 _sharedOpToken.setPos(_startLine, _startPos); 2836 _sharedOpToken.opCode = op; 2837 return _sharedOpToken; 2838 } 2839 return parserError("Invalid token", _line, _pos); 2840 } 2841 2842 /// tokenize all 2843 Token[] allTokens() { 2844 Token[] res; 2845 res.assumeSafeAppend; 2846 for(;;) { 2847 Token tok = nextToken(); 2848 if (!tok || tok.type == TokenType.EOF) 2849 break; 2850 res ~= tok.clone(); 2851 } 2852 return res; 2853 } 2854 } 2855 2856 unittest { 2857 version(DisableLexerTest) { 2858 import std.stdio; 2859 import std.conv; 2860 import std.utf; 2861 import dlangui.core.linestream; 2862 string fname = "/home/lve/src/d/ddc/ddclexer/tests/tokenizer_test.d"; 2863 writeln("opening file"); 2864 try { 2865 std.stream.File f = new std.stream.File(fname); 2866 scope(exit) { f.close(); } 2867 try { 2868 LineStream lines = LineStream.create(f, fname); 2869 Tokenizer tokenizer = new Tokenizer(lines); 2870 for (;;) { 2871 Token token = tokenizer.nextToken(); 2872 if (token is null) { 2873 writeln("Null token returned"); 2874 break; 2875 } 2876 if (token.type == TokenType.EOF) { 2877 writeln("EOF token"); 2878 break; 2879 } 2880 writeln("", token.line, ":", token.pos, "\t", token.toString); 2881 } 2882 } catch (Exception e) { 2883 writeln("Exception " ~ e.toString); 2884 } 2885 } catch (Exception e) { 2886 writeln("Exception " ~ e.toString); 2887 } 2888 } 2889 } 2890 2891 /// converts named entity to character, returns 0 if not found 2892 dchar entityToChar(string name) { 2893 if (auto ch = name in entityToCharMap) { 2894 return *ch; 2895 } 2896 return 0; 2897 } 2898 2899 /// fings entity name for character, returns null if not found 2900 string charToEntity(dchar ch) { 2901 if (auto name = ch in charToEntityMap) { 2902 return *name; 2903 } 2904 return null; 2905 } 2906 2907 private __gshared dchar[string]entityToCharMap; 2908 private __gshared string[dchar]charToEntityMap; 2909 private void addEntity(string name, dchar ch) { 2910 entityToCharMap[name] = ch; 2911 charToEntityMap[ch] = name; 2912 } 2913 __gshared static this() { 2914 addEntity("quot", 34); 2915 addEntity("amp", 38); 2916 addEntity("lt", 60); 2917 addEntity("gt", 62); 2918 addEntity("OElig", 338); 2919 addEntity("oelig", 339); 2920 addEntity("Scaron", 352); 2921 addEntity("scaron", 353); 2922 addEntity("Yuml", 376); 2923 addEntity("circ", 710); 2924 addEntity("tilde", 732); 2925 addEntity("ensp", 8194); 2926 addEntity("emsp", 8195); 2927 addEntity("thinsp", 8201); 2928 addEntity("zwnj", 8204); 2929 addEntity("zwj", 8205); 2930 addEntity("lrm", 8206); 2931 addEntity("rlm", 8207); 2932 addEntity("ndash", 8211); 2933 addEntity("mdash", 8212); 2934 addEntity("lsquo", 8216); 2935 addEntity("rsquo", 8217); 2936 addEntity("sbquo", 8218); 2937 addEntity("ldquo", 8220); 2938 addEntity("rdquo", 8221); 2939 addEntity("bdquo", 8222); 2940 addEntity("dagger", 8224); 2941 addEntity("Dagger", 8225); 2942 addEntity("permil", 8240); 2943 addEntity("lsaquo", 8249); 2944 addEntity("rsaquo", 8250); 2945 addEntity("euro", 8364); 2946 addEntity("nbsp", 160); 2947 addEntity("iexcl", 161); 2948 addEntity("cent", 162); 2949 addEntity("pound", 163); 2950 addEntity("curren", 164); 2951 addEntity("yen", 165); 2952 addEntity("brvbar", 166); 2953 addEntity("sect", 167); 2954 addEntity("uml", 168); 2955 addEntity("copy", 169); 2956 addEntity("ordf", 170); 2957 addEntity("laquo", 171); 2958 addEntity("not", 172); 2959 addEntity("shy", 173); 2960 addEntity("reg", 174); 2961 addEntity("macr", 175); 2962 addEntity("deg", 176); 2963 addEntity("plusmn", 177); 2964 addEntity("sup2", 178); 2965 addEntity("sup3", 179); 2966 addEntity("acute", 180); 2967 addEntity("micro", 181); 2968 addEntity("para", 182); 2969 addEntity("middot", 183); 2970 addEntity("cedil", 184); 2971 addEntity("sup1", 185); 2972 addEntity("ordm", 186); 2973 addEntity("raquo", 187); 2974 addEntity("frac14", 188); 2975 addEntity("frac12", 189); 2976 addEntity("frac34", 190); 2977 addEntity("iquest", 191); 2978 addEntity("Agrave", 192); 2979 addEntity("Aacute", 193); 2980 addEntity("Acirc", 194); 2981 addEntity("Atilde", 195); 2982 addEntity("Auml", 196); 2983 addEntity("Aring", 197); 2984 addEntity("AElig", 198); 2985 addEntity("Ccedil", 199); 2986 addEntity("Egrave", 200); 2987 addEntity("Eacute", 201); 2988 addEntity("Ecirc", 202); 2989 addEntity("Euml", 203); 2990 addEntity("Igrave", 204); 2991 addEntity("Iacute", 205); 2992 addEntity("Icirc", 206); 2993 addEntity("Iuml", 207); 2994 addEntity("ETH", 208); 2995 addEntity("Ntilde", 209); 2996 addEntity("Ograve", 210); 2997 addEntity("Oacute", 211); 2998 addEntity("Ocirc", 212); 2999 addEntity("Otilde", 213); 3000 addEntity("Ouml", 214); 3001 addEntity("times", 215); 3002 addEntity("Oslash", 216); 3003 addEntity("Ugrave", 217); 3004 addEntity("Uacute", 218); 3005 addEntity("Ucirc", 219); 3006 addEntity("Uuml", 220); 3007 addEntity("Yacute", 221); 3008 addEntity("THORN", 222); 3009 addEntity("szlig", 223); 3010 addEntity("agrave", 224); 3011 addEntity("aacute", 225); 3012 addEntity("acirc", 226); 3013 addEntity("atilde", 227); 3014 addEntity("auml", 228); 3015 addEntity("aring", 229); 3016 addEntity("aelig", 230); 3017 addEntity("ccedil", 231); 3018 addEntity("egrave", 232); 3019 addEntity("eacute", 233); 3020 addEntity("ecirc", 234); 3021 addEntity("euml", 235); 3022 addEntity("igrave", 236); 3023 addEntity("iacute", 237); 3024 addEntity("icirc", 238); 3025 addEntity("iuml", 239); 3026 addEntity("eth", 240); 3027 addEntity("ntilde", 241); 3028 addEntity("ograve", 242); 3029 addEntity("oacute", 243); 3030 addEntity("ocirc", 244); 3031 addEntity("otilde", 245); 3032 addEntity("ouml", 246); 3033 addEntity("divide", 247); 3034 addEntity("oslash", 248); 3035 addEntity("ugrave", 249); 3036 addEntity("uacute", 250); 3037 addEntity("ucirc", 251); 3038 addEntity("uuml", 252); 3039 addEntity("yacute", 253); 3040 addEntity("thorn", 254); 3041 addEntity("yuml", 255); 3042 addEntity("fnof", 402); 3043 addEntity("Alpha", 913); 3044 addEntity("Beta", 914); 3045 addEntity("Gamma", 915); 3046 addEntity("Delta", 916); 3047 addEntity("Epsilon", 917); 3048 addEntity("Zeta", 918); 3049 addEntity("Eta", 919); 3050 addEntity("Theta", 920); 3051 addEntity("Iota", 921); 3052 addEntity("Kappa", 922); 3053 addEntity("Lambda", 923); 3054 addEntity("Mu", 924); 3055 addEntity("Nu", 925); 3056 addEntity("Xi", 926); 3057 addEntity("Omicron", 927); 3058 addEntity("Pi", 928); 3059 addEntity("Rho", 929); 3060 addEntity("Sigma", 931); 3061 addEntity("Tau", 932); 3062 addEntity("Upsilon", 933); 3063 addEntity("Phi", 934); 3064 addEntity("Chi", 935); 3065 addEntity("Psi", 936); 3066 addEntity("Omega", 937); 3067 addEntity("alpha", 945); 3068 addEntity("beta", 946); 3069 addEntity("gamma", 947); 3070 addEntity("delta", 948); 3071 addEntity("epsilon", 949); 3072 addEntity("zeta", 950); 3073 addEntity("eta", 951); 3074 addEntity("theta", 952); 3075 addEntity("iota", 953); 3076 addEntity("kappa", 954); 3077 addEntity("lambda", 955); 3078 addEntity("mu", 956); 3079 addEntity("nu", 957); 3080 addEntity("xi", 958); 3081 addEntity("omicron", 959); 3082 addEntity("pi", 960); 3083 addEntity("rho", 961); 3084 addEntity("sigmaf", 962); 3085 addEntity("sigma", 963); 3086 addEntity("tau", 964); 3087 addEntity("upsilon", 965); 3088 addEntity("phi", 966); 3089 addEntity("chi", 967); 3090 addEntity("psi", 968); 3091 addEntity("omega", 969); 3092 addEntity("thetasym", 977); 3093 addEntity("upsih", 978); 3094 addEntity("piv", 982); 3095 addEntity("bull", 8226); 3096 addEntity("hellip", 8230); 3097 addEntity("prime", 8242); 3098 addEntity("Prime", 8243); 3099 addEntity("oline", 8254); 3100 addEntity("frasl", 8260); 3101 addEntity("weierp", 8472); 3102 addEntity("image", 8465); 3103 addEntity("real", 8476); 3104 addEntity("trade", 8482); 3105 addEntity("alefsym", 8501); 3106 addEntity("larr", 8592); 3107 addEntity("uarr", 8593); 3108 addEntity("rarr", 8594); 3109 addEntity("darr", 8595); 3110 addEntity("harr", 8596); 3111 addEntity("crarr", 8629); 3112 addEntity("lArr", 8656); 3113 addEntity("uArr", 8657); 3114 addEntity("rArr", 8658); 3115 addEntity("dArr", 8659); 3116 addEntity("hArr", 8660); 3117 addEntity("forall", 8704); 3118 addEntity("part", 8706); 3119 addEntity("exist", 8707); 3120 addEntity("empty", 8709); 3121 addEntity("nabla", 8711); 3122 addEntity("isin", 8712); 3123 addEntity("notin", 8713); 3124 addEntity("ni", 8715); 3125 addEntity("prod", 8719); 3126 addEntity("sum", 8721); 3127 addEntity("minus", 8722); 3128 addEntity("lowast", 8727); 3129 addEntity("radic", 8730); 3130 addEntity("prop", 8733); 3131 addEntity("infin", 8734); 3132 addEntity("ang", 8736); 3133 addEntity("and", 8743); 3134 addEntity("or", 8744); 3135 addEntity("cap", 8745); 3136 addEntity("cup", 8746); 3137 addEntity("int", 8747); 3138 addEntity("there4", 8756); 3139 addEntity("sim", 8764); 3140 addEntity("cong", 8773); 3141 addEntity("asymp", 8776); 3142 addEntity("ne", 8800); 3143 addEntity("equiv", 8801); 3144 addEntity("le", 8804); 3145 addEntity("ge", 8805); 3146 addEntity("sub", 8834); 3147 addEntity("sup", 8835); 3148 addEntity("nsub", 8836); 3149 addEntity("sube", 8838); 3150 addEntity("supe", 8839); 3151 addEntity("oplus", 8853); 3152 addEntity("otimes", 8855); 3153 addEntity("perp", 8869); 3154 addEntity("sdot", 8901); 3155 addEntity("lceil", 8968); 3156 addEntity("rceil", 8969); 3157 addEntity("lfloor", 8970); 3158 addEntity("rfloor", 8971); 3159 addEntity("loz", 9674); 3160 addEntity("spades", 9824); 3161 addEntity("clubs", 9827); 3162 addEntity("hearts", 9829); 3163 addEntity("diams", 9830); 3164 addEntity("lang", 10216); 3165 addEntity("rang", 10217); 3166 } 3167 3168 3169 3170 //void runTokenizerTest() 3171 unittest 3172 { 3173 import std.algorithm; 3174 class TokenTest { 3175 int _line; 3176 string _file; 3177 this(string file, int line) { 3178 _file = file; 3179 _line = line; 3180 } 3181 bool doTest(Token token) { 3182 return true; 3183 } 3184 void execute(Tokenizer tokenizer) { 3185 Token token = tokenizer.nextToken(); 3186 if (!doTest(token)) { 3187 assert(false, " token doesn not match at " ~ _file ~ ":" ~ to!string(_line) ~ " foundToken: " ~ token.toString ~ " expected: " ~ toString); 3188 } 3189 } 3190 public override @property string toString() { 3191 return "TokenTest"; 3192 } 3193 } 3194 void testTokenizer(string code, TokenTest[] tokens, string file = __FILE__, uint line = __LINE__) { 3195 Tokenizer tokenizer = new Tokenizer(code, "tokenizerTest:" ~ file ~ ":" ~ to!string(line)); 3196 for (int i = 0; i < tokens.length; i++) { 3197 tokens[i].execute(tokenizer); 3198 } 3199 } 3200 class KeywordTest : TokenTest { 3201 Keyword _code; 3202 this(Keyword code, string file = __FILE__, uint line = __LINE__) { 3203 super(file, line); 3204 _code = code; 3205 } 3206 override bool doTest(Token token) { 3207 if (token.type != TokenType.KEYWORD) 3208 return false; 3209 if (token.keyword != _code) 3210 return false; 3211 return true; 3212 } 3213 public override @property string toString() { 3214 return "Keyword:" ~ to!string(_code); 3215 } 3216 } 3217 class OpTest : TokenTest { 3218 OpCode _code; 3219 this(OpCode code, string file = __FILE__, uint line = __LINE__) { 3220 super(file, line); 3221 _code = code; 3222 } 3223 override bool doTest(Token token) { 3224 if (token.type != TokenType.OP) 3225 return false; 3226 if (token.opCode != _code) 3227 return false; 3228 return true; 3229 } 3230 public override @property string toString() { 3231 return "Op:" ~ to!string(_code); 3232 } 3233 } 3234 class StringTest : TokenTest { 3235 dstring _value; 3236 dchar _literalType; 3237 this(dstring value, dchar literalType = 0, string file = __FILE__, uint line = __LINE__) { 3238 super(file, line); 3239 _value = value; 3240 _literalType = literalType; 3241 } 3242 override bool doTest(Token token) { 3243 if (token.type != TokenType.STRING) 3244 return false; 3245 if (!token.text.equal(_value)) 3246 return false; 3247 if (token.literalType != _literalType) 3248 return false; 3249 return true; 3250 } 3251 public override @property string toString() { 3252 return toUTF8("String:\"" ~ _value ~ "\"" ~ (_literalType ? _literalType : ' ')); 3253 } 3254 } 3255 class IntegerTest : TokenTest { 3256 ulong _value; 3257 bool _unsigned; 3258 bool _long; 3259 this(ulong value, bool unsignedFlag = false, bool longFlag = false, string file = __FILE__, uint line = __LINE__) { 3260 super(file, line); 3261 _value = value; 3262 _unsigned = unsignedFlag; 3263 _long = longFlag; 3264 } 3265 override bool doTest(Token token) { 3266 if (token.type != TokenType.INTEGER) 3267 return false; 3268 if (token.intValue != _value) 3269 return false; 3270 if (token.isUnsigned != _unsigned) 3271 return false; 3272 if (token.isLong != _long) 3273 return false; 3274 return true; 3275 } 3276 public override @property string toString() { 3277 return "Integer:" ~ to!string(_value); 3278 } 3279 } 3280 class RealTest : TokenTest { 3281 real _value; 3282 ubyte _precision; 3283 bool _imaginary; 3284 this(real value, ubyte precision = 1, bool imaginary = false, string file = __FILE__, uint line = __LINE__) { 3285 super(file, line); 3286 _value = value; 3287 _precision = precision; 3288 _imaginary = imaginary; 3289 } 3290 override bool doTest(Token token) { 3291 if (token.type != TokenType.FLOAT) 3292 return false; 3293 real diff = token.realValue - _value; 3294 real maxerr = _value / 1000000; 3295 if (diff < 0) diff = -diff; 3296 if (maxerr < 0) maxerr = -maxerr; 3297 if (diff > maxerr) 3298 return false; 3299 if (token.precision != _precision) 3300 return false; 3301 if (token.isImaginary != _imaginary) 3302 return false; 3303 return true; 3304 } 3305 public override @property string toString() { 3306 return "Real:" ~ to!string(_value) ~ (_precision == 0 ? "f" : (_precision == 2 ? "L" : "")) ~ (_imaginary ? "i" : ""); 3307 } 3308 } 3309 class IdentTest : TokenTest { 3310 string _value; 3311 this(string value, string file = __FILE__, uint line = __LINE__) { 3312 super(file, line); 3313 _value = value; 3314 } 3315 override bool doTest(Token token) { 3316 if (token.type != TokenType.IDENTIFIER) 3317 return false; 3318 if (! to!string(token.text).equal(_value)) 3319 return false; 3320 return true; 3321 } 3322 public override @property string toString() { 3323 return "Ident:" ~ _value; 3324 } 3325 } 3326 class CommentTest : TokenTest { 3327 this(string file = __FILE__, uint line = __LINE__) { 3328 super(file, line); 3329 } 3330 override bool doTest(Token token) { 3331 if (token.type != TokenType.COMMENT) 3332 return false; 3333 return true; 3334 } 3335 public override @property string toString() { 3336 return "Comment"; 3337 } 3338 } 3339 class EOFTest : TokenTest { 3340 this(string file = __FILE__, uint line = __LINE__) { 3341 super(file, line); 3342 } 3343 override bool doTest(Token token) { 3344 if (token.type != TokenType.EOF) 3345 return false; 3346 return true; 3347 } 3348 public override @property string toString() { 3349 return "EOF"; 3350 } 3351 } 3352 class WhiteSpaceTest : TokenTest { 3353 this(string file = __FILE__, uint line = __LINE__) { 3354 super(file, line); 3355 } 3356 override bool doTest(Token token) { 3357 if (token.type != TokenType.WHITESPACE) 3358 return false; 3359 return true; 3360 } 3361 public override @property string toString() { 3362 return "whiteSpace"; 3363 } 3364 } 3365 TokenTest checkString(dstring value, dchar literalType = 0, string file = __FILE__, uint line = __LINE__) { 3366 return new StringTest(value, literalType, file, line); 3367 } 3368 TokenTest checkInteger(ulong value, bool unsignedFlag = false, bool longFlag = false, string file = __FILE__, uint line = __LINE__) { 3369 return new IntegerTest(value, unsignedFlag, longFlag, file, line); 3370 } 3371 TokenTest checkReal(real value, byte precision = 1, bool imaginary = false, string file = __FILE__, uint line = __LINE__) { 3372 return new RealTest(value, precision, imaginary, file, line); 3373 } 3374 TokenTest checkIdent(string value, string file = __FILE__, uint line = __LINE__) { 3375 return new IdentTest(value, file, line); 3376 } 3377 TokenTest checkKeyword(Keyword value, string file = __FILE__, uint line = __LINE__) { 3378 return new KeywordTest(value, file, line); 3379 } 3380 TokenTest checkOp(OpCode value, string file = __FILE__, uint line = __LINE__) { 3381 return new OpTest(value, file, line); 3382 } 3383 TokenTest checkSpace(string file = __FILE__, uint line = __LINE__) { 3384 return new WhiteSpaceTest(file, line); 3385 } 3386 TokenTest checkComment(string file = __FILE__, uint line = __LINE__) { 3387 return new CommentTest(file, line); 3388 } 3389 TokenTest checkEOF(string file = __FILE__, uint line = __LINE__) { 3390 return new EOFTest(file, line); 3391 } 3392 3393 // test strings 3394 testTokenizer("r\"simple\\nstring\"", [checkString( r"simple\nstring" )]); 3395 3396 // test strings 3397 testTokenizer(q"TEST 3398 "simple string" 3399 "simple\nstring" 3400 `simple string` 3401 "simple string"d 3402 "simple string"c 3403 "simple string"w 3404 "simple\"string" 3405 "\r\n\f\t\\\"\'&" 3406 TEST" 3407 , [ 3408 checkString("simple string"), 3409 checkSpace(), 3410 checkString("simple\nstring"), 3411 checkSpace(), 3412 checkString("simple string"), 3413 checkSpace(), 3414 checkString("simple string", 'd'), 3415 checkSpace(), 3416 checkString("simple string", 'c'), 3417 checkSpace(), 3418 checkString("simple string", 'w'), 3419 checkSpace(), 3420 checkString("simple\"string"), 3421 checkSpace(), 3422 checkString("\r\n\f\t\\\"\'&"), 3423 ]); 3424 // basic test 3425 testTokenizer(q"TEST 3426 int i; 3427 TEST" 3428 , [ 3429 checkKeyword(Keyword.INT), 3430 checkSpace(), 3431 checkIdent("i"), 3432 checkOp(OpCode.SEMICOLON), 3433 checkEOF() 3434 ]); 3435 // test numbers 3436 testTokenizer("0b1101 0x123abcdU 0xABCL 0743 192837465 0 192_837_465 5.25 12.3f 54.1L 67.1i 3e3 25.67e-5f" 3437 , [ 3438 checkInteger(13), 3439 checkSpace(), 3440 checkInteger(0x123abcd, true, false), 3441 checkSpace(), 3442 checkInteger(0xabc, false, true), 3443 checkSpace(), 3444 checkInteger(std.conv.octal!743), 3445 checkSpace(), 3446 checkInteger(192_837_465), 3447 checkSpace(), 3448 checkInteger(0), 3449 checkSpace(), 3450 checkInteger(192837465), 3451 checkSpace(), 3452 checkReal(5.25), 3453 checkSpace(), 3454 checkReal(12.3f, 0), 3455 checkSpace(), 3456 checkReal(54.1L, 2), 3457 checkSpace(), 3458 checkReal(67.1, 1, true), 3459 checkSpace(), 3460 checkReal(3e3), 3461 checkSpace(), 3462 checkReal(25.67e-5f, 0), 3463 checkEOF() 3464 ]); 3465 // strange keyword detection: `fork;` or `ind;` keyword in beginning of ident is highlighted 3466 testTokenizer("fork;", [checkIdent("fork"),checkOp(OpCode.SEMICOLON),checkEOF()]); 3467 3468 } 3469