1 module ddc.lexer.tokenizer; 2 3 import ddc.lexer.textsource; 4 import ddc.lexer.exceptions; 5 6 import std.stdio; 7 import std.datetime; 8 import std.conv; 9 import std.utf; 10 import std.math; 11 12 enum TokenType : ubyte { 13 EOF, 14 //EOL, 15 WHITESPACE, 16 COMMENT, 17 IDENTIFIER, 18 STRING, 19 CHARACTER, 20 INTEGER, 21 FLOAT, 22 KEYWORD, 23 OP, 24 INVALID 25 } 26 27 // table for fast checking of UniversalAlpha (as per ISO/IEC 9899:1999 Annex E) OR a..z OR A..Z OR _ 28 // max code is 0xd7ff 29 //1728 30 const uint[1728] UNIVERSAL_ALPHA_FLAGS = [ 31 0x00000000,0x00000000,0x87fffffe,0x07fffffe,0x00000000,0x04a00400,0xff7fffff,0xff7fffff,// 0000-00ff 32 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xfc3fffff,// 0100-01ff 33 0x00ffffff,0x00000000,0xffff0000,0xffffffff,0xffffffff,0xe9ff01ff,0x00030003,0x0000001f,// 0200-02ff 34 0x00000000,0x00000000,0x00000000,0x04000000,0xffffd740,0xfffffffb,0x547f7fff,0x000ffffd,// 0300-03ff 35 0xffffdffe,0xffffffff,0xdffeffff,0xffffffff,0xffff0003,0xffffffff,0xffff199f,0x033fcfff,// 0400-04ff 36 0x00000000,0xfffe0000,0x027fffff,0xfffffffe,0x000000ff,0xbbff0000,0xffff0006,0x000707ff,// 0500-05ff 37 0x00000000,0x07fffffe,0x0007ffff,0xffff03ff,0xffffffff,0x7cffffff,0x1fff7fff,0x03ff3de0,// 0600-06ff 38 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 0700-07ff 39 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 0800-08ff 40 0xffffffee,0xe3ffffff,0xff073fff,0x0000ffcf,0xfff99fee,0xc3c5fdff,0xb000399f,0x0003ffcf,// 0900-09ff 41 0xfff987e4,0xc36dfdff,0x5e003987,0x0010ffc0,0xfffbafee,0xe3edfdff,0x00013bbf,0x0000ffc1,// 0a00-0aff 42 0xfff99fee,0xe3cdfdff,0xb000398f,0x0000ffc3,0xd63dc7ec,0xc3bfc718,0x00003dc7,0x0000ff80,// 0b00-0bff 43 0xfffddfee,0xc3effdff,0x00003ddf,0x0000ffc3,0xfffddfec,0xc3effdff,0x40003ddf,0x0000ffc3,// 0c00-0cff 44 0xfffddfec,0xc3fffdff,0x00003dcf,0x0000ffc3,0x00000000,0x00000000,0x00000000,0x00000000,// 0d00-0dff 45 0xfffffffe,0x07ffffff,0x0fffffff,0x00000000,0xfef02596,0x3bff6cae,0x33ff3f5f,0x00000000,// 0e00-0eff 46 0x03000001,0xc2afffff,0xfffffeff,0xfffe03ff,0xfebf0fdf,0x02fe3fff,0x00000000,0x00000000,// 0f00-0fff 47 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0xffffffff,0xffff003f,0x007fffff,// 1000-10ff 48 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 1100-11ff 49 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 1200-12ff 50 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 1300-13ff 51 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 1400-14ff 52 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 1500-15ff 53 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 1600-16ff 54 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 1700-17ff 55 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 1800-18ff 56 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 1900-19ff 57 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 1a00-1aff 58 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 1b00-1bff 59 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 1c00-1cff 60 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 1d00-1dff 61 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0x0fffffff,0xffffffff,0xffffffff,0x03ffffff,// 1e00-1eff 62 0x3f3fffff,0xffffffff,0xaaff3f3f,0x3fffffff,0xffffffff,0x5fdfffff,0x0fcf1fdc,0x1fdc1fff,// 1f00-1fff 63 0x00000000,0x80000000,0x00000001,0x80000000,0x00000000,0x00000000,0x00000000,0x00000000,// 2000-20ff 64 0x3f2ffc84,0x01fbfd50,0x00000000,0xffffffff,0x00000007,0x00000000,0x00000000,0x00000000,// 2100-21ff 65 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 2200-22ff 66 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 2300-23ff 67 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 2400-24ff 68 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 2500-25ff 69 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 2600-26ff 70 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 2700-27ff 71 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 2800-28ff 72 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 2900-29ff 73 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 2a00-2aff 74 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 2b00-2bff 75 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 2c00-2cff 76 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 2d00-2dff 77 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 2e00-2eff 78 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 2f00-2fff 79 0x000000e0,0x000003fe,0xfffffffe,0xffffffff,0x180fffff,0xfffffffe,0xffffffff,0x187fffff,// 3000-30ff 80 0xffffffe0,0x00001fff,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 3100-31ff 81 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 3200-32ff 82 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 3300-33ff 83 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 3400-34ff 84 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 3500-35ff 85 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 3600-36ff 86 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 3700-37ff 87 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 3800-38ff 88 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 3900-39ff 89 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 3a00-3aff 90 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 3b00-3bff 91 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 3c00-3cff 92 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 3d00-3dff 93 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 3e00-3eff 94 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 3f00-3fff 95 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 4000-40ff 96 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 4100-41ff 97 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 4200-42ff 98 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 4300-43ff 99 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 4400-44ff 100 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 4500-45ff 101 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 4600-46ff 102 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 4700-47ff 103 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 4800-48ff 104 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 4900-49ff 105 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 4a00-4aff 106 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 4b00-4bff 107 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 4c00-4cff 108 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 4d00-4dff 109 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 4e00-4eff 110 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 4f00-4fff 111 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 5000-50ff 112 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 5100-51ff 113 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 5200-52ff 114 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 5300-53ff 115 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 5400-54ff 116 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 5500-55ff 117 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 5600-56ff 118 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 5700-57ff 119 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 5800-58ff 120 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 5900-59ff 121 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 5a00-5aff 122 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 5b00-5bff 123 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 5c00-5cff 124 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 5d00-5dff 125 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 5e00-5eff 126 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 5f00-5fff 127 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 6000-60ff 128 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 6100-61ff 129 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 6200-62ff 130 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 6300-63ff 131 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 6400-64ff 132 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 6500-65ff 133 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 6600-66ff 134 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 6700-67ff 135 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 6800-68ff 136 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 6900-69ff 137 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 6a00-6aff 138 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 6b00-6bff 139 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 6c00-6cff 140 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 6d00-6dff 141 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 6e00-6eff 142 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 6f00-6fff 143 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 7000-70ff 144 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 7100-71ff 145 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 7200-72ff 146 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 7300-73ff 147 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 7400-74ff 148 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 7500-75ff 149 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 7600-76ff 150 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 7700-77ff 151 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 7800-78ff 152 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 7900-79ff 153 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 7a00-7aff 154 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 7b00-7bff 155 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 7c00-7cff 156 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 7d00-7dff 157 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 7e00-7eff 158 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 7f00-7fff 159 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 8000-80ff 160 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 8100-81ff 161 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 8200-82ff 162 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 8300-83ff 163 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 8400-84ff 164 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 8500-85ff 165 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 8600-86ff 166 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 8700-87ff 167 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 8800-88ff 168 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 8900-89ff 169 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 8a00-8aff 170 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 8b00-8bff 171 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 8c00-8cff 172 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 8d00-8dff 173 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 8e00-8eff 174 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 8f00-8fff 175 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 9000-90ff 176 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 9100-91ff 177 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 9200-92ff 178 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 9300-93ff 179 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 9400-94ff 180 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 9500-95ff 181 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 9600-96ff 182 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 9700-97ff 183 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 9800-98ff 184 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 9900-99ff 185 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 9a00-9aff 186 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 9b00-9bff 187 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 9c00-9cff 188 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 9d00-9dff 189 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 9e00-9eff 190 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0x0000003f,0x00000000,0x00000000,// 9f00-9fff 191 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// a000-a0ff 192 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// a100-a1ff 193 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// a200-a2ff 194 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// a300-a3ff 195 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// a400-a4ff 196 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// a500-a5ff 197 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// a600-a6ff 198 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// a700-a7ff 199 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// a800-a8ff 200 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// a900-a9ff 201 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// aa00-aaff 202 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// ab00-abff 203 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// ac00-acff 204 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// ad00-adff 205 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// ae00-aeff 206 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// af00-afff 207 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// b000-b0ff 208 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// b100-b1ff 209 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// b200-b2ff 210 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// b300-b3ff 211 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// b400-b4ff 212 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// b500-b5ff 213 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// b600-b6ff 214 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// b700-b7ff 215 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// b800-b8ff 216 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// b900-b9ff 217 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// ba00-baff 218 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// bb00-bbff 219 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// bc00-bcff 220 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// bd00-bdff 221 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// be00-beff 222 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// bf00-bfff 223 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// c000-c0ff 224 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// c100-c1ff 225 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// c200-c2ff 226 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// c300-c3ff 227 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// c400-c4ff 228 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// c500-c5ff 229 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// c600-c6ff 230 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// c700-c7ff 231 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// c800-c8ff 232 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// c900-c9ff 233 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// ca00-caff 234 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// cb00-cbff 235 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// cc00-ccff 236 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// cd00-cdff 237 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// ce00-ceff 238 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// cf00-cfff 239 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// d000-d0ff 240 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// d100-d1ff 241 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// d200-d2ff 242 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// d300-d3ff 243 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// d400-d4ff 244 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// d500-d5ff 245 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// d600-d6ff 246 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0x0000000f,0x00000000,0x00000000// d700-d7ff 247 ]; 248 249 /// returns true if character is A..Z, a..z, _ or universal alpha 250 bool isUniversalAlpha(dchar ch) pure nothrow { 251 return (ch <= 0xd7ff && (UNIVERSAL_ALPHA_FLAGS[ch >> 5] & (1 << (ch & 31)))); 252 } 253 254 /// character can present at the beginning of identifier 255 bool isIdentStartChar(dchar ch) pure nothrow { 256 return isUniversalAlpha(ch); 257 } 258 259 /// character can present in middle of identifier 260 bool isIdentMiddleChar(dchar ch) pure nothrow { 261 return (ch >= '0' && ch <='9') || isUniversalAlpha(ch); 262 } 263 264 immutable bool ENABLE_DUMP_UNIVERSAL_ALPHA_TABLE = false; 265 static if (ENABLE_DUMP_UNIVERSAL_ALPHA_TABLE) { 266 bool r(dchar ch, wchar v) pure nothrow { 267 return ch == v; 268 } 269 270 bool r(dchar ch, wchar v1, wchar v2) pure nothrow { 271 return ch >= v1 && ch <= v2; 272 } 273 274 bool isUniversalAlphaSlow(dchar c) pure nothrow { 275 return 276 // Latin: 00AA, 00BA, 00C0−00D6, 00D8−00F6, 00F8−01F5, 01FA−0217, 277 // 0250−02A8, 1E00−1E9B, 1EA0−1EF9, 207F 278 r(c, 0xAA) || r(c, 0x00BA) || r(c, 0x00C0,0x00D6) || r(c, 0x00D8,0x00F6) || r(c, 0x00F8,0x01F5) || r(c, 0x01FA,0x0217) 279 || r(c, 0x0250,0x02A8) || r(c, 0x1E00,0x1E9B) || r(c, 0x1EA0,0x1EF9) || r(c, 0x207F) 280 //Greek: 0386, 0388−038A, 038C, 038E−03A1, 03A3−03CE, 03D0−03D6, 281 //03DA, 03DC, 03DE, 03E0, 03E2−03F3, 1F00−1F15, 1F18−1F1D, 282 //1F20−1F45, 1F48−1F4D, 1F50−1F57, 1F59, 1F5B, 1F5D, 283 //1F5F−1F7D, 1F80−1FB4, 1FB6−1FBC, 1FC2−1FC4, 1FC6−1FCC, 284 //1FD0−1FD3, 1FD6−1FDB, 1FE0−1FEC, 1FF2−1FF4, 1FF6−1FFC 285 || r(c, 0x0386) || r(c, 0x0388,0x038A) || r(c, 0x038C) || r(c, 0x038E,0x03A1) || r(c, 0x03A3,0x03CE) || r(c, 0x03D0,0x03D6) 286 || r(c, 0x03DA) || r(c, 0x03DC) || r(c, 0x03DE) || r(c, 0x03E0) || r(c, 0x03E2,0x03F3) || r(c, 0x1F00,0x1F15) || r(c, 0x1F18,0x1F1D) 287 || r(c, 0x1F20,0x1F45) || r(c, 0x1F48,0x1F4D) || r(c, 0x1F50,0x1F57) || r(c, 0x1F59) || r(c, 0x1F5B) || r(c, 0x1F5D) 288 || r(c, 0x1F5F,0x1F7D) || r(c, 0x1F80,0x1FB4) || r(c, 0x1FB6,0x1FBC) || r(c, 0x1FC2,0x1FC4) || r(c, 0x1FC6,0x1FCC) 289 || r(c, 0x1FD0,0x1FD3) || r(c, 0x1FD6,0x1FDB) || r(c, 0x1FE0,0x1FEC) || r(c, 0x1FF2,0x1FF4) || r(c, 0x1FF6,0x1FFC) 290 //Cyrillic: 0401−040C, 040E−044F, 0451−045C, 045E−0481, 0490−04C4, 291 //04C7−04C8, 04CB−04CC, 04D0−04EB, 04EE−04F5, 04F8−04F9 292 || r(c, 0x0401,0x040C) || r(c, 0x040E,0x044F) || r(c, 0x0451,0x045C) || r(c, 0x045E,0x0481) || r(c, 0x0490,0x04C4) 293 || r(c, 0x04C7,0x04C8) || r(c, 0x04CB,0x04CC) || r(c, 0x04D0,0x04EB) || r(c, 0x04EE,0x04F5) || r(c, 0x04F8,0x04F9) 294 //Armenian: 0531−0556, 0561−0587 295 || r(c, 0x0531,0x0556) || r(c, 0x0561,0x0587) 296 //Hebrew: 05B0−05B9, 05BB−05BD, 05BF, 05C1−05C2, 05D0−05EA, 297 //05F0−05F2 298 || r(c, 0x05B0,0x05B9) || r(c, 0x05BB,0x05BD) || r(c, 0x05BF) || r(c, 0x05C1,0x05C2) || r(c, 0x05D0,0x05EA) 299 || r(c, 0x05F0,0x05F2) 300 //Arabic: 0621−063A, 0640−0652, 0670−06B7, 06BA−06BE, 06C0−06CE, 301 //06D0−06DC, 06E5−06E8, 06EA−06ED 302 || r(c, 0x0621,0x063A) || r(c, 0x0640,0x0652) || r(c, 0x0670,0x06B7) || r(c, 0x06BA,0x06BE) || r(c, 0x06C0,0x06CE) 303 || r(c, 0x06D0,0x06DC) || r(c, 0x06E5,0x06E8) || r(c, 0x06EA,0x06ED) 304 //Devanagari: 0901−0903, 0905−0939, 093E−094D, 0950−0952, 0958−0963 305 || r(c, 0x0901,0x0903) || r(c, 0x0905,0x0939) || r(c, 0x093E,0x094D) || r(c, 0x0950,0x0952) || r(c, 0x0958,0x0963) 306 //Bengali: 0981−0983, 0985−098C, 098F−0990, 0993−09A8, 09AA−09B0, 307 //09B2, 09B6−09B9, 09BE−09C4, 09C7−09C8, 09CB−09CD, 308 //09DC−09DD, 09DF−09E3, 09F0−09F1 309 || r(c, 0x0981,0x0983) || r(c, 0x0985,0x098C) || r(c, 0x098F,0x0990) || r(c, 0x0993,0x09A8) || r(c, 0x09AA,0x09B0) 310 || r(c, 0x09B2) || r(c, 0x09B6,0x09B9) || r(c, 0x09BE,0x09C4) || r(c, 0x09C7,0x09C8) || r(c, 0x09CB,0x09CD) 311 || r(c, 0x09DC,0x09DD) || r(c, 0x09DF,0x09E3) || r(c, 0x09F0,0x09F1) 312 //Gurmukhi: 0A02, 0A05−0A0A, 0A0F−0A10, 0A13−0A28, 0A2A−0A30, 313 //0A32−0A33, 0A35−0A36, 0A38−0A39, 0A3E−0A42, 0A47−0A48, 314 //0A4B−0A4D, 0A59−0A5C, 0A5E, 0A74 315 || r(c, 0x0A02) || r(c, 0x0A05,0x0A0A) || r(c, 0x0A0F,0x0A10) || r(c, 0x0A13,0x0A28) || r(c, 0x0A2A,0x0A30) 316 || r(c, 0x0A32,0x0A33) || r(c, 0x0A35,0x0A36) || r(c, 0x0A38,0x0A39) || r(c, 0x0A3E,0x0A42) || r(c, 0x0A47,0x0A48) 317 || r(c, 0x0A4B,0x0A4D) || r(c, 0x0A59,0x0A5C) || r(c, 0x0A5E) || r(c, 0x0A74) 318 //Gujarati: 0A81−0A83, 0A85−0A8B, 0A8D, 0A8F−0A91, 0A93−0AA8, 319 //0AAA−0AB0, 0AB2−0AB3, 0AB5−0AB9, 0ABD−0AC5, 320 //0AC7−0AC9, 0ACB−0ACD, 0AD0, 0AE0 321 || r(c, 0x0A81,0x0A83) || r(c, 0x0A85,0x0A8B) || r(c, 0x0A8D) || r(c, 0x0A8F,0x0A91) || r(c, 0x0A93,0x0AA8) 322 || r(c, 0x0AAA,0x0AB0) || r(c, 0x0AB2,0x0AB3) || r(c, 0x0AB5,0x0AB9) || r(c, 0x0ABD,0x0AC5) 323 || r(c, 0x0AC7,0x0AC9) || r(c, 0x0ACB,0x0ACD) || r(c, 0x0AD0) || r(c, 0x0AE0) 324 // Oriya: 0B01−0B03, 0B05−0B0C, 0B0F−0B10, 0B13−0B28, 0B2A−0B30, 325 //0B32−0B33, 0B36−0B39, 0B3E−0B43, 0B47−0B48, 0B4B−0B4D, 326 //0B5C−0B5D, 0B5F−0B61 327 || r(c, 0x0B01,0x0B03) || r(c, 0x0B05,0x0B0C) || r(c, 0x0B0F,0x0B10) || r(c, 0x0B13,0x0B28) || r(c, 0x0B2A,0x0B30) 328 || r(c, 0x0B32,0x0B33) || r(c, 0x0B36,0x0B39) || r(c, 0x0B3E,0x0B43) || r(c, 0x0B47,0x0B48) || r(c, 0x0B4B,0x0B4D) 329 || r(c, 0x0B5C,0x0B5D) || r(c, 0x0B5F,0x0B61) 330 //Tamil: 0B82−0B83, 0B85−0B8A, 0B8E−0B90, 0B92−0B95, 0B99−0B9A, 331 //0B9C, 0B9E−0B9F, 0BA3−0BA4, 0BA8−0BAA, 0BAE−0BB5, 332 //0BB7−0BB9, 0BBE−0BC2, 0BC6−0BC8, 0BCA−0BCD 333 || r(c, 0x0B82,0x0B83) || r(c, 0x0B85,0x0B8A) || r(c, 0x0B8E,0x0B90) || r(c, 0x0B92,0x0B95) || r(c, 0x0B99,0x0B9A) 334 || r(c, 0x0B9C) || r(c, 0x0B9E,0x0B9F) || r(c, 0x0BA3,0x0BA4) || r(c, 0x0BA8,0x0BAA) || r(c, 0x0BAE,0x0BB5) 335 || r(c, 0x0BB7,0x0BB9) || r(c, 0x0BBE,0x0BC2) || r(c, 0x0BC6,0x0BC8) || r(c, 0x0BCA,0x0BCD) 336 //Telugu: 0C01−0C03, 0C05−0C0C, 0C0E−0C10, 0C12−0C28, 0C2A−0C33, 337 //0C35−0C39, 0C3E−0C44, 0C46−0C48, 0C4A−0C4D, 0C60−0C61 338 || r(c, 0x0C01,0x0C03) || r(c, 0x0C05,0x0C0C) || r(c, 0x0C0E,0x0C10) || r(c, 0x0C12,0x0C28) || r(c, 0x0C2A,0x0C33) 339 || r(c, 0x0C35,0x0C39) || r(c, 0x0C3E,0x0C44) || r(c, 0x0C46,0x0C48) || r(c, 0x0C4A,0x0C4D) || r(c, 0x0C60,0x0C61) 340 //Kannada: 0C82−0C83, 0C85−0C8C, 0C8E−0C90, 0C92−0CA8, 0CAA−0CB3, 341 //0CB5−0CB9, 0CBE−0CC4, 0CC6−0CC8, 0CCA−0CCD, 0CDE, 342 //0CE0−0CE1 343 || r(c, 0x0C82,0x0C83) || r(c, 0x0C85,0x0C8C) || r(c, 0x0C8E,0x0C90) || r(c, 0x0C92,0x0CA8) || r(c, 0x0CAA,0x0CB3) 344 || r(c, 0x0CB5,0x0CB9) || r(c, 0x0CBE,0x0CC4) || r(c, 0x0CC6,0x0CC8) || r(c, 0x0CCA,0x0CCD) || r(c, 0x0CDE) 345 || r(c, 0x0CE0,0x0CE1) 346 //Malayalam: 0D02−0D03, 0D05−0D0C, 0D0E−0D10, 0D12−0D28, 0D2A−0D39, 347 //0D3E−0D43, 0D46−0D48, 0D4A−0D4D, 0D60−0D61 348 || r(c, 0x0D02,0x0D03) || r(c, 0x0D05,0x0D0C) || r(c, 0x0D0E,0x0D10) || r(c, 0x0D12,0x0D28) || r(c, 0x0D2A,0x0D39) 349 || r(c, 0xD3E,0x0D43) || r(c, 0x0D46,0x0D48) || r(c, 0x0D4A,0x0D4D) || r(c, 0x0D60,0x0D61) 350 //Thai: 0E01−0E3A, 0E40−0E5B 351 || r(c, 0x0E01,0x0E3A) || r(c, 0x0E40,0x0E5B) 352 //Lao: 0E81−0E82, 0E84, 0E87−0E88, 0E8A, 0E8D, 0E94−0E97, 353 //0E99−0E9F, 0EA1−0EA3, 0EA5, 0EA7, 0EAA−0EAB, 354 //0EAD−0EAE, 0EB0−0EB9, 0EBB−0EBD, 0EC0−0EC4, 0EC6, 355 //0EC8−0ECD, 0EDC−0EDD 356 || r(c, 0x0E81,0x0E82) || r(c, 0x0E84) || r(c, 0x0E87,0x0E88) || r(c, 0x0E8A) || r(c, 0x0E8D) || r(c, 0x0E94,0x0E97) 357 || r(c, 0x0E99,0x0E9F) || r(c, 0x0EA1,0x0EA3) || r(c, 0x0EA5) || r(c, 0x0EA7) || r(c, 0x0EAA,0x0EAB) 358 || r(c, 0x0EAD,0x0EAE) || r(c, 0x0EB0,0x0EB9) || r(c, 0x0EBB,0x0EBD) || r(c, 0x0EC0,0x0EC4) || r(c, 0x0EC6) 359 || r(c, 0x0EC8,0x0ECD) || r(c, 0x0EDC,0x0EDD) 360 //Tibetan: 0F00, 0F18−0F19, 0F35, 0F37, 0F39, 0F3E−0F47, 0F49−0F69, 361 //0F71−0F84, 0F86−0F8B, 0F90−0F95, 0F97, 0F99−0FAD, 362 //0FB1−0FB7, 0FB9 363 || r(c, 0x0F00) || r(c, 0x0F18,0x0F19) || r(c, 0x0F35) || r(c, 0x0F37) || r(c, 0x0F39) || r(c, 0x0F3E,0x0F47) || r(c, 0x0F49,0x0F69) 364 || r(c, 0x0F71,0x0F84) || r(c, 0x0F86,0x0F8B) || r(c, 0x0F90,0x0F95) || r(c, 0x0F97) || r(c, 0x0F99,0x0FAD) 365 || r(c, 0x0FB1,0x0FB7) || r(c, 0x0FB9) 366 //Georgian: 10A0−10C5, 10D0−10F6 367 || r(c, 0x10A0,0x10C5) || r(c, 0x10D0,0x10F6) 368 //Hiragana: 3041−3093, 309B−309C 369 || r(c, 0x3041,0x3093) || r(c, 0x309B,0x309C) 370 //Katakana: 30A1−30F6, 30FB−30FC 371 || r(c, 0x30A1,0x30F6) || r(c, 0x30FB,0x30FC) 372 //Bopomofo: 3105−312C 373 || r(c, 0x3105,0x312C) 374 //CJK Unified Ideographs: 4E00−9FA5 375 || r(c, 0x4E00,0x9FA5) 376 //Hangul: AC00−D7A3 377 || r(c, 0xAC00,0xD7A3) 378 //Digits: 0660−0669, 06F0−06F9, 0966−096F, 09E6−09EF, 0A66−0A6F, 379 //0AE6−0AEF, 0B66−0B6F, 0BE7−0BEF, 0C66−0C6F, 0CE6−0CEF, 380 //0D66−0D6F, 0E50−0E59, 0ED0−0ED9, 0F20−0F33 381 || r(c, 0x0660,0x0669) || r(c, 0x06F0,0x06F9) || r(c, 0x0966,0x096F) || r(c, 0x09E6,0x09EF) || r(c, 0x0A66,0x0A6F) 382 || r(c, 0x0AE6,0x0AEF) || r(c, 0x0B66,0x0B6F) || r(c, 0x0BE7,0x0BEF) || r(c, 0x0C66,0x0C6F) || r(c, 0x0CE6,0x0CEF) 383 || r(c, 0x0D66,0x0D6F) || r(c, 0x0E50,0x0E59) || r(c, 0x0ED0,0x0ED9) || r(c, 0x0F20,0x0F33) 384 //Special characters: 00B5, 00B7, 02B0−02B8, 02BB, 02BD−02C1, 02D0−02D1, 385 //02E0−02E4, 037A, 0559, 093D, 0B3D, 1FBE, 203F−2040, 2102, 386 //2107, 210A−2113, 2115, 2118−211D, 2124, 2126, 2128, 212A−2131, 387 //2133−2138, 2160−2182, 3005−3007, 3021−3029 388 || r(c, 0x00B5) || r(c, 0x00B7) || r(c, 0x02B0,0x02B8) || r(c, 0x02BB) || r(c, 0x02BD,0x02C1) || r(c, 0x02D0,0x02D1) 389 || r(c, 0x2E0,0x02E4) || r(c, 0x037A) || r(c, 0x0559) || r(c, 0x093D) || r(c, 0x0B3D) || r(c, 0x1FBE) || r(c, 0x203F,0x2040) || r(c, 0x2102) 390 || r(c, 0x2107) || r(c, 0x210A,0x2113) || r(c, 0x2115) || r(c, 0x2118,0x211D) || r(c, 0x2124) || r(c, 0x2126) || r(c, 0x2128) || r(c, 0x212A,0x2131) 391 || r(c, 0x2133,0x2138) || r(c, 0x2160,0x2182) || r(c, 0x3005,0x3007) || r(c, 0x3021,0x3029) 392 ; 393 } 394 395 } 396 397 unittest { 398 399 400 static if (ENABLE_DUMP_UNIVERSAL_ALPHA_TABLE) { 401 immutable uint itemsInRow = 8; 402 403 uint maxAlpha = 0; 404 for (uint i = 0; i < 0x10000; i++) { 405 uint ch = i; 406 if (isUniversalAlphaSlow(ch) || ch == '_' || (ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z')) 407 maxAlpha = i; 408 } 409 maxAlpha = (maxAlpha + itemsInRow * 32 - 1) / (itemsInRow * 32) * (itemsInRow * 32) - 1; 410 writeln("// table for fast checking of UniversalAlpha (as per ISO/IEC 9899:1999 Annex E) OR a..z OR A..Z OR _"); 411 writefln("// max code is 0x%04x", maxAlpha); 412 writeln("immutable uint[", (maxAlpha + 1) / 32,"] UNIVERSAL_ALPHA_FLAGS = ["); 413 for (uint i = 0; i <= maxAlpha; i += 32) { 414 if ((i / 32) % itemsInRow == 0) 415 write(" "); 416 uint flags = 0; 417 for (uint j = 0; j < 32; j++) { 418 uint ch = i + j; 419 bool flag = isUniversalAlphaSlow(ch) || ch == '_' || (ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z'); 420 if (flag) 421 flags |= (1 << j); 422 } 423 writef("0x%08x", flags); 424 if (i != maxAlpha / 32 * 32) 425 write(","); 426 if ((i / 32) % itemsInRow == itemsInRow - 1) 427 writefln("// %04x-%04x", i - itemsInRow * 32 + 1 + 31, i + 31); 428 } 429 writeln("];"); 430 431 for (uint ch = 0; ch < 0x100000; ch++) { 432 bool flag = isUniversalAlphaSlow(ch) || ch == '_' || (ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z'); 433 bool flag2 = isUniversalAlpha(ch); 434 if (flag2 != flag) { 435 isUniversalAlpha(ch); 436 writefln("universalAlpha test failed for char %06x expeced %d actual %d", ch, flag ? 1 : 0, flag2 ? 1 : 0); 437 } 438 assert(flag2 == flag); 439 } 440 } 441 } 442 443 enum OpCode : ubyte { 444 NONE, // no op 445 DIV, // / 446 DIV_EQ, // /= 447 DOT, // . 448 DOT_DOT, // .. 449 DOT_DOT_DOT,// ... 450 AND, // & 451 AND_EQ, // &= 452 LOG_AND, // && 453 OR, // | 454 OR_EQ, // |= 455 LOG_OR, // || 456 MINUS, // - 457 MINUS_EQ, // -= 458 MINUS_MINUS,// -- 459 PLUS, // + 460 PLUS_EQ, // += 461 PLUS_PLUS, // ++ 462 LT, // < 463 LT_EQ, // <= 464 SHL, // << 465 SHL_EQ, // <<= 466 LT_GT, // <> 467 NE_EQ, // <>= 468 GT, // > 469 GT_EQ, // >= 470 SHR_EQ, // >>= 471 ASR_EQ, // >>>= 472 SHR, // >> 473 ASR, // >>> 474 NOT, // ! 475 NOT_EQ, // != 476 NOT_LT_GT, // !<> 477 NOT_LT_GT_EQ, // !<>= 478 NOT_LT, // !< 479 NOT_LT_EQ, // !<= 480 NOT_GT, // !> 481 NOT_GT_EQ, // !>= 482 PAR_OPEN, // ( 483 PAR_CLOSE, // ) 484 SQ_OPEN, // [ 485 SQ_CLOSE, // ] 486 CURL_OPEN, // { 487 CURL_CLOSE, // } 488 QUEST, // ? 489 COMMA, // , 490 SEMICOLON, // ; 491 COLON, // : 492 DOLLAR, // $ 493 EQ, // = 494 QE_EQ, // == 495 MUL, // * 496 MUL_EQ, // *= 497 MOD, // % 498 MOD_EQ, // %= 499 XOR, // ^ 500 XOR_EQ, // ^= 501 LOG_XOR, // ^^ 502 LOG_XOR_EQ, // ^^= 503 INV, // ~ 504 INV_EQ, // ~= 505 AT, // @ 506 EQ_GT, // => 507 SHARP // # 508 }; 509 510 immutable dstring[] OP_CODE_STRINGS = [ 511 "", 512 "/", 513 "/=", 514 ".", 515 "..", 516 "...", 517 "&", 518 "&=", 519 "&&", 520 "|", 521 "|=", 522 "||", 523 "-", 524 "-=", 525 "--", 526 "+", 527 "+=", 528 "++", 529 "<", 530 "<=", 531 "<<", 532 "<<=", 533 "<>", 534 "<>=", 535 ">", 536 ">=", 537 ">>=", 538 ">>>=", 539 ">>", 540 ">>>", 541 "!", 542 "!=", 543 "!<>", 544 "!<>=", 545 "!<", 546 "!<=", 547 "!>", 548 "!>=", 549 "(", 550 ")", 551 "[", 552 "]", 553 "{", 554 "}", 555 "?", 556 ",", 557 ";", 558 ":", 559 "$", 560 "=", 561 "==", 562 "*", 563 "*=", 564 "%", 565 "%=", 566 "^", 567 "^=", 568 "^^", 569 "^^=", 570 "~", 571 "~=", 572 "@", 573 "=>", 574 "#" 575 ]; 576 577 dstring getOpNameD(OpCode op) pure nothrow { 578 return OP_CODE_STRINGS[op]; 579 }; 580 581 enum Keyword : ubyte { 582 NONE, 583 ABSTRACT, 584 ALIAS, 585 ALIGN, 586 ASM, 587 ASSERT, 588 AUTO, 589 590 BODY, 591 BOOL, 592 BREAK, 593 BYTE, 594 595 CASE, 596 CAST, 597 CATCH, 598 CDOUBLE, 599 CENT, 600 CFLOAT, 601 CHAR, 602 CLASS, 603 CONST, 604 CONTINUE, 605 CREAL, 606 607 DCHAR, 608 DEBUG, 609 DEFAULT, 610 DELEGATE, 611 DELETE, 612 DEPRECATED, 613 DO, 614 DOUBLE, 615 616 ELSE, 617 ENUM, 618 EXPORT, 619 EXTERN, 620 621 FALSE, 622 FINAL, 623 FINALLY, 624 FLOAT, 625 FOR, 626 FOREACH, 627 FOREACH_REVERSE, 628 FUNCTION, 629 630 GOTO, 631 632 IDOUBLE, 633 IF, 634 IFLOAT, 635 IMMUTABLE, 636 IMPORT, 637 IN, 638 INOUT, 639 INT, 640 INTERFACE, 641 INVARIANT, 642 IREAL, 643 IS, 644 645 LAZY, 646 LONG, 647 648 MACRO, 649 MIXIN, 650 MODULE, 651 652 NEW, 653 NOTHROW, 654 NULL, 655 656 OUT, 657 OVERRIDE, 658 659 PACKAGE, 660 PRAGMA, 661 PRIVATE, 662 PROTECTED, 663 PUBLIC, 664 PURE, 665 666 REAL, 667 REF, 668 RETURN, 669 670 SCOPE, 671 SHARED, 672 SHORT, 673 STATIC, 674 STRUCT, 675 SUPER, 676 SWITCH, 677 SYNCHRONIZED, 678 679 TEMPLATE, 680 THIS, 681 THROW, 682 TRUE, 683 TRY, 684 TYPEDEF, 685 TYPEID, 686 TYPEOF, 687 688 UBYTE, 689 UCENT, 690 UINT, 691 ULONG, 692 UNION, 693 UNITTEST, 694 USHORT, 695 696 VERSION, 697 VOID, 698 VOLATILE, 699 700 WCHAR, 701 WHILE, 702 WITH, 703 704 FILE, 705 MODULE__, 706 LINE, 707 FUNCTION__, 708 PRETTY_FUNCTION, 709 710 //Special Token Replaced with 711 DATE, // string literal of the date of compilation "mmm dd yyyy" 712 EOF, // sets the scanner to the end of the file 713 TIME, // string literal of the time of compilation "hh:mm:ss" 714 TIMESTAMP, // string literal of the date and time of compilation "www mmm dd hh:mm:ss yyyy" 715 VENDOR, // Compiler vendor string, such as "Digital Mars D" 716 VERSION_, // Compiler version as an integer, such as 2001 717 718 GSHARED, 719 TRAITS, 720 VECTOR, 721 PARAMETERS, 722 723 } 724 725 immutable dstring[] KEYWORD_STRINGS = [ 726 "", 727 "abstract", 728 "alias", 729 "align", 730 "asm", 731 "assert", 732 "auto", 733 734 "body", 735 "bool", 736 "break", 737 "byte", 738 739 "case", 740 "cast", 741 "catch", 742 "cdouble", 743 "cent", 744 "cfloat", 745 "char", 746 "class", 747 "const", 748 "continue", 749 "creal", 750 751 "dchar", 752 "debug", 753 "default", 754 "delegate", 755 "delete", 756 "deprecated", 757 "do", 758 "double", 759 760 "else", 761 "enum", 762 "export", 763 "extern", 764 765 "false", 766 "final", 767 "finally", 768 "float", 769 "for", 770 "foreach", 771 "foreach_reverse", 772 "function", 773 774 "goto", 775 776 "idouble", 777 "if", 778 "ifloat", 779 "immutable", 780 "import", 781 "in", 782 "inout", 783 "int", 784 "interface", 785 "invariant", 786 "ireal", 787 "is", 788 789 "lazy", 790 "long", 791 792 "macro", 793 "mixin", 794 "module", 795 796 "new", 797 "nothrow", 798 "null", 799 800 "out", 801 "override", 802 803 "package", 804 "pragma", 805 "private", 806 "protected", 807 "public", 808 "pure", 809 810 "real", 811 "ref", 812 "return", 813 814 "scope", 815 "shared", 816 "short", 817 "static", 818 "struct", 819 "super", 820 "switch", 821 "synchronized", 822 823 "template", 824 "this", 825 "throw", 826 "true", 827 "try", 828 "typedef", 829 "typeid", 830 "typeof", 831 832 "ubyte", 833 "ucent", 834 "uint", 835 "ulong", 836 "union", 837 "unittest", 838 "ushort", 839 840 "version", 841 "void", 842 "volatile", 843 844 "wchar", 845 "while", 846 "with", 847 848 "__FILE__", 849 "__MODULE__", 850 "__LINE__", 851 "__FUNCTION__", 852 "__PRETTY_FUNCTION__", 853 854 //Special Token Replaced with 855 "__DATE__", // string literal of the date of compilation "mmm dd yyyy" 856 "__EOF__", // sets the scanner to the end of the file 857 "__TIME__", // string literal of the time of compilation "hh:mm:ss" 858 "__TIMESTAMP__", // string literal of the date and time of compilation "www mmm dd hh:mm:ss yyyy" 859 "__VENDOR__", // Compiler vendor string, such as "Digital Mars D" 860 "__VERSION__", // Compiler version as an integer, such as 2001 861 862 863 "__gshared", 864 "__traits", 865 "__vector", 866 "__parameters" 867 ]; 868 869 public dstring getKeywordNameD(Keyword keyword) pure nothrow { 870 return KEYWORD_STRINGS[keyword]; 871 }; 872 873 public Keyword findKeyword(Keyword start, Keyword end, dchar * name, int len, ref int pos) pure nothrow { 874 for (Keyword i = start; i <= end; i++) { 875 dstring s = KEYWORD_STRINGS[i]; 876 if (s.length > len + 1) 877 continue; // too long 878 bool found = true; 879 for (uint j = 1; j < s.length; j++) { 880 if (s[j] != name[j - 1]) { 881 found = false; 882 break; 883 } 884 } 885 if (found) { 886 if (s.length == len - 1 || !isIdentMiddleChar(name[s.length - 1])) { 887 pos += s.length - 1; 888 return i; 889 } 890 } 891 } 892 return Keyword.NONE; 893 } 894 895 /** 896 * Token. 897 */ 898 class Token { 899 protected SourceFile _file; 900 protected int _line; 901 protected int _pos; 902 protected TokenType _type; 903 /// returns token type 904 @property TokenType type() { return _type; } 905 /// returns file info for source 906 @property SourceFile filename() { return _file; } 907 /// returns 1-based source line number of token start 908 @property int line() { return _line; } 909 /// returns 1-based source line position of token start 910 @property int pos() { return _pos; } 911 /// returns token text 912 @property dchar[] text() { return null; } 913 914 // number token properties 915 @property dchar literalType() { return 0; } 916 @property ulong intValue() { return 0; } 917 @property bool isUnsigned() { return false; } 918 @property ulong isLong() { return false; } 919 @property real realValue() { return 0; } 920 @property double doubleValue() { return 0; } 921 @property float floatValue() { return 0; } 922 @property byte precision() { return 0; } 923 @property bool isImaginary() { return false; } 924 925 /// returns opcode ID - for opcode tokens 926 @property OpCode opCode() { return OpCode.NONE; } 927 /// returns keyword ID - for keyword tokens 928 @property Keyword keyword() { return Keyword.NONE; } 929 /// returns true if this is documentation comment token 930 @property bool isDocumentationComment() { return false; } 931 /// returns true if this is multiline 932 @property bool isMultilineComment() { return false; } 933 934 // error handling 935 936 /// returns true if it's invalid token (can be returned in error tolerant mode of tokenizer) 937 @property bool isError() { return type == TokenType.INVALID; } 938 /// returns error message if it's invalid token (can be returned in error tolerant mode of tokenizer) 939 @property string errorMessage() { return null; } 940 /// returns error code if it's invalid token (can be returned in error tolerant mode of tokenizer) 941 @property int errorCode() { return 0; } 942 /// returns type of token parsing of which has been failed - if it's invalid token (can be returned in error tolerant mode of tokenizer) 943 @property TokenType invalidTokenType() { return TokenType.INVALID; } 944 945 946 this(TokenType type) { 947 _type = type; 948 } 949 950 this(TokenType type, SourceFile file, int line, int pos) { 951 _type = type; 952 _file = file; 953 _line = line; 954 _pos = pos; 955 } 956 /// set start position for token (line is 1-based, pos is 0-based) 957 void setPos(SourceFile file, int line, int pos) { 958 _file = file; 959 _line = line; 960 _pos = pos + 1; 961 } 962 /// set source file information for token 963 void setFile(SourceFile file) { 964 _file = file; 965 } 966 /// set start position for token (line is 1-based, pos is 0-based) 967 void setPos(int line, int pos) { 968 _line = line; 969 _pos = pos + 1; 970 } 971 972 public abstract Token clone(); 973 public override @property string toString() { 974 return "" ~ to!string(_line) ~ ":" ~ to!string(_pos) ~ " " ~ to!string(type) ~ " " ~ to!string(opCode) ~ " " ~ to!string(keyword) 975 ~" \"" ~ toUTF8(text()) ~ "\""; 976 } 977 } 978 979 class EofToken : Token { 980 this() { 981 super(TokenType.EOF); 982 } 983 this(SourceFile file, uint line, uint pos) { 984 super(TokenType.EOF, file, line, pos); 985 } 986 override public Token clone() { 987 return new EofToken(_file, _line, _pos); 988 } 989 public override @property string toString() { 990 return "EOF"; 991 } 992 } 993 994 // treat as white space 995 //class EolToken : Token { 996 // this(string file, uint line, uint pos) { 997 // super(TokenType.EOL, file, line, pos); 998 // } 999 //} 1000 1001 /// white space token 1002 class WhiteSpaceToken : Token { 1003 this() { 1004 super(TokenType.WHITESPACE); 1005 } 1006 this(SourceFile file, uint line, uint pos) { 1007 super(TokenType.WHITESPACE, file, line, pos); 1008 } 1009 override public Token clone() { 1010 return new WhiteSpaceToken(_file, _line, _pos); 1011 } 1012 public override @property string toString() { 1013 return "WhiteSpace"; 1014 } 1015 } 1016 1017 class OpToken : Token { 1018 OpCode _op; 1019 public @property override OpCode opCode() { return _op; } 1020 public @property void opCode(OpCode op) { _op = op; } 1021 public @property override dchar[] text() { return cast(dchar[])getOpNameD(_op); } 1022 this() { 1023 super(TokenType.OP); 1024 } 1025 this(SourceFile file, uint line, uint pos) { 1026 super(TokenType.OP, file, line, pos); 1027 } 1028 override public Token clone() { 1029 OpToken res = new OpToken(_file, _line, _pos); 1030 res._op = _op; 1031 return res; 1032 } 1033 public override @property string toString() { 1034 return "Op:" ~ to!string(_op); 1035 } 1036 } 1037 1038 class KeywordToken : Token { 1039 Keyword _keyword; 1040 public @property override Keyword keyword() { return _keyword; } 1041 public @property void keyword(Keyword keyword) { _keyword = keyword; } 1042 public @property override dchar[] text() { return cast(dchar[])getKeywordNameD(_keyword); } 1043 this() { 1044 super(TokenType.KEYWORD); 1045 } 1046 this(SourceFile file, uint line, uint pos) { 1047 super(TokenType.KEYWORD, file, line, pos); 1048 } 1049 override public Token clone() { 1050 KeywordToken res = new KeywordToken(_file, _line, _pos); 1051 res._keyword = _keyword; 1052 return res; 1053 } 1054 public override @property string toString() { 1055 return "Keyword:" ~ to!string(_keyword); 1056 } 1057 } 1058 1059 /// comment token 1060 class CommentToken : Token { 1061 protected dchar[] _text; 1062 protected bool _isDocumentationComment; 1063 protected bool _isMultilineComment; 1064 1065 1066 override @property bool isDocumentationComment() { 1067 return _isDocumentationComment; 1068 } 1069 1070 @property void isDocumentationComment(bool f) { 1071 _isDocumentationComment = f; 1072 } 1073 1074 /// returns true if this is multiline 1075 override @property bool isMultilineComment() { 1076 return _isMultilineComment; 1077 } 1078 1079 @property void isMultilineComment(bool f) { 1080 _isMultilineComment = f; 1081 } 1082 1083 @property override dchar[] text() { return _text; } 1084 @property void text(dchar[] text) { _text = text; } 1085 this() { 1086 super(TokenType.COMMENT); 1087 } 1088 this(SourceFile file, uint line, uint pos, dchar[] text) { 1089 super(TokenType.COMMENT, file, line, pos); 1090 _text = text; 1091 } 1092 override public Token clone() { 1093 CommentToken res = new CommentToken(_file, _line, _pos, _text.dup); 1094 res._isDocumentationComment = _isDocumentationComment; 1095 res._isMultilineComment = _isMultilineComment; 1096 return res; 1097 } 1098 public override @property string toString() { 1099 return "Comment:" ~ to!string(_text); 1100 } 1101 } 1102 1103 /// Invalid token holder - for error tolerant parsing 1104 class InvalidToken : Token { 1105 protected dchar[] _text; 1106 protected TokenType _invalidTokenType; 1107 protected int _errorCode; 1108 protected string _errorMessage; 1109 1110 /// returns error message if it's invalid token (can be returned in error tolerant mode of tokenizer) 1111 override @property string errorMessage() { return _errorMessage; } 1112 /// sets error message 1113 @property void errorMessage(string s) { _errorMessage = s; } 1114 /// returns error code if it's invalid token (can be returned in error tolerant mode of tokenizer) 1115 override @property int errorCode() { return _errorCode; } 1116 /// sets error code 1117 @property void errorCode(int c) { _errorCode = c; } 1118 /// returns type of token parsing of which has been failed - if it's invalid token (can be returned in error tolerant mode of tokenizer) 1119 override @property TokenType invalidTokenType() { return _invalidTokenType; } 1120 /// sets type of token parsing of which has been failed 1121 @property void invalidTokenType(TokenType t) { _invalidTokenType = t; } 1122 1123 /// text of invalid token 1124 @property override dchar[] text() { return _text; } 1125 /// text of invalid token 1126 @property void text(dchar[] text) { _text = text; } 1127 1128 this() { 1129 super(TokenType.INVALID); 1130 } 1131 this(SourceFile file, uint line, uint pos, dchar[] text) { 1132 super(TokenType.INVALID, file, line, pos); 1133 _text = text; 1134 } 1135 override Token clone() { 1136 InvalidToken res = new InvalidToken(_file, _line, _pos, _text.dup); 1137 res._errorMessage = _errorMessage.dup; 1138 res._errorCode = _errorCode; 1139 res._invalidTokenType = _invalidTokenType; 1140 return res; 1141 } 1142 override @property string toString() { 1143 return "Invalid:" ~ to!string(_text); 1144 } 1145 } 1146 1147 alias tokenizer_ident_t = uint; 1148 alias tokenizer_ident_name_t = dchar[]; 1149 1150 enum : tokenizer_ident_t { 1151 NO_IDENT = 0 1152 } 1153 1154 /** 1155 * Global storage for identifier strings. 1156 */ 1157 class IdentHolder { 1158 protected tokenizer_ident_t _nextId; 1159 protected tokenizer_ident_name_t[tokenizer_ident_t] _idToName; 1160 protected tokenizer_ident_t[tokenizer_ident_name_t] _nameToId; 1161 1162 public this() { 1163 _nextId = NO_IDENT + 1; 1164 } 1165 1166 /** 1167 * Search for id by name, return NO_IDENT if not found. 1168 */ 1169 uint findByName(tokenizer_ident_name_t name) { 1170 tokenizer_ident_t * found = (name in _nameToId); 1171 if (found) 1172 return *found; 1173 return NO_IDENT; 1174 } 1175 1176 /** 1177 * Search for name by id, return null if not found. 1178 */ 1179 tokenizer_ident_name_t nameById(tokenizer_ident_t id) { 1180 auto found = (id in _idToName); 1181 if (found) 1182 return *found; 1183 return null; 1184 } 1185 1186 /** 1187 * Search for ident id by name, create new entry if not found. 1188 */ 1189 tokenizer_ident_t idByName(tokenizer_ident_name_t name) { 1190 uint * found = (name in _nameToId); 1191 if (found) 1192 return *found; 1193 uint newid = _nextId++; 1194 _nameToId[cast(dstring)name] = newid; 1195 _idToName[newid] = cast(tokenizer_ident_name_t)name; 1196 return newid; 1197 } 1198 } 1199 1200 /** 1201 * Thread local storage for IDs. 1202 */ 1203 IdentHolder identMap; 1204 1205 static this() { 1206 // init ID storage 1207 identMap = new IdentHolder(); 1208 } 1209 1210 class StringLiteralToken : Token { 1211 dchar[] _text; 1212 dchar _literalType; 1213 public @property override dchar literalType() { return _literalType; } 1214 public @property override dchar[] text() { return _text; } 1215 public void setText(dchar[] text, dchar type) { _text = text; _literalType = type; } 1216 this() { 1217 super(TokenType.STRING); 1218 } 1219 this(SourceFile file, uint line, uint pos, dchar[] text, dchar type) { 1220 super(TokenType.STRING, file, line, pos); 1221 _text = text; 1222 _literalType = type; 1223 } 1224 override public Token clone() { 1225 return new StringLiteralToken(_file, _line, _pos, _text.dup, _literalType); 1226 } 1227 public override @property string toString() { 1228 return toUTF8("String:\"" ~ _text ~ "\"" ~ (_literalType ? _literalType : ' ')); 1229 } 1230 } 1231 1232 class CharacterLiteralToken : Token { 1233 dchar _character; 1234 dchar _literalType; 1235 @property override dchar literalType() { return _literalType; } 1236 @property dchar character() { return _character; } 1237 @property override dchar[] text() { return [_character]; } 1238 void setCharacter(dchar ch, dchar type) { _character = ch; _literalType = type; } 1239 this() { 1240 super(TokenType.CHARACTER); 1241 } 1242 this(SourceFile file, uint line, uint pos, dchar character, dchar type) { 1243 super(TokenType.CHARACTER, file, line, pos); 1244 _character = character; 1245 _literalType = type; 1246 } 1247 override public Token clone() { 1248 return new CharacterLiteralToken(_file, _line, _pos, _character, _literalType); 1249 } 1250 public override @property string toString() { 1251 return "Char:" ~ toUTF8([_character]); 1252 } 1253 } 1254 1255 class IntegerLiteralToken : Token { 1256 ulong _value; 1257 bool _unsigned; 1258 bool _long; 1259 public @property override ulong intValue() { return _value; } 1260 public @property override bool isUnsigned() { return _unsigned; } 1261 public @property override ulong isLong() { return _long; } 1262 public @property override dchar[] text() { return cast(dchar[])to!dstring(_value); } 1263 public void setValue(ulong value, bool unsignedFlag = false, bool longFlag = false) { 1264 _value = value; 1265 _unsigned = unsignedFlag; 1266 _long = longFlag; 1267 } 1268 public void setFlags(bool unsignedFlag = false, bool longFlag = false) { 1269 _unsigned = unsignedFlag; 1270 _long = longFlag; 1271 } 1272 this() { 1273 super(TokenType.INTEGER); 1274 } 1275 this(SourceFile file, uint line, uint pos, ulong value, bool unsignedFlag, bool longFlag) { 1276 super(TokenType.INTEGER, file, line, pos); 1277 _value = value; 1278 _unsigned = unsignedFlag; 1279 _long = longFlag; 1280 } 1281 override public Token clone() { 1282 return new IntegerLiteralToken(_file, _line, _pos, _value, _unsigned, _long); 1283 } 1284 public override @property string toString() { 1285 return "Integer:" ~ to!string(_value) ~ (_long ? "L" : "") ~ (_unsigned ? "U" : ""); 1286 } 1287 } 1288 1289 class RealLiteralToken : Token { 1290 real _value; 1291 byte _precision; 1292 bool _imaginary; 1293 public @property override ulong intValue() { return to!long(_value); } 1294 public @property override real realValue() { return _value; } 1295 public @property override double doubleValue() { return cast(double)_value; } 1296 public @property override float floatValue() { return cast(float)_value; } 1297 public @property override byte precision() { return _precision; } 1298 public @property override bool isImaginary() { return _imaginary; } 1299 public @property override dchar[] text() { return cast(dchar[])to!dstring(_value); } 1300 public void setValue(real value, byte precision = 1, bool imaginary = false) { 1301 _value = value; 1302 _precision = precision; 1303 _imaginary = imaginary; 1304 } 1305 public void setFlags(byte precision = 1, bool imaginary = false) { 1306 _precision = precision; 1307 _imaginary = imaginary; 1308 } 1309 this() { 1310 super(TokenType.FLOAT); 1311 } 1312 this(SourceFile file, uint line, uint pos, real value, byte precision, bool imaginary) { 1313 super(TokenType.FLOAT, file, line, pos); 1314 _value = value; 1315 _precision = precision; 1316 _imaginary = imaginary; 1317 } 1318 override public Token clone() { 1319 return new RealLiteralToken(_file, _line, _pos, _value, _precision, _imaginary); 1320 } 1321 public override @property string toString() { 1322 return "Real:" ~ to!string(_value) ~ (_precision == 0 ? "f" : (_precision == 2 ? "L" : "")) ~ (_imaginary ? "i" : ""); 1323 } 1324 } 1325 1326 class IdentToken : Token { 1327 tokenizer_ident_t _id; 1328 public @property override dchar[] text() { return identMap.nameById(_id); } 1329 public void setText(dchar[] text) { _id = identMap.idByName(text); } 1330 this() { 1331 super(TokenType.IDENTIFIER); 1332 } 1333 this(SourceFile file, uint line, uint pos, dchar[] text) { 1334 super(TokenType.IDENTIFIER, file, line, pos); 1335 _id = identMap.idByName(text); 1336 } 1337 this(SourceFile file, uint line, uint pos, tokenizer_ident_t id) { 1338 super(TokenType.IDENTIFIER, file, line, pos); 1339 _id = id; 1340 } 1341 override public Token clone() { 1342 return new IdentToken(_file, _line, _pos, _id); 1343 } 1344 public override @property string toString() { 1345 return "Ident:" ~ to!string(text); 1346 } 1347 } 1348 1349 // shared appender buffer, to avoid extra heap allocations 1350 struct StringAppender { 1351 dchar[] buf; 1352 uint len; 1353 dchar[] get() { 1354 return buf[0 .. len]; 1355 } 1356 void appendEol() { 1357 if (len + 1 > buf.length) { 1358 uint newsize = cast(uint)((len + 1 + buf.length) * 2); 1359 if (newsize < 128) 1360 newsize = 128; 1361 buf.length = newsize; 1362 } 1363 buf[len] = '\n'; 1364 len++; 1365 } 1366 void append(dchar[] s) { 1367 if (s.length == 0) 1368 return; 1369 if (len + s.length > buf.length) { 1370 uint newsize = cast(uint)((len + s.length + buf.length) * 2); 1371 if (newsize < 128) 1372 newsize = 128; 1373 buf.length = newsize; 1374 } 1375 buf[len .. len + s.length] = s; 1376 len += s.length; 1377 } 1378 void append(dchar ch) { 1379 if (len + 1 > buf.length) { 1380 uint newsize = cast(uint)(buf.length * 2); 1381 if (newsize < 128) 1382 newsize = 128; 1383 buf.length = newsize; 1384 } 1385 buf[len++] = ch; 1386 } 1387 void reset() { 1388 len = 0; 1389 } 1390 static int parseHexDigit(dchar ch) { 1391 if (ch >= '0' && ch <='9') 1392 return ch - '0'; 1393 if (ch >= 'a' && ch <='f') 1394 return ch - 'a' + 10; 1395 if (ch >= 'A' && ch <='F') 1396 return ch - 'A' + 10; 1397 return -1; 1398 } 1399 bool errorFlag = false; 1400 dchar decodeHex(ref int pos, int count) { 1401 dchar res = 0; 1402 for (int i = 0; i < count; i++) { 1403 if (pos >= len - 1) { 1404 errorFlag = true; 1405 return res; 1406 } 1407 dchar ch = buf[++pos]; 1408 int digit = parseHexDigit(ch); 1409 if (digit < 0) { 1410 errorFlag = true; 1411 digit = 0; 1412 } 1413 res = (res << 4) | digit; 1414 } 1415 return res; 1416 } 1417 dchar decodeOct(dchar firstChar, ref int pos) { 1418 dchar res = 0; 1419 res = firstChar - '0'; 1420 if (pos < len - 1 && buf[pos + 1] >= '0' && buf[pos + 1] <= '7') { 1421 res = (res << 3) | (buf[++pos] - '0'); 1422 } 1423 if (pos < len - 1 && buf[pos + 1] >= '0' && buf[pos + 1] <= '7') { 1424 res = (res << 3) | (buf[++pos] - '0'); 1425 } 1426 return res; 1427 } 1428 1429 char[] entityNameBuf; 1430 int entityNameLen; 1431 1432 dchar decodeCharacterEntity(ref int pos) { 1433 entityNameLen = 0; 1434 pos++; 1435 for(; pos < len && buf[pos] != ';'; pos++) { 1436 dchar ch = buf[pos]; 1437 if (ch >= 0x80) 1438 errorFlag = true; 1439 if (entityNameBuf.length < entityNameLen + 4) 1440 entityNameBuf.length += 32; 1441 entityNameBuf[entityNameLen++] = cast(char)ch; 1442 } 1443 if (pos < len && buf[pos] == ';') { 1444 dchar ch = entityToChar(cast(string)entityNameBuf[0 .. entityNameLen]); 1445 if (ch) 1446 return ch; 1447 } 1448 errorFlag = true; 1449 return '?'; 1450 } 1451 1452 bool processEscapeSequences() { 1453 errorFlag = false; 1454 int dst = 0; 1455 for (int src = 0; src < len; src++) { 1456 dchar ch = buf[src]; 1457 if (ch == '\\') { 1458 if (src == len - 1) 1459 break; // INVALID 1460 ch = buf[++src]; 1461 switch (ch) { 1462 case '\'': 1463 case '\"': 1464 case '?': 1465 case '\\': 1466 buf[dst++] = ch; 1467 break; 1468 case '0': 1469 buf[dst++] = '\0'; 1470 break; 1471 case 'a': 1472 buf[dst++] = '\a'; 1473 break; 1474 case 'b': 1475 buf[dst++] = '\b'; 1476 break; 1477 case 'f': 1478 buf[dst++] = '\f'; 1479 break; 1480 case 'n': 1481 buf[dst++] = '\n'; 1482 break; 1483 case 'r': 1484 buf[dst++] = '\r'; 1485 break; 1486 case 't': 1487 buf[dst++] = '\t'; 1488 break; 1489 case 'v': 1490 buf[dst++] = '\v'; 1491 break; 1492 case 'x': 1493 buf[dst++] = decodeHex(src, 2); 1494 break; 1495 case 'u': 1496 buf[dst++] = decodeHex(src, 4); 1497 break; 1498 case 'U': 1499 buf[dst++] = decodeHex(src, 8); 1500 break; 1501 default: 1502 if (ch >= '0' && ch <= '7') { 1503 // octal X XX or XXX 1504 buf[dst++] = decodeOct(ch, src); // something wrong 1505 } else if (ch == '&') { 1506 // named character entity 1507 buf[dst++] = decodeCharacterEntity(src); 1508 // just show it as is 1509 } else { 1510 buf[dst++] = ch; // something wrong 1511 errorFlag = true; 1512 } 1513 break; 1514 } 1515 } else { 1516 buf[dst++] = ch; 1517 } 1518 } 1519 len = dst; 1520 return errorFlag; 1521 } 1522 } 1523 1524 class Tokenizer 1525 { 1526 protected SourceLines _lineStream; 1527 protected dchar[] _lineText; 1528 protected int _line; // current line number 1529 protected int _len; // current line length 1530 protected int _pos; // current line read position 1531 protected int _prevLineLength; // previous line length 1532 protected uint _state; // tokenizer state 1533 1534 enum : int { 1535 EOF_CHAR = 0x001A, 1536 EOL_CHAR = 0x000A 1537 }; 1538 1539 protected WhiteSpaceToken _sharedWhiteSpaceToken = new WhiteSpaceToken(); 1540 protected CommentToken _sharedCommentToken = new CommentToken(); 1541 protected StringLiteralToken _sharedStringLiteralToken = new StringLiteralToken(); 1542 protected IdentToken _sharedIdentToken = new IdentToken(); 1543 protected OpToken _sharedOpToken = new OpToken(); 1544 protected KeywordToken _sharedKeywordToken = new KeywordToken(); 1545 protected IntegerLiteralToken _sharedIntegerToken = new IntegerLiteralToken(); 1546 protected RealLiteralToken _sharedRealToken = new RealLiteralToken(); 1547 protected InvalidToken _sharedInvalidToken = new InvalidToken(); 1548 protected CharacterLiteralToken _sharedCharacterLiteralToken = new CharacterLiteralToken(); 1549 protected StringAppender _stringLiteralAppender; 1550 protected StringAppender _commentAppender; 1551 protected StringAppender _identAppender; 1552 1553 protected bool _enableCommentText = true; 1554 /// when false, does not put comment text into comment token - for less allocations 1555 @property void enableCommentText(bool enabled) { 1556 _enableCommentText = enabled; 1557 } 1558 /// when false, does not put comment text into comment token - for less allocations 1559 @property bool enableCommentText() { 1560 return _enableCommentText; 1561 } 1562 1563 protected bool _errorTolerant = false; 1564 /// when true, returns BadToken instead of throwing exception 1565 @property void errorTolerant(bool enabled) { 1566 _errorTolerant = enabled; 1567 } 1568 /// when true, returns BadToken instead of throwing exception 1569 @property bool errorTolerant() { 1570 return _errorTolerant; 1571 } 1572 1573 this(SourceLines lineStream) { 1574 init(lineStream); 1575 } 1576 1577 void init(SourceLines lineStream, int pos = 0) { 1578 _lineStream = lineStream; 1579 SourceFile file = _lineStream.file; 1580 _sharedWhiteSpaceToken.setFile(file); 1581 _sharedCommentToken.setFile(file); 1582 _sharedStringLiteralToken.setFile(file); 1583 _sharedIdentToken.setFile(file); 1584 _sharedOpToken.setFile(file); 1585 _sharedKeywordToken.setFile(file); 1586 _sharedIntegerToken.setFile(file); 1587 _sharedRealToken.setFile(file); 1588 _sharedInvalidToken.setFile(file); 1589 _sharedCharacterLiteralToken.setFile(file); 1590 buildTime = Clock.currTime(); 1591 _line = lineStream.line; 1592 _pos = 0; 1593 _prevLineLength = 0; 1594 _lineText = null; 1595 nextLine(); 1596 _pos = pos; 1597 } 1598 1599 this(string code, string filename = "") { 1600 this(new ArraySourceLines(code, filename)); 1601 } 1602 1603 // fetch next line from source stream 1604 protected bool nextLine() { 1605 _prevLineLength = cast(int)_lineText.length; 1606 _lineText = _lineStream.readLine(); 1607 if (!_lineText) { 1608 if (_lineStream.errorCode != 0) 1609 throw new SourceEncodingException(_lineStream.errorMessage, _lineStream.file, _lineStream.errorLine, _lineStream.errorPos); 1610 if (_lineStream.eof) { 1611 // end of file 1612 _pos = 0; 1613 _len = 0; 1614 return false; 1615 } 1616 // just an empty line 1617 } 1618 _line = _lineStream.line; 1619 _pos = 0; 1620 _len = cast(int)_lineText.length; // do not support lines longer that 4Gb 1621 return true; 1622 } 1623 1624 protected dchar nextChar() { 1625 if (_pos >= _len) { 1626 if (!nextLine()) { 1627 _pos = _prevLineLength + 1; 1628 return EOF_CHAR; 1629 } 1630 return EOL_CHAR; 1631 } 1632 dchar res = _lineText[_pos++]; 1633 if (_pos >= _len) 1634 nextLine(); 1635 return res; 1636 } 1637 1638 protected dchar peekChar() { 1639 if (_lineText is null) { 1640 if (!nextLine()) { 1641 return EOF_CHAR; 1642 } 1643 } 1644 if (_pos >= _len) 1645 return EOL_CHAR; 1646 return _lineText[_pos++]; 1647 } 1648 1649 protected Token emitEof() { 1650 // TODO: check for current state 1651 return new EofToken(_lineStream.file, _startLine, _startPos + 2); 1652 } 1653 1654 protected Token processWhiteSpace(dchar firstChar) { 1655 // reuse the same token instance, to avoid extra heap spamming 1656 _sharedWhiteSpaceToken.setPos(_startLine, _startPos); 1657 for (;;) { 1658 int i = _pos; 1659 for (; i < _len; i++) { 1660 dchar ch = _lineText[i]; 1661 if (!(ch == 0x0020 || ch == 0x0009 || ch == 0x000B || ch == 0x000C || ch == EOL_CHAR)) 1662 break; 1663 } 1664 _pos = i; 1665 if (_pos < _len) 1666 break; 1667 // go to next line 1668 if (!nextLine()) 1669 break; 1670 } 1671 return _sharedWhiteSpaceToken; 1672 } 1673 1674 protected Token processOneLineComment() { 1675 _sharedCommentToken.setPos(_startLine, _startPos); 1676 _sharedCommentToken.isDocumentationComment = _pos + 1 < _lineText.length && _lineText[_pos + 1] == '/'; 1677 _sharedCommentToken.isMultilineComment = false; 1678 if (_enableCommentText) { 1679 _sharedCommentToken.text = _lineText[_pos + 1 .. $]; 1680 } 1681 _pos = _len; 1682 nextChar(); 1683 return _sharedCommentToken; 1684 } 1685 1686 protected Token processOneLineSharpComment() { 1687 _sharedCommentToken.setPos(_startLine, _startPos); 1688 if (_enableCommentText) { 1689 _sharedCommentToken.text = _lineText[_pos .. $]; 1690 } 1691 _pos = _len; 1692 return _sharedCommentToken; 1693 } 1694 1695 // Comment /* */ 1696 protected Token processMultilineComment() { 1697 _sharedCommentToken.setPos(_startLine, _startPos); 1698 _sharedCommentToken.isDocumentationComment = _pos + 1 < _lineText.length && _lineText[_pos + 1] == '*'; 1699 _sharedCommentToken.isMultilineComment = true; 1700 _commentAppender.reset(); 1701 int textStart = _pos + 1; 1702 for (;;) { 1703 int textEnd = int.max; 1704 int i = textStart; 1705 for (; i < _len - 1; i++) { 1706 if (_lineText[i] == '*' && _lineText[i + 1] == '/') { 1707 textEnd = i; 1708 break; 1709 } 1710 } 1711 if (textEnd != int.max) { 1712 if (_enableCommentText) 1713 _commentAppender.append(_lineText[textStart .. textEnd]); 1714 _pos = textEnd + 2; 1715 break; 1716 } 1717 if (!nextLine()) { 1718 // TODO: do we need throw exception if comment not closed by end of file? 1719 _pos = _len; 1720 break; 1721 } 1722 textStart = 0; 1723 } 1724 if (_enableCommentText) { 1725 _sharedCommentToken.text = _commentAppender.get(); 1726 } 1727 return _sharedCommentToken; 1728 } 1729 1730 // Comment /+ +/ 1731 protected Token processNestedComment() { 1732 _sharedCommentToken.setPos(_startLine, _startPos); 1733 _sharedCommentToken.isDocumentationComment = _pos + 1 < _lineText.length && _lineText[_pos + 1] == '+'; 1734 _sharedCommentToken.isMultilineComment = true; 1735 _commentAppender.reset(); 1736 dchar[] text; 1737 int textStart = _pos + 1; 1738 int level = 1; 1739 for (;;) { 1740 int textEnd = int.max; 1741 int i = textStart; 1742 for (; i < _len - 1; i++) { 1743 if (_lineText[i] == '/' && _lineText[i + 1] == '+') { 1744 level++; 1745 i++; 1746 } else if (_lineText[i] == '+' && _lineText[i + 1] == '/') { 1747 if (--level == 0) { 1748 textEnd = i; 1749 break; 1750 } 1751 } 1752 } 1753 if (textEnd != int.max) { 1754 if (_enableCommentText) 1755 _commentAppender.append(_lineText[textStart .. textEnd]); 1756 _pos = textEnd + 2; 1757 break; 1758 } 1759 if (!nextLine()) { 1760 // TODO: do we need throw exception if comment not closed by end of file? 1761 _pos = _len; 1762 break; 1763 } 1764 if (_enableCommentText) 1765 _commentAppender.appendEol(); 1766 textStart = 0; 1767 } 1768 if (_enableCommentText) { 1769 _sharedCommentToken.text = _commentAppender.get(); 1770 } 1771 return _sharedCommentToken; 1772 } 1773 1774 protected Token processHexString() { 1775 _pos++; 1776 // TODO: 1777 return null; 1778 } 1779 1780 protected Token processDelimitedString() { 1781 _pos++; 1782 // TODO: 1783 return null; 1784 } 1785 1786 // r"string" or `string` 1787 protected Token processWysiwygString(dchar ch) { 1788 _pos++; 1789 // TODO: 1790 return null; 1791 } 1792 1793 protected Token processIdent(dchar firstChar) { 1794 _sharedIdentToken.setPos(_startLine, _startPos); 1795 _identAppender.reset(); 1796 _identAppender.append(firstChar); 1797 for (; _pos < _len; ) { 1798 dchar ch = _lineText[_pos]; 1799 if (!isIdentMiddleChar(ch)) { 1800 break; 1801 } 1802 _identAppender.append(ch); 1803 _pos++; 1804 } 1805 _sharedIdentToken.setText(_identAppender.get); 1806 return _sharedIdentToken; 1807 } 1808 1809 protected Token processIntegerSuffix() { 1810 if (_pos >= _len) 1811 return _sharedIntegerToken; 1812 bool longFlag = false; 1813 bool unsignedFlag = false; 1814 dchar ch = _lineText[_pos]; 1815 dchar ch2 = _pos < _len - 1 ? _lineText[_pos + 1] : 0; 1816 if (ch == 'l' || ch == 'L') { 1817 longFlag = true; 1818 _pos++; 1819 if (ch2 == 'u' || ch2 == 'U') { 1820 unsignedFlag = true; 1821 _pos++; 1822 } 1823 } else if (ch == 'u' || ch == 'U') { 1824 unsignedFlag = true; 1825 _pos++; 1826 if (ch2 == 'l' || ch2 == 'L') { 1827 longFlag = true; 1828 _pos++; 1829 } 1830 } 1831 _sharedIntegerToken.setFlags(unsignedFlag, longFlag); 1832 ch = _pos < _len ? _lineText[_pos] : 0; 1833 if (isIdentMiddleChar(ch)) 1834 return parserError("Unexpected character after number", _sharedIntegerToken); 1835 return _sharedIntegerToken; 1836 } 1837 1838 protected Token processBinaryNumber() { 1839 _sharedIntegerToken.setPos(_startLine, _startPos); 1840 _pos++; 1841 if (_pos >= _len) 1842 return parserError("Unexpected end of line in binary number", _sharedIntegerToken); 1843 int digits = 0; 1844 ulong number = 0; 1845 int i = _pos; 1846 for (;i < _len; i++) { 1847 dchar ch = _lineText[i]; 1848 if (ch != '0' && ch != '1') 1849 break; 1850 number = (number << 1) | (ch == '1' ? 1 : 0); 1851 digits++; 1852 } 1853 _pos = i; 1854 if (digits > 64) 1855 return parserError("number is too big", _sharedIntegerToken); 1856 _sharedIntegerToken.setValue(number); 1857 return processIntegerSuffix(); 1858 } 1859 1860 protected Token processHexNumber() { 1861 _sharedIntegerToken.setPos(_startLine, _startPos); 1862 _sharedRealToken.setPos(_startLine, _startPos); 1863 _pos++; 1864 if (_pos >= _len) 1865 return parserError("Unexpected end of line in hex number", _sharedIntegerToken); 1866 int digits = 0; 1867 ulong number = 0; 1868 int i = _pos; 1869 for (;i < _len; i++) { 1870 dchar ch = _lineText[i]; 1871 uint digit = 0; 1872 if (ch >= '0' && ch <= '9') 1873 digit = ch - '0'; 1874 else if (ch >= 'a' && ch <= 'f') 1875 digit = ch - 'a' + 10; 1876 else if (ch >= 'A' && ch <= 'F') 1877 digit = ch - 'A' + 10; 1878 else if (ch == '_') 1879 continue; 1880 else 1881 break; 1882 number = (number << 4) | digit; 1883 digits++; 1884 } 1885 _pos = i; 1886 if (digits > 16) 1887 return parserError("number is too big to fit 64 bits", _sharedIntegerToken); 1888 _sharedIntegerToken.setValue(number); 1889 return processIntegerSuffix(); 1890 } 1891 1892 protected Token processOctNumber() { 1893 _sharedIntegerToken.setPos(_startLine, _startPos); 1894 if (_pos >= _len) 1895 return parserError("Unexpected end of line in octal number", _sharedIntegerToken); 1896 int digits = 0; 1897 ulong number = 0; 1898 int i = _pos; 1899 bool overflow = false; 1900 for (;i < _len; i++) { 1901 dchar ch = _lineText[i]; 1902 int digit = 0; 1903 if (ch >= '0' && ch <= '7') 1904 digit = ch - '0'; 1905 else if (ch == '_') 1906 continue; 1907 else 1908 break; 1909 number <<= 3; 1910 if (digits >= 20) { 1911 if ((number >> 3) << 3 != number) { 1912 overflow = true; 1913 break; 1914 } 1915 } 1916 number |= digit; 1917 digits++; 1918 } 1919 _pos = i; 1920 if (overflow) 1921 return parserError("number is too big to fit 64 bits", _sharedIntegerToken); 1922 _sharedIntegerToken.setValue(number); 1923 return processIntegerSuffix(); 1924 } 1925 1926 // 1927 protected Token processDecFloatSuffix(real value) { 1928 ubyte precision = 1; 1929 bool imaginary = false; 1930 dchar next = _pos < _len ? _lineText[_pos] : 0; 1931 if (next == 'f') { 1932 _pos++; 1933 precision = 0; 1934 } else if (next == 'L') { 1935 _pos++; 1936 precision = 2; 1937 } 1938 next = _pos < _len ? _lineText[_pos] : 0; 1939 if (next == 'i') { 1940 _pos++; 1941 imaginary = true; 1942 } 1943 next = _pos < _len ? _lineText[_pos] : 0; 1944 if (isIdentMiddleChar(next)) 1945 return parserError("invalid suffix for floating point literal", _sharedRealToken); 1946 _sharedRealToken.setValue(value, precision, imaginary); 1947 return _sharedRealToken; 1948 } 1949 1950 // after E char 1951 protected Token processDecFloatExponent(real value) { 1952 dchar next = _pos < _len ? _lineText[_pos] : 0; 1953 int sign = 1; 1954 if (next == '+') { 1955 _pos++; 1956 } else if (next == '-') { 1957 _pos++; 1958 sign = -1; 1959 } 1960 if (_pos >= _len) 1961 return parserError("Invalid exponent", _sharedRealToken); 1962 ulong digits = 0; 1963 ulong number = 0; 1964 int i = _pos; 1965 bool overflow = false; 1966 for (;i < _len; i++) { 1967 dchar ch = _lineText[i]; 1968 uint digit = 0; 1969 if (ch >= '0' && ch <= '9') 1970 digit = ch - '0'; 1971 else if (ch == '_') 1972 continue; 1973 else 1974 break; 1975 number *= 10; 1976 if (digits >= 18) { 1977 if ((number * 10) / 10 != number) { 1978 overflow = true; 1979 break; 1980 } 1981 } 1982 number += digit; 1983 digits++; 1984 } 1985 if (digits == 0) 1986 return parserError("Invalid exponent", _sharedRealToken); 1987 _pos = i; 1988 value *= pow(10., cast(long)number * sign); 1989 return processDecFloatSuffix(value); 1990 } 1991 1992 protected Token processDecFloatSecondPart(ulong firstPart) { 1993 if (_pos >= _len) { 1994 _sharedRealToken.setValue(cast(real)firstPart); 1995 return _sharedRealToken; 1996 } 1997 ulong divider = 1; 1998 ulong number = 0; 1999 int i = _pos; 2000 bool overflow = false; 2001 for (;i < _len; i++) { 2002 dchar ch = _lineText[i]; 2003 uint digit = 0; 2004 if (ch >= '0' && ch <= '9') 2005 digit = ch - '0'; 2006 else if (ch == '_') 2007 continue; 2008 else 2009 break; 2010 if (divider * 10 < divider) 2011 continue; // ignore extra digits 2012 number *= 10; 2013 number += digit; 2014 divider *= 10; 2015 } 2016 _pos = i; 2017 real value = cast(real)firstPart + (cast(real)number / divider); 2018 dchar next = _pos < _len ? _lineText[_pos] : 0; 2019 if (next == 0) { 2020 // neither exponent nor suffix 2021 _sharedRealToken.setValue(value); 2022 return _sharedRealToken; 2023 } 2024 if (next == 'e' || next == 'E') { 2025 _pos++; 2026 return processDecFloatExponent(value); 2027 } 2028 return processDecFloatSuffix(value); 2029 } 2030 2031 protected Token processDecNumber(dchar c) { 2032 _sharedIntegerToken.setPos(_startLine, _startPos); 2033 _sharedRealToken.setPos(_startLine, _startPos); 2034 //if (_pos >= _len) 2035 // return parserError("Unexpected end of line in number", _sharedIntegerToken); 2036 int digits = 1; 2037 ulong number = c - '0'; 2038 int i = _pos; 2039 bool overflow = false; 2040 if (_line == _startLine) { 2041 for (;i < _len; i++) { 2042 dchar ch = _lineText[i]; 2043 uint digit = 0; 2044 if (ch >= '0' && ch <= '9') 2045 digit = ch - '0'; 2046 else if (ch == '_') 2047 continue; 2048 else 2049 break; 2050 number *= 10; 2051 if (digits >= 18) { 2052 if ((number * 10) / 10 != number) { 2053 overflow = true; 2054 break; 2055 } 2056 } 2057 number += digit; 2058 digits++; 2059 } 2060 _pos = i; 2061 } 2062 if (overflow) 2063 return parserError("number is too big to fit 64 bits", _sharedIntegerToken); 2064 _sharedIntegerToken.setValue(number); 2065 dchar next = _line == _startLine && _pos < _len ? _lineText[_pos] : 0; 2066 if (next == 0) 2067 return _sharedIntegerToken; 2068 if (next == 'e' || next == 'E') { 2069 _pos++; 2070 return processDecFloatExponent(number); 2071 } else if (next == '.') { 2072 _pos++; 2073 return processDecFloatSecondPart(number); 2074 } 2075 return processIntegerSuffix(); 2076 } 2077 2078 /// Either return InvalidToken or throw parser exception depending on current errorTolerant flag 2079 protected Token parserError(string msg, Token incompleteToken) { 2080 return parserError(msg, incompleteToken.line, incompleteToken.pos, incompleteToken.type); 2081 } 2082 /// Either return InvalidToken or throw parser exception depending on current errorTolerant flag 2083 protected Token parserError(string msg, int startLine, int startPos, TokenType failedTokenType = TokenType.INVALID) { 2084 if (_errorTolerant) { 2085 startPos--; 2086 _sharedInvalidToken.setPos(startLine, startPos); 2087 _sharedInvalidToken.errorMessage = msg; 2088 _sharedInvalidToken.errorCode = 1; // for future extension 2089 _sharedInvalidToken.invalidTokenType = failedTokenType; // for future extension 2090 // make invalid source text 2091 dchar[] invalidText; 2092 int p = startLine == _line ? startPos : 0; 2093 for (int i = p; i < _pos && i < _lineText.length; i++) 2094 invalidText ~= _lineText[i]; 2095 2096 // recover after error 2097 for (; _pos < _lineText.length; _pos++) { 2098 dchar ch = _lineText[_pos]; 2099 if (ch == ' ' || ch == '\t' || ch == '(' || ch == ')' || ch == '[' || ch == ']' || ch == '{' || ch == '}') 2100 break; 2101 if (failedTokenType == TokenType.INTEGER || failedTokenType == TokenType.FLOAT) { 2102 if (ch == '*' || ch == '/') 2103 break; 2104 } 2105 invalidText ~= ch; 2106 } 2107 _sharedInvalidToken.text = invalidText; 2108 return _sharedInvalidToken; 2109 } 2110 throw new ParserException(msg, _lineStream.file, _line, _pos); 2111 } 2112 2113 protected Keyword detectKeyword(dchar ch) { 2114 if (ch > 'z') 2115 return Keyword.NONE; 2116 int len = _len - _pos; 2117 switch (cast(ubyte)ch) { 2118 // ABSTRACT, 2119 // ALIAS, 2120 // ALIGN, 2121 // ASM, 2122 // ASSERT, 2123 // AUTO, 2124 case 'a': return findKeyword(Keyword.ABSTRACT, Keyword.AUTO, _lineText.ptr + _pos, len, _pos); 2125 2126 // BODY, 2127 // BOOL, 2128 // BREAK, 2129 // BYTE, 2130 case 'b': return findKeyword(Keyword.BODY, Keyword.BYTE, _lineText.ptr + _pos, len, _pos); 2131 2132 // CASE, 2133 // CAST, 2134 // CATCH, 2135 // CDOUBLE, 2136 // CENT, 2137 // CFLOAT, 2138 // CHAR, 2139 // CLASS, 2140 // CONST, 2141 // CONTINUE, 2142 // CREAL, 2143 case 'c': return findKeyword(Keyword.CASE, Keyword.CREAL, _lineText.ptr + _pos, len, _pos); 2144 2145 // DCHAR, 2146 // DEBUG, 2147 // DEFAULT, 2148 // DELEGATE, 2149 // DELETE, 2150 // DEPRECATED, 2151 // DO, 2152 // DOUBLE, 2153 case 'd': return findKeyword(Keyword.DCHAR, Keyword.DOUBLE, _lineText.ptr + _pos, len, _pos); 2154 2155 // ELSE, 2156 // ENUM, 2157 // EXPORT, 2158 // EXTERN, 2159 case 'e': return findKeyword(Keyword.ELSE, Keyword.EXTERN, _lineText.ptr + _pos, len, _pos); 2160 2161 // FALSE, 2162 // FINAL, 2163 // FINALLY, 2164 // FLOAT, 2165 // FOR, 2166 // FOREACH, 2167 // FOREACH_REVERSE, 2168 // FUNCTION, 2169 case 'f': return findKeyword(Keyword.FALSE, Keyword.FUNCTION, _lineText.ptr + _pos, len, _pos); 2170 2171 // GOTO, 2172 case 'g': return findKeyword(Keyword.GOTO, Keyword.GOTO, _lineText.ptr + _pos, len, _pos); 2173 2174 // IDOUBLE, 2175 // IF, 2176 // IFLOAT, 2177 // IMMUTABLE, 2178 // IMPORT, 2179 // IN, 2180 // INOUT, 2181 // INT, 2182 // INTERFACE, 2183 // INVARIANT, 2184 // IREAL, 2185 // IS, 2186 case 'i': return findKeyword(Keyword.IDOUBLE, Keyword.IS, _lineText.ptr + _pos, len, _pos); 2187 2188 // LAZY, 2189 // LONG, 2190 case 'l': return findKeyword(Keyword.LAZY, Keyword.LONG, _lineText.ptr + _pos, len, _pos); 2191 2192 // MACRO, 2193 // MIXIN, 2194 // MODULE, 2195 case 'm': return findKeyword(Keyword.MACRO, Keyword.MODULE, _lineText.ptr + _pos, len, _pos); 2196 2197 // NEW, 2198 // NOTHROW, 2199 // NULL, 2200 case 'n': return findKeyword(Keyword.NEW, Keyword.NULL, _lineText.ptr + _pos, len, _pos); 2201 2202 // OUT, 2203 // OVERRIDE, 2204 case 'o': return findKeyword(Keyword.OUT, Keyword.OVERRIDE, _lineText.ptr + _pos, len, _pos); 2205 2206 // PACKAGE, 2207 // PRAGMA, 2208 // PRIVATE, 2209 // PROTECTED, 2210 // PUBLIC, 2211 // PURE, 2212 case 'p': return findKeyword(Keyword.PACKAGE, Keyword.PURE, _lineText.ptr + _pos, len, _pos); 2213 2214 // REAL, 2215 // REF, 2216 // RETURN, 2217 case 'r': return findKeyword(Keyword.REAL, Keyword.RETURN, _lineText.ptr + _pos, len, _pos); 2218 2219 // SCOPE, 2220 // SHARED, 2221 // SHORT, 2222 // STATIC, 2223 // STRUCT, 2224 // SUPER, 2225 // SWITCH, 2226 // SYNCHRONIZED, 2227 case 's': return findKeyword(Keyword.SCOPE, Keyword.SYNCHRONIZED, _lineText.ptr + _pos, len, _pos); 2228 2229 // TEMPLATE, 2230 // THIS, 2231 // THROW, 2232 // TRUE, 2233 // TRY, 2234 // TYPEDEF, 2235 // TYPEID, 2236 // TYPEOF, 2237 case 't': return findKeyword(Keyword.TEMPLATE, Keyword.TYPEOF, _lineText.ptr + _pos, len, _pos); 2238 2239 // UBYTE, 2240 // UCENT, 2241 // UINT, 2242 // ULONG, 2243 // UNION, 2244 // UNITTEST, 2245 // USHORT, 2246 case 'u': return findKeyword(Keyword.UBYTE, Keyword.USHORT, _lineText.ptr + _pos, len, _pos); 2247 2248 // VERSION, 2249 // VOID, 2250 // VOLATILE, 2251 case 'v': return findKeyword(Keyword.VERSION, Keyword.VOLATILE, _lineText.ptr + _pos, len, _pos); 2252 2253 // WCHAR, 2254 // WHILE, 2255 // WITH, 2256 case 'w': return findKeyword(Keyword.WCHAR, Keyword.WITH, _lineText.ptr + _pos, len, _pos); 2257 2258 // FILE, 2259 // MODULE, 2260 // LINE, 2261 // FUNCTION, 2262 // PRETTY_FUNCTION, 2263 // 2264 // GSHARED, 2265 // TRAITS, 2266 // VECTOR, 2267 // PARAMETERS, 2268 case '_': return findKeyword(Keyword.FILE, Keyword.PARAMETERS, _lineText.ptr + _pos, len, _pos); 2269 default: return Keyword.NONE; 2270 } 2271 } 2272 protected OpCode detectOp(dchar ch) nothrow { 2273 if (ch >= 128) 2274 return OpCode.NONE; 2275 dchar ch2 = _pos < _len ? _lineText[_pos] : 0; 2276 dchar ch3 = _pos < _len - 1 ? _lineText[_pos + 1] : 0; 2277 switch(cast(ubyte)ch) { 2278 // DIV, // / 2279 // DIV_EQ, // /= 2280 case '/': 2281 if (ch2 == '=') { 2282 _pos++; 2283 return OpCode.DIV_EQ; 2284 } 2285 return OpCode.DIV; 2286 // DOT, // . 2287 // DOT_DOT, // .. 2288 // DOT_DOT_DOT,// ... 2289 case '.': 2290 if (ch2 == '.') { 2291 if (ch3 == '.') { 2292 _pos += 2; 2293 return OpCode.DOT_DOT_DOT; 2294 } 2295 _pos++; 2296 return OpCode.DOT_DOT; 2297 } 2298 return OpCode.DOT; 2299 // AND, // & 2300 // AND_EQ, // &= 2301 // LOG_AND, // && 2302 case '&': 2303 if (ch2 == '=') { 2304 _pos++; 2305 return OpCode.AND_EQ; 2306 } 2307 if (ch2 == '&') { 2308 _pos++; 2309 return OpCode.LOG_AND; 2310 } 2311 return OpCode.AND; 2312 // OR, // | 2313 // OR_EQ, // |= 2314 // LOG_OR, // || 2315 case '|': 2316 if (ch2 == '=') { 2317 _pos++; 2318 return OpCode.OR_EQ; 2319 } 2320 if (ch2 == '|') { 2321 _pos++; 2322 return OpCode.LOG_OR; 2323 } 2324 return OpCode.OR; 2325 // MINUS, // - 2326 // MINUS_EQ, // -= 2327 // MINUS_MINUS,// -- 2328 case '-': 2329 if (ch2 == '=') { 2330 _pos++; 2331 return OpCode.MINUS_EQ; 2332 } 2333 if (ch2 == '-') { 2334 _pos++; 2335 return OpCode.MINUS_MINUS; 2336 } 2337 return OpCode.MINUS; 2338 // PLUS, // + 2339 // PLUS_EQ, // += 2340 // PLUS_PLUS, // ++ 2341 case '+': 2342 if (ch2 == '=') { 2343 _pos++; 2344 return OpCode.PLUS_EQ; 2345 } 2346 if (ch2 == '+') { 2347 _pos++; 2348 return OpCode.PLUS_PLUS; 2349 } 2350 return OpCode.PLUS; 2351 // LT, // < 2352 // LT_EQ, // <= 2353 // SHL, // << 2354 // SHL_EQ, // <<= 2355 // LT_GT, // <> 2356 // NE_EQ, // <>= 2357 case '<': 2358 if (ch2 == '<') { 2359 if (ch3 == '=') { 2360 _pos += 2; 2361 return OpCode.SHL_EQ; 2362 } 2363 _pos++; 2364 return OpCode.SHL; 2365 } 2366 if (ch2 == '>') { 2367 if (ch3 == '=') { 2368 _pos += 2; 2369 return OpCode.NE_EQ; 2370 } 2371 _pos++; 2372 return OpCode.LT_GT; 2373 } 2374 if (ch2 == '=') { 2375 _pos++; 2376 return OpCode.LT_EQ; 2377 } 2378 return OpCode.LT; 2379 // GT, // > 2380 // GT_EQ, // >= 2381 // SHR_EQ // >>= 2382 // ASR_EQ, // >>>= 2383 // SHR, // >> 2384 // ASR, // >>> 2385 case '>': 2386 if (ch2 == '>') { 2387 if (ch3 == '>') { 2388 dchar ch4 = _pos < _len - 2 ? _lineText[_pos + 2] : 0; 2389 if (ch4 == '=') { // >>>= 2390 _pos += 3; 2391 return OpCode.ASR_EQ; 2392 } 2393 _pos += 2; 2394 return OpCode.ASR; // >>> 2395 } 2396 if (ch3 == '=') { // >>= 2397 _pos += 2; 2398 return OpCode.SHR_EQ; 2399 } 2400 _pos++; 2401 return OpCode.SHR; 2402 } 2403 if (ch2 == '=') { // >= 2404 _pos++; 2405 return OpCode.GT_EQ; 2406 } 2407 // > 2408 return OpCode.GT; 2409 // NOT, // ! 2410 // NOT_EQ // != 2411 // NOT_LT_GT, // !<> 2412 // NOT_LT_GT_EQ, // !<>= 2413 // NOT_LT, // !< 2414 // NOT_LT_EQ, // !<= 2415 // NOT_GT, // !> 2416 // NOT_GT_EQ, // !>= 2417 case '!': 2418 if (ch2 == '<') { // !< 2419 if (ch3 == '>') { // !<> 2420 dchar ch4 = _pos < _len - 2 ? _lineText[_pos + 2] : 0; 2421 if (ch4 == '=') { // !<>= 2422 _pos += 3; 2423 return OpCode.NOT_LT_GT_EQ; 2424 } 2425 _pos += 2; 2426 return OpCode.NOT_LT_GT; // !<> 2427 } 2428 if (ch3 == '=') { // !<= 2429 _pos += 2; 2430 return OpCode.NOT_LT_EQ; 2431 } 2432 _pos++; 2433 return OpCode.NOT_LT; // !< 2434 } 2435 if (ch2 == '=') { // != 2436 _pos++; 2437 return OpCode.NOT_EQ; 2438 } 2439 return OpCode.NOT; 2440 // PAR_OPEN, // ( 2441 case '(': 2442 return OpCode.PAR_OPEN; 2443 // PAR_CLOSE, // ) 2444 case ')': 2445 return OpCode.PAR_CLOSE; 2446 // SQ_OPEN, // [ 2447 case '[': 2448 return OpCode.SQ_OPEN; 2449 // SQ_CLOSE, // ] 2450 case ']': 2451 return OpCode.SQ_CLOSE; 2452 // CURL_OPEN, // { 2453 case '{': 2454 return OpCode.CURL_OPEN; 2455 // CURL_CLOSE, // } 2456 case '}': 2457 return OpCode.CURL_CLOSE; 2458 // QUEST, // ? 2459 case '?': 2460 return OpCode.QUEST; 2461 // COMMA, // , 2462 case ',': 2463 return OpCode.COMMA; 2464 // SEMICOLON, // ; 2465 case ';': 2466 return OpCode.SEMICOLON; 2467 // COLON, // : 2468 case ':': 2469 return OpCode.COLON; 2470 // DOLLAR, // $ 2471 case '$': 2472 return OpCode.DOLLAR; 2473 // EQ, // = 2474 // QE_EQ, // == 2475 // EQ_GT, // => 2476 case '=': 2477 if (ch2 == '=') { // == 2478 _pos++; 2479 return OpCode.QE_EQ; 2480 } 2481 if (ch2 == '>') { // => 2482 _pos++; 2483 return OpCode.EQ_GT; 2484 } 2485 return OpCode.EQ; 2486 // MUL, // * 2487 // MUL_EQ, // *= 2488 case '*': 2489 if (ch2 == '=') { 2490 _pos++; 2491 return OpCode.MUL_EQ; 2492 } 2493 return OpCode.MUL; 2494 // MOD, // % 2495 // MOD_EQ, // %= 2496 case '%': 2497 if (ch2 == '=') { 2498 _pos++; 2499 return OpCode.MOD_EQ; 2500 } 2501 return OpCode.MOD; 2502 // XOR, // ^ 2503 // XOR_EQ, // ^= 2504 // LOG_XOR, // ^^ 2505 // LOG_XOR_EQ, // ^^= 2506 case '^': 2507 if (ch2 == '^') { 2508 if (ch3 == '=') { 2509 _pos += 2; 2510 return OpCode.LOG_XOR_EQ; 2511 } 2512 _pos++; 2513 return OpCode.LOG_XOR; 2514 } 2515 if (ch2 == '=') { 2516 _pos++; 2517 return OpCode.XOR_EQ; 2518 } 2519 return OpCode.XOR; 2520 // INV, // ~ 2521 // INV_EQ, // ~= 2522 case '~': 2523 if (ch2 == '=') { 2524 _pos++; 2525 return OpCode.INV_EQ; 2526 } 2527 return OpCode.INV; 2528 // AT, // @ 2529 case '@': 2530 return OpCode.AT; 2531 // SHARP // # 2532 case '#': 2533 return OpCode.SHARP; 2534 default: 2535 return OpCode.NONE; 2536 } 2537 } 2538 2539 protected Token processCharacterLiteral() { 2540 _sharedCharacterLiteralToken.setPos(_startLine, _startPos); 2541 if (_pos + 2 > _len) 2542 return parserError("Invalid character literal", _sharedCharacterLiteralToken); 2543 dchar ch = _lineText[_pos++]; 2544 dchar ch2 = _lineText[_pos++]; 2545 dchar type = 0; 2546 if (ch == '\\') { 2547 // process escaped character - store it in ch 2548 // TODO: support all escape sequences 2549 switch(ch2) { 2550 case 'r': 2551 ch = '\r'; 2552 break; 2553 case 'n': 2554 ch = '\n'; 2555 break; 2556 case 't': 2557 ch = '\t'; 2558 break; 2559 case '\\': 2560 ch = '\\'; 2561 break; 2562 default: 2563 ch = ch2; 2564 break; 2565 } 2566 // here must be closing ' 2567 if (_pos + 1 > _len) 2568 return parserError("Invalid character literal", _sharedCharacterLiteralToken); 2569 ch2 = _lineText[_pos++]; 2570 } 2571 if (ch2 != '\'') 2572 return parserError("Invalid character literal", _sharedCharacterLiteralToken); 2573 if (_pos < _len) { 2574 dchar t = _lineText[_pos]; 2575 if (t == 'd' || t == 'w' || t == 'c') { 2576 type = t; 2577 _pos++; 2578 } else if (isIdentMiddleChar(ch)) { 2579 return parserError("Unexpected character after character literal", _sharedCharacterLiteralToken); 2580 } 2581 } 2582 _sharedCharacterLiteralToken.setCharacter(ch, type); 2583 return _sharedCharacterLiteralToken; 2584 } 2585 2586 protected Token processDoubleQuotedOrWysiwygString(dchar delimiter) { 2587 bool wysiwyg = (delimiter == 'r' || delimiter == '`'); 2588 //writeln("processDoubleQuotedString()"); 2589 _sharedStringLiteralToken.setPos(_startLine, _startPos); 2590 _stringLiteralAppender.reset(); 2591 if (delimiter == 'r') { 2592 _pos++; 2593 delimiter = '\"'; 2594 } 2595 dchar type = 0; 2596 for (;;) { 2597 int i = _pos; 2598 int endPos = int.max; 2599 bool lastBackSlash = false; 2600 for(; i < _len; i++) { 2601 dchar ch = _lineText[i]; 2602 if (ch == '\\') { 2603 if (lastBackSlash) 2604 lastBackSlash = false; 2605 else 2606 lastBackSlash = true; 2607 } 2608 else if (ch == delimiter && !lastBackSlash) { 2609 endPos = i; 2610 break; 2611 } 2612 else if(lastBackSlash) 2613 lastBackSlash = false; 2614 } 2615 if (endPos != int.max) { 2616 // found end quote 2617 _stringLiteralAppender.append(_lineText[_pos .. endPos]); 2618 _pos = endPos + 1; 2619 break; 2620 } 2621 // no quote by end of line 2622 _stringLiteralAppender.append(_lineText[_pos .. $]); 2623 _stringLiteralAppender.appendEol(); 2624 if (!nextLine()) { 2625 // do we need to throw exception if eof comes before end of string? 2626 break; 2627 } 2628 } 2629 dchar t = 0; 2630 if (_pos < _len) { 2631 dchar ch = _lineText[_pos]; 2632 if (ch == 'c' || ch == 'w' || ch == 'd') { 2633 t = ch; 2634 _pos++; 2635 if (_pos < _len) { 2636 ch = _lineText[_pos]; 2637 if (isIdentMiddleChar(ch)) 2638 return parserError("Unexpected character after string literal", _sharedStringLiteralToken); 2639 } 2640 } else if (isIdentMiddleChar(ch)) 2641 return parserError("Unexpected character after string literal", _sharedStringLiteralToken); 2642 } 2643 if (t != 0) { 2644 if (type != 0 && t != type) 2645 return parserError("Cannot concatenate strings of different type", _sharedStringLiteralToken); 2646 type = t; 2647 } 2648 if (wysiwyg) { 2649 // no escape processing 2650 _sharedStringLiteralToken.setText(_stringLiteralAppender.get(), type); 2651 return _sharedStringLiteralToken; 2652 } 2653 _stringLiteralAppender.processEscapeSequences(); 2654 _sharedStringLiteralToken.setText(_stringLiteralAppender.get(), type); 2655 return _sharedStringLiteralToken; 2656 } 2657 2658 protected SysTime buildTime; 2659 2660 // string literal of the date of compilation "mmm dd yyyy" 2661 protected dstring formatBuildDate() { 2662 // TODO: provide proper format 2663 return to!dstring(buildTime); 2664 } 2665 2666 // string literal of the time of compilation "hh:mm:ss" 2667 protected dstring formatBuildTime() { 2668 // TODO: provide proper format 2669 return to!dstring(buildTime); 2670 } 2671 2672 // string literal of the date and time of compilation "www mmm dd hh:mm:ss yyyy" 2673 protected dstring formatBuildTimestamp() { 2674 // TODO: provide proper format 2675 return to!dstring(buildTime); 2676 } 2677 2678 static immutable dstring VERSION = "0.1"; 2679 static immutable dstring VENDOR = "coolreader.org"; 2680 2681 protected Token makeSpecialTokenString(dstring str, int pos) { 2682 _sharedStringLiteralToken.setPos(_startLine, _startPos); 2683 _sharedStringLiteralToken.setText(cast(dchar[])str, 0); 2684 return _sharedStringLiteralToken; 2685 } 2686 2687 protected Token processSpecialToken(Keyword keyword, int pos) { 2688 switch (keyword) { 2689 //Special Token Replaced with 2690 case Keyword.DATE: // string literal of the date of compilation "mmm dd yyyy" 2691 return makeSpecialTokenString(formatBuildDate(), pos); 2692 case Keyword.TIME: // string literal of the time of compilation "hh:mm:ss" 2693 return makeSpecialTokenString(formatBuildTime(), pos); 2694 case Keyword.TIMESTAMP: // string literal of the date and time of compilation "www mmm dd hh:mm:ss yyyy" 2695 return makeSpecialTokenString(formatBuildTimestamp(), pos); 2696 case Keyword.VENDOR: // Compiler vendor string, such as "Digital Mars D" 2697 return makeSpecialTokenString(VENDOR, pos); 2698 case Keyword.VERSION_: // Compiler version as an integer, such as 2001 2699 return makeSpecialTokenString(VERSION, pos); 2700 default: 2701 parserError("Unknown special token", _line, pos); 2702 } 2703 return null; 2704 } 2705 2706 protected int _startLine; 2707 protected int _startPos; 2708 2709 // returns next token (clone it if you want to store for future usage, otherwise it may be overwritten by further nextToken() calls). 2710 Token nextToken() { 2711 _startLine = _line; 2712 _startPos = _pos; 2713 dchar ch = nextChar(); 2714 if (ch == EOF_CHAR) { 2715 return emitEof(); 2716 } 2717 if (ch == EOL_CHAR || ch == 0x0020 || ch == 0x0009 || ch == 0x000B || ch == 0x000C) { 2718 // white space (treat EOL as whitespace, too) 2719 return processWhiteSpace(ch); 2720 } 2721 dchar next = _pos < _len ? _lineText[_pos] : 0; 2722 if (ch == '/') { 2723 if (next == '/') 2724 return processOneLineComment(); 2725 else if (next == '*') 2726 return processMultilineComment(); 2727 else if (next == '+') 2728 return processNestedComment(); 2729 } 2730 if (ch == '#' && _line == 1) 2731 return processOneLineSharpComment(); 2732 if (ch == '\"') 2733 return processDoubleQuotedOrWysiwygString(ch); 2734 if (ch == '\'') 2735 return processCharacterLiteral(); 2736 if (ch == 'x' && next == '\"') 2737 return processHexString(); 2738 if (ch == 'q' && next == '\"') 2739 return processDelimitedString(); 2740 if ((ch == 'r' && next == '\"') || (ch == '`')) 2741 return processDoubleQuotedOrWysiwygString(ch); 2742 int oldPos = _pos - 1; 2743 2744 if (ch == '0') { 2745 if (next == 'b' || next == 'B') 2746 return processBinaryNumber(); 2747 if (next == 'x' || next == 'X') 2748 return processHexNumber(); 2749 if (next >= '0' && next <= '9') 2750 return processOctNumber(); 2751 if (next >= '0' && next <= '9') 2752 return processDecNumber(ch); 2753 } 2754 if (ch >= '0' && ch <= '9') 2755 return processDecNumber(ch); 2756 if (ch == '.' && next >= '0' && next <= '9') // .123 2757 return processDecFloatSecondPart(0); 2758 2759 if (ch == '_' || isUniversalAlpha(ch)) { 2760 // start of identifier or keyword? 2761 Keyword keyword = detectKeyword(ch); 2762 if (keyword != Keyword.NONE) { 2763 switch (keyword) { 2764 //Special Token Replaced with 2765 case Keyword.EOF: return emitEof(); // sets the scanner to the end of the file 2766 case Keyword.DATE: // string literal of the date of compilation "mmm dd yyyy" 2767 case Keyword.TIME: // string literal of the time of compilation "hh:mm:ss" 2768 case Keyword.TIMESTAMP: // string literal of the date and time of compilation "www mmm dd hh:mm:ss yyyy" 2769 case Keyword.VENDOR: // Compiler vendor string, such as "Digital Mars D" 2770 case Keyword.VERSION_: // Compiler version as an integer, such as 2001 2771 return processSpecialToken(keyword, oldPos); 2772 default: 2773 _sharedKeywordToken.setPos(_startLine, _startPos); 2774 _sharedKeywordToken.keyword = keyword; 2775 return _sharedKeywordToken; 2776 } 2777 } 2778 return processIdent(ch); 2779 } 2780 OpCode op = detectOp(ch); 2781 if (op != OpCode.NONE) { 2782 _sharedOpToken.setPos(_startLine, _startPos); 2783 _sharedOpToken.opCode = op; 2784 return _sharedOpToken; 2785 } 2786 return parserError("Invalid token", _line, _pos); 2787 } 2788 2789 2790 } 2791 2792 unittest { 2793 version(DisableLexerTest) { 2794 import std.stdio; 2795 import std.conv; 2796 import std.utf; 2797 import dlangui.core.linestream; 2798 string fname = "/home/lve/src/d/ddc/ddclexer/tests/tokenizer_test.d"; 2799 writeln("opening file"); 2800 try { 2801 std.stream.File f = new std.stream.File(fname); 2802 scope(exit) { f.close(); } 2803 try { 2804 LineStream lines = LineStream.create(f, fname); 2805 Tokenizer tokenizer = new Tokenizer(lines); 2806 for (;;) { 2807 Token token = tokenizer.nextToken(); 2808 if (token is null) { 2809 writeln("Null token returned"); 2810 break; 2811 } 2812 if (token.type == TokenType.EOF) { 2813 writeln("EOF token"); 2814 break; 2815 } 2816 writeln("", token.line, ":", token.pos, "\t", token.toString); 2817 } 2818 } catch (Exception e) { 2819 writeln("Exception " ~ e.toString); 2820 } 2821 } catch (Exception e) { 2822 writeln("Exception " ~ e.toString); 2823 } 2824 } 2825 } 2826 2827 /// converts named entity to character, returns 0 if not found 2828 dchar entityToChar(string name) { 2829 if (auto ch = name in entityToCharMap) { 2830 return *ch; 2831 } 2832 return 0; 2833 } 2834 2835 /// fings entity name for character, returns null if not found 2836 string charToEntity(dchar ch) { 2837 if (auto name = ch in charToEntityMap) { 2838 return *name; 2839 } 2840 return null; 2841 } 2842 2843 private __gshared dchar[string]entityToCharMap; 2844 private __gshared string[dchar]charToEntityMap; 2845 private void addEntity(string name, dchar ch) { 2846 entityToCharMap[name] = ch; 2847 charToEntityMap[ch] = name; 2848 } 2849 __gshared static this() { 2850 addEntity("quot", 34); 2851 addEntity("amp", 38); 2852 addEntity("lt", 60); 2853 addEntity("gt", 62); 2854 addEntity("OElig", 338); 2855 addEntity("oelig", 339); 2856 addEntity("Scaron", 352); 2857 addEntity("scaron", 353); 2858 addEntity("Yuml", 376); 2859 addEntity("circ", 710); 2860 addEntity("tilde", 732); 2861 addEntity("ensp", 8194); 2862 addEntity("emsp", 8195); 2863 addEntity("thinsp", 8201); 2864 addEntity("zwnj", 8204); 2865 addEntity("zwj", 8205); 2866 addEntity("lrm", 8206); 2867 addEntity("rlm", 8207); 2868 addEntity("ndash", 8211); 2869 addEntity("mdash", 8212); 2870 addEntity("lsquo", 8216); 2871 addEntity("rsquo", 8217); 2872 addEntity("sbquo", 8218); 2873 addEntity("ldquo", 8220); 2874 addEntity("rdquo", 8221); 2875 addEntity("bdquo", 8222); 2876 addEntity("dagger", 8224); 2877 addEntity("Dagger", 8225); 2878 addEntity("permil", 8240); 2879 addEntity("lsaquo", 8249); 2880 addEntity("rsaquo", 8250); 2881 addEntity("euro", 8364); 2882 addEntity("nbsp", 160); 2883 addEntity("iexcl", 161); 2884 addEntity("cent", 162); 2885 addEntity("pound", 163); 2886 addEntity("curren", 164); 2887 addEntity("yen", 165); 2888 addEntity("brvbar", 166); 2889 addEntity("sect", 167); 2890 addEntity("uml", 168); 2891 addEntity("copy", 169); 2892 addEntity("ordf", 170); 2893 addEntity("laquo", 171); 2894 addEntity("not", 172); 2895 addEntity("shy", 173); 2896 addEntity("reg", 174); 2897 addEntity("macr", 175); 2898 addEntity("deg", 176); 2899 addEntity("plusmn", 177); 2900 addEntity("sup2", 178); 2901 addEntity("sup3", 179); 2902 addEntity("acute", 180); 2903 addEntity("micro", 181); 2904 addEntity("para", 182); 2905 addEntity("middot", 183); 2906 addEntity("cedil", 184); 2907 addEntity("sup1", 185); 2908 addEntity("ordm", 186); 2909 addEntity("raquo", 187); 2910 addEntity("frac14", 188); 2911 addEntity("frac12", 189); 2912 addEntity("frac34", 190); 2913 addEntity("iquest", 191); 2914 addEntity("Agrave", 192); 2915 addEntity("Aacute", 193); 2916 addEntity("Acirc", 194); 2917 addEntity("Atilde", 195); 2918 addEntity("Auml", 196); 2919 addEntity("Aring", 197); 2920 addEntity("AElig", 198); 2921 addEntity("Ccedil", 199); 2922 addEntity("Egrave", 200); 2923 addEntity("Eacute", 201); 2924 addEntity("Ecirc", 202); 2925 addEntity("Euml", 203); 2926 addEntity("Igrave", 204); 2927 addEntity("Iacute", 205); 2928 addEntity("Icirc", 206); 2929 addEntity("Iuml", 207); 2930 addEntity("ETH", 208); 2931 addEntity("Ntilde", 209); 2932 addEntity("Ograve", 210); 2933 addEntity("Oacute", 211); 2934 addEntity("Ocirc", 212); 2935 addEntity("Otilde", 213); 2936 addEntity("Ouml", 214); 2937 addEntity("times", 215); 2938 addEntity("Oslash", 216); 2939 addEntity("Ugrave", 217); 2940 addEntity("Uacute", 218); 2941 addEntity("Ucirc", 219); 2942 addEntity("Uuml", 220); 2943 addEntity("Yacute", 221); 2944 addEntity("THORN", 222); 2945 addEntity("szlig", 223); 2946 addEntity("agrave", 224); 2947 addEntity("aacute", 225); 2948 addEntity("acirc", 226); 2949 addEntity("atilde", 227); 2950 addEntity("auml", 228); 2951 addEntity("aring", 229); 2952 addEntity("aelig", 230); 2953 addEntity("ccedil", 231); 2954 addEntity("egrave", 232); 2955 addEntity("eacute", 233); 2956 addEntity("ecirc", 234); 2957 addEntity("euml", 235); 2958 addEntity("igrave", 236); 2959 addEntity("iacute", 237); 2960 addEntity("icirc", 238); 2961 addEntity("iuml", 239); 2962 addEntity("eth", 240); 2963 addEntity("ntilde", 241); 2964 addEntity("ograve", 242); 2965 addEntity("oacute", 243); 2966 addEntity("ocirc", 244); 2967 addEntity("otilde", 245); 2968 addEntity("ouml", 246); 2969 addEntity("divide", 247); 2970 addEntity("oslash", 248); 2971 addEntity("ugrave", 249); 2972 addEntity("uacute", 250); 2973 addEntity("ucirc", 251); 2974 addEntity("uuml", 252); 2975 addEntity("yacute", 253); 2976 addEntity("thorn", 254); 2977 addEntity("yuml", 255); 2978 addEntity("fnof", 402); 2979 addEntity("Alpha", 913); 2980 addEntity("Beta", 914); 2981 addEntity("Gamma", 915); 2982 addEntity("Delta", 916); 2983 addEntity("Epsilon", 917); 2984 addEntity("Zeta", 918); 2985 addEntity("Eta", 919); 2986 addEntity("Theta", 920); 2987 addEntity("Iota", 921); 2988 addEntity("Kappa", 922); 2989 addEntity("Lambda", 923); 2990 addEntity("Mu", 924); 2991 addEntity("Nu", 925); 2992 addEntity("Xi", 926); 2993 addEntity("Omicron", 927); 2994 addEntity("Pi", 928); 2995 addEntity("Rho", 929); 2996 addEntity("Sigma", 931); 2997 addEntity("Tau", 932); 2998 addEntity("Upsilon", 933); 2999 addEntity("Phi", 934); 3000 addEntity("Chi", 935); 3001 addEntity("Psi", 936); 3002 addEntity("Omega", 937); 3003 addEntity("alpha", 945); 3004 addEntity("beta", 946); 3005 addEntity("gamma", 947); 3006 addEntity("delta", 948); 3007 addEntity("epsilon", 949); 3008 addEntity("zeta", 950); 3009 addEntity("eta", 951); 3010 addEntity("theta", 952); 3011 addEntity("iota", 953); 3012 addEntity("kappa", 954); 3013 addEntity("lambda", 955); 3014 addEntity("mu", 956); 3015 addEntity("nu", 957); 3016 addEntity("xi", 958); 3017 addEntity("omicron", 959); 3018 addEntity("pi", 960); 3019 addEntity("rho", 961); 3020 addEntity("sigmaf", 962); 3021 addEntity("sigma", 963); 3022 addEntity("tau", 964); 3023 addEntity("upsilon", 965); 3024 addEntity("phi", 966); 3025 addEntity("chi", 967); 3026 addEntity("psi", 968); 3027 addEntity("omega", 969); 3028 addEntity("thetasym", 977); 3029 addEntity("upsih", 978); 3030 addEntity("piv", 982); 3031 addEntity("bull", 8226); 3032 addEntity("hellip", 8230); 3033 addEntity("prime", 8242); 3034 addEntity("Prime", 8243); 3035 addEntity("oline", 8254); 3036 addEntity("frasl", 8260); 3037 addEntity("weierp", 8472); 3038 addEntity("image", 8465); 3039 addEntity("real", 8476); 3040 addEntity("trade", 8482); 3041 addEntity("alefsym", 8501); 3042 addEntity("larr", 8592); 3043 addEntity("uarr", 8593); 3044 addEntity("rarr", 8594); 3045 addEntity("darr", 8595); 3046 addEntity("harr", 8596); 3047 addEntity("crarr", 8629); 3048 addEntity("lArr", 8656); 3049 addEntity("uArr", 8657); 3050 addEntity("rArr", 8658); 3051 addEntity("dArr", 8659); 3052 addEntity("hArr", 8660); 3053 addEntity("forall", 8704); 3054 addEntity("part", 8706); 3055 addEntity("exist", 8707); 3056 addEntity("empty", 8709); 3057 addEntity("nabla", 8711); 3058 addEntity("isin", 8712); 3059 addEntity("notin", 8713); 3060 addEntity("ni", 8715); 3061 addEntity("prod", 8719); 3062 addEntity("sum", 8721); 3063 addEntity("minus", 8722); 3064 addEntity("lowast", 8727); 3065 addEntity("radic", 8730); 3066 addEntity("prop", 8733); 3067 addEntity("infin", 8734); 3068 addEntity("ang", 8736); 3069 addEntity("and", 8743); 3070 addEntity("or", 8744); 3071 addEntity("cap", 8745); 3072 addEntity("cup", 8746); 3073 addEntity("int", 8747); 3074 addEntity("there4", 8756); 3075 addEntity("sim", 8764); 3076 addEntity("cong", 8773); 3077 addEntity("asymp", 8776); 3078 addEntity("ne", 8800); 3079 addEntity("equiv", 8801); 3080 addEntity("le", 8804); 3081 addEntity("ge", 8805); 3082 addEntity("sub", 8834); 3083 addEntity("sup", 8835); 3084 addEntity("nsub", 8836); 3085 addEntity("sube", 8838); 3086 addEntity("supe", 8839); 3087 addEntity("oplus", 8853); 3088 addEntity("otimes", 8855); 3089 addEntity("perp", 8869); 3090 addEntity("sdot", 8901); 3091 addEntity("lceil", 8968); 3092 addEntity("rceil", 8969); 3093 addEntity("lfloor", 8970); 3094 addEntity("rfloor", 8971); 3095 addEntity("loz", 9674); 3096 addEntity("spades", 9824); 3097 addEntity("clubs", 9827); 3098 addEntity("hearts", 9829); 3099 addEntity("diams", 9830); 3100 addEntity("lang", 10216); 3101 addEntity("rang", 10217); 3102 } 3103 3104 3105 3106 //void runTokenizerTest() 3107 unittest 3108 { 3109 import std.algorithm; 3110 class TokenTest { 3111 int _line; 3112 string _file; 3113 this(string file, int line) { 3114 _file = file; 3115 _line = line; 3116 } 3117 bool doTest(Token token) { 3118 return true; 3119 } 3120 void execute(Tokenizer tokenizer) { 3121 Token token = tokenizer.nextToken(); 3122 if (!doTest(token)) { 3123 assert(false, " token doesn not match at " ~ _file ~ ":" ~ to!string(_line) ~ " foundToken: " ~ token.toString ~ " expected: " ~ toString); 3124 } 3125 } 3126 public override @property string toString() { 3127 return "TokenTest"; 3128 } 3129 } 3130 void testTokenizer(string code, TokenTest[] tokens, string file = __FILE__, uint line = __LINE__) { 3131 Tokenizer tokenizer = new Tokenizer(code, "tokenizerTest:" ~ file ~ ":" ~ to!string(line)); 3132 for (int i = 0; i < tokens.length; i++) { 3133 tokens[i].execute(tokenizer); 3134 } 3135 } 3136 class KeywordTest : TokenTest { 3137 Keyword _code; 3138 this(Keyword code, string file = __FILE__, uint line = __LINE__) { 3139 super(file, line); 3140 _code = code; 3141 } 3142 override bool doTest(Token token) { 3143 if (token.type != TokenType.KEYWORD) 3144 return false; 3145 if (token.keyword != _code) 3146 return false; 3147 return true; 3148 } 3149 public override @property string toString() { 3150 return "Keyword:" ~ to!string(_code); 3151 } 3152 } 3153 class OpTest : TokenTest { 3154 OpCode _code; 3155 this(OpCode code, string file = __FILE__, uint line = __LINE__) { 3156 super(file, line); 3157 _code = code; 3158 } 3159 override bool doTest(Token token) { 3160 if (token.type != TokenType.OP) 3161 return false; 3162 if (token.opCode != _code) 3163 return false; 3164 return true; 3165 } 3166 public override @property string toString() { 3167 return "Op:" ~ to!string(_code); 3168 } 3169 } 3170 class StringTest : TokenTest { 3171 dstring _value; 3172 dchar _literalType; 3173 this(dstring value, dchar literalType = 0, string file = __FILE__, uint line = __LINE__) { 3174 super(file, line); 3175 _value = value; 3176 _literalType = literalType; 3177 } 3178 override bool doTest(Token token) { 3179 if (token.type != TokenType.STRING) 3180 return false; 3181 if (!token.text.equal(_value)) 3182 return false; 3183 if (token.literalType != _literalType) 3184 return false; 3185 return true; 3186 } 3187 public override @property string toString() { 3188 return toUTF8("String:\"" ~ _value ~ "\"" ~ (_literalType ? _literalType : ' ')); 3189 } 3190 } 3191 class IntegerTest : TokenTest { 3192 ulong _value; 3193 bool _unsigned; 3194 bool _long; 3195 this(ulong value, bool unsignedFlag = false, bool longFlag = false, string file = __FILE__, uint line = __LINE__) { 3196 super(file, line); 3197 _value = value; 3198 _unsigned = unsignedFlag; 3199 _long = longFlag; 3200 } 3201 override bool doTest(Token token) { 3202 if (token.type != TokenType.INTEGER) 3203 return false; 3204 if (token.intValue != _value) 3205 return false; 3206 if (token.isUnsigned != _unsigned) 3207 return false; 3208 if (token.isLong != _long) 3209 return false; 3210 return true; 3211 } 3212 public override @property string toString() { 3213 return "Integer:" ~ to!string(_value); 3214 } 3215 } 3216 class RealTest : TokenTest { 3217 real _value; 3218 ubyte _precision; 3219 bool _imaginary; 3220 this(real value, ubyte precision = 1, bool imaginary = false, string file = __FILE__, uint line = __LINE__) { 3221 super(file, line); 3222 _value = value; 3223 _precision = precision; 3224 _imaginary = imaginary; 3225 } 3226 override bool doTest(Token token) { 3227 if (token.type != TokenType.FLOAT) 3228 return false; 3229 real diff = token.realValue - _value; 3230 real maxerr = _value / 1000000; 3231 if (diff < 0) diff = -diff; 3232 if (maxerr < 0) maxerr = -maxerr; 3233 if (diff > maxerr) 3234 return false; 3235 if (token.precision != _precision) 3236 return false; 3237 if (token.isImaginary != _imaginary) 3238 return false; 3239 return true; 3240 } 3241 public override @property string toString() { 3242 return "Real:" ~ to!string(_value) ~ (_precision == 0 ? "f" : (_precision == 2 ? "L" : "")) ~ (_imaginary ? "i" : ""); 3243 } 3244 } 3245 class IdentTest : TokenTest { 3246 string _value; 3247 this(string value, string file = __FILE__, uint line = __LINE__) { 3248 super(file, line); 3249 _value = value; 3250 } 3251 override bool doTest(Token token) { 3252 if (token.type != TokenType.IDENTIFIER) 3253 return false; 3254 if (! to!string(token.text).equal(_value)) 3255 return false; 3256 return true; 3257 } 3258 public override @property string toString() { 3259 return "Ident:" ~ _value; 3260 } 3261 } 3262 class CommentTest : TokenTest { 3263 this(string file = __FILE__, uint line = __LINE__) { 3264 super(file, line); 3265 } 3266 override bool doTest(Token token) { 3267 if (token.type != TokenType.COMMENT) 3268 return false; 3269 return true; 3270 } 3271 public override @property string toString() { 3272 return "Comment"; 3273 } 3274 } 3275 class EOFTest : TokenTest { 3276 this(string file = __FILE__, uint line = __LINE__) { 3277 super(file, line); 3278 } 3279 override bool doTest(Token token) { 3280 if (token.type != TokenType.EOF) 3281 return false; 3282 return true; 3283 } 3284 public override @property string toString() { 3285 return "EOF"; 3286 } 3287 } 3288 class WhiteSpaceTest : TokenTest { 3289 this(string file = __FILE__, uint line = __LINE__) { 3290 super(file, line); 3291 } 3292 override bool doTest(Token token) { 3293 if (token.type != TokenType.WHITESPACE) 3294 return false; 3295 return true; 3296 } 3297 public override @property string toString() { 3298 return "whiteSpace"; 3299 } 3300 } 3301 TokenTest checkString(dstring value, dchar literalType = 0, string file = __FILE__, uint line = __LINE__) { 3302 return new StringTest(value, literalType, file, line); 3303 } 3304 TokenTest checkInteger(ulong value, bool unsignedFlag = false, bool longFlag = false, string file = __FILE__, uint line = __LINE__) { 3305 return new IntegerTest(value, unsignedFlag, longFlag, file, line); 3306 } 3307 TokenTest checkReal(real value, byte precision = 1, bool imaginary = false, string file = __FILE__, uint line = __LINE__) { 3308 return new RealTest(value, precision, imaginary, file, line); 3309 } 3310 TokenTest checkIdent(string value, string file = __FILE__, uint line = __LINE__) { 3311 return new IdentTest(value, file, line); 3312 } 3313 TokenTest checkKeyword(Keyword value, string file = __FILE__, uint line = __LINE__) { 3314 return new KeywordTest(value, file, line); 3315 } 3316 TokenTest checkOp(OpCode value, string file = __FILE__, uint line = __LINE__) { 3317 return new OpTest(value, file, line); 3318 } 3319 TokenTest checkSpace(string file = __FILE__, uint line = __LINE__) { 3320 return new WhiteSpaceTest(file, line); 3321 } 3322 TokenTest checkComment(string file = __FILE__, uint line = __LINE__) { 3323 return new CommentTest(file, line); 3324 } 3325 TokenTest checkEOF(string file = __FILE__, uint line = __LINE__) { 3326 return new EOFTest(file, line); 3327 } 3328 3329 // test strings 3330 testTokenizer("r\"simple\\nstring\"", [checkString( r"simple\nstring" )]); 3331 3332 // test strings 3333 testTokenizer(q"TEST 3334 "simple string" 3335 "simple\nstring" 3336 `simple string` 3337 "simple string"d 3338 "simple string"c 3339 "simple string"w 3340 "simple\"string" 3341 "\r\n\f\t\\\"\'&" 3342 TEST" 3343 , [ 3344 checkString("simple string"), 3345 checkSpace(), 3346 checkString("simple\nstring"), 3347 checkSpace(), 3348 checkString("simple string"), 3349 checkSpace(), 3350 checkString("simple string", 'd'), 3351 checkSpace(), 3352 checkString("simple string", 'c'), 3353 checkSpace(), 3354 checkString("simple string", 'w'), 3355 checkSpace(), 3356 checkString("simple\"string"), 3357 checkSpace(), 3358 checkString("\r\n\f\t\\\"\'&"), 3359 ]); 3360 // basic test 3361 testTokenizer(q"TEST 3362 int i; 3363 TEST" 3364 , [ 3365 checkKeyword(Keyword.INT), 3366 checkSpace(), 3367 checkIdent("i"), 3368 checkOp(OpCode.SEMICOLON), 3369 checkEOF() 3370 ]); 3371 // test numbers 3372 testTokenizer("0b1101 0x123abcdU 0xABCL 0743 192837465 0 192_837_465 5.25 12.3f 54.1L 67.1i 3e3 25.67e-5f" 3373 , [ 3374 checkInteger(13), 3375 checkSpace(), 3376 checkInteger(0x123abcd, true, false), 3377 checkSpace(), 3378 checkInteger(0xabc, false, true), 3379 checkSpace(), 3380 checkInteger(std.conv.octal!743), 3381 checkSpace(), 3382 checkInteger(192_837_465), 3383 checkSpace(), 3384 checkInteger(0), 3385 checkSpace(), 3386 checkInteger(192837465), 3387 checkSpace(), 3388 checkReal(5.25), 3389 checkSpace(), 3390 checkReal(12.3f, 0), 3391 checkSpace(), 3392 checkReal(54.1L, 2), 3393 checkSpace(), 3394 checkReal(67.1, 1, true), 3395 checkSpace(), 3396 checkReal(3e3), 3397 checkSpace(), 3398 checkReal(25.67e-5f, 0), 3399 checkEOF() 3400 ]); 3401 } 3402