1 module ddc.lexer.tokenizer; 2 3 import ddc.lexer.textsource; 4 import ddc.lexer.exceptions; 5 6 import std.stdio; 7 import std.datetime; 8 import std.conv; 9 import std.utf; 10 import std.math; 11 12 enum TokenType : ubyte { 13 EOF, 14 //EOL, 15 WHITESPACE, 16 COMMENT, 17 IDENTIFIER, 18 STRING, 19 CHARACTER, 20 INTEGER, 21 FLOAT, 22 KEYWORD, 23 OP, 24 INVALID 25 } 26 27 // table for fast checking of UniversalAlpha (as per ISO/IEC 9899:1999 Annex E) OR a..z OR A..Z OR _ 28 // max code is 0xd7ff 29 //1728 30 const uint[1728] UNIVERSAL_ALPHA_FLAGS = [ 31 0x00000000,0x00000000,0x87fffffe,0x07fffffe,0x00000000,0x04a00400,0xff7fffff,0xff7fffff,// 0000-00ff 32 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xfc3fffff,// 0100-01ff 33 0x00ffffff,0x00000000,0xffff0000,0xffffffff,0xffffffff,0xe9ff01ff,0x00030003,0x0000001f,// 0200-02ff 34 0x00000000,0x00000000,0x00000000,0x04000000,0xffffd740,0xfffffffb,0x547f7fff,0x000ffffd,// 0300-03ff 35 0xffffdffe,0xffffffff,0xdffeffff,0xffffffff,0xffff0003,0xffffffff,0xffff199f,0x033fcfff,// 0400-04ff 36 0x00000000,0xfffe0000,0x027fffff,0xfffffffe,0x000000ff,0xbbff0000,0xffff0006,0x000707ff,// 0500-05ff 37 0x00000000,0x07fffffe,0x0007ffff,0xffff03ff,0xffffffff,0x7cffffff,0x1fff7fff,0x03ff3de0,// 0600-06ff 38 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 0700-07ff 39 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 0800-08ff 40 0xffffffee,0xe3ffffff,0xff073fff,0x0000ffcf,0xfff99fee,0xc3c5fdff,0xb000399f,0x0003ffcf,// 0900-09ff 41 0xfff987e4,0xc36dfdff,0x5e003987,0x0010ffc0,0xfffbafee,0xe3edfdff,0x00013bbf,0x0000ffc1,// 0a00-0aff 42 0xfff99fee,0xe3cdfdff,0xb000398f,0x0000ffc3,0xd63dc7ec,0xc3bfc718,0x00003dc7,0x0000ff80,// 0b00-0bff 43 0xfffddfee,0xc3effdff,0x00003ddf,0x0000ffc3,0xfffddfec,0xc3effdff,0x40003ddf,0x0000ffc3,// 0c00-0cff 44 0xfffddfec,0xc3fffdff,0x00003dcf,0x0000ffc3,0x00000000,0x00000000,0x00000000,0x00000000,// 0d00-0dff 45 0xfffffffe,0x07ffffff,0x0fffffff,0x00000000,0xfef02596,0x3bff6cae,0x33ff3f5f,0x00000000,// 0e00-0eff 46 0x03000001,0xc2afffff,0xfffffeff,0xfffe03ff,0xfebf0fdf,0x02fe3fff,0x00000000,0x00000000,// 0f00-0fff 47 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0xffffffff,0xffff003f,0x007fffff,// 1000-10ff 48 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 1100-11ff 49 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 1200-12ff 50 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 1300-13ff 51 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 1400-14ff 52 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 1500-15ff 53 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 1600-16ff 54 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 1700-17ff 55 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 1800-18ff 56 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 1900-19ff 57 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 1a00-1aff 58 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 1b00-1bff 59 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 1c00-1cff 60 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 1d00-1dff 61 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0x0fffffff,0xffffffff,0xffffffff,0x03ffffff,// 1e00-1eff 62 0x3f3fffff,0xffffffff,0xaaff3f3f,0x3fffffff,0xffffffff,0x5fdfffff,0x0fcf1fdc,0x1fdc1fff,// 1f00-1fff 63 0x00000000,0x80000000,0x00000001,0x80000000,0x00000000,0x00000000,0x00000000,0x00000000,// 2000-20ff 64 0x3f2ffc84,0x01fbfd50,0x00000000,0xffffffff,0x00000007,0x00000000,0x00000000,0x00000000,// 2100-21ff 65 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 2200-22ff 66 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 2300-23ff 67 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 2400-24ff 68 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 2500-25ff 69 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 2600-26ff 70 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 2700-27ff 71 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 2800-28ff 72 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 2900-29ff 73 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 2a00-2aff 74 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 2b00-2bff 75 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 2c00-2cff 76 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 2d00-2dff 77 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 2e00-2eff 78 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 2f00-2fff 79 0x000000e0,0x000003fe,0xfffffffe,0xffffffff,0x180fffff,0xfffffffe,0xffffffff,0x187fffff,// 3000-30ff 80 0xffffffe0,0x00001fff,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 3100-31ff 81 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 3200-32ff 82 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 3300-33ff 83 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 3400-34ff 84 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 3500-35ff 85 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 3600-36ff 86 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 3700-37ff 87 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 3800-38ff 88 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 3900-39ff 89 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 3a00-3aff 90 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 3b00-3bff 91 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 3c00-3cff 92 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 3d00-3dff 93 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 3e00-3eff 94 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 3f00-3fff 95 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 4000-40ff 96 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 4100-41ff 97 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 4200-42ff 98 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 4300-43ff 99 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 4400-44ff 100 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 4500-45ff 101 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 4600-46ff 102 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 4700-47ff 103 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 4800-48ff 104 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 4900-49ff 105 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 4a00-4aff 106 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 4b00-4bff 107 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 4c00-4cff 108 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 4d00-4dff 109 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 4e00-4eff 110 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 4f00-4fff 111 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 5000-50ff 112 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 5100-51ff 113 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 5200-52ff 114 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 5300-53ff 115 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 5400-54ff 116 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 5500-55ff 117 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 5600-56ff 118 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 5700-57ff 119 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 5800-58ff 120 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 5900-59ff 121 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 5a00-5aff 122 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 5b00-5bff 123 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 5c00-5cff 124 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 5d00-5dff 125 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 5e00-5eff 126 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 5f00-5fff 127 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 6000-60ff 128 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 6100-61ff 129 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 6200-62ff 130 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 6300-63ff 131 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 6400-64ff 132 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 6500-65ff 133 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 6600-66ff 134 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 6700-67ff 135 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 6800-68ff 136 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 6900-69ff 137 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 6a00-6aff 138 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 6b00-6bff 139 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 6c00-6cff 140 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 6d00-6dff 141 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 6e00-6eff 142 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 6f00-6fff 143 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 7000-70ff 144 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 7100-71ff 145 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 7200-72ff 146 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 7300-73ff 147 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 7400-74ff 148 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 7500-75ff 149 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 7600-76ff 150 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 7700-77ff 151 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 7800-78ff 152 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 7900-79ff 153 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 7a00-7aff 154 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 7b00-7bff 155 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 7c00-7cff 156 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 7d00-7dff 157 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 7e00-7eff 158 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 7f00-7fff 159 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 8000-80ff 160 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 8100-81ff 161 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 8200-82ff 162 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 8300-83ff 163 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 8400-84ff 164 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 8500-85ff 165 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 8600-86ff 166 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 8700-87ff 167 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 8800-88ff 168 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 8900-89ff 169 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 8a00-8aff 170 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 8b00-8bff 171 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 8c00-8cff 172 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 8d00-8dff 173 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 8e00-8eff 174 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 8f00-8fff 175 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 9000-90ff 176 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 9100-91ff 177 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 9200-92ff 178 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 9300-93ff 179 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 9400-94ff 180 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 9500-95ff 181 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 9600-96ff 182 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 9700-97ff 183 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 9800-98ff 184 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 9900-99ff 185 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 9a00-9aff 186 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 9b00-9bff 187 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 9c00-9cff 188 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 9d00-9dff 189 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 9e00-9eff 190 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0x0000003f,0x00000000,0x00000000,// 9f00-9fff 191 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// a000-a0ff 192 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// a100-a1ff 193 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// a200-a2ff 194 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// a300-a3ff 195 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// a400-a4ff 196 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// a500-a5ff 197 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// a600-a6ff 198 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// a700-a7ff 199 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// a800-a8ff 200 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// a900-a9ff 201 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// aa00-aaff 202 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// ab00-abff 203 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// ac00-acff 204 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// ad00-adff 205 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// ae00-aeff 206 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// af00-afff 207 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// b000-b0ff 208 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// b100-b1ff 209 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// b200-b2ff 210 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// b300-b3ff 211 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// b400-b4ff 212 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// b500-b5ff 213 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// b600-b6ff 214 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// b700-b7ff 215 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// b800-b8ff 216 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// b900-b9ff 217 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// ba00-baff 218 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// bb00-bbff 219 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// bc00-bcff 220 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// bd00-bdff 221 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// be00-beff 222 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// bf00-bfff 223 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// c000-c0ff 224 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// c100-c1ff 225 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// c200-c2ff 226 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// c300-c3ff 227 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// c400-c4ff 228 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// c500-c5ff 229 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// c600-c6ff 230 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// c700-c7ff 231 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// c800-c8ff 232 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// c900-c9ff 233 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// ca00-caff 234 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// cb00-cbff 235 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// cc00-ccff 236 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// cd00-cdff 237 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// ce00-ceff 238 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// cf00-cfff 239 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// d000-d0ff 240 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// d100-d1ff 241 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// d200-d2ff 242 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// d300-d3ff 243 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// d400-d4ff 244 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// d500-d5ff 245 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// d600-d6ff 246 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0x0000000f,0x00000000,0x00000000// d700-d7ff 247 ]; 248 249 /// returns true if character is A..Z, a..z, _ or universal alpha 250 bool isUniversalAlpha(dchar ch) pure nothrow { 251 return (ch <= 0xd7ff && (UNIVERSAL_ALPHA_FLAGS[ch >> 5] & (1 << (ch & 31)))); 252 } 253 254 /// character can present at the beginning of identifier 255 bool isIdentStartChar(dchar ch) pure nothrow { 256 return isUniversalAlpha(ch); 257 } 258 259 /// character can present in middle of identifier 260 bool isIdentMiddleChar(dchar ch) pure nothrow { 261 return (ch >= '0' && ch <='9') || isUniversalAlpha(ch); 262 } 263 264 immutable bool ENABLE_DUMP_UNIVERSAL_ALPHA_TABLE = false; 265 static if (ENABLE_DUMP_UNIVERSAL_ALPHA_TABLE) { 266 bool r(dchar ch, wchar v) pure nothrow { 267 return ch == v; 268 } 269 270 bool r(dchar ch, wchar v1, wchar v2) pure nothrow { 271 return ch >= v1 && ch <= v2; 272 } 273 274 bool isUniversalAlphaSlow(dchar c) pure nothrow { 275 return 276 // Latin: 00AA, 00BA, 00C0−00D6, 00D8−00F6, 00F8−01F5, 01FA−0217, 277 // 0250−02A8, 1E00−1E9B, 1EA0−1EF9, 207F 278 r(c, 0xAA) || r(c, 0x00BA) || r(c, 0x00C0,0x00D6) || r(c, 0x00D8,0x00F6) || r(c, 0x00F8,0x01F5) || r(c, 0x01FA,0x0217) 279 || r(c, 0x0250,0x02A8) || r(c, 0x1E00,0x1E9B) || r(c, 0x1EA0,0x1EF9) || r(c, 0x207F) 280 //Greek: 0386, 0388−038A, 038C, 038E−03A1, 03A3−03CE, 03D0−03D6, 281 //03DA, 03DC, 03DE, 03E0, 03E2−03F3, 1F00−1F15, 1F18−1F1D, 282 //1F20−1F45, 1F48−1F4D, 1F50−1F57, 1F59, 1F5B, 1F5D, 283 //1F5F−1F7D, 1F80−1FB4, 1FB6−1FBC, 1FC2−1FC4, 1FC6−1FCC, 284 //1FD0−1FD3, 1FD6−1FDB, 1FE0−1FEC, 1FF2−1FF4, 1FF6−1FFC 285 || r(c, 0x0386) || r(c, 0x0388,0x038A) || r(c, 0x038C) || r(c, 0x038E,0x03A1) || r(c, 0x03A3,0x03CE) || r(c, 0x03D0,0x03D6) 286 || r(c, 0x03DA) || r(c, 0x03DC) || r(c, 0x03DE) || r(c, 0x03E0) || r(c, 0x03E2,0x03F3) || r(c, 0x1F00,0x1F15) || r(c, 0x1F18,0x1F1D) 287 || r(c, 0x1F20,0x1F45) || r(c, 0x1F48,0x1F4D) || r(c, 0x1F50,0x1F57) || r(c, 0x1F59) || r(c, 0x1F5B) || r(c, 0x1F5D) 288 || r(c, 0x1F5F,0x1F7D) || r(c, 0x1F80,0x1FB4) || r(c, 0x1FB6,0x1FBC) || r(c, 0x1FC2,0x1FC4) || r(c, 0x1FC6,0x1FCC) 289 || r(c, 0x1FD0,0x1FD3) || r(c, 0x1FD6,0x1FDB) || r(c, 0x1FE0,0x1FEC) || r(c, 0x1FF2,0x1FF4) || r(c, 0x1FF6,0x1FFC) 290 //Cyrillic: 0401−040C, 040E−044F, 0451−045C, 045E−0481, 0490−04C4, 291 //04C7−04C8, 04CB−04CC, 04D0−04EB, 04EE−04F5, 04F8−04F9 292 || r(c, 0x0401,0x040C) || r(c, 0x040E,0x044F) || r(c, 0x0451,0x045C) || r(c, 0x045E,0x0481) || r(c, 0x0490,0x04C4) 293 || r(c, 0x04C7,0x04C8) || r(c, 0x04CB,0x04CC) || r(c, 0x04D0,0x04EB) || r(c, 0x04EE,0x04F5) || r(c, 0x04F8,0x04F9) 294 //Armenian: 0531−0556, 0561−0587 295 || r(c, 0x0531,0x0556) || r(c, 0x0561,0x0587) 296 //Hebrew: 05B0−05B9, 05BB−05BD, 05BF, 05C1−05C2, 05D0−05EA, 297 //05F0−05F2 298 || r(c, 0x05B0,0x05B9) || r(c, 0x05BB,0x05BD) || r(c, 0x05BF) || r(c, 0x05C1,0x05C2) || r(c, 0x05D0,0x05EA) 299 || r(c, 0x05F0,0x05F2) 300 //Arabic: 0621−063A, 0640−0652, 0670−06B7, 06BA−06BE, 06C0−06CE, 301 //06D0−06DC, 06E5−06E8, 06EA−06ED 302 || r(c, 0x0621,0x063A) || r(c, 0x0640,0x0652) || r(c, 0x0670,0x06B7) || r(c, 0x06BA,0x06BE) || r(c, 0x06C0,0x06CE) 303 || r(c, 0x06D0,0x06DC) || r(c, 0x06E5,0x06E8) || r(c, 0x06EA,0x06ED) 304 //Devanagari: 0901−0903, 0905−0939, 093E−094D, 0950−0952, 0958−0963 305 || r(c, 0x0901,0x0903) || r(c, 0x0905,0x0939) || r(c, 0x093E,0x094D) || r(c, 0x0950,0x0952) || r(c, 0x0958,0x0963) 306 //Bengali: 0981−0983, 0985−098C, 098F−0990, 0993−09A8, 09AA−09B0, 307 //09B2, 09B6−09B9, 09BE−09C4, 09C7−09C8, 09CB−09CD, 308 //09DC−09DD, 09DF−09E3, 09F0−09F1 309 || r(c, 0x0981,0x0983) || r(c, 0x0985,0x098C) || r(c, 0x098F,0x0990) || r(c, 0x0993,0x09A8) || r(c, 0x09AA,0x09B0) 310 || r(c, 0x09B2) || r(c, 0x09B6,0x09B9) || r(c, 0x09BE,0x09C4) || r(c, 0x09C7,0x09C8) || r(c, 0x09CB,0x09CD) 311 || r(c, 0x09DC,0x09DD) || r(c, 0x09DF,0x09E3) || r(c, 0x09F0,0x09F1) 312 //Gurmukhi: 0A02, 0A05−0A0A, 0A0F−0A10, 0A13−0A28, 0A2A−0A30, 313 //0A32−0A33, 0A35−0A36, 0A38−0A39, 0A3E−0A42, 0A47−0A48, 314 //0A4B−0A4D, 0A59−0A5C, 0A5E, 0A74 315 || r(c, 0x0A02) || r(c, 0x0A05,0x0A0A) || r(c, 0x0A0F,0x0A10) || r(c, 0x0A13,0x0A28) || r(c, 0x0A2A,0x0A30) 316 || r(c, 0x0A32,0x0A33) || r(c, 0x0A35,0x0A36) || r(c, 0x0A38,0x0A39) || r(c, 0x0A3E,0x0A42) || r(c, 0x0A47,0x0A48) 317 || r(c, 0x0A4B,0x0A4D) || r(c, 0x0A59,0x0A5C) || r(c, 0x0A5E) || r(c, 0x0A74) 318 //Gujarati: 0A81−0A83, 0A85−0A8B, 0A8D, 0A8F−0A91, 0A93−0AA8, 319 //0AAA−0AB0, 0AB2−0AB3, 0AB5−0AB9, 0ABD−0AC5, 320 //0AC7−0AC9, 0ACB−0ACD, 0AD0, 0AE0 321 || r(c, 0x0A81,0x0A83) || r(c, 0x0A85,0x0A8B) || r(c, 0x0A8D) || r(c, 0x0A8F,0x0A91) || r(c, 0x0A93,0x0AA8) 322 || r(c, 0x0AAA,0x0AB0) || r(c, 0x0AB2,0x0AB3) || r(c, 0x0AB5,0x0AB9) || r(c, 0x0ABD,0x0AC5) 323 || r(c, 0x0AC7,0x0AC9) || r(c, 0x0ACB,0x0ACD) || r(c, 0x0AD0) || r(c, 0x0AE0) 324 // Oriya: 0B01−0B03, 0B05−0B0C, 0B0F−0B10, 0B13−0B28, 0B2A−0B30, 325 //0B32−0B33, 0B36−0B39, 0B3E−0B43, 0B47−0B48, 0B4B−0B4D, 326 //0B5C−0B5D, 0B5F−0B61 327 || r(c, 0x0B01,0x0B03) || r(c, 0x0B05,0x0B0C) || r(c, 0x0B0F,0x0B10) || r(c, 0x0B13,0x0B28) || r(c, 0x0B2A,0x0B30) 328 || r(c, 0x0B32,0x0B33) || r(c, 0x0B36,0x0B39) || r(c, 0x0B3E,0x0B43) || r(c, 0x0B47,0x0B48) || r(c, 0x0B4B,0x0B4D) 329 || r(c, 0x0B5C,0x0B5D) || r(c, 0x0B5F,0x0B61) 330 //Tamil: 0B82−0B83, 0B85−0B8A, 0B8E−0B90, 0B92−0B95, 0B99−0B9A, 331 //0B9C, 0B9E−0B9F, 0BA3−0BA4, 0BA8−0BAA, 0BAE−0BB5, 332 //0BB7−0BB9, 0BBE−0BC2, 0BC6−0BC8, 0BCA−0BCD 333 || r(c, 0x0B82,0x0B83) || r(c, 0x0B85,0x0B8A) || r(c, 0x0B8E,0x0B90) || r(c, 0x0B92,0x0B95) || r(c, 0x0B99,0x0B9A) 334 || r(c, 0x0B9C) || r(c, 0x0B9E,0x0B9F) || r(c, 0x0BA3,0x0BA4) || r(c, 0x0BA8,0x0BAA) || r(c, 0x0BAE,0x0BB5) 335 || r(c, 0x0BB7,0x0BB9) || r(c, 0x0BBE,0x0BC2) || r(c, 0x0BC6,0x0BC8) || r(c, 0x0BCA,0x0BCD) 336 //Telugu: 0C01−0C03, 0C05−0C0C, 0C0E−0C10, 0C12−0C28, 0C2A−0C33, 337 //0C35−0C39, 0C3E−0C44, 0C46−0C48, 0C4A−0C4D, 0C60−0C61 338 || r(c, 0x0C01,0x0C03) || r(c, 0x0C05,0x0C0C) || r(c, 0x0C0E,0x0C10) || r(c, 0x0C12,0x0C28) || r(c, 0x0C2A,0x0C33) 339 || r(c, 0x0C35,0x0C39) || r(c, 0x0C3E,0x0C44) || r(c, 0x0C46,0x0C48) || r(c, 0x0C4A,0x0C4D) || r(c, 0x0C60,0x0C61) 340 //Kannada: 0C82−0C83, 0C85−0C8C, 0C8E−0C90, 0C92−0CA8, 0CAA−0CB3, 341 //0CB5−0CB9, 0CBE−0CC4, 0CC6−0CC8, 0CCA−0CCD, 0CDE, 342 //0CE0−0CE1 343 || r(c, 0x0C82,0x0C83) || r(c, 0x0C85,0x0C8C) || r(c, 0x0C8E,0x0C90) || r(c, 0x0C92,0x0CA8) || r(c, 0x0CAA,0x0CB3) 344 || r(c, 0x0CB5,0x0CB9) || r(c, 0x0CBE,0x0CC4) || r(c, 0x0CC6,0x0CC8) || r(c, 0x0CCA,0x0CCD) || r(c, 0x0CDE) 345 || r(c, 0x0CE0,0x0CE1) 346 //Malayalam: 0D02−0D03, 0D05−0D0C, 0D0E−0D10, 0D12−0D28, 0D2A−0D39, 347 //0D3E−0D43, 0D46−0D48, 0D4A−0D4D, 0D60−0D61 348 || r(c, 0x0D02,0x0D03) || r(c, 0x0D05,0x0D0C) || r(c, 0x0D0E,0x0D10) || r(c, 0x0D12,0x0D28) || r(c, 0x0D2A,0x0D39) 349 || r(c, 0xD3E,0x0D43) || r(c, 0x0D46,0x0D48) || r(c, 0x0D4A,0x0D4D) || r(c, 0x0D60,0x0D61) 350 //Thai: 0E01−0E3A, 0E40−0E5B 351 || r(c, 0x0E01,0x0E3A) || r(c, 0x0E40,0x0E5B) 352 //Lao: 0E81−0E82, 0E84, 0E87−0E88, 0E8A, 0E8D, 0E94−0E97, 353 //0E99−0E9F, 0EA1−0EA3, 0EA5, 0EA7, 0EAA−0EAB, 354 //0EAD−0EAE, 0EB0−0EB9, 0EBB−0EBD, 0EC0−0EC4, 0EC6, 355 //0EC8−0ECD, 0EDC−0EDD 356 || r(c, 0x0E81,0x0E82) || r(c, 0x0E84) || r(c, 0x0E87,0x0E88) || r(c, 0x0E8A) || r(c, 0x0E8D) || r(c, 0x0E94,0x0E97) 357 || r(c, 0x0E99,0x0E9F) || r(c, 0x0EA1,0x0EA3) || r(c, 0x0EA5) || r(c, 0x0EA7) || r(c, 0x0EAA,0x0EAB) 358 || r(c, 0x0EAD,0x0EAE) || r(c, 0x0EB0,0x0EB9) || r(c, 0x0EBB,0x0EBD) || r(c, 0x0EC0,0x0EC4) || r(c, 0x0EC6) 359 || r(c, 0x0EC8,0x0ECD) || r(c, 0x0EDC,0x0EDD) 360 //Tibetan: 0F00, 0F18−0F19, 0F35, 0F37, 0F39, 0F3E−0F47, 0F49−0F69, 361 //0F71−0F84, 0F86−0F8B, 0F90−0F95, 0F97, 0F99−0FAD, 362 //0FB1−0FB7, 0FB9 363 || r(c, 0x0F00) || r(c, 0x0F18,0x0F19) || r(c, 0x0F35) || r(c, 0x0F37) || r(c, 0x0F39) || r(c, 0x0F3E,0x0F47) || r(c, 0x0F49,0x0F69) 364 || r(c, 0x0F71,0x0F84) || r(c, 0x0F86,0x0F8B) || r(c, 0x0F90,0x0F95) || r(c, 0x0F97) || r(c, 0x0F99,0x0FAD) 365 || r(c, 0x0FB1,0x0FB7) || r(c, 0x0FB9) 366 //Georgian: 10A0−10C5, 10D0−10F6 367 || r(c, 0x10A0,0x10C5) || r(c, 0x10D0,0x10F6) 368 //Hiragana: 3041−3093, 309B−309C 369 || r(c, 0x3041,0x3093) || r(c, 0x309B,0x309C) 370 //Katakana: 30A1−30F6, 30FB−30FC 371 || r(c, 0x30A1,0x30F6) || r(c, 0x30FB,0x30FC) 372 //Bopomofo: 3105−312C 373 || r(c, 0x3105,0x312C) 374 //CJK Unified Ideographs: 4E00−9FA5 375 || r(c, 0x4E00,0x9FA5) 376 //Hangul: AC00−D7A3 377 || r(c, 0xAC00,0xD7A3) 378 //Digits: 0660−0669, 06F0−06F9, 0966−096F, 09E6−09EF, 0A66−0A6F, 379 //0AE6−0AEF, 0B66−0B6F, 0BE7−0BEF, 0C66−0C6F, 0CE6−0CEF, 380 //0D66−0D6F, 0E50−0E59, 0ED0−0ED9, 0F20−0F33 381 || r(c, 0x0660,0x0669) || r(c, 0x06F0,0x06F9) || r(c, 0x0966,0x096F) || r(c, 0x09E6,0x09EF) || r(c, 0x0A66,0x0A6F) 382 || r(c, 0x0AE6,0x0AEF) || r(c, 0x0B66,0x0B6F) || r(c, 0x0BE7,0x0BEF) || r(c, 0x0C66,0x0C6F) || r(c, 0x0CE6,0x0CEF) 383 || r(c, 0x0D66,0x0D6F) || r(c, 0x0E50,0x0E59) || r(c, 0x0ED0,0x0ED9) || r(c, 0x0F20,0x0F33) 384 //Special characters: 00B5, 00B7, 02B0−02B8, 02BB, 02BD−02C1, 02D0−02D1, 385 //02E0−02E4, 037A, 0559, 093D, 0B3D, 1FBE, 203F−2040, 2102, 386 //2107, 210A−2113, 2115, 2118−211D, 2124, 2126, 2128, 212A−2131, 387 //2133−2138, 2160−2182, 3005−3007, 3021−3029 388 || r(c, 0x00B5) || r(c, 0x00B7) || r(c, 0x02B0,0x02B8) || r(c, 0x02BB) || r(c, 0x02BD,0x02C1) || r(c, 0x02D0,0x02D1) 389 || r(c, 0x2E0,0x02E4) || r(c, 0x037A) || r(c, 0x0559) || r(c, 0x093D) || r(c, 0x0B3D) || r(c, 0x1FBE) || r(c, 0x203F,0x2040) || r(c, 0x2102) 390 || r(c, 0x2107) || r(c, 0x210A,0x2113) || r(c, 0x2115) || r(c, 0x2118,0x211D) || r(c, 0x2124) || r(c, 0x2126) || r(c, 0x2128) || r(c, 0x212A,0x2131) 391 || r(c, 0x2133,0x2138) || r(c, 0x2160,0x2182) || r(c, 0x3005,0x3007) || r(c, 0x3021,0x3029) 392 ; 393 } 394 395 } 396 397 unittest { 398 399 400 static if (ENABLE_DUMP_UNIVERSAL_ALPHA_TABLE) { 401 immutable uint itemsInRow = 8; 402 403 uint maxAlpha = 0; 404 for (uint i = 0; i < 0x10000; i++) { 405 uint ch = i; 406 if (isUniversalAlphaSlow(ch) || ch == '_' || (ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z')) 407 maxAlpha = i; 408 } 409 maxAlpha = (maxAlpha + itemsInRow * 32 - 1) / (itemsInRow * 32) * (itemsInRow * 32) - 1; 410 writeln("// table for fast checking of UniversalAlpha (as per ISO/IEC 9899:1999 Annex E) OR a..z OR A..Z OR _"); 411 writefln("// max code is 0x%04x", maxAlpha); 412 writeln("immutable uint[", (maxAlpha + 1) / 32,"] UNIVERSAL_ALPHA_FLAGS = ["); 413 for (uint i = 0; i <= maxAlpha; i += 32) { 414 if ((i / 32) % itemsInRow == 0) 415 write(" "); 416 uint flags = 0; 417 for (uint j = 0; j < 32; j++) { 418 uint ch = i + j; 419 bool flag = isUniversalAlphaSlow(ch) || ch == '_' || (ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z'); 420 if (flag) 421 flags |= (1 << j); 422 } 423 writef("0x%08x", flags); 424 if (i != maxAlpha / 32 * 32) 425 write(","); 426 if ((i / 32) % itemsInRow == itemsInRow - 1) 427 writefln("// %04x-%04x", i - itemsInRow * 32 + 1 + 31, i + 31); 428 } 429 writeln("];"); 430 431 for (uint ch = 0; ch < 0x100000; ch++) { 432 bool flag = isUniversalAlphaSlow(ch) || ch == '_' || (ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z'); 433 bool flag2 = isUniversalAlpha(ch); 434 if (flag2 != flag) { 435 isUniversalAlpha(ch); 436 writefln("universalAlpha test failed for char %06x expeced %d actual %d", ch, flag ? 1 : 0, flag2 ? 1 : 0); 437 } 438 assert(flag2 == flag); 439 } 440 } 441 } 442 443 enum OpCode : ubyte { 444 NONE, // no op 445 DIV, // / 446 DIV_EQ, // /= 447 DOT, // . 448 DOT_DOT, // .. 449 DOT_DOT_DOT,// ... 450 AND, // & 451 AND_EQ, // &= 452 LOG_AND, // && 453 OR, // | 454 OR_EQ, // |= 455 LOG_OR, // || 456 MINUS, // - 457 MINUS_EQ, // -= 458 MINUS_MINUS,// -- 459 PLUS, // + 460 PLUS_EQ, // += 461 PLUS_PLUS, // ++ 462 LT, // < 463 LT_EQ, // <= 464 SHL, // << 465 SHL_EQ, // <<= 466 LT_GT, // <> 467 NE_EQ, // <>= 468 GT, // > 469 GT_EQ, // >= 470 SHR_EQ, // >>= 471 ASR_EQ, // >>>= 472 SHR, // >> 473 ASR, // >>> 474 NOT, // ! 475 NOT_EQ, // != 476 NOT_LT_GT, // !<> 477 NOT_LT_GT_EQ, // !<>= 478 NOT_LT, // !< 479 NOT_LT_EQ, // !<= 480 NOT_GT, // !> 481 NOT_GT_EQ, // !>= 482 PAR_OPEN, // ( 483 PAR_CLOSE, // ) 484 SQ_OPEN, // [ 485 SQ_CLOSE, // ] 486 CURL_OPEN, // { 487 CURL_CLOSE, // } 488 QUEST, // ? 489 COMMA, // , 490 SEMICOLON, // ; 491 COLON, // : 492 DOLLAR, // $ 493 EQ, // = 494 QE_EQ, // == 495 MUL, // * 496 MUL_EQ, // *= 497 MOD, // % 498 MOD_EQ, // %= 499 XOR, // ^ 500 XOR_EQ, // ^= 501 LOG_XOR, // ^^ 502 LOG_XOR_EQ, // ^^= 503 INV, // ~ 504 INV_EQ, // ~= 505 AT, // @ 506 EQ_GT, // => 507 SHARP // # 508 }; 509 510 immutable dstring[] OP_CODE_STRINGS = [ 511 "", 512 "/", 513 "/=", 514 ".", 515 "..", 516 "...", 517 "&", 518 "&=", 519 "&&", 520 "|", 521 "|=", 522 "||", 523 "-", 524 "-=", 525 "--", 526 "+", 527 "+=", 528 "++", 529 "<", 530 "<=", 531 "<<", 532 "<<=", 533 "<>", 534 "<>=", 535 ">", 536 ">=", 537 ">>=", 538 ">>>=", 539 ">>", 540 ">>>", 541 "!", 542 "!=", 543 "!<>", 544 "!<>=", 545 "!<", 546 "!<=", 547 "!>", 548 "!>=", 549 "(", 550 ")", 551 "[", 552 "]", 553 "{", 554 "}", 555 "?", 556 ",", 557 ";", 558 ":", 559 "$", 560 "=", 561 "==", 562 "*", 563 "*=", 564 "%", 565 "%=", 566 "^", 567 "^=", 568 "^^", 569 "^^=", 570 "~", 571 "~=", 572 "@", 573 "=>", 574 "#" 575 ]; 576 577 dstring getOpNameD(OpCode op) pure nothrow { 578 return OP_CODE_STRINGS[op]; 579 }; 580 581 enum Keyword : ubyte { 582 NONE, 583 ABSTRACT, 584 ALIAS, 585 ALIGN, 586 ASM, 587 ASSERT, 588 AUTO, 589 590 BODY, 591 BOOL, 592 BREAK, 593 BYTE, 594 595 CASE, 596 CAST, 597 CATCH, 598 CDOUBLE, 599 CENT, 600 CFLOAT, 601 CHAR, 602 CLASS, 603 CONST, 604 CONTINUE, 605 CREAL, 606 607 DCHAR, 608 DEBUG, 609 DEFAULT, 610 DELEGATE, 611 DELETE, 612 DEPRECATED, 613 DO, 614 DOUBLE, 615 616 ELSE, 617 ENUM, 618 EXPORT, 619 EXTERN, 620 621 FALSE, 622 FINAL, 623 FINALLY, 624 FLOAT, 625 FOR, 626 FOREACH, 627 FOREACH_REVERSE, 628 FUNCTION, 629 630 GOTO, 631 632 IDOUBLE, 633 IF, 634 IFLOAT, 635 IMMUTABLE, 636 IMPORT, 637 IN, 638 INOUT, 639 INT, 640 INTERFACE, 641 INVARIANT, 642 IREAL, 643 IS, 644 645 LAZY, 646 LONG, 647 648 MACRO, 649 MIXIN, 650 MODULE, 651 652 NEW, 653 NOTHROW, 654 NULL, 655 656 OUT, 657 OVERRIDE, 658 659 PACKAGE, 660 PRAGMA, 661 PRIVATE, 662 PROTECTED, 663 PUBLIC, 664 PURE, 665 666 REAL, 667 REF, 668 RETURN, 669 670 SCOPE, 671 SHARED, 672 SHORT, 673 STATIC, 674 STRUCT, 675 SUPER, 676 SWITCH, 677 SYNCHRONIZED, 678 679 TEMPLATE, 680 THIS, 681 THROW, 682 TRUE, 683 TRY, 684 TYPEDEF, 685 TYPEID, 686 TYPEOF, 687 688 UBYTE, 689 UCENT, 690 UINT, 691 ULONG, 692 UNION, 693 UNITTEST, 694 USHORT, 695 696 VERSION, 697 VOID, 698 VOLATILE, 699 700 WCHAR, 701 WHILE, 702 WITH, 703 704 FILE, 705 MODULE__, 706 LINE, 707 FUNCTION__, 708 PRETTY_FUNCTION, 709 710 //Special Token Replaced with 711 DATE, // string literal of the date of compilation "mmm dd yyyy" 712 EOF, // sets the scanner to the end of the file 713 TIME, // string literal of the time of compilation "hh:mm:ss" 714 TIMESTAMP, // string literal of the date and time of compilation "www mmm dd hh:mm:ss yyyy" 715 VENDOR, // Compiler vendor string, such as "Digital Mars D" 716 VERSION_, // Compiler version as an integer, such as 2001 717 718 GSHARED, 719 TRAITS, 720 VECTOR, 721 PARAMETERS, 722 723 } 724 725 immutable dstring[] KEYWORD_STRINGS = [ 726 "", 727 "abstract", 728 "alias", 729 "align", 730 "asm", 731 "assert", 732 "auto", 733 734 "body", 735 "bool", 736 "break", 737 "byte", 738 739 "case", 740 "cast", 741 "catch", 742 "cdouble", 743 "cent", 744 "cfloat", 745 "char", 746 "class", 747 "const", 748 "continue", 749 "creal", 750 751 "dchar", 752 "debug", 753 "default", 754 "delegate", 755 "delete", 756 "deprecated", 757 "do", 758 "double", 759 760 "else", 761 "enum", 762 "export", 763 "extern", 764 765 "false", 766 "final", 767 "finally", 768 "float", 769 "for", 770 "foreach", 771 "foreach_reverse", 772 "function", 773 774 "goto", 775 776 "idouble", 777 "if", 778 "ifloat", 779 "immutable", 780 "import", 781 "in", 782 "inout", 783 "int", 784 "interface", 785 "invariant", 786 "ireal", 787 "is", 788 789 "lazy", 790 "long", 791 792 "macro", 793 "mixin", 794 "module", 795 796 "new", 797 "nothrow", 798 "null", 799 800 "out", 801 "override", 802 803 "package", 804 "pragma", 805 "private", 806 "protected", 807 "public", 808 "pure", 809 810 "real", 811 "ref", 812 "return", 813 814 "scope", 815 "shared", 816 "short", 817 "static", 818 "struct", 819 "super", 820 "switch", 821 "synchronized", 822 823 "template", 824 "this", 825 "throw", 826 "true", 827 "try", 828 "typedef", 829 "typeid", 830 "typeof", 831 832 "ubyte", 833 "ucent", 834 "uint", 835 "ulong", 836 "union", 837 "unittest", 838 "ushort", 839 840 "version", 841 "void", 842 "volatile", 843 844 "wchar", 845 "while", 846 "with", 847 848 "__FILE__", 849 "__MODULE__", 850 "__LINE__", 851 "__FUNCTION__", 852 "__PRETTY_FUNCTION__", 853 854 //Special Token Replaced with 855 "__DATE__", // string literal of the date of compilation "mmm dd yyyy" 856 "__EOF__", // sets the scanner to the end of the file 857 "__TIME__", // string literal of the time of compilation "hh:mm:ss" 858 "__TIMESTAMP__", // string literal of the date and time of compilation "www mmm dd hh:mm:ss yyyy" 859 "__VENDOR__", // Compiler vendor string, such as "Digital Mars D" 860 "__VERSION__", // Compiler version as an integer, such as 2001 861 862 863 "__gshared", 864 "__traits", 865 "__vector", 866 "__parameters" 867 ]; 868 869 public dstring getKeywordNameD(Keyword keyword) pure nothrow { 870 return KEYWORD_STRINGS[keyword]; 871 }; 872 873 public Keyword findKeyword(Keyword start, Keyword end, dchar * name, int len, ref int pos) pure nothrow { 874 for (Keyword i = start; i <= end; i++) { 875 dstring s = KEYWORD_STRINGS[i]; 876 if (s.length > len + 1) 877 continue; // too long 878 bool found = true; 879 for (uint j = 1; j < s.length; j++) { 880 if (s[j] != name[j - 1]) { 881 found = false; 882 break; 883 } 884 } 885 if (found) { 886 if (s.length == len - 1 || !isIdentMiddleChar(name[s.length - 1])) { 887 pos += s.length - 1; 888 return i; 889 } 890 } 891 } 892 return Keyword.NONE; 893 } 894 895 /** 896 * Token. 897 */ 898 class Token { 899 protected SourceFile _file; 900 protected int _line; 901 protected int _pos; 902 protected TokenType _type; 903 /// returns token type 904 @property TokenType type() { return _type; } 905 /// returns file info for source 906 @property SourceFile filename() { return _file; } 907 /// returns 1-based source line number of token start 908 @property int line() { return _line; } 909 /// returns 1-based source line position of token start 910 @property int pos() { return _pos; } 911 /// returns token text 912 @property dchar[] text() { return null; } 913 914 // number token properties 915 @property dchar literalType() { return 0; } 916 @property ulong intValue() { return 0; } 917 @property bool isUnsigned() { return false; } 918 @property ulong isLong() { return false; } 919 @property real realValue() { return 0; } 920 @property double doubleValue() { return 0; } 921 @property float floatValue() { return 0; } 922 @property byte precision() { return 0; } 923 @property bool isImaginary() { return false; } 924 925 /// returns opcode ID - for opcode tokens 926 @property OpCode opCode() { return OpCode.NONE; } 927 /// returns keyword ID - for keyword tokens 928 @property Keyword keyword() { return Keyword.NONE; } 929 /// returns true if this is documentation comment token 930 @property bool isDocumentationComment() { return false; } 931 /// returns true if this is multiline 932 @property bool isMultilineComment() { return false; } 933 934 // error handling 935 936 /// returns true if it's invalid token (can be returned in error tolerant mode of tokenizer) 937 @property bool isError() { return type == TokenType.INVALID; } 938 /// returns error message if it's invalid token (can be returned in error tolerant mode of tokenizer) 939 @property string errorMessage() { return null; } 940 /// returns error code if it's invalid token (can be returned in error tolerant mode of tokenizer) 941 @property int errorCode() { return 0; } 942 /// returns type of token parsing of which has been failed - if it's invalid token (can be returned in error tolerant mode of tokenizer) 943 @property TokenType invalidTokenType() { return TokenType.INVALID; } 944 945 946 this(TokenType type) { 947 _type = type; 948 } 949 950 this(TokenType type, SourceFile file, int line, int pos) { 951 _type = type; 952 _file = file; 953 _line = line; 954 _pos = pos; 955 } 956 /// set start position for token (line is 1-based, pos is 0-based) 957 void setPos(SourceFile file, int line, int pos) { 958 _file = file; 959 _line = line; 960 _pos = pos + 1; 961 } 962 /// set source file information for token 963 void setFile(SourceFile file) { 964 _file = file; 965 } 966 /// set start position for token (line is 1-based, pos is 0-based) 967 void setPos(int line, int pos) { 968 _line = line; 969 _pos = pos + 1; 970 } 971 972 public abstract Token clone(); 973 public override @property string toString() { 974 return "" ~ to!string(_line) ~ ":" ~ to!string(_pos) ~ " " ~ to!string(type) ~ " " ~ to!string(opCode) ~ " " ~ to!string(keyword) 975 ~" \"" ~ toUTF8(text()) ~ "\""; 976 } 977 } 978 979 class EofToken : Token { 980 this() { 981 super(TokenType.EOF); 982 } 983 this(SourceFile file, uint line, uint pos) { 984 super(TokenType.EOF, file, line, pos); 985 } 986 override public Token clone() { 987 return new EofToken(_file, _line, _pos); 988 } 989 public override @property string toString() { 990 return "EOF"; 991 } 992 } 993 994 // treat as white space 995 //class EolToken : Token { 996 // this(string file, uint line, uint pos) { 997 // super(TokenType.EOL, file, line, pos); 998 // } 999 //} 1000 1001 /// white space token 1002 class WhiteSpaceToken : Token { 1003 this() { 1004 super(TokenType.WHITESPACE); 1005 } 1006 this(SourceFile file, uint line, uint pos) { 1007 super(TokenType.WHITESPACE, file, line, pos); 1008 } 1009 override public Token clone() { 1010 return new WhiteSpaceToken(_file, _line, _pos); 1011 } 1012 public override @property string toString() { 1013 return "WhiteSpace"; 1014 } 1015 } 1016 1017 class OpToken : Token { 1018 OpCode _op; 1019 public @property override OpCode opCode() { return _op; } 1020 public @property void opCode(OpCode op) { _op = op; } 1021 public @property override dchar[] text() { return cast(dchar[])getOpNameD(_op); } 1022 this() { 1023 super(TokenType.OP); 1024 } 1025 this(SourceFile file, uint line, uint pos) { 1026 super(TokenType.OP, file, line, pos); 1027 } 1028 override public Token clone() { 1029 OpToken res = new OpToken(_file, _line, _pos); 1030 res._op = _op; 1031 return res; 1032 } 1033 public override @property string toString() { 1034 return "Op:" ~ to!string(_op); 1035 } 1036 } 1037 1038 class KeywordToken : Token { 1039 Keyword _keyword; 1040 public @property override Keyword keyword() { return _keyword; } 1041 public @property void keyword(Keyword keyword) { _keyword = keyword; } 1042 public @property override dchar[] text() { return cast(dchar[])getKeywordNameD(_keyword); } 1043 this() { 1044 super(TokenType.KEYWORD); 1045 } 1046 this(SourceFile file, uint line, uint pos) { 1047 super(TokenType.KEYWORD, file, line, pos); 1048 } 1049 override public Token clone() { 1050 KeywordToken res = new KeywordToken(_file, _line, _pos); 1051 res._keyword = _keyword; 1052 return res; 1053 } 1054 public override @property string toString() { 1055 return "Keyword:" ~ to!string(_keyword); 1056 } 1057 } 1058 1059 /// comment token 1060 class CommentToken : Token { 1061 protected dchar[] _text; 1062 protected bool _isDocumentationComment; 1063 protected bool _isMultilineComment; 1064 1065 1066 override @property bool isDocumentationComment() { 1067 return _isDocumentationComment; 1068 } 1069 1070 @property void isDocumentationComment(bool f) { 1071 _isDocumentationComment = f; 1072 } 1073 1074 /// returns true if this is multiline 1075 override @property bool isMultilineComment() { 1076 return _isMultilineComment; 1077 } 1078 1079 @property void isMultilineComment(bool f) { 1080 _isMultilineComment = f; 1081 } 1082 1083 @property override dchar[] text() { return _text; } 1084 @property void text(dchar[] text) { _text = text; } 1085 this() { 1086 super(TokenType.COMMENT); 1087 } 1088 this(SourceFile file, uint line, uint pos, dchar[] text) { 1089 super(TokenType.COMMENT, file, line, pos); 1090 _text = text; 1091 } 1092 override public Token clone() { 1093 CommentToken res = new CommentToken(_file, _line, _pos, _text.dup); 1094 res._isDocumentationComment = _isDocumentationComment; 1095 res._isMultilineComment = _isMultilineComment; 1096 return res; 1097 } 1098 public override @property string toString() { 1099 return "Comment:" ~ to!string(_text); 1100 } 1101 } 1102 1103 /// Invalid token holder - for error tolerant parsing 1104 class InvalidToken : Token { 1105 protected dchar[] _text; 1106 protected TokenType _invalidTokenType; 1107 protected int _errorCode; 1108 protected string _errorMessage; 1109 1110 /// returns error message if it's invalid token (can be returned in error tolerant mode of tokenizer) 1111 override @property string errorMessage() { return _errorMessage; } 1112 /// sets error message 1113 @property void errorMessage(string s) { _errorMessage = s; } 1114 /// returns error code if it's invalid token (can be returned in error tolerant mode of tokenizer) 1115 override @property int errorCode() { return _errorCode; } 1116 /// sets error code 1117 @property void errorCode(int c) { _errorCode = c; } 1118 /// returns type of token parsing of which has been failed - if it's invalid token (can be returned in error tolerant mode of tokenizer) 1119 override @property TokenType invalidTokenType() { return _invalidTokenType; } 1120 /// sets type of token parsing of which has been failed 1121 @property void invalidTokenType(TokenType t) { _invalidTokenType = t; } 1122 1123 /// text of invalid token 1124 @property override dchar[] text() { return _text; } 1125 /// text of invalid token 1126 @property void text(dchar[] text) { _text = text; } 1127 1128 this() { 1129 super(TokenType.INVALID); 1130 } 1131 this(SourceFile file, uint line, uint pos, dchar[] text) { 1132 super(TokenType.INVALID, file, line, pos); 1133 _text = text; 1134 } 1135 override Token clone() { 1136 InvalidToken res = new InvalidToken(_file, _line, _pos, _text.dup); 1137 res._errorMessage = _errorMessage.dup; 1138 res._errorCode = _errorCode; 1139 res._invalidTokenType = _invalidTokenType; 1140 return res; 1141 } 1142 override @property string toString() { 1143 return "Invalid:" ~ to!string(_text); 1144 } 1145 } 1146 1147 alias tokenizer_ident_t = uint; 1148 alias tokenizer_ident_name_t = dchar[]; 1149 1150 enum : tokenizer_ident_t { 1151 NO_IDENT = 0 1152 } 1153 1154 /** 1155 * Global storage for identifier strings. 1156 */ 1157 class IdentHolder { 1158 protected tokenizer_ident_t _nextId; 1159 protected tokenizer_ident_name_t[tokenizer_ident_t] _idToName; 1160 protected tokenizer_ident_t[tokenizer_ident_name_t] _nameToId; 1161 1162 public this() { 1163 _nextId = NO_IDENT + 1; 1164 } 1165 1166 /** 1167 * Search for id by name, return NO_IDENT if not found. 1168 */ 1169 uint findByName(tokenizer_ident_name_t name) { 1170 tokenizer_ident_t * found = (name in _nameToId); 1171 if (found) 1172 return *found; 1173 return NO_IDENT; 1174 } 1175 1176 /** 1177 * Search for name by id, return null if not found. 1178 */ 1179 tokenizer_ident_name_t nameById(tokenizer_ident_t id) { 1180 auto found = (id in _idToName); 1181 if (found) 1182 return *found; 1183 return null; 1184 } 1185 1186 /** 1187 * Search for ident id by name, create new entry if not found. 1188 */ 1189 tokenizer_ident_t idByName(tokenizer_ident_name_t name) { 1190 uint * found = (name in _nameToId); 1191 if (found) 1192 return *found; 1193 uint newid = _nextId++; 1194 _nameToId[cast(dstring)name] = newid; 1195 _idToName[newid] = cast(tokenizer_ident_name_t)name; 1196 return newid; 1197 } 1198 } 1199 1200 /** 1201 * Thread local storage for IDs. 1202 */ 1203 IdentHolder identMap; 1204 1205 static this() { 1206 // init ID storage 1207 identMap = new IdentHolder(); 1208 } 1209 1210 class StringLiteralToken : Token { 1211 dchar[] _text; 1212 dchar _literalType; 1213 public @property override dchar literalType() { return _literalType; } 1214 public @property override dchar[] text() { return _text; } 1215 public void setText(dchar[] text, dchar type) { _text = text; _literalType = type; } 1216 this() { 1217 super(TokenType.STRING); 1218 } 1219 this(SourceFile file, uint line, uint pos, dchar[] text, dchar type) { 1220 super(TokenType.STRING, file, line, pos); 1221 _text = text; 1222 _literalType = type; 1223 } 1224 override public Token clone() { 1225 return new StringLiteralToken(_file, _line, _pos, _text.dup, _literalType); 1226 } 1227 public override @property string toString() { 1228 return toUTF8("String:\"" ~ _text ~ "\"" ~ (_literalType ? _literalType : ' ')); 1229 } 1230 } 1231 1232 class CharacterLiteralToken : Token { 1233 dchar _character; 1234 dchar _literalType; 1235 @property override dchar literalType() { return _literalType; } 1236 @property dchar character() { return _character; } 1237 @property override dchar[] text() { return [_character]; } 1238 void setCharacter(dchar ch, dchar type) { _character = ch; _literalType = type; } 1239 this() { 1240 super(TokenType.CHARACTER); 1241 } 1242 this(SourceFile file, uint line, uint pos, dchar character, dchar type) { 1243 super(TokenType.CHARACTER, file, line, pos); 1244 _character = character; 1245 _literalType = type; 1246 } 1247 override public Token clone() { 1248 return new CharacterLiteralToken(_file, _line, _pos, _character, _literalType); 1249 } 1250 public override @property string toString() { 1251 return "Char:" ~ toUTF8([_character]); 1252 } 1253 } 1254 1255 class IntegerLiteralToken : Token { 1256 ulong _value; 1257 bool _unsigned; 1258 bool _long; 1259 public @property override ulong intValue() { return _value; } 1260 public @property override bool isUnsigned() { return _unsigned; } 1261 public @property override ulong isLong() { return _long; } 1262 public @property override dchar[] text() { return cast(dchar[])to!dstring(_value); } 1263 public void setValue(ulong value, bool unsignedFlag = false, bool longFlag = false) { 1264 _value = value; 1265 _unsigned = unsignedFlag; 1266 _long = longFlag; 1267 } 1268 public void setFlags(bool unsignedFlag = false, bool longFlag = false) { 1269 _unsigned = unsignedFlag; 1270 _long = longFlag; 1271 } 1272 this() { 1273 super(TokenType.INTEGER); 1274 } 1275 this(SourceFile file, uint line, uint pos, ulong value, bool unsignedFlag, bool longFlag) { 1276 super(TokenType.INTEGER, file, line, pos); 1277 _value = value; 1278 _unsigned = unsignedFlag; 1279 _long = longFlag; 1280 } 1281 override public Token clone() { 1282 return new IntegerLiteralToken(_file, _line, _pos, _value, _unsigned, _long); 1283 } 1284 public override @property string toString() { 1285 return "Integer:" ~ to!string(_value) ~ (_long ? "L" : "") ~ (_unsigned ? "U" : ""); 1286 } 1287 } 1288 1289 class RealLiteralToken : Token { 1290 real _value; 1291 byte _precision; 1292 bool _imaginary; 1293 public @property override ulong intValue() { return to!long(_value); } 1294 public @property override real realValue() { return _value; } 1295 public @property override double doubleValue() { return cast(double)_value; } 1296 public @property override float floatValue() { return cast(float)_value; } 1297 public @property override byte precision() { return _precision; } 1298 public @property override bool isImaginary() { return _imaginary; } 1299 public @property override dchar[] text() { return cast(dchar[])to!dstring(_value); } 1300 public void setValue(real value, byte precision = 1, bool imaginary = false) { 1301 _value = value; 1302 _precision = precision; 1303 _imaginary = imaginary; 1304 } 1305 public void setFlags(byte precision = 1, bool imaginary = false) { 1306 _precision = precision; 1307 _imaginary = imaginary; 1308 } 1309 this() { 1310 super(TokenType.FLOAT); 1311 } 1312 this(SourceFile file, uint line, uint pos, real value, byte precision, bool imaginary) { 1313 super(TokenType.FLOAT, file, line, pos); 1314 _value = value; 1315 _precision = precision; 1316 _imaginary = imaginary; 1317 } 1318 override public Token clone() { 1319 return new RealLiteralToken(_file, _line, _pos, _value, _precision, _imaginary); 1320 } 1321 public override @property string toString() { 1322 return "Real:" ~ to!string(_value) ~ (_precision == 0 ? "f" : (_precision == 2 ? "L" : "")) ~ (_imaginary ? "i" : ""); 1323 } 1324 } 1325 1326 class IdentToken : Token { 1327 tokenizer_ident_t _id; 1328 public @property override dchar[] text() { return identMap.nameById(_id); } 1329 public void setText(dchar[] text) { _id = identMap.idByName(text); } 1330 this() { 1331 super(TokenType.IDENTIFIER); 1332 } 1333 this(SourceFile file, uint line, uint pos, dchar[] text) { 1334 super(TokenType.IDENTIFIER, file, line, pos); 1335 _id = identMap.idByName(text); 1336 } 1337 this(SourceFile file, uint line, uint pos, tokenizer_ident_t id) { 1338 super(TokenType.IDENTIFIER, file, line, pos); 1339 _id = id; 1340 } 1341 override public Token clone() { 1342 return new IdentToken(_file, _line, _pos, _id); 1343 } 1344 public override @property string toString() { 1345 return "Ident:" ~ to!string(text); 1346 } 1347 } 1348 1349 // shared appender buffer, to avoid extra heap allocations 1350 struct StringAppender { 1351 dchar[] buf; 1352 uint len; 1353 dchar[] get() { 1354 return buf[0 .. len]; 1355 } 1356 void appendEol() { 1357 if (len + 1 > buf.length) { 1358 uint newsize = cast(uint)((len + 1 + buf.length) * 2); 1359 if (newsize < 128) 1360 newsize = 128; 1361 buf.length = newsize; 1362 } 1363 buf[len] = '\n'; 1364 len++; 1365 } 1366 void append(dchar[] s) { 1367 if (s.length == 0) 1368 return; 1369 if (len + s.length > buf.length) { 1370 uint newsize = cast(uint)((len + s.length + buf.length) * 2); 1371 if (newsize < 128) 1372 newsize = 128; 1373 buf.length = newsize; 1374 } 1375 buf[len .. len + s.length] = s; 1376 len += s.length; 1377 } 1378 void append(dchar ch) { 1379 if (len + 1 > buf.length) { 1380 uint newsize = cast(uint)(buf.length * 2); 1381 if (newsize < 128) 1382 newsize = 128; 1383 buf.length = newsize; 1384 } 1385 buf[len++] = ch; 1386 } 1387 void reset() { 1388 len = 0; 1389 } 1390 static int parseHexDigit(dchar ch) { 1391 if (ch >= '0' && ch <='9') 1392 return ch - '0'; 1393 if (ch >= 'a' && ch <='f') 1394 return ch - 'a' + 10; 1395 if (ch >= 'A' && ch <='F') 1396 return ch - 'A' + 10; 1397 return -1; 1398 } 1399 bool errorFlag = false; 1400 dchar decodeHex(ref int pos, int count) { 1401 dchar res = 0; 1402 for (int i = 0; i < count; i++) { 1403 if (pos >= len - 1) { 1404 errorFlag = true; 1405 return res; 1406 } 1407 dchar ch = buf[++pos]; 1408 int digit = parseHexDigit(ch); 1409 if (digit < 0) { 1410 errorFlag = true; 1411 digit = 0; 1412 } 1413 res = (res << 4) | digit; 1414 } 1415 return res; 1416 } 1417 dchar decodeOct(dchar firstChar, ref int pos) { 1418 dchar res = 0; 1419 res = firstChar - '0'; 1420 if (pos < len - 1 && buf[pos + 1] >= '0' && buf[pos + 1] <= '7') { 1421 res = (res << 3) | (buf[++pos] - '0'); 1422 } 1423 if (pos < len - 1 && buf[pos + 1] >= '0' && buf[pos + 1] <= '7') { 1424 res = (res << 3) | (buf[++pos] - '0'); 1425 } 1426 return res; 1427 } 1428 1429 char[] entityNameBuf; 1430 int entityNameLen; 1431 1432 dchar decodeCharacterEntity(ref int pos) { 1433 entityNameLen = 0; 1434 pos++; 1435 for(; pos < len && buf[pos] != ';'; pos++) { 1436 dchar ch = buf[pos]; 1437 if (ch >= 0x80) 1438 errorFlag = true; 1439 if (entityNameBuf.length < entityNameLen + 4) 1440 entityNameBuf.length += 32; 1441 entityNameBuf[entityNameLen++] = cast(char)ch; 1442 } 1443 if (pos < len && buf[pos] == ';') { 1444 dchar ch = entityToChar(cast(string)entityNameBuf[0 .. entityNameLen]); 1445 if (ch) 1446 return ch; 1447 } 1448 errorFlag = true; 1449 return '?'; 1450 } 1451 1452 bool processEscapeSequences() { 1453 errorFlag = false; 1454 int dst = 0; 1455 for (int src = 0; src < len; src++) { 1456 dchar ch = buf[src]; 1457 if (ch == '\\') { 1458 if (src == len - 1) 1459 break; // INVALID 1460 ch = buf[++src]; 1461 switch (ch) { 1462 case '\'': 1463 case '\"': 1464 case '?': 1465 case '\\': 1466 buf[dst++] = ch; 1467 break; 1468 case '0': 1469 buf[dst++] = '\0'; 1470 break; 1471 case 'a': 1472 buf[dst++] = '\a'; 1473 break; 1474 case 'b': 1475 buf[dst++] = '\b'; 1476 break; 1477 case 'f': 1478 buf[dst++] = '\f'; 1479 break; 1480 case 'n': 1481 buf[dst++] = '\n'; 1482 break; 1483 case 'r': 1484 buf[dst++] = '\r'; 1485 break; 1486 case 't': 1487 buf[dst++] = '\t'; 1488 break; 1489 case 'v': 1490 buf[dst++] = '\v'; 1491 break; 1492 case 'x': 1493 buf[dst++] = decodeHex(src, 2); 1494 break; 1495 case 'u': 1496 buf[dst++] = decodeHex(src, 4); 1497 break; 1498 case 'U': 1499 buf[dst++] = decodeHex(src, 8); 1500 break; 1501 default: 1502 if (ch >= '0' && ch <= '7') { 1503 // octal X XX or XXX 1504 buf[dst++] = decodeOct(ch, src); // something wrong 1505 } else if (ch == '&') { 1506 // named character entity 1507 buf[dst++] = decodeCharacterEntity(src); 1508 // just show it as is 1509 } else { 1510 buf[dst++] = ch; // something wrong 1511 errorFlag = true; 1512 } 1513 break; 1514 } 1515 } else { 1516 buf[dst++] = ch; 1517 } 1518 } 1519 len = dst; 1520 return errorFlag; 1521 } 1522 } 1523 1524 class Tokenizer 1525 { 1526 protected SourceLines _lineStream; 1527 protected dchar[] _lineText; 1528 protected int _line; // current line number 1529 protected int _len; // current line length 1530 protected int _pos; // current line read position 1531 protected int _prevLineLength; // previous line length 1532 protected uint _state; // tokenizer state 1533 1534 enum : int { 1535 EOF_CHAR = 0x001A, 1536 EOL_CHAR = 0x000A 1537 }; 1538 1539 protected WhiteSpaceToken _sharedWhiteSpaceToken = new WhiteSpaceToken(); 1540 protected CommentToken _sharedCommentToken = new CommentToken(); 1541 protected StringLiteralToken _sharedStringLiteralToken = new StringLiteralToken(); 1542 protected IdentToken _sharedIdentToken = new IdentToken(); 1543 protected OpToken _sharedOpToken = new OpToken(); 1544 protected KeywordToken _sharedKeywordToken = new KeywordToken(); 1545 protected IntegerLiteralToken _sharedIntegerToken = new IntegerLiteralToken(); 1546 protected RealLiteralToken _sharedRealToken = new RealLiteralToken(); 1547 protected InvalidToken _sharedInvalidToken = new InvalidToken(); 1548 protected CharacterLiteralToken _sharedCharacterLiteralToken = new CharacterLiteralToken(); 1549 protected StringAppender _stringLiteralAppender; 1550 protected StringAppender _commentAppender; 1551 protected StringAppender _identAppender; 1552 1553 protected bool _enableCommentText = true; 1554 /// when false, does not put comment text into comment token - for less allocations 1555 @property void enableCommentText(bool enabled) { 1556 _enableCommentText = enabled; 1557 } 1558 /// when false, does not put comment text into comment token - for less allocations 1559 @property bool enableCommentText() { 1560 return _enableCommentText; 1561 } 1562 1563 protected bool _errorTolerant = false; 1564 /// when true, returns BadToken instead of throwing exception 1565 @property void errorTolerant(bool enabled) { 1566 _errorTolerant = enabled; 1567 } 1568 /// when true, returns BadToken instead of throwing exception 1569 @property bool errorTolerant() { 1570 return _errorTolerant; 1571 } 1572 1573 this(SourceLines lineStream) { 1574 init(lineStream); 1575 } 1576 1577 void init(SourceLines lineStream, int pos = 0) { 1578 _lineStream = lineStream; 1579 SourceFile file = _lineStream.file; 1580 _sharedWhiteSpaceToken.setFile(file); 1581 _sharedCommentToken.setFile(file); 1582 _sharedStringLiteralToken.setFile(file); 1583 _sharedIdentToken.setFile(file); 1584 _sharedOpToken.setFile(file); 1585 _sharedKeywordToken.setFile(file); 1586 _sharedIntegerToken.setFile(file); 1587 _sharedRealToken.setFile(file); 1588 _sharedInvalidToken.setFile(file); 1589 _sharedCharacterLiteralToken.setFile(file); 1590 buildTime = Clock.currTime(); 1591 _line = lineStream.line; 1592 _pos = 0; 1593 _prevLineLength = 0; 1594 _lineText = null; 1595 nextLine(); 1596 _pos = pos; 1597 } 1598 1599 this(string code, string filename = "") { 1600 this(new ArraySourceLines(code, filename)); 1601 } 1602 1603 // fetch next line from source stream 1604 protected bool nextLine() { 1605 _prevLineLength = cast(int)_lineText.length; 1606 _lineText = _lineStream.readLine(); 1607 if (!_lineText) { 1608 if (_lineStream.errorCode != 0) 1609 throw new SourceEncodingException(_lineStream.errorMessage, _lineStream.file, _lineStream.errorLine, _lineStream.errorPos); 1610 if (_lineStream.eof) { 1611 // end of file 1612 _pos = 0; 1613 _len = 0; 1614 return false; 1615 } 1616 // just an empty line 1617 } 1618 _line = _lineStream.line; 1619 _pos = 0; 1620 _len = cast(int)_lineText.length; // do not support lines longer that 4Gb 1621 return true; 1622 } 1623 1624 protected dchar nextChar() { 1625 if (_pos >= _len) { 1626 if (!nextLine()) { 1627 _pos = _prevLineLength + 1; 1628 return EOF_CHAR; 1629 } 1630 return EOL_CHAR; 1631 } 1632 dchar res = _lineText[_pos++]; 1633 if (_pos >= _len) 1634 nextLine(); 1635 return res; 1636 } 1637 1638 protected dchar peekChar() { 1639 if (_lineText is null) { 1640 if (!nextLine()) { 1641 return EOF_CHAR; 1642 } 1643 } 1644 if (_pos >= _len) 1645 return EOL_CHAR; 1646 return _lineText[_pos++]; 1647 } 1648 1649 protected Token emitEof() { 1650 // TODO: check for current state 1651 return new EofToken(_lineStream.file, _startLine, _startPos + 2); 1652 } 1653 1654 protected Token processWhiteSpace(dchar firstChar) { 1655 // reuse the same token instance, to avoid extra heap spamming 1656 _sharedWhiteSpaceToken.setPos(_startLine, _startPos); 1657 for (;;) { 1658 int i = _pos; 1659 for (; i < _len; i++) { 1660 dchar ch = _lineText[i]; 1661 if (!(ch == 0x0020 || ch == 0x0009 || ch == 0x000B || ch == 0x000C || ch == EOL_CHAR)) 1662 break; 1663 } 1664 _pos = i; 1665 if (_pos < _len) 1666 break; 1667 // go to next line 1668 if (!nextLine()) 1669 break; 1670 } 1671 return _sharedWhiteSpaceToken; 1672 } 1673 1674 protected Token processOneLineComment() { 1675 _sharedCommentToken.setPos(_startLine, _startPos); 1676 _sharedCommentToken.isDocumentationComment = _pos + 1 < _lineText.length && _lineText[_pos + 1] == '/'; 1677 _sharedCommentToken.isMultilineComment = false; 1678 if (_enableCommentText) { 1679 _sharedCommentToken.text = _lineText[_pos + 1 .. $]; 1680 } 1681 _pos = _len; 1682 nextChar(); 1683 return _sharedCommentToken; 1684 } 1685 1686 protected Token processOneLineSharpComment() { 1687 _sharedCommentToken.setPos(_startLine, _startPos); 1688 if (_enableCommentText) { 1689 _sharedCommentToken.text = _lineText[_pos .. $]; 1690 } 1691 _pos = _len; 1692 return _sharedCommentToken; 1693 } 1694 1695 // Comment /* */ 1696 protected Token processMultilineComment() { 1697 _sharedCommentToken.setPos(_startLine, _startPos); 1698 _sharedCommentToken.isDocumentationComment = _pos + 1 < _lineText.length && _lineText[_pos + 1] == '*'; 1699 _sharedCommentToken.isMultilineComment = true; 1700 _commentAppender.reset(); 1701 int textStart = _pos + 1; 1702 for (;;) { 1703 int textEnd = int.max; 1704 int i = textStart; 1705 for (; i < _len - 1; i++) { 1706 if (_lineText[i] == '*' && _lineText[i + 1] == '/') { 1707 textEnd = i; 1708 break; 1709 } 1710 } 1711 if (textEnd != int.max) { 1712 if (_enableCommentText) 1713 _commentAppender.append(_lineText[textStart .. textEnd]); 1714 _pos = textEnd + 2; 1715 break; 1716 } 1717 if (!nextLine()) { 1718 // TODO: do we need throw exception if comment not closed by end of file? 1719 _pos = _len; 1720 break; 1721 } 1722 textStart = 0; 1723 } 1724 if (_enableCommentText) { 1725 _sharedCommentToken.text = _commentAppender.get(); 1726 } 1727 return _sharedCommentToken; 1728 } 1729 1730 // Comment /+ +/ 1731 protected Token processNestedComment() { 1732 _sharedCommentToken.setPos(_startLine, _startPos); 1733 _sharedCommentToken.isDocumentationComment = _pos + 1 < _lineText.length && _lineText[_pos + 1] == '+'; 1734 _sharedCommentToken.isMultilineComment = true; 1735 _commentAppender.reset(); 1736 dchar[] text; 1737 int textStart = _pos + 1; 1738 int level = 1; 1739 for (;;) { 1740 int textEnd = int.max; 1741 int i = textStart; 1742 for (; i < _len - 1; i++) { 1743 if (_lineText[i] == '/' && _lineText[i + 1] == '+') { 1744 level++; 1745 i++; 1746 } else if (_lineText[i] == '+' && _lineText[i + 1] == '/') { 1747 if (--level == 0) { 1748 textEnd = i; 1749 break; 1750 } 1751 } 1752 } 1753 if (textEnd != int.max) { 1754 if (_enableCommentText) 1755 _commentAppender.append(_lineText[textStart .. textEnd]); 1756 _pos = textEnd + 2; 1757 break; 1758 } 1759 if (!nextLine()) { 1760 // TODO: do we need throw exception if comment not closed by end of file? 1761 _pos = _len; 1762 break; 1763 } 1764 if (_enableCommentText) 1765 _commentAppender.appendEol(); 1766 textStart = 0; 1767 } 1768 if (_enableCommentText) { 1769 _sharedCommentToken.text = _commentAppender.get(); 1770 } 1771 return _sharedCommentToken; 1772 } 1773 1774 protected Token processHexString() { 1775 _pos++; 1776 // TODO: 1777 return null; 1778 } 1779 1780 protected Token processDelimitedString() { 1781 _pos++; 1782 // TODO: 1783 return null; 1784 } 1785 1786 // r"string" or `string` 1787 protected Token processWysiwygString(dchar ch) { 1788 _pos++; 1789 // TODO: 1790 return null; 1791 } 1792 1793 protected Token processIdent(dchar firstChar) { 1794 _sharedIdentToken.setPos(_startLine, _startPos); 1795 _identAppender.reset(); 1796 _identAppender.append(firstChar); 1797 for (; _pos < _len; ) { 1798 dchar ch = _lineText[_pos]; 1799 if (!isIdentMiddleChar(ch)) { 1800 break; 1801 } 1802 _identAppender.append(ch); 1803 _pos++; 1804 } 1805 _sharedIdentToken.setText(_identAppender.get); 1806 return _sharedIdentToken; 1807 } 1808 1809 protected Token processIntegerSuffix() { 1810 if (_pos >= _len) 1811 return _sharedIntegerToken; 1812 bool longFlag = false; 1813 bool unsignedFlag = false; 1814 dchar ch = _lineText[_pos]; 1815 dchar ch2 = _pos < _len - 1 ? _lineText[_pos + 1] : 0; 1816 if (ch == 'l' || ch == 'L') { 1817 longFlag = true; 1818 _pos++; 1819 if (ch2 == 'u' || ch2 == 'U') { 1820 unsignedFlag = true; 1821 _pos++; 1822 } 1823 } else if (ch == 'u' || ch == 'U') { 1824 unsignedFlag = true; 1825 _pos++; 1826 if (ch2 == 'l' || ch2 == 'L') { 1827 longFlag = true; 1828 _pos++; 1829 } 1830 } 1831 _sharedIntegerToken.setFlags(unsignedFlag, longFlag); 1832 ch = _pos < _len ? _lineText[_pos] : 0; 1833 if (isIdentMiddleChar(ch)) 1834 return parserError("Unexpected character after number", _sharedIntegerToken); 1835 return _sharedIntegerToken; 1836 } 1837 1838 protected Token processBinaryNumber() { 1839 _sharedIntegerToken.setPos(_startLine, _startPos); 1840 _pos++; 1841 if (_pos >= _len) 1842 return parserError("Unexpected end of line in binary number", _sharedIntegerToken); 1843 int digits = 0; 1844 ulong number = 0; 1845 int i = _pos; 1846 for (;i < _len; i++) { 1847 dchar ch = _lineText[i]; 1848 if (ch != '0' && ch != '1') 1849 break; 1850 number = (number << 1) | (ch == '1' ? 1 : 0); 1851 digits++; 1852 } 1853 _pos = i; 1854 if (digits > 64) 1855 return parserError("number is too big", _sharedIntegerToken); 1856 _sharedIntegerToken.setValue(number); 1857 return processIntegerSuffix(); 1858 } 1859 1860 protected Token processHexNumber() { 1861 _sharedIntegerToken.setPos(_startLine, _startPos); 1862 _sharedRealToken.setPos(_startLine, _startPos); 1863 _pos++; 1864 if (_pos >= _len) 1865 return parserError("Unexpected end of line in hex number", _sharedIntegerToken); 1866 int digits = 0; 1867 ulong number = 0; 1868 int i = _pos; 1869 for (;i < _len; i++) { 1870 dchar ch = _lineText[i]; 1871 uint digit = 0; 1872 if (ch >= '0' && ch <= '9') 1873 digit = ch - '0'; 1874 else if (ch >= 'a' && ch <= 'f') 1875 digit = ch - 'a' + 10; 1876 else if (ch >= 'A' && ch <= 'F') 1877 digit = ch - 'A' + 10; 1878 else if (ch == '_') 1879 continue; 1880 else 1881 break; 1882 number = (number << 4) | digit; 1883 digits++; 1884 } 1885 _pos = i; 1886 if (digits > 16) 1887 return parserError("number is too big to fit 64 bits", _sharedIntegerToken); 1888 _sharedIntegerToken.setValue(number); 1889 return processIntegerSuffix(); 1890 } 1891 1892 protected Token processOctNumber() { 1893 _sharedIntegerToken.setPos(_startLine, _startPos); 1894 if (_pos >= _len) 1895 return parserError("Unexpected end of line in octal number", _sharedIntegerToken); 1896 int digits = 0; 1897 ulong number = 0; 1898 int i = _pos; 1899 bool overflow = false; 1900 for (;i < _len; i++) { 1901 dchar ch = _lineText[i]; 1902 int digit = 0; 1903 if (ch >= '0' && ch <= '7') 1904 digit = ch - '0'; 1905 else if (ch == '_') 1906 continue; 1907 else 1908 break; 1909 number <<= 3; 1910 if (digits >= 20) { 1911 if ((number >> 3) << 3 != number) { 1912 overflow = true; 1913 break; 1914 } 1915 } 1916 number |= digit; 1917 digits++; 1918 } 1919 _pos = i; 1920 if (overflow) 1921 return parserError("number is too big to fit 64 bits", _sharedIntegerToken); 1922 _sharedIntegerToken.setValue(number); 1923 return processIntegerSuffix(); 1924 } 1925 1926 // 1927 protected Token processDecFloatSuffix(real value) { 1928 ubyte precision = 1; 1929 bool imaginary = false; 1930 dchar next = _pos < _len ? _lineText[_pos] : 0; 1931 if (next == 'f') { 1932 _pos++; 1933 precision = 0; 1934 } else if (next == 'L') { 1935 _pos++; 1936 precision = 2; 1937 } 1938 next = _pos < _len ? _lineText[_pos] : 0; 1939 if (next == 'i') { 1940 _pos++; 1941 imaginary = true; 1942 } 1943 next = _pos < _len ? _lineText[_pos] : 0; 1944 if (isIdentMiddleChar(next)) 1945 return parserError("invalid suffix for floating point literal", _sharedRealToken); 1946 _sharedRealToken.setValue(value, precision, imaginary); 1947 return _sharedRealToken; 1948 } 1949 1950 // after E char 1951 protected Token processDecFloatExponent(real value) { 1952 dchar next = _pos < _len ? _lineText[_pos] : 0; 1953 int sign = 1; 1954 if (next == '+') { 1955 _pos++; 1956 } else if (next == '-') { 1957 _pos++; 1958 sign = -1; 1959 } 1960 if (_pos >= _len) 1961 return parserError("Invalid exponent", _sharedRealToken); 1962 ulong digits = 0; 1963 ulong number = 0; 1964 int i = _pos; 1965 bool overflow = false; 1966 for (;i < _len; i++) { 1967 dchar ch = _lineText[i]; 1968 uint digit = 0; 1969 if (ch >= '0' && ch <= '9') 1970 digit = ch - '0'; 1971 else if (ch == '_') 1972 continue; 1973 else 1974 break; 1975 number *= 10; 1976 if (digits >= 18) { 1977 if ((number * 10) / 10 != number) { 1978 overflow = true; 1979 break; 1980 } 1981 } 1982 number += digit; 1983 digits++; 1984 } 1985 if (digits == 0) 1986 return parserError("Invalid exponent", _sharedRealToken); 1987 _pos = i; 1988 value *= pow(10., cast(long)number * sign); 1989 return processDecFloatSuffix(value); 1990 } 1991 1992 protected Token processDecFloatSecondPart(ulong firstPart) { 1993 if (_pos >= _len) { 1994 _sharedRealToken.setValue(cast(real)firstPart); 1995 return _sharedRealToken; 1996 } 1997 ulong divider = 1; 1998 ulong number = 0; 1999 int i = _pos; 2000 bool overflow = false; 2001 for (;i < _len; i++) { 2002 dchar ch = _lineText[i]; 2003 uint digit = 0; 2004 if (ch >= '0' && ch <= '9') 2005 digit = ch - '0'; 2006 else if (ch == '_') 2007 continue; 2008 else 2009 break; 2010 if (divider * 10 < divider) 2011 continue; // ignore extra digits 2012 number *= 10; 2013 number += digit; 2014 divider *= 10; 2015 } 2016 _pos = i; 2017 real value = cast(real)firstPart + (cast(real)number / divider); 2018 dchar next = _pos < _len ? _lineText[_pos] : 0; 2019 if (next == 0) { 2020 // neither exponent nor suffix 2021 _sharedRealToken.setValue(value); 2022 return _sharedRealToken; 2023 } 2024 if (next == 'e' || next == 'E') { 2025 _pos++; 2026 return processDecFloatExponent(value); 2027 } 2028 return processDecFloatSuffix(value); 2029 } 2030 2031 protected Token processDecNumber(dchar c) { 2032 _pos--; 2033 _sharedIntegerToken.setPos(_startLine, _startPos); 2034 _sharedRealToken.setPos(_startLine, _startPos); 2035 if (_pos >= _len) 2036 return parserError("Unexpected end of line in number", _sharedIntegerToken); 2037 int digits = 0; 2038 ulong number = 0; 2039 int i = _pos; 2040 bool overflow = false; 2041 for (;i < _len; i++) { 2042 dchar ch = _lineText[i]; 2043 uint digit = 0; 2044 if (ch >= '0' && ch <= '9') 2045 digit = ch - '0'; 2046 else if (ch == '_') 2047 continue; 2048 else 2049 break; 2050 number *= 10; 2051 if (digits >= 18) { 2052 if ((number * 10) / 10 != number) { 2053 overflow = true; 2054 break; 2055 } 2056 } 2057 number += digit; 2058 digits++; 2059 } 2060 _pos = i; 2061 if (overflow) 2062 return parserError("number is too big to fit 64 bits", _sharedIntegerToken); 2063 _sharedIntegerToken.setValue(number); 2064 dchar next = _pos < _len ? _lineText[_pos] : 0; 2065 if (next == 0) 2066 return _sharedIntegerToken; 2067 if (next == 'e' || next == 'E') { 2068 _pos++; 2069 return processDecFloatExponent(number); 2070 } else if (next == '.') { 2071 _pos++; 2072 return processDecFloatSecondPart(number); 2073 } 2074 return processIntegerSuffix(); 2075 } 2076 2077 /// Either return InvalidToken or throw parser exception depending on current errorTolerant flag 2078 protected Token parserError(string msg, Token incompleteToken) { 2079 return parserError(msg, incompleteToken.line, incompleteToken.pos, incompleteToken.type); 2080 } 2081 /// Either return InvalidToken or throw parser exception depending on current errorTolerant flag 2082 protected Token parserError(string msg, int startLine, int startPos, TokenType failedTokenType = TokenType.INVALID) { 2083 if (_errorTolerant) { 2084 startPos--; 2085 _sharedInvalidToken.setPos(startLine, startPos); 2086 _sharedInvalidToken.errorMessage = msg; 2087 _sharedInvalidToken.errorCode = 1; // for future extension 2088 _sharedInvalidToken.invalidTokenType = failedTokenType; // for future extension 2089 // make invalid source text 2090 dchar[] invalidText; 2091 int p = startLine == _line ? startPos : 0; 2092 for (int i = p; i < _pos && i < _lineText.length; i++) 2093 invalidText ~= _lineText[i]; 2094 2095 // recover after error 2096 for (; _pos < _lineText.length; _pos++) { 2097 dchar ch = _lineText[_pos]; 2098 if (ch == ' ' || ch == '\t' || ch == '(' || ch == ')' || ch == '[' || ch == ']' || ch == '{' || ch == '}') 2099 break; 2100 if (failedTokenType == TokenType.INTEGER || failedTokenType == TokenType.FLOAT) { 2101 if (ch == '*' || ch == '/') 2102 break; 2103 } 2104 invalidText ~= ch; 2105 } 2106 _sharedInvalidToken.text = invalidText; 2107 return _sharedInvalidToken; 2108 } 2109 throw new ParserException(msg, _lineStream.file, _line, _pos); 2110 } 2111 2112 protected Keyword detectKeyword(dchar ch) { 2113 if (ch > 'z') 2114 return Keyword.NONE; 2115 int len = _len - _pos; 2116 switch (cast(ubyte)ch) { 2117 // ABSTRACT, 2118 // ALIAS, 2119 // ALIGN, 2120 // ASM, 2121 // ASSERT, 2122 // AUTO, 2123 case 'a': return findKeyword(Keyword.ABSTRACT, Keyword.AUTO, _lineText.ptr + _pos, len, _pos); 2124 2125 // BODY, 2126 // BOOL, 2127 // BREAK, 2128 // BYTE, 2129 case 'b': return findKeyword(Keyword.BODY, Keyword.BYTE, _lineText.ptr + _pos, len, _pos); 2130 2131 // CASE, 2132 // CAST, 2133 // CATCH, 2134 // CDOUBLE, 2135 // CENT, 2136 // CFLOAT, 2137 // CHAR, 2138 // CLASS, 2139 // CONST, 2140 // CONTINUE, 2141 // CREAL, 2142 case 'c': return findKeyword(Keyword.CASE, Keyword.CREAL, _lineText.ptr + _pos, len, _pos); 2143 2144 // DCHAR, 2145 // DEBUG, 2146 // DEFAULT, 2147 // DELEGATE, 2148 // DELETE, 2149 // DEPRECATED, 2150 // DO, 2151 // DOUBLE, 2152 case 'd': return findKeyword(Keyword.DCHAR, Keyword.DOUBLE, _lineText.ptr + _pos, len, _pos); 2153 2154 // ELSE, 2155 // ENUM, 2156 // EXPORT, 2157 // EXTERN, 2158 case 'e': return findKeyword(Keyword.ELSE, Keyword.EXTERN, _lineText.ptr + _pos, len, _pos); 2159 2160 // FALSE, 2161 // FINAL, 2162 // FINALLY, 2163 // FLOAT, 2164 // FOR, 2165 // FOREACH, 2166 // FOREACH_REVERSE, 2167 // FUNCTION, 2168 case 'f': return findKeyword(Keyword.FALSE, Keyword.FUNCTION, _lineText.ptr + _pos, len, _pos); 2169 2170 // GOTO, 2171 case 'g': return findKeyword(Keyword.GOTO, Keyword.GOTO, _lineText.ptr + _pos, len, _pos); 2172 2173 // IDOUBLE, 2174 // IF, 2175 // IFLOAT, 2176 // IMMUTABLE, 2177 // IMPORT, 2178 // IN, 2179 // INOUT, 2180 // INT, 2181 // INTERFACE, 2182 // INVARIANT, 2183 // IREAL, 2184 // IS, 2185 case 'i': return findKeyword(Keyword.IDOUBLE, Keyword.IS, _lineText.ptr + _pos, len, _pos); 2186 2187 // LAZY, 2188 // LONG, 2189 case 'l': return findKeyword(Keyword.LAZY, Keyword.LONG, _lineText.ptr + _pos, len, _pos); 2190 2191 // MACRO, 2192 // MIXIN, 2193 // MODULE, 2194 case 'm': return findKeyword(Keyword.MACRO, Keyword.MODULE, _lineText.ptr + _pos, len, _pos); 2195 2196 // NEW, 2197 // NOTHROW, 2198 // NULL, 2199 case 'n': return findKeyword(Keyword.NEW, Keyword.NULL, _lineText.ptr + _pos, len, _pos); 2200 2201 // OUT, 2202 // OVERRIDE, 2203 case 'o': return findKeyword(Keyword.OUT, Keyword.OVERRIDE, _lineText.ptr + _pos, len, _pos); 2204 2205 // PACKAGE, 2206 // PRAGMA, 2207 // PRIVATE, 2208 // PROTECTED, 2209 // PUBLIC, 2210 // PURE, 2211 case 'p': return findKeyword(Keyword.PACKAGE, Keyword.PURE, _lineText.ptr + _pos, len, _pos); 2212 2213 // REAL, 2214 // REF, 2215 // RETURN, 2216 case 'r': return findKeyword(Keyword.REAL, Keyword.RETURN, _lineText.ptr + _pos, len, _pos); 2217 2218 // SCOPE, 2219 // SHARED, 2220 // SHORT, 2221 // STATIC, 2222 // STRUCT, 2223 // SUPER, 2224 // SWITCH, 2225 // SYNCHRONIZED, 2226 case 's': return findKeyword(Keyword.SCOPE, Keyword.SYNCHRONIZED, _lineText.ptr + _pos, len, _pos); 2227 2228 // TEMPLATE, 2229 // THIS, 2230 // THROW, 2231 // TRUE, 2232 // TRY, 2233 // TYPEDEF, 2234 // TYPEID, 2235 // TYPEOF, 2236 case 't': return findKeyword(Keyword.TEMPLATE, Keyword.TYPEOF, _lineText.ptr + _pos, len, _pos); 2237 2238 // UBYTE, 2239 // UCENT, 2240 // UINT, 2241 // ULONG, 2242 // UNION, 2243 // UNITTEST, 2244 // USHORT, 2245 case 'u': return findKeyword(Keyword.UBYTE, Keyword.USHORT, _lineText.ptr + _pos, len, _pos); 2246 2247 // VERSION, 2248 // VOID, 2249 // VOLATILE, 2250 case 'v': return findKeyword(Keyword.VERSION, Keyword.VOLATILE, _lineText.ptr + _pos, len, _pos); 2251 2252 // WCHAR, 2253 // WHILE, 2254 // WITH, 2255 case 'w': return findKeyword(Keyword.WCHAR, Keyword.WITH, _lineText.ptr + _pos, len, _pos); 2256 2257 // FILE, 2258 // MODULE, 2259 // LINE, 2260 // FUNCTION, 2261 // PRETTY_FUNCTION, 2262 // 2263 // GSHARED, 2264 // TRAITS, 2265 // VECTOR, 2266 // PARAMETERS, 2267 case '_': return findKeyword(Keyword.FILE, Keyword.PARAMETERS, _lineText.ptr + _pos, len, _pos); 2268 default: return Keyword.NONE; 2269 } 2270 } 2271 protected OpCode detectOp(dchar ch) nothrow { 2272 if (ch >= 128) 2273 return OpCode.NONE; 2274 dchar ch2 = _pos < _len ? _lineText[_pos] : 0; 2275 dchar ch3 = _pos < _len - 1 ? _lineText[_pos + 1] : 0; 2276 switch(cast(ubyte)ch) { 2277 // DIV, // / 2278 // DIV_EQ, // /= 2279 case '/': 2280 if (ch2 == '=') { 2281 _pos++; 2282 return OpCode.DIV_EQ; 2283 } 2284 return OpCode.DIV; 2285 // DOT, // . 2286 // DOT_DOT, // .. 2287 // DOT_DOT_DOT,// ... 2288 case '.': 2289 if (ch2 == '.') { 2290 if (ch3 == '.') { 2291 _pos += 2; 2292 return OpCode.DOT_DOT_DOT; 2293 } 2294 _pos++; 2295 return OpCode.DOT_DOT; 2296 } 2297 return OpCode.DOT; 2298 // AND, // & 2299 // AND_EQ, // &= 2300 // LOG_AND, // && 2301 case '&': 2302 if (ch2 == '=') { 2303 _pos++; 2304 return OpCode.AND_EQ; 2305 } 2306 if (ch2 == '&') { 2307 _pos++; 2308 return OpCode.LOG_AND; 2309 } 2310 return OpCode.AND; 2311 // OR, // | 2312 // OR_EQ, // |= 2313 // LOG_OR, // || 2314 case '|': 2315 if (ch2 == '=') { 2316 _pos++; 2317 return OpCode.OR_EQ; 2318 } 2319 if (ch2 == '|') { 2320 _pos++; 2321 return OpCode.LOG_OR; 2322 } 2323 return OpCode.OR; 2324 // MINUS, // - 2325 // MINUS_EQ, // -= 2326 // MINUS_MINUS,// -- 2327 case '-': 2328 if (ch2 == '=') { 2329 _pos++; 2330 return OpCode.MINUS_EQ; 2331 } 2332 if (ch2 == '-') { 2333 _pos++; 2334 return OpCode.MINUS_MINUS; 2335 } 2336 return OpCode.MINUS; 2337 // PLUS, // + 2338 // PLUS_EQ, // += 2339 // PLUS_PLUS, // ++ 2340 case '+': 2341 if (ch2 == '=') { 2342 _pos++; 2343 return OpCode.PLUS_EQ; 2344 } 2345 if (ch2 == '+') { 2346 _pos++; 2347 return OpCode.PLUS_PLUS; 2348 } 2349 return OpCode.PLUS; 2350 // LT, // < 2351 // LT_EQ, // <= 2352 // SHL, // << 2353 // SHL_EQ, // <<= 2354 // LT_GT, // <> 2355 // NE_EQ, // <>= 2356 case '<': 2357 if (ch2 == '<') { 2358 if (ch3 == '=') { 2359 _pos += 2; 2360 return OpCode.SHL_EQ; 2361 } 2362 _pos++; 2363 return OpCode.SHL; 2364 } 2365 if (ch2 == '>') { 2366 if (ch3 == '=') { 2367 _pos += 2; 2368 return OpCode.NE_EQ; 2369 } 2370 _pos++; 2371 return OpCode.LT_GT; 2372 } 2373 if (ch2 == '=') { 2374 _pos++; 2375 return OpCode.LT_EQ; 2376 } 2377 return OpCode.LT; 2378 // GT, // > 2379 // GT_EQ, // >= 2380 // SHR_EQ // >>= 2381 // ASR_EQ, // >>>= 2382 // SHR, // >> 2383 // ASR, // >>> 2384 case '>': 2385 if (ch2 == '>') { 2386 if (ch3 == '>') { 2387 dchar ch4 = _pos < _len - 2 ? _lineText[_pos + 2] : 0; 2388 if (ch4 == '=') { // >>>= 2389 _pos += 3; 2390 return OpCode.ASR_EQ; 2391 } 2392 _pos += 2; 2393 return OpCode.ASR; // >>> 2394 } 2395 if (ch3 == '=') { // >>= 2396 _pos += 2; 2397 return OpCode.SHR_EQ; 2398 } 2399 _pos++; 2400 return OpCode.SHR; 2401 } 2402 if (ch2 == '=') { // >= 2403 _pos++; 2404 return OpCode.GT_EQ; 2405 } 2406 // > 2407 return OpCode.GT; 2408 // NOT, // ! 2409 // NOT_EQ // != 2410 // NOT_LT_GT, // !<> 2411 // NOT_LT_GT_EQ, // !<>= 2412 // NOT_LT, // !< 2413 // NOT_LT_EQ, // !<= 2414 // NOT_GT, // !> 2415 // NOT_GT_EQ, // !>= 2416 case '!': 2417 if (ch2 == '<') { // !< 2418 if (ch3 == '>') { // !<> 2419 dchar ch4 = _pos < _len - 2 ? _lineText[_pos + 2] : 0; 2420 if (ch4 == '=') { // !<>= 2421 _pos += 3; 2422 return OpCode.NOT_LT_GT_EQ; 2423 } 2424 _pos += 2; 2425 return OpCode.NOT_LT_GT; // !<> 2426 } 2427 if (ch3 == '=') { // !<= 2428 _pos += 2; 2429 return OpCode.NOT_LT_EQ; 2430 } 2431 _pos++; 2432 return OpCode.NOT_LT; // !< 2433 } 2434 if (ch2 == '=') { // != 2435 _pos++; 2436 return OpCode.NOT_EQ; 2437 } 2438 return OpCode.NOT; 2439 // PAR_OPEN, // ( 2440 case '(': 2441 return OpCode.PAR_OPEN; 2442 // PAR_CLOSE, // ) 2443 case ')': 2444 return OpCode.PAR_CLOSE; 2445 // SQ_OPEN, // [ 2446 case '[': 2447 return OpCode.SQ_OPEN; 2448 // SQ_CLOSE, // ] 2449 case ']': 2450 return OpCode.SQ_CLOSE; 2451 // CURL_OPEN, // { 2452 case '{': 2453 return OpCode.CURL_OPEN; 2454 // CURL_CLOSE, // } 2455 case '}': 2456 return OpCode.CURL_CLOSE; 2457 // QUEST, // ? 2458 case '?': 2459 return OpCode.QUEST; 2460 // COMMA, // , 2461 case ',': 2462 return OpCode.COMMA; 2463 // SEMICOLON, // ; 2464 case ';': 2465 return OpCode.SEMICOLON; 2466 // COLON, // : 2467 case ':': 2468 return OpCode.COLON; 2469 // DOLLAR, // $ 2470 case '$': 2471 return OpCode.DOLLAR; 2472 // EQ, // = 2473 // QE_EQ, // == 2474 // EQ_GT, // => 2475 case '=': 2476 if (ch2 == '=') { // == 2477 _pos++; 2478 return OpCode.QE_EQ; 2479 } 2480 if (ch2 == '>') { // => 2481 _pos++; 2482 return OpCode.EQ_GT; 2483 } 2484 return OpCode.EQ; 2485 // MUL, // * 2486 // MUL_EQ, // *= 2487 case '*': 2488 if (ch2 == '=') { 2489 _pos++; 2490 return OpCode.MUL_EQ; 2491 } 2492 return OpCode.MUL; 2493 // MOD, // % 2494 // MOD_EQ, // %= 2495 case '%': 2496 if (ch2 == '=') { 2497 _pos++; 2498 return OpCode.MOD_EQ; 2499 } 2500 return OpCode.MOD; 2501 // XOR, // ^ 2502 // XOR_EQ, // ^= 2503 // LOG_XOR, // ^^ 2504 // LOG_XOR_EQ, // ^^= 2505 case '^': 2506 if (ch2 == '^') { 2507 if (ch3 == '=') { 2508 _pos += 2; 2509 return OpCode.LOG_XOR_EQ; 2510 } 2511 _pos++; 2512 return OpCode.LOG_XOR; 2513 } 2514 if (ch2 == '=') { 2515 _pos++; 2516 return OpCode.XOR_EQ; 2517 } 2518 return OpCode.XOR; 2519 // INV, // ~ 2520 // INV_EQ, // ~= 2521 case '~': 2522 if (ch2 == '=') { 2523 _pos++; 2524 return OpCode.INV_EQ; 2525 } 2526 return OpCode.INV; 2527 // AT, // @ 2528 case '@': 2529 return OpCode.AT; 2530 // SHARP // # 2531 case '#': 2532 return OpCode.SHARP; 2533 default: 2534 return OpCode.NONE; 2535 } 2536 } 2537 2538 protected Token processCharacterLiteral() { 2539 _sharedCharacterLiteralToken.setPos(_startLine, _startPos); 2540 if (_pos + 2 > _len) 2541 return parserError("Invalid character literal", _sharedCharacterLiteralToken); 2542 dchar ch = _lineText[_pos++]; 2543 dchar ch2 = _lineText[_pos++]; 2544 dchar type = 0; 2545 if (ch == '\\') { 2546 // process escaped character - store it in ch 2547 // TODO: support all escape sequences 2548 switch(ch2) { 2549 case 'r': 2550 ch = '\r'; 2551 break; 2552 case 'n': 2553 ch = '\n'; 2554 break; 2555 case 't': 2556 ch = '\t'; 2557 break; 2558 case '\\': 2559 ch = '\\'; 2560 break; 2561 default: 2562 ch = ch2; 2563 break; 2564 } 2565 // here must be closing ' 2566 if (_pos + 1 > _len) 2567 return parserError("Invalid character literal", _sharedCharacterLiteralToken); 2568 ch2 = _lineText[_pos++]; 2569 } 2570 if (ch2 != '\'') 2571 return parserError("Invalid character literal", _sharedCharacterLiteralToken); 2572 if (_pos < _len) { 2573 dchar t = _lineText[_pos]; 2574 if (t == 'd' || t == 'w' || t == 'c') { 2575 type = t; 2576 _pos++; 2577 } else if (isIdentMiddleChar(ch)) { 2578 return parserError("Unexpected character after character literal", _sharedCharacterLiteralToken); 2579 } 2580 } 2581 _sharedCharacterLiteralToken.setCharacter(ch, type); 2582 return _sharedCharacterLiteralToken; 2583 } 2584 2585 protected Token processDoubleQuotedOrWysiwygString(dchar delimiter) { 2586 bool wysiwyg = (delimiter == 'r' || delimiter == '`'); 2587 //writeln("processDoubleQuotedString()"); 2588 _sharedStringLiteralToken.setPos(_startLine, _startPos); 2589 _stringLiteralAppender.reset(); 2590 if (delimiter == 'r') { 2591 _pos++; 2592 delimiter = '\"'; 2593 } 2594 dchar type = 0; 2595 for (;;) { 2596 int i = _pos; 2597 int endPos = int.max; 2598 for(; i < _len; i++) { 2599 if (_lineText[i] == delimiter && (i == 0 || _lineText[i - 1] != '\\')) { 2600 endPos = i; 2601 break; 2602 } 2603 } 2604 if (endPos != int.max) { 2605 // found end quote 2606 _stringLiteralAppender.append(_lineText[_pos .. endPos]); 2607 _pos = endPos + 1; 2608 break; 2609 } 2610 // no quote by end of line 2611 _stringLiteralAppender.append(_lineText[_pos .. $]); 2612 _stringLiteralAppender.appendEol(); 2613 if (!nextLine()) { 2614 // do we need to throw exception if eof comes before end of string? 2615 break; 2616 } 2617 } 2618 dchar t = 0; 2619 if (_pos < _len) { 2620 dchar ch = _lineText[_pos]; 2621 if (ch == 'c' || ch == 'w' || ch == 'd') { 2622 t = ch; 2623 _pos++; 2624 if (_pos < _len) { 2625 ch = _lineText[_pos]; 2626 if (isIdentMiddleChar(ch)) 2627 return parserError("Unexpected character after string literal", _sharedStringLiteralToken); 2628 } 2629 } else if (isIdentMiddleChar(ch)) 2630 return parserError("Unexpected character after string literal", _sharedStringLiteralToken); 2631 } 2632 if (t != 0) { 2633 if (type != 0 && t != type) 2634 return parserError("Cannot concatenate strings of different type", _sharedStringLiteralToken); 2635 type = t; 2636 } 2637 if (wysiwyg) { 2638 // no escape processing 2639 _sharedStringLiteralToken.setText(_stringLiteralAppender.get(), type); 2640 return _sharedStringLiteralToken; 2641 } 2642 _stringLiteralAppender.processEscapeSequences(); 2643 _sharedStringLiteralToken.setText(_stringLiteralAppender.get(), type); 2644 return _sharedStringLiteralToken; 2645 } 2646 2647 protected SysTime buildTime; 2648 2649 // string literal of the date of compilation "mmm dd yyyy" 2650 protected dstring formatBuildDate() { 2651 // TODO: provide proper format 2652 return to!dstring(buildTime); 2653 } 2654 2655 // string literal of the time of compilation "hh:mm:ss" 2656 protected dstring formatBuildTime() { 2657 // TODO: provide proper format 2658 return to!dstring(buildTime); 2659 } 2660 2661 // string literal of the date and time of compilation "www mmm dd hh:mm:ss yyyy" 2662 protected dstring formatBuildTimestamp() { 2663 // TODO: provide proper format 2664 return to!dstring(buildTime); 2665 } 2666 2667 static immutable dstring VERSION = "0.1"; 2668 static immutable dstring VENDOR = "coolreader.org"; 2669 2670 protected Token makeSpecialTokenString(dstring str, int pos) { 2671 _sharedStringLiteralToken.setPos(_startLine, _startPos); 2672 _sharedStringLiteralToken.setText(cast(dchar[])str, 0); 2673 return _sharedStringLiteralToken; 2674 } 2675 2676 protected Token processSpecialToken(Keyword keyword, int pos) { 2677 switch (keyword) { 2678 //Special Token Replaced with 2679 case Keyword.DATE: // string literal of the date of compilation "mmm dd yyyy" 2680 return makeSpecialTokenString(formatBuildDate(), pos); 2681 case Keyword.TIME: // string literal of the time of compilation "hh:mm:ss" 2682 return makeSpecialTokenString(formatBuildTime(), pos); 2683 case Keyword.TIMESTAMP: // string literal of the date and time of compilation "www mmm dd hh:mm:ss yyyy" 2684 return makeSpecialTokenString(formatBuildTimestamp(), pos); 2685 case Keyword.VENDOR: // Compiler vendor string, such as "Digital Mars D" 2686 return makeSpecialTokenString(VENDOR, pos); 2687 case Keyword.VERSION_: // Compiler version as an integer, such as 2001 2688 return makeSpecialTokenString(VERSION, pos); 2689 default: 2690 parserError("Unknown special token", _line, pos); 2691 } 2692 return null; 2693 } 2694 2695 protected int _startLine; 2696 protected int _startPos; 2697 2698 // returns next token (clone it if you want to store for future usage, otherwise it may be overwritten by further nextToken() calls). 2699 Token nextToken() { 2700 _startLine = _line; 2701 _startPos = _pos; 2702 dchar ch = nextChar(); 2703 if (ch == EOF_CHAR) { 2704 return emitEof(); 2705 } 2706 if (ch == EOL_CHAR || ch == 0x0020 || ch == 0x0009 || ch == 0x000B || ch == 0x000C) { 2707 // white space (treat EOL as whitespace, too) 2708 return processWhiteSpace(ch); 2709 } 2710 dchar next = _pos < _len ? _lineText[_pos] : 0; 2711 if (ch == '/') { 2712 if (next == '/') 2713 return processOneLineComment(); 2714 else if (next == '*') 2715 return processMultilineComment(); 2716 else if (next == '+') 2717 return processNestedComment(); 2718 } 2719 if (ch == '#' && _line == 1) 2720 return processOneLineSharpComment(); 2721 if (ch == '\"') 2722 return processDoubleQuotedOrWysiwygString(ch); 2723 if (ch == '\'') 2724 return processCharacterLiteral(); 2725 if (ch == 'x' && next == '\"') 2726 return processHexString(); 2727 if (ch == 'q' && next == '\"') 2728 return processDelimitedString(); 2729 if ((ch == 'r' && next == '\"') || (ch == '`')) 2730 return processDoubleQuotedOrWysiwygString(ch); 2731 int oldPos = _pos - 1; 2732 2733 if (ch == '0') { 2734 if (next == 'b' || next == 'B') 2735 return processBinaryNumber(); 2736 if (next == 'x' || next == 'X') 2737 return processHexNumber(); 2738 if (next >= '0' && next <= '9') 2739 return processOctNumber(); 2740 if (next >= '0' && next <= '9') 2741 return processDecNumber(ch); 2742 } 2743 if (ch >= '0' && ch <= '9') 2744 return processDecNumber(ch); 2745 if (ch == '.' && next >= '0' && next <= '9') // .123 2746 return processDecFloatSecondPart(0); 2747 2748 if (ch == '_' || isUniversalAlpha(ch)) { 2749 // start of identifier or keyword? 2750 Keyword keyword = detectKeyword(ch); 2751 if (keyword != Keyword.NONE) { 2752 switch (keyword) { 2753 //Special Token Replaced with 2754 case Keyword.EOF: return emitEof(); // sets the scanner to the end of the file 2755 case Keyword.DATE: // string literal of the date of compilation "mmm dd yyyy" 2756 case Keyword.TIME: // string literal of the time of compilation "hh:mm:ss" 2757 case Keyword.TIMESTAMP: // string literal of the date and time of compilation "www mmm dd hh:mm:ss yyyy" 2758 case Keyword.VENDOR: // Compiler vendor string, such as "Digital Mars D" 2759 case Keyword.VERSION_: // Compiler version as an integer, such as 2001 2760 return processSpecialToken(keyword, oldPos); 2761 default: 2762 _sharedKeywordToken.setPos(_startLine, _startPos); 2763 _sharedKeywordToken.keyword = keyword; 2764 return _sharedKeywordToken; 2765 } 2766 } 2767 return processIdent(ch); 2768 } 2769 OpCode op = detectOp(ch); 2770 if (op != OpCode.NONE) { 2771 _sharedOpToken.setPos(_startLine, _startPos); 2772 _sharedOpToken.opCode = op; 2773 return _sharedOpToken; 2774 } 2775 return parserError("Invalid token", _line, _pos); 2776 } 2777 2778 2779 } 2780 2781 unittest { 2782 version(DisableLexerTest) { 2783 import std.stdio; 2784 import std.conv; 2785 import std.utf; 2786 import dlangui.core.linestream; 2787 string fname = "/home/lve/src/d/ddc/ddclexer/tests/tokenizer_test.d"; 2788 writeln("opening file"); 2789 try { 2790 std.stream.File f = new std.stream.File(fname); 2791 scope(exit) { f.close(); } 2792 try { 2793 LineStream lines = LineStream.create(f, fname); 2794 Tokenizer tokenizer = new Tokenizer(lines); 2795 for (;;) { 2796 Token token = tokenizer.nextToken(); 2797 if (token is null) { 2798 writeln("Null token returned"); 2799 break; 2800 } 2801 if (token.type == TokenType.EOF) { 2802 writeln("EOF token"); 2803 break; 2804 } 2805 writeln("", token.line, ":", token.pos, "\t", token.toString); 2806 } 2807 } catch (Exception e) { 2808 writeln("Exception " ~ e.toString); 2809 } 2810 } catch (Exception e) { 2811 writeln("Exception " ~ e.toString); 2812 } 2813 } 2814 } 2815 2816 /// converts named entity to character, returns 0 if not found 2817 dchar entityToChar(string name) { 2818 if (auto ch = name in entityToCharMap) { 2819 return *ch; 2820 } 2821 return 0; 2822 } 2823 2824 /// fings entity name for character, returns null if not found 2825 string charToEntity(dchar ch) { 2826 if (auto name = ch in charToEntityMap) { 2827 return *name; 2828 } 2829 return null; 2830 } 2831 2832 private __gshared dchar[string]entityToCharMap; 2833 private __gshared string[dchar]charToEntityMap; 2834 private void addEntity(string name, dchar ch) { 2835 entityToCharMap[name] = ch; 2836 charToEntityMap[ch] = name; 2837 } 2838 __gshared static this() { 2839 addEntity("quot", 34); 2840 addEntity("amp", 38); 2841 addEntity("lt", 60); 2842 addEntity("gt", 62); 2843 addEntity("OElig", 338); 2844 addEntity("oelig", 339); 2845 addEntity("Scaron", 352); 2846 addEntity("scaron", 353); 2847 addEntity("Yuml", 376); 2848 addEntity("circ", 710); 2849 addEntity("tilde", 732); 2850 addEntity("ensp", 8194); 2851 addEntity("emsp", 8195); 2852 addEntity("thinsp", 8201); 2853 addEntity("zwnj", 8204); 2854 addEntity("zwj", 8205); 2855 addEntity("lrm", 8206); 2856 addEntity("rlm", 8207); 2857 addEntity("ndash", 8211); 2858 addEntity("mdash", 8212); 2859 addEntity("lsquo", 8216); 2860 addEntity("rsquo", 8217); 2861 addEntity("sbquo", 8218); 2862 addEntity("ldquo", 8220); 2863 addEntity("rdquo", 8221); 2864 addEntity("bdquo", 8222); 2865 addEntity("dagger", 8224); 2866 addEntity("Dagger", 8225); 2867 addEntity("permil", 8240); 2868 addEntity("lsaquo", 8249); 2869 addEntity("rsaquo", 8250); 2870 addEntity("euro", 8364); 2871 addEntity("nbsp", 160); 2872 addEntity("iexcl", 161); 2873 addEntity("cent", 162); 2874 addEntity("pound", 163); 2875 addEntity("curren", 164); 2876 addEntity("yen", 165); 2877 addEntity("brvbar", 166); 2878 addEntity("sect", 167); 2879 addEntity("uml", 168); 2880 addEntity("copy", 169); 2881 addEntity("ordf", 170); 2882 addEntity("laquo", 171); 2883 addEntity("not", 172); 2884 addEntity("shy", 173); 2885 addEntity("reg", 174); 2886 addEntity("macr", 175); 2887 addEntity("deg", 176); 2888 addEntity("plusmn", 177); 2889 addEntity("sup2", 178); 2890 addEntity("sup3", 179); 2891 addEntity("acute", 180); 2892 addEntity("micro", 181); 2893 addEntity("para", 182); 2894 addEntity("middot", 183); 2895 addEntity("cedil", 184); 2896 addEntity("sup1", 185); 2897 addEntity("ordm", 186); 2898 addEntity("raquo", 187); 2899 addEntity("frac14", 188); 2900 addEntity("frac12", 189); 2901 addEntity("frac34", 190); 2902 addEntity("iquest", 191); 2903 addEntity("Agrave", 192); 2904 addEntity("Aacute", 193); 2905 addEntity("Acirc", 194); 2906 addEntity("Atilde", 195); 2907 addEntity("Auml", 196); 2908 addEntity("Aring", 197); 2909 addEntity("AElig", 198); 2910 addEntity("Ccedil", 199); 2911 addEntity("Egrave", 200); 2912 addEntity("Eacute", 201); 2913 addEntity("Ecirc", 202); 2914 addEntity("Euml", 203); 2915 addEntity("Igrave", 204); 2916 addEntity("Iacute", 205); 2917 addEntity("Icirc", 206); 2918 addEntity("Iuml", 207); 2919 addEntity("ETH", 208); 2920 addEntity("Ntilde", 209); 2921 addEntity("Ograve", 210); 2922 addEntity("Oacute", 211); 2923 addEntity("Ocirc", 212); 2924 addEntity("Otilde", 213); 2925 addEntity("Ouml", 214); 2926 addEntity("times", 215); 2927 addEntity("Oslash", 216); 2928 addEntity("Ugrave", 217); 2929 addEntity("Uacute", 218); 2930 addEntity("Ucirc", 219); 2931 addEntity("Uuml", 220); 2932 addEntity("Yacute", 221); 2933 addEntity("THORN", 222); 2934 addEntity("szlig", 223); 2935 addEntity("agrave", 224); 2936 addEntity("aacute", 225); 2937 addEntity("acirc", 226); 2938 addEntity("atilde", 227); 2939 addEntity("auml", 228); 2940 addEntity("aring", 229); 2941 addEntity("aelig", 230); 2942 addEntity("ccedil", 231); 2943 addEntity("egrave", 232); 2944 addEntity("eacute", 233); 2945 addEntity("ecirc", 234); 2946 addEntity("euml", 235); 2947 addEntity("igrave", 236); 2948 addEntity("iacute", 237); 2949 addEntity("icirc", 238); 2950 addEntity("iuml", 239); 2951 addEntity("eth", 240); 2952 addEntity("ntilde", 241); 2953 addEntity("ograve", 242); 2954 addEntity("oacute", 243); 2955 addEntity("ocirc", 244); 2956 addEntity("otilde", 245); 2957 addEntity("ouml", 246); 2958 addEntity("divide", 247); 2959 addEntity("oslash", 248); 2960 addEntity("ugrave", 249); 2961 addEntity("uacute", 250); 2962 addEntity("ucirc", 251); 2963 addEntity("uuml", 252); 2964 addEntity("yacute", 253); 2965 addEntity("thorn", 254); 2966 addEntity("yuml", 255); 2967 addEntity("fnof", 402); 2968 addEntity("Alpha", 913); 2969 addEntity("Beta", 914); 2970 addEntity("Gamma", 915); 2971 addEntity("Delta", 916); 2972 addEntity("Epsilon", 917); 2973 addEntity("Zeta", 918); 2974 addEntity("Eta", 919); 2975 addEntity("Theta", 920); 2976 addEntity("Iota", 921); 2977 addEntity("Kappa", 922); 2978 addEntity("Lambda", 923); 2979 addEntity("Mu", 924); 2980 addEntity("Nu", 925); 2981 addEntity("Xi", 926); 2982 addEntity("Omicron", 927); 2983 addEntity("Pi", 928); 2984 addEntity("Rho", 929); 2985 addEntity("Sigma", 931); 2986 addEntity("Tau", 932); 2987 addEntity("Upsilon", 933); 2988 addEntity("Phi", 934); 2989 addEntity("Chi", 935); 2990 addEntity("Psi", 936); 2991 addEntity("Omega", 937); 2992 addEntity("alpha", 945); 2993 addEntity("beta", 946); 2994 addEntity("gamma", 947); 2995 addEntity("delta", 948); 2996 addEntity("epsilon", 949); 2997 addEntity("zeta", 950); 2998 addEntity("eta", 951); 2999 addEntity("theta", 952); 3000 addEntity("iota", 953); 3001 addEntity("kappa", 954); 3002 addEntity("lambda", 955); 3003 addEntity("mu", 956); 3004 addEntity("nu", 957); 3005 addEntity("xi", 958); 3006 addEntity("omicron", 959); 3007 addEntity("pi", 960); 3008 addEntity("rho", 961); 3009 addEntity("sigmaf", 962); 3010 addEntity("sigma", 963); 3011 addEntity("tau", 964); 3012 addEntity("upsilon", 965); 3013 addEntity("phi", 966); 3014 addEntity("chi", 967); 3015 addEntity("psi", 968); 3016 addEntity("omega", 969); 3017 addEntity("thetasym", 977); 3018 addEntity("upsih", 978); 3019 addEntity("piv", 982); 3020 addEntity("bull", 8226); 3021 addEntity("hellip", 8230); 3022 addEntity("prime", 8242); 3023 addEntity("Prime", 8243); 3024 addEntity("oline", 8254); 3025 addEntity("frasl", 8260); 3026 addEntity("weierp", 8472); 3027 addEntity("image", 8465); 3028 addEntity("real", 8476); 3029 addEntity("trade", 8482); 3030 addEntity("alefsym", 8501); 3031 addEntity("larr", 8592); 3032 addEntity("uarr", 8593); 3033 addEntity("rarr", 8594); 3034 addEntity("darr", 8595); 3035 addEntity("harr", 8596); 3036 addEntity("crarr", 8629); 3037 addEntity("lArr", 8656); 3038 addEntity("uArr", 8657); 3039 addEntity("rArr", 8658); 3040 addEntity("dArr", 8659); 3041 addEntity("hArr", 8660); 3042 addEntity("forall", 8704); 3043 addEntity("part", 8706); 3044 addEntity("exist", 8707); 3045 addEntity("empty", 8709); 3046 addEntity("nabla", 8711); 3047 addEntity("isin", 8712); 3048 addEntity("notin", 8713); 3049 addEntity("ni", 8715); 3050 addEntity("prod", 8719); 3051 addEntity("sum", 8721); 3052 addEntity("minus", 8722); 3053 addEntity("lowast", 8727); 3054 addEntity("radic", 8730); 3055 addEntity("prop", 8733); 3056 addEntity("infin", 8734); 3057 addEntity("ang", 8736); 3058 addEntity("and", 8743); 3059 addEntity("or", 8744); 3060 addEntity("cap", 8745); 3061 addEntity("cup", 8746); 3062 addEntity("int", 8747); 3063 addEntity("there4", 8756); 3064 addEntity("sim", 8764); 3065 addEntity("cong", 8773); 3066 addEntity("asymp", 8776); 3067 addEntity("ne", 8800); 3068 addEntity("equiv", 8801); 3069 addEntity("le", 8804); 3070 addEntity("ge", 8805); 3071 addEntity("sub", 8834); 3072 addEntity("sup", 8835); 3073 addEntity("nsub", 8836); 3074 addEntity("sube", 8838); 3075 addEntity("supe", 8839); 3076 addEntity("oplus", 8853); 3077 addEntity("otimes", 8855); 3078 addEntity("perp", 8869); 3079 addEntity("sdot", 8901); 3080 addEntity("lceil", 8968); 3081 addEntity("rceil", 8969); 3082 addEntity("lfloor", 8970); 3083 addEntity("rfloor", 8971); 3084 addEntity("loz", 9674); 3085 addEntity("spades", 9824); 3086 addEntity("clubs", 9827); 3087 addEntity("hearts", 9829); 3088 addEntity("diams", 9830); 3089 addEntity("lang", 10216); 3090 addEntity("rang", 10217); 3091 } 3092 3093 3094 3095 //void runTokenizerTest() 3096 unittest 3097 { 3098 import std.algorithm; 3099 class TokenTest { 3100 int _line; 3101 string _file; 3102 this(string file, int line) { 3103 _file = file; 3104 _line = line; 3105 } 3106 bool doTest(Token token) { 3107 return true; 3108 } 3109 void execute(Tokenizer tokenizer) { 3110 Token token = tokenizer.nextToken(); 3111 if (!doTest(token)) { 3112 assert(false, " token doesn not match at " ~ _file ~ ":" ~ to!string(_line) ~ " foundToken: " ~ token.toString ~ " expected: " ~ toString); 3113 } 3114 } 3115 public override @property string toString() { 3116 return "TokenTest"; 3117 } 3118 } 3119 void testTokenizer(string code, TokenTest[] tokens, string file = __FILE__, uint line = __LINE__) { 3120 Tokenizer tokenizer = new Tokenizer(code, "tokenizerTest:" ~ file ~ ":" ~ to!string(line)); 3121 for (int i = 0; i < tokens.length; i++) { 3122 tokens[i].execute(tokenizer); 3123 } 3124 } 3125 class KeywordTest : TokenTest { 3126 Keyword _code; 3127 this(Keyword code, string file = __FILE__, uint line = __LINE__) { 3128 super(file, line); 3129 _code = code; 3130 } 3131 override bool doTest(Token token) { 3132 if (token.type != TokenType.KEYWORD) 3133 return false; 3134 if (token.keyword != _code) 3135 return false; 3136 return true; 3137 } 3138 public override @property string toString() { 3139 return "Keyword:" ~ to!string(_code); 3140 } 3141 } 3142 class OpTest : TokenTest { 3143 OpCode _code; 3144 this(OpCode code, string file = __FILE__, uint line = __LINE__) { 3145 super(file, line); 3146 _code = code; 3147 } 3148 override bool doTest(Token token) { 3149 if (token.type != TokenType.OP) 3150 return false; 3151 if (token.opCode != _code) 3152 return false; 3153 return true; 3154 } 3155 public override @property string toString() { 3156 return "Op:" ~ to!string(_code); 3157 } 3158 } 3159 class StringTest : TokenTest { 3160 dstring _value; 3161 dchar _literalType; 3162 this(dstring value, dchar literalType = 0, string file = __FILE__, uint line = __LINE__) { 3163 super(file, line); 3164 _value = value; 3165 _literalType = literalType; 3166 } 3167 override bool doTest(Token token) { 3168 if (token.type != TokenType.STRING) 3169 return false; 3170 if (!token.text.equal(_value)) 3171 return false; 3172 if (token.literalType != _literalType) 3173 return false; 3174 return true; 3175 } 3176 public override @property string toString() { 3177 return toUTF8("String:\"" ~ _value ~ "\"" ~ (_literalType ? _literalType : ' ')); 3178 } 3179 } 3180 class IntegerTest : TokenTest { 3181 ulong _value; 3182 bool _unsigned; 3183 bool _long; 3184 this(ulong value, bool unsignedFlag = false, bool longFlag = false, string file = __FILE__, uint line = __LINE__) { 3185 super(file, line); 3186 _value = value; 3187 _unsigned = unsignedFlag; 3188 _long = longFlag; 3189 } 3190 override bool doTest(Token token) { 3191 if (token.type != TokenType.INTEGER) 3192 return false; 3193 if (token.intValue != _value) 3194 return false; 3195 if (token.isUnsigned != _unsigned) 3196 return false; 3197 if (token.isLong != _long) 3198 return false; 3199 return true; 3200 } 3201 public override @property string toString() { 3202 return "Integer:" ~ to!string(_value); 3203 } 3204 } 3205 class RealTest : TokenTest { 3206 real _value; 3207 ubyte _precision; 3208 bool _imaginary; 3209 this(real value, ubyte precision = 1, bool imaginary = false, string file = __FILE__, uint line = __LINE__) { 3210 super(file, line); 3211 _value = value; 3212 _precision = precision; 3213 _imaginary = imaginary; 3214 } 3215 override bool doTest(Token token) { 3216 if (token.type != TokenType.FLOAT) 3217 return false; 3218 real diff = token.realValue - _value; 3219 real maxerr = _value / 1000000; 3220 if (diff < 0) diff = -diff; 3221 if (maxerr < 0) maxerr = -maxerr; 3222 if (diff > maxerr) 3223 return false; 3224 if (token.precision != _precision) 3225 return false; 3226 if (token.isImaginary != _imaginary) 3227 return false; 3228 return true; 3229 } 3230 public override @property string toString() { 3231 return "Real:" ~ to!string(_value) ~ (_precision == 0 ? "f" : (_precision == 2 ? "L" : "")) ~ (_imaginary ? "i" : ""); 3232 } 3233 } 3234 class IdentTest : TokenTest { 3235 string _value; 3236 this(string value, string file = __FILE__, uint line = __LINE__) { 3237 super(file, line); 3238 _value = value; 3239 } 3240 override bool doTest(Token token) { 3241 if (token.type != TokenType.IDENTIFIER) 3242 return false; 3243 if (! to!string(token.text).equal(_value)) 3244 return false; 3245 return true; 3246 } 3247 public override @property string toString() { 3248 return "Ident:" ~ _value; 3249 } 3250 } 3251 class CommentTest : TokenTest { 3252 this(string file = __FILE__, uint line = __LINE__) { 3253 super(file, line); 3254 } 3255 override bool doTest(Token token) { 3256 if (token.type != TokenType.COMMENT) 3257 return false; 3258 return true; 3259 } 3260 public override @property string toString() { 3261 return "Comment"; 3262 } 3263 } 3264 class EOFTest : TokenTest { 3265 this(string file = __FILE__, uint line = __LINE__) { 3266 super(file, line); 3267 } 3268 override bool doTest(Token token) { 3269 if (token.type != TokenType.EOF) 3270 return false; 3271 return true; 3272 } 3273 public override @property string toString() { 3274 return "EOF"; 3275 } 3276 } 3277 class WhiteSpaceTest : TokenTest { 3278 this(string file = __FILE__, uint line = __LINE__) { 3279 super(file, line); 3280 } 3281 override bool doTest(Token token) { 3282 if (token.type != TokenType.WHITESPACE) 3283 return false; 3284 return true; 3285 } 3286 public override @property string toString() { 3287 return "whiteSpace"; 3288 } 3289 } 3290 TokenTest checkString(dstring value, dchar literalType = 0, string file = __FILE__, uint line = __LINE__) { 3291 return new StringTest(value, literalType, file, line); 3292 } 3293 TokenTest checkInteger(ulong value, bool unsignedFlag = false, bool longFlag = false, string file = __FILE__, uint line = __LINE__) { 3294 return new IntegerTest(value, unsignedFlag, longFlag, file, line); 3295 } 3296 TokenTest checkReal(real value, byte precision = 1, bool imaginary = false, string file = __FILE__, uint line = __LINE__) { 3297 return new RealTest(value, precision, imaginary, file, line); 3298 } 3299 TokenTest checkIdent(string value, string file = __FILE__, uint line = __LINE__) { 3300 return new IdentTest(value, file, line); 3301 } 3302 TokenTest checkKeyword(Keyword value, string file = __FILE__, uint line = __LINE__) { 3303 return new KeywordTest(value, file, line); 3304 } 3305 TokenTest checkOp(OpCode value, string file = __FILE__, uint line = __LINE__) { 3306 return new OpTest(value, file, line); 3307 } 3308 TokenTest checkSpace(string file = __FILE__, uint line = __LINE__) { 3309 return new WhiteSpaceTest(file, line); 3310 } 3311 TokenTest checkComment(string file = __FILE__, uint line = __LINE__) { 3312 return new CommentTest(file, line); 3313 } 3314 TokenTest checkEOF(string file = __FILE__, uint line = __LINE__) { 3315 return new EOFTest(file, line); 3316 } 3317 3318 // test strings 3319 testTokenizer("r\"simple\\nstring\"", [checkString( r"simple\nstring" )]); 3320 3321 // test strings 3322 testTokenizer(q"TEST 3323 "simple string" 3324 "simple\nstring" 3325 `simple string` 3326 "simple string"d 3327 "simple string"c 3328 "simple string"w 3329 "simple\"string" 3330 "\r\n\f\t\\\"\'&" 3331 TEST" 3332 , [ 3333 checkString("simple string"), 3334 checkSpace(), 3335 checkString("simple\nstring"), 3336 checkSpace(), 3337 checkString("simple string"), 3338 checkSpace(), 3339 checkString("simple string", 'd'), 3340 checkSpace(), 3341 checkString("simple string", 'c'), 3342 checkSpace(), 3343 checkString("simple string", 'w'), 3344 checkSpace(), 3345 checkString("simple\"string"), 3346 checkSpace(), 3347 checkString("\r\n\f\t\\\"\'&"), 3348 ]); 3349 // basic test 3350 testTokenizer(q"TEST 3351 int i; 3352 TEST" 3353 , [ 3354 checkKeyword(Keyword.INT), 3355 checkSpace(), 3356 checkIdent("i"), 3357 checkOp(OpCode.SEMICOLON), 3358 checkEOF() 3359 ]); 3360 // test numbers 3361 testTokenizer("0b1101 0x123abcdU 0xABCL 0743 192837465 0 192_837_465 5.25 12.3f 54.1L 67.1i 3e3 25.67e-5f" 3362 , [ 3363 checkInteger(13), 3364 checkSpace(), 3365 checkInteger(0x123abcd, true, false), 3366 checkSpace(), 3367 checkInteger(0xabc, false, true), 3368 checkSpace(), 3369 checkInteger(std.conv.octal!743), 3370 checkSpace(), 3371 checkInteger(192_837_465), 3372 checkSpace(), 3373 checkInteger(0), 3374 checkSpace(), 3375 checkInteger(192837465), 3376 checkSpace(), 3377 checkReal(5.25), 3378 checkSpace(), 3379 checkReal(12.3f, 0), 3380 checkSpace(), 3381 checkReal(54.1L, 2), 3382 checkSpace(), 3383 checkReal(67.1, 1, true), 3384 checkSpace(), 3385 checkReal(3e3), 3386 checkSpace(), 3387 checkReal(25.67e-5f, 0), 3388 checkEOF() 3389 ]); 3390 } 3391