1 module ddc.lexer.tokenizer; 2 3 import ddc.lexer.textsource; 4 import ddc.lexer.exceptions; 5 6 import std.stdio; 7 import std.datetime; 8 import std.conv; 9 import std.utf; 10 import std.math; 11 12 enum TokenType : ubyte { 13 EOF, 14 //EOL, 15 WHITESPACE, 16 COMMENT, 17 IDENTIFIER, 18 STRING, 19 CHARACTER, 20 INTEGER, 21 FLOAT, 22 KEYWORD, 23 OP, 24 INVALID 25 } 26 27 // table for fast checking of UniversalAlpha (as per ISO/IEC 9899:1999 Annex E) OR a..z OR A..Z OR _ 28 // max code is 0xd7ff 29 //1728 30 const uint[1728] UNIVERSAL_ALPHA_FLAGS = [ 31 0x00000000,0x00000000,0x87fffffe,0x07fffffe,0x00000000,0x04a00400,0xff7fffff,0xff7fffff,// 0000-00ff 32 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xfc3fffff,// 0100-01ff 33 0x00ffffff,0x00000000,0xffff0000,0xffffffff,0xffffffff,0xe9ff01ff,0x00030003,0x0000001f,// 0200-02ff 34 0x00000000,0x00000000,0x00000000,0x04000000,0xffffd740,0xfffffffb,0x547f7fff,0x000ffffd,// 0300-03ff 35 0xffffdffe,0xffffffff,0xdffeffff,0xffffffff,0xffff0003,0xffffffff,0xffff199f,0x033fcfff,// 0400-04ff 36 0x00000000,0xfffe0000,0x027fffff,0xfffffffe,0x000000ff,0xbbff0000,0xffff0006,0x000707ff,// 0500-05ff 37 0x00000000,0x07fffffe,0x0007ffff,0xffff03ff,0xffffffff,0x7cffffff,0x1fff7fff,0x03ff3de0,// 0600-06ff 38 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 0700-07ff 39 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 0800-08ff 40 0xffffffee,0xe3ffffff,0xff073fff,0x0000ffcf,0xfff99fee,0xc3c5fdff,0xb000399f,0x0003ffcf,// 0900-09ff 41 0xfff987e4,0xc36dfdff,0x5e003987,0x0010ffc0,0xfffbafee,0xe3edfdff,0x00013bbf,0x0000ffc1,// 0a00-0aff 42 0xfff99fee,0xe3cdfdff,0xb000398f,0x0000ffc3,0xd63dc7ec,0xc3bfc718,0x00003dc7,0x0000ff80,// 0b00-0bff 43 0xfffddfee,0xc3effdff,0x00003ddf,0x0000ffc3,0xfffddfec,0xc3effdff,0x40003ddf,0x0000ffc3,// 0c00-0cff 44 0xfffddfec,0xc3fffdff,0x00003dcf,0x0000ffc3,0x00000000,0x00000000,0x00000000,0x00000000,// 0d00-0dff 45 0xfffffffe,0x07ffffff,0x0fffffff,0x00000000,0xfef02596,0x3bff6cae,0x33ff3f5f,0x00000000,// 0e00-0eff 46 0x03000001,0xc2afffff,0xfffffeff,0xfffe03ff,0xfebf0fdf,0x02fe3fff,0x00000000,0x00000000,// 0f00-0fff 47 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0xffffffff,0xffff003f,0x007fffff,// 1000-10ff 48 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 1100-11ff 49 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 1200-12ff 50 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 1300-13ff 51 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 1400-14ff 52 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 1500-15ff 53 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 1600-16ff 54 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 1700-17ff 55 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 1800-18ff 56 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 1900-19ff 57 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 1a00-1aff 58 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 1b00-1bff 59 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 1c00-1cff 60 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 1d00-1dff 61 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0x0fffffff,0xffffffff,0xffffffff,0x03ffffff,// 1e00-1eff 62 0x3f3fffff,0xffffffff,0xaaff3f3f,0x3fffffff,0xffffffff,0x5fdfffff,0x0fcf1fdc,0x1fdc1fff,// 1f00-1fff 63 0x00000000,0x80000000,0x00000001,0x80000000,0x00000000,0x00000000,0x00000000,0x00000000,// 2000-20ff 64 0x3f2ffc84,0x01fbfd50,0x00000000,0xffffffff,0x00000007,0x00000000,0x00000000,0x00000000,// 2100-21ff 65 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 2200-22ff 66 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 2300-23ff 67 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 2400-24ff 68 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 2500-25ff 69 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 2600-26ff 70 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 2700-27ff 71 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 2800-28ff 72 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 2900-29ff 73 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 2a00-2aff 74 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 2b00-2bff 75 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 2c00-2cff 76 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 2d00-2dff 77 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 2e00-2eff 78 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 2f00-2fff 79 0x000000e0,0x000003fe,0xfffffffe,0xffffffff,0x180fffff,0xfffffffe,0xffffffff,0x187fffff,// 3000-30ff 80 0xffffffe0,0x00001fff,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 3100-31ff 81 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 3200-32ff 82 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 3300-33ff 83 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 3400-34ff 84 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 3500-35ff 85 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 3600-36ff 86 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 3700-37ff 87 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 3800-38ff 88 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 3900-39ff 89 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 3a00-3aff 90 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 3b00-3bff 91 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 3c00-3cff 92 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 3d00-3dff 93 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 3e00-3eff 94 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 3f00-3fff 95 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 4000-40ff 96 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 4100-41ff 97 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 4200-42ff 98 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 4300-43ff 99 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 4400-44ff 100 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 4500-45ff 101 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 4600-46ff 102 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 4700-47ff 103 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 4800-48ff 104 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 4900-49ff 105 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 4a00-4aff 106 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 4b00-4bff 107 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 4c00-4cff 108 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 4d00-4dff 109 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 4e00-4eff 110 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 4f00-4fff 111 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 5000-50ff 112 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 5100-51ff 113 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 5200-52ff 114 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 5300-53ff 115 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 5400-54ff 116 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 5500-55ff 117 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 5600-56ff 118 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 5700-57ff 119 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 5800-58ff 120 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 5900-59ff 121 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 5a00-5aff 122 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 5b00-5bff 123 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 5c00-5cff 124 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 5d00-5dff 125 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 5e00-5eff 126 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 5f00-5fff 127 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 6000-60ff 128 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 6100-61ff 129 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 6200-62ff 130 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 6300-63ff 131 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 6400-64ff 132 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 6500-65ff 133 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 6600-66ff 134 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 6700-67ff 135 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 6800-68ff 136 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 6900-69ff 137 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 6a00-6aff 138 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 6b00-6bff 139 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 6c00-6cff 140 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 6d00-6dff 141 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 6e00-6eff 142 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 6f00-6fff 143 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 7000-70ff 144 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 7100-71ff 145 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 7200-72ff 146 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 7300-73ff 147 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 7400-74ff 148 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 7500-75ff 149 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 7600-76ff 150 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 7700-77ff 151 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 7800-78ff 152 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 7900-79ff 153 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 7a00-7aff 154 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 7b00-7bff 155 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 7c00-7cff 156 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 7d00-7dff 157 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 7e00-7eff 158 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 7f00-7fff 159 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 8000-80ff 160 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 8100-81ff 161 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 8200-82ff 162 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 8300-83ff 163 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 8400-84ff 164 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 8500-85ff 165 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 8600-86ff 166 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 8700-87ff 167 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 8800-88ff 168 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 8900-89ff 169 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 8a00-8aff 170 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 8b00-8bff 171 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 8c00-8cff 172 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 8d00-8dff 173 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 8e00-8eff 174 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 8f00-8fff 175 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 9000-90ff 176 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 9100-91ff 177 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 9200-92ff 178 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 9300-93ff 179 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 9400-94ff 180 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 9500-95ff 181 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 9600-96ff 182 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 9700-97ff 183 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 9800-98ff 184 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 9900-99ff 185 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 9a00-9aff 186 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 9b00-9bff 187 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 9c00-9cff 188 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 9d00-9dff 189 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 9e00-9eff 190 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0x0000003f,0x00000000,0x00000000,// 9f00-9fff 191 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// a000-a0ff 192 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// a100-a1ff 193 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// a200-a2ff 194 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// a300-a3ff 195 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// a400-a4ff 196 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// a500-a5ff 197 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// a600-a6ff 198 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// a700-a7ff 199 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// a800-a8ff 200 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// a900-a9ff 201 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// aa00-aaff 202 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// ab00-abff 203 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// ac00-acff 204 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// ad00-adff 205 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// ae00-aeff 206 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// af00-afff 207 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// b000-b0ff 208 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// b100-b1ff 209 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// b200-b2ff 210 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// b300-b3ff 211 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// b400-b4ff 212 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// b500-b5ff 213 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// b600-b6ff 214 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// b700-b7ff 215 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// b800-b8ff 216 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// b900-b9ff 217 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// ba00-baff 218 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// bb00-bbff 219 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// bc00-bcff 220 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// bd00-bdff 221 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// be00-beff 222 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// bf00-bfff 223 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// c000-c0ff 224 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// c100-c1ff 225 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// c200-c2ff 226 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// c300-c3ff 227 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// c400-c4ff 228 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// c500-c5ff 229 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// c600-c6ff 230 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// c700-c7ff 231 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// c800-c8ff 232 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// c900-c9ff 233 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// ca00-caff 234 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// cb00-cbff 235 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// cc00-ccff 236 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// cd00-cdff 237 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// ce00-ceff 238 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// cf00-cfff 239 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// d000-d0ff 240 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// d100-d1ff 241 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// d200-d2ff 242 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// d300-d3ff 243 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// d400-d4ff 244 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// d500-d5ff 245 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// d600-d6ff 246 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0x0000000f,0x00000000,0x00000000// d700-d7ff 247 ]; 248 249 /// returns true if character is A..Z, a..z, _ or universal alpha 250 bool isUniversalAlpha(dchar ch) pure nothrow { 251 return (ch <= 0xd7ff && (UNIVERSAL_ALPHA_FLAGS[ch >> 5] & (1 << (ch & 31)))); 252 } 253 254 /// character can present at the beginning of identifier 255 bool isIdentStartChar(dchar ch) pure nothrow { 256 return isUniversalAlpha(ch); 257 } 258 259 /// character can present in middle of identifier 260 bool isIdentMiddleChar(dchar ch) pure nothrow { 261 return (ch >= '0' && ch <='9') || isUniversalAlpha(ch); 262 } 263 264 immutable bool ENABLE_DUMP_UNIVERSAL_ALPHA_TABLE = false; 265 static if (ENABLE_DUMP_UNIVERSAL_ALPHA_TABLE) { 266 bool r(dchar ch, wchar v) pure nothrow { 267 return ch == v; 268 } 269 270 bool r(dchar ch, wchar v1, wchar v2) pure nothrow { 271 return ch >= v1 && ch <= v2; 272 } 273 274 bool isUniversalAlphaSlow(dchar c) pure nothrow { 275 return 276 // Latin: 00AA, 00BA, 00C0−00D6, 00D8−00F6, 00F8−01F5, 01FA−0217, 277 // 0250−02A8, 1E00−1E9B, 1EA0−1EF9, 207F 278 r(c, 0xAA) || r(c, 0x00BA) || r(c, 0x00C0,0x00D6) || r(c, 0x00D8,0x00F6) || r(c, 0x00F8,0x01F5) || r(c, 0x01FA,0x0217) 279 || r(c, 0x0250,0x02A8) || r(c, 0x1E00,0x1E9B) || r(c, 0x1EA0,0x1EF9) || r(c, 0x207F) 280 //Greek: 0386, 0388−038A, 038C, 038E−03A1, 03A3−03CE, 03D0−03D6, 281 //03DA, 03DC, 03DE, 03E0, 03E2−03F3, 1F00−1F15, 1F18−1F1D, 282 //1F20−1F45, 1F48−1F4D, 1F50−1F57, 1F59, 1F5B, 1F5D, 283 //1F5F−1F7D, 1F80−1FB4, 1FB6−1FBC, 1FC2−1FC4, 1FC6−1FCC, 284 //1FD0−1FD3, 1FD6−1FDB, 1FE0−1FEC, 1FF2−1FF4, 1FF6−1FFC 285 || r(c, 0x0386) || r(c, 0x0388,0x038A) || r(c, 0x038C) || r(c, 0x038E,0x03A1) || r(c, 0x03A3,0x03CE) || r(c, 0x03D0,0x03D6) 286 || r(c, 0x03DA) || r(c, 0x03DC) || r(c, 0x03DE) || r(c, 0x03E0) || r(c, 0x03E2,0x03F3) || r(c, 0x1F00,0x1F15) || r(c, 0x1F18,0x1F1D) 287 || r(c, 0x1F20,0x1F45) || r(c, 0x1F48,0x1F4D) || r(c, 0x1F50,0x1F57) || r(c, 0x1F59) || r(c, 0x1F5B) || r(c, 0x1F5D) 288 || r(c, 0x1F5F,0x1F7D) || r(c, 0x1F80,0x1FB4) || r(c, 0x1FB6,0x1FBC) || r(c, 0x1FC2,0x1FC4) || r(c, 0x1FC6,0x1FCC) 289 || r(c, 0x1FD0,0x1FD3) || r(c, 0x1FD6,0x1FDB) || r(c, 0x1FE0,0x1FEC) || r(c, 0x1FF2,0x1FF4) || r(c, 0x1FF6,0x1FFC) 290 //Cyrillic: 0401−040C, 040E−044F, 0451−045C, 045E−0481, 0490−04C4, 291 //04C7−04C8, 04CB−04CC, 04D0−04EB, 04EE−04F5, 04F8−04F9 292 || r(c, 0x0401,0x040C) || r(c, 0x040E,0x044F) || r(c, 0x0451,0x045C) || r(c, 0x045E,0x0481) || r(c, 0x0490,0x04C4) 293 || r(c, 0x04C7,0x04C8) || r(c, 0x04CB,0x04CC) || r(c, 0x04D0,0x04EB) || r(c, 0x04EE,0x04F5) || r(c, 0x04F8,0x04F9) 294 //Armenian: 0531−0556, 0561−0587 295 || r(c, 0x0531,0x0556) || r(c, 0x0561,0x0587) 296 //Hebrew: 05B0−05B9, 05BB−05BD, 05BF, 05C1−05C2, 05D0−05EA, 297 //05F0−05F2 298 || r(c, 0x05B0,0x05B9) || r(c, 0x05BB,0x05BD) || r(c, 0x05BF) || r(c, 0x05C1,0x05C2) || r(c, 0x05D0,0x05EA) 299 || r(c, 0x05F0,0x05F2) 300 //Arabic: 0621−063A, 0640−0652, 0670−06B7, 06BA−06BE, 06C0−06CE, 301 //06D0−06DC, 06E5−06E8, 06EA−06ED 302 || r(c, 0x0621,0x063A) || r(c, 0x0640,0x0652) || r(c, 0x0670,0x06B7) || r(c, 0x06BA,0x06BE) || r(c, 0x06C0,0x06CE) 303 || r(c, 0x06D0,0x06DC) || r(c, 0x06E5,0x06E8) || r(c, 0x06EA,0x06ED) 304 //Devanagari: 0901−0903, 0905−0939, 093E−094D, 0950−0952, 0958−0963 305 || r(c, 0x0901,0x0903) || r(c, 0x0905,0x0939) || r(c, 0x093E,0x094D) || r(c, 0x0950,0x0952) || r(c, 0x0958,0x0963) 306 //Bengali: 0981−0983, 0985−098C, 098F−0990, 0993−09A8, 09AA−09B0, 307 //09B2, 09B6−09B9, 09BE−09C4, 09C7−09C8, 09CB−09CD, 308 //09DC−09DD, 09DF−09E3, 09F0−09F1 309 || r(c, 0x0981,0x0983) || r(c, 0x0985,0x098C) || r(c, 0x098F,0x0990) || r(c, 0x0993,0x09A8) || r(c, 0x09AA,0x09B0) 310 || r(c, 0x09B2) || r(c, 0x09B6,0x09B9) || r(c, 0x09BE,0x09C4) || r(c, 0x09C7,0x09C8) || r(c, 0x09CB,0x09CD) 311 || r(c, 0x09DC,0x09DD) || r(c, 0x09DF,0x09E3) || r(c, 0x09F0,0x09F1) 312 //Gurmukhi: 0A02, 0A05−0A0A, 0A0F−0A10, 0A13−0A28, 0A2A−0A30, 313 //0A32−0A33, 0A35−0A36, 0A38−0A39, 0A3E−0A42, 0A47−0A48, 314 //0A4B−0A4D, 0A59−0A5C, 0A5E, 0A74 315 || r(c, 0x0A02) || r(c, 0x0A05,0x0A0A) || r(c, 0x0A0F,0x0A10) || r(c, 0x0A13,0x0A28) || r(c, 0x0A2A,0x0A30) 316 || r(c, 0x0A32,0x0A33) || r(c, 0x0A35,0x0A36) || r(c, 0x0A38,0x0A39) || r(c, 0x0A3E,0x0A42) || r(c, 0x0A47,0x0A48) 317 || r(c, 0x0A4B,0x0A4D) || r(c, 0x0A59,0x0A5C) || r(c, 0x0A5E) || r(c, 0x0A74) 318 //Gujarati: 0A81−0A83, 0A85−0A8B, 0A8D, 0A8F−0A91, 0A93−0AA8, 319 //0AAA−0AB0, 0AB2−0AB3, 0AB5−0AB9, 0ABD−0AC5, 320 //0AC7−0AC9, 0ACB−0ACD, 0AD0, 0AE0 321 || r(c, 0x0A81,0x0A83) || r(c, 0x0A85,0x0A8B) || r(c, 0x0A8D) || r(c, 0x0A8F,0x0A91) || r(c, 0x0A93,0x0AA8) 322 || r(c, 0x0AAA,0x0AB0) || r(c, 0x0AB2,0x0AB3) || r(c, 0x0AB5,0x0AB9) || r(c, 0x0ABD,0x0AC5) 323 || r(c, 0x0AC7,0x0AC9) || r(c, 0x0ACB,0x0ACD) || r(c, 0x0AD0) || r(c, 0x0AE0) 324 // Oriya: 0B01−0B03, 0B05−0B0C, 0B0F−0B10, 0B13−0B28, 0B2A−0B30, 325 //0B32−0B33, 0B36−0B39, 0B3E−0B43, 0B47−0B48, 0B4B−0B4D, 326 //0B5C−0B5D, 0B5F−0B61 327 || r(c, 0x0B01,0x0B03) || r(c, 0x0B05,0x0B0C) || r(c, 0x0B0F,0x0B10) || r(c, 0x0B13,0x0B28) || r(c, 0x0B2A,0x0B30) 328 || r(c, 0x0B32,0x0B33) || r(c, 0x0B36,0x0B39) || r(c, 0x0B3E,0x0B43) || r(c, 0x0B47,0x0B48) || r(c, 0x0B4B,0x0B4D) 329 || r(c, 0x0B5C,0x0B5D) || r(c, 0x0B5F,0x0B61) 330 //Tamil: 0B82−0B83, 0B85−0B8A, 0B8E−0B90, 0B92−0B95, 0B99−0B9A, 331 //0B9C, 0B9E−0B9F, 0BA3−0BA4, 0BA8−0BAA, 0BAE−0BB5, 332 //0BB7−0BB9, 0BBE−0BC2, 0BC6−0BC8, 0BCA−0BCD 333 || r(c, 0x0B82,0x0B83) || r(c, 0x0B85,0x0B8A) || r(c, 0x0B8E,0x0B90) || r(c, 0x0B92,0x0B95) || r(c, 0x0B99,0x0B9A) 334 || r(c, 0x0B9C) || r(c, 0x0B9E,0x0B9F) || r(c, 0x0BA3,0x0BA4) || r(c, 0x0BA8,0x0BAA) || r(c, 0x0BAE,0x0BB5) 335 || r(c, 0x0BB7,0x0BB9) || r(c, 0x0BBE,0x0BC2) || r(c, 0x0BC6,0x0BC8) || r(c, 0x0BCA,0x0BCD) 336 //Telugu: 0C01−0C03, 0C05−0C0C, 0C0E−0C10, 0C12−0C28, 0C2A−0C33, 337 //0C35−0C39, 0C3E−0C44, 0C46−0C48, 0C4A−0C4D, 0C60−0C61 338 || r(c, 0x0C01,0x0C03) || r(c, 0x0C05,0x0C0C) || r(c, 0x0C0E,0x0C10) || r(c, 0x0C12,0x0C28) || r(c, 0x0C2A,0x0C33) 339 || r(c, 0x0C35,0x0C39) || r(c, 0x0C3E,0x0C44) || r(c, 0x0C46,0x0C48) || r(c, 0x0C4A,0x0C4D) || r(c, 0x0C60,0x0C61) 340 //Kannada: 0C82−0C83, 0C85−0C8C, 0C8E−0C90, 0C92−0CA8, 0CAA−0CB3, 341 //0CB5−0CB9, 0CBE−0CC4, 0CC6−0CC8, 0CCA−0CCD, 0CDE, 342 //0CE0−0CE1 343 || r(c, 0x0C82,0x0C83) || r(c, 0x0C85,0x0C8C) || r(c, 0x0C8E,0x0C90) || r(c, 0x0C92,0x0CA8) || r(c, 0x0CAA,0x0CB3) 344 || r(c, 0x0CB5,0x0CB9) || r(c, 0x0CBE,0x0CC4) || r(c, 0x0CC6,0x0CC8) || r(c, 0x0CCA,0x0CCD) || r(c, 0x0CDE) 345 || r(c, 0x0CE0,0x0CE1) 346 //Malayalam: 0D02−0D03, 0D05−0D0C, 0D0E−0D10, 0D12−0D28, 0D2A−0D39, 347 //0D3E−0D43, 0D46−0D48, 0D4A−0D4D, 0D60−0D61 348 || r(c, 0x0D02,0x0D03) || r(c, 0x0D05,0x0D0C) || r(c, 0x0D0E,0x0D10) || r(c, 0x0D12,0x0D28) || r(c, 0x0D2A,0x0D39) 349 || r(c, 0xD3E,0x0D43) || r(c, 0x0D46,0x0D48) || r(c, 0x0D4A,0x0D4D) || r(c, 0x0D60,0x0D61) 350 //Thai: 0E01−0E3A, 0E40−0E5B 351 || r(c, 0x0E01,0x0E3A) || r(c, 0x0E40,0x0E5B) 352 //Lao: 0E81−0E82, 0E84, 0E87−0E88, 0E8A, 0E8D, 0E94−0E97, 353 //0E99−0E9F, 0EA1−0EA3, 0EA5, 0EA7, 0EAA−0EAB, 354 //0EAD−0EAE, 0EB0−0EB9, 0EBB−0EBD, 0EC0−0EC4, 0EC6, 355 //0EC8−0ECD, 0EDC−0EDD 356 || r(c, 0x0E81,0x0E82) || r(c, 0x0E84) || r(c, 0x0E87,0x0E88) || r(c, 0x0E8A) || r(c, 0x0E8D) || r(c, 0x0E94,0x0E97) 357 || r(c, 0x0E99,0x0E9F) || r(c, 0x0EA1,0x0EA3) || r(c, 0x0EA5) || r(c, 0x0EA7) || r(c, 0x0EAA,0x0EAB) 358 || r(c, 0x0EAD,0x0EAE) || r(c, 0x0EB0,0x0EB9) || r(c, 0x0EBB,0x0EBD) || r(c, 0x0EC0,0x0EC4) || r(c, 0x0EC6) 359 || r(c, 0x0EC8,0x0ECD) || r(c, 0x0EDC,0x0EDD) 360 //Tibetan: 0F00, 0F18−0F19, 0F35, 0F37, 0F39, 0F3E−0F47, 0F49−0F69, 361 //0F71−0F84, 0F86−0F8B, 0F90−0F95, 0F97, 0F99−0FAD, 362 //0FB1−0FB7, 0FB9 363 || r(c, 0x0F00) || r(c, 0x0F18,0x0F19) || r(c, 0x0F35) || r(c, 0x0F37) || r(c, 0x0F39) || r(c, 0x0F3E,0x0F47) || r(c, 0x0F49,0x0F69) 364 || r(c, 0x0F71,0x0F84) || r(c, 0x0F86,0x0F8B) || r(c, 0x0F90,0x0F95) || r(c, 0x0F97) || r(c, 0x0F99,0x0FAD) 365 || r(c, 0x0FB1,0x0FB7) || r(c, 0x0FB9) 366 //Georgian: 10A0−10C5, 10D0−10F6 367 || r(c, 0x10A0,0x10C5) || r(c, 0x10D0,0x10F6) 368 //Hiragana: 3041−3093, 309B−309C 369 || r(c, 0x3041,0x3093) || r(c, 0x309B,0x309C) 370 //Katakana: 30A1−30F6, 30FB−30FC 371 || r(c, 0x30A1,0x30F6) || r(c, 0x30FB,0x30FC) 372 //Bopomofo: 3105−312C 373 || r(c, 0x3105,0x312C) 374 //CJK Unified Ideographs: 4E00−9FA5 375 || r(c, 0x4E00,0x9FA5) 376 //Hangul: AC00−D7A3 377 || r(c, 0xAC00,0xD7A3) 378 //Digits: 0660−0669, 06F0−06F9, 0966−096F, 09E6−09EF, 0A66−0A6F, 379 //0AE6−0AEF, 0B66−0B6F, 0BE7−0BEF, 0C66−0C6F, 0CE6−0CEF, 380 //0D66−0D6F, 0E50−0E59, 0ED0−0ED9, 0F20−0F33 381 || r(c, 0x0660,0x0669) || r(c, 0x06F0,0x06F9) || r(c, 0x0966,0x096F) || r(c, 0x09E6,0x09EF) || r(c, 0x0A66,0x0A6F) 382 || r(c, 0x0AE6,0x0AEF) || r(c, 0x0B66,0x0B6F) || r(c, 0x0BE7,0x0BEF) || r(c, 0x0C66,0x0C6F) || r(c, 0x0CE6,0x0CEF) 383 || r(c, 0x0D66,0x0D6F) || r(c, 0x0E50,0x0E59) || r(c, 0x0ED0,0x0ED9) || r(c, 0x0F20,0x0F33) 384 //Special characters: 00B5, 00B7, 02B0−02B8, 02BB, 02BD−02C1, 02D0−02D1, 385 //02E0−02E4, 037A, 0559, 093D, 0B3D, 1FBE, 203F−2040, 2102, 386 //2107, 210A−2113, 2115, 2118−211D, 2124, 2126, 2128, 212A−2131, 387 //2133−2138, 2160−2182, 3005−3007, 3021−3029 388 || r(c, 0x00B5) || r(c, 0x00B7) || r(c, 0x02B0,0x02B8) || r(c, 0x02BB) || r(c, 0x02BD,0x02C1) || r(c, 0x02D0,0x02D1) 389 || r(c, 0x2E0,0x02E4) || r(c, 0x037A) || r(c, 0x0559) || r(c, 0x093D) || r(c, 0x0B3D) || r(c, 0x1FBE) || r(c, 0x203F,0x2040) || r(c, 0x2102) 390 || r(c, 0x2107) || r(c, 0x210A,0x2113) || r(c, 0x2115) || r(c, 0x2118,0x211D) || r(c, 0x2124) || r(c, 0x2126) || r(c, 0x2128) || r(c, 0x212A,0x2131) 391 || r(c, 0x2133,0x2138) || r(c, 0x2160,0x2182) || r(c, 0x3005,0x3007) || r(c, 0x3021,0x3029) 392 ; 393 } 394 395 } 396 397 unittest { 398 399 400 static if (ENABLE_DUMP_UNIVERSAL_ALPHA_TABLE) { 401 immutable uint itemsInRow = 8; 402 403 uint maxAlpha = 0; 404 for (uint i = 0; i < 0x10000; i++) { 405 uint ch = i; 406 if (isUniversalAlphaSlow(ch) || ch == '_' || (ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z')) 407 maxAlpha = i; 408 } 409 maxAlpha = (maxAlpha + itemsInRow * 32 - 1) / (itemsInRow * 32) * (itemsInRow * 32) - 1; 410 writeln("// table for fast checking of UniversalAlpha (as per ISO/IEC 9899:1999 Annex E) OR a..z OR A..Z OR _"); 411 writefln("// max code is 0x%04x", maxAlpha); 412 writeln("immutable uint[", (maxAlpha + 1) / 32,"] UNIVERSAL_ALPHA_FLAGS = ["); 413 for (uint i = 0; i <= maxAlpha; i += 32) { 414 if ((i / 32) % itemsInRow == 0) 415 write(" "); 416 uint flags = 0; 417 for (uint j = 0; j < 32; j++) { 418 uint ch = i + j; 419 bool flag = isUniversalAlphaSlow(ch) || ch == '_' || (ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z'); 420 if (flag) 421 flags |= (1 << j); 422 } 423 writef("0x%08x", flags); 424 if (i != maxAlpha / 32 * 32) 425 write(","); 426 if ((i / 32) % itemsInRow == itemsInRow - 1) 427 writefln("// %04x-%04x", i - itemsInRow * 32 + 1 + 31, i + 31); 428 } 429 writeln("];"); 430 431 for (uint ch = 0; ch < 0x100000; ch++) { 432 bool flag = isUniversalAlphaSlow(ch) || ch == '_' || (ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z'); 433 bool flag2 = isUniversalAlpha(ch); 434 if (flag2 != flag) { 435 isUniversalAlpha(ch); 436 writefln("universalAlpha test failed for char %06x expeced %d actual %d", ch, flag ? 1 : 0, flag2 ? 1 : 0); 437 } 438 assert(flag2 == flag); 439 } 440 } 441 } 442 443 enum OpCode : ubyte { 444 NONE, // no op 445 DIV, // / 446 DIV_EQ, // /= 447 DOT, // . 448 DOT_DOT, // .. 449 DOT_DOT_DOT,// ... 450 AND, // & 451 AND_EQ, // &= 452 LOG_AND, // && 453 OR, // | 454 OR_EQ, // |= 455 LOG_OR, // || 456 MINUS, // - 457 MINUS_EQ, // -= 458 MINUS_MINUS,// -- 459 PLUS, // + 460 PLUS_EQ, // += 461 PLUS_PLUS, // ++ 462 LT, // < 463 LT_EQ, // <= 464 SHL, // << 465 SHL_EQ, // <<= 466 LT_GT, // <> 467 NE_EQ, // <>= 468 GT, // > 469 GT_EQ, // >= 470 SHR_EQ, // >>= 471 ASR_EQ, // >>>= 472 SHR, // >> 473 ASR, // >>> 474 NOT, // ! 475 NOT_EQ, // != 476 NOT_LT_GT, // !<> 477 NOT_LT_GT_EQ, // !<>= 478 NOT_LT, // !< 479 NOT_LT_EQ, // !<= 480 NOT_GT, // !> 481 NOT_GT_EQ, // !>= 482 PAR_OPEN, // ( 483 PAR_CLOSE, // ) 484 SQ_OPEN, // [ 485 SQ_CLOSE, // ] 486 CURL_OPEN, // { 487 CURL_CLOSE, // } 488 QUEST, // ? 489 COMMA, // , 490 SEMICOLON, // ; 491 COLON, // : 492 DOLLAR, // $ 493 EQ, // = 494 QE_EQ, // == 495 MUL, // * 496 MUL_EQ, // *= 497 MOD, // % 498 MOD_EQ, // %= 499 XOR, // ^ 500 XOR_EQ, // ^= 501 LOG_XOR, // ^^ 502 LOG_XOR_EQ, // ^^= 503 INV, // ~ 504 INV_EQ, // ~= 505 AT, // @ 506 EQ_GT, // => 507 SHARP // # 508 }; 509 510 immutable dstring[] OP_CODE_STRINGS = [ 511 "", 512 "/", 513 "/=", 514 ".", 515 "..", 516 "...", 517 "&", 518 "&=", 519 "&&", 520 "|", 521 "|=", 522 "||", 523 "-", 524 "-=", 525 "--", 526 "+", 527 "+=", 528 "++", 529 "<", 530 "<=", 531 "<<", 532 "<<=", 533 "<>", 534 "<>=", 535 ">", 536 ">=", 537 ">>=", 538 ">>>=", 539 ">>", 540 ">>>", 541 "!", 542 "!=", 543 "!<>", 544 "!<>=", 545 "!<", 546 "!<=", 547 "!>", 548 "!>=", 549 "(", 550 ")", 551 "[", 552 "]", 553 "{", 554 "}", 555 "?", 556 ",", 557 ";", 558 ":", 559 "$", 560 "=", 561 "==", 562 "*", 563 "*=", 564 "%", 565 "%=", 566 "^", 567 "^=", 568 "^^", 569 "^^=", 570 "~", 571 "~=", 572 "@", 573 "=>", 574 "#" 575 ]; 576 577 dstring getOpNameD(OpCode op) pure nothrow { 578 return OP_CODE_STRINGS[op]; 579 }; 580 581 enum Keyword : ubyte { 582 NONE, 583 ABSTRACT, 584 ALIAS, 585 ALIGN, 586 ASM, 587 ASSERT, 588 AUTO, 589 590 BODY, 591 BOOL, 592 BREAK, 593 BYTE, 594 595 CASE, 596 CAST, 597 CATCH, 598 CDOUBLE, 599 CENT, 600 CFLOAT, 601 CHAR, 602 CLASS, 603 CONST, 604 CONTINUE, 605 CREAL, 606 607 DCHAR, 608 DEBUG, 609 DEFAULT, 610 DELEGATE, 611 DELETE, 612 DEPRECATED, 613 DO, 614 DOUBLE, 615 616 ELSE, 617 ENUM, 618 EXPORT, 619 EXTERN, 620 621 FALSE, 622 FINAL, 623 FINALLY, 624 FLOAT, 625 FOR, 626 FOREACH, 627 FOREACH_REVERSE, 628 FUNCTION, 629 630 GOTO, 631 632 IDOUBLE, 633 IF, 634 IFLOAT, 635 IMMUTABLE, 636 IMPORT, 637 IN, 638 INOUT, 639 INT, 640 INTERFACE, 641 INVARIANT, 642 IREAL, 643 IS, 644 645 LAZY, 646 LONG, 647 648 MACRO, 649 MIXIN, 650 MODULE, 651 652 NEW, 653 NOTHROW, 654 NULL, 655 656 OUT, 657 OVERRIDE, 658 659 PACKAGE, 660 PRAGMA, 661 PRIVATE, 662 PROTECTED, 663 PUBLIC, 664 PURE, 665 666 REAL, 667 REF, 668 RETURN, 669 670 SCOPE, 671 SHARED, 672 SHORT, 673 STATIC, 674 STRUCT, 675 SUPER, 676 SWITCH, 677 SYNCHRONIZED, 678 679 TEMPLATE, 680 THIS, 681 THROW, 682 TRUE, 683 TRY, 684 TYPEDEF, 685 TYPEID, 686 TYPEOF, 687 688 UBYTE, 689 UCENT, 690 UINT, 691 ULONG, 692 UNION, 693 UNITTEST, 694 USHORT, 695 696 VERSION, 697 VOID, 698 VOLATILE, 699 700 WCHAR, 701 WHILE, 702 WITH, 703 704 FILE, 705 MODULE__, 706 LINE, 707 FUNCTION__, 708 PRETTY_FUNCTION, 709 710 //Special Token Replaced with 711 DATE, // string literal of the date of compilation "mmm dd yyyy" 712 EOF, // sets the scanner to the end of the file 713 TIME, // string literal of the time of compilation "hh:mm:ss" 714 TIMESTAMP, // string literal of the date and time of compilation "www mmm dd hh:mm:ss yyyy" 715 VENDOR, // Compiler vendor string, such as "Digital Mars D" 716 VERSION_, // Compiler version as an integer, such as 2001 717 718 GSHARED, 719 TRAITS, 720 VECTOR, 721 PARAMETERS, 722 723 } 724 725 immutable dstring[] KEYWORD_STRINGS = [ 726 "", 727 "abstract", 728 "alias", 729 "align", 730 "asm", 731 "assert", 732 "auto", 733 734 "body", 735 "bool", 736 "break", 737 "byte", 738 739 "case", 740 "cast", 741 "catch", 742 "cdouble", 743 "cent", 744 "cfloat", 745 "char", 746 "class", 747 "const", 748 "continue", 749 "creal", 750 751 "dchar", 752 "debug", 753 "default", 754 "delegate", 755 "delete", 756 "deprecated", 757 "do", 758 "double", 759 760 "else", 761 "enum", 762 "export", 763 "extern", 764 765 "false", 766 "final", 767 "finally", 768 "float", 769 "for", 770 "foreach", 771 "foreach_reverse", 772 "function", 773 774 "goto", 775 776 "idouble", 777 "if", 778 "ifloat", 779 "immutable", 780 "import", 781 "in", 782 "inout", 783 "int", 784 "interface", 785 "invariant", 786 "ireal", 787 "is", 788 789 "lazy", 790 "long", 791 792 "macro", 793 "mixin", 794 "module", 795 796 "new", 797 "nothrow", 798 "null", 799 800 "out", 801 "override", 802 803 "package", 804 "pragma", 805 "private", 806 "protected", 807 "public", 808 "pure", 809 810 "real", 811 "ref", 812 "return", 813 814 "scope", 815 "shared", 816 "short", 817 "static", 818 "struct", 819 "super", 820 "switch", 821 "synchronized", 822 823 "template", 824 "this", 825 "throw", 826 "true", 827 "try", 828 "typedef", 829 "typeid", 830 "typeof", 831 832 "ubyte", 833 "ucent", 834 "uint", 835 "ulong", 836 "union", 837 "unittest", 838 "ushort", 839 840 "version", 841 "void", 842 "volatile", 843 844 "wchar", 845 "while", 846 "with", 847 848 "__FILE__", 849 "__MODULE__", 850 "__LINE__", 851 "__FUNCTION__", 852 "__PRETTY_FUNCTION__", 853 854 //Special Token Replaced with 855 "__DATE__", // string literal of the date of compilation "mmm dd yyyy" 856 "__EOF__", // sets the scanner to the end of the file 857 "__TIME__", // string literal of the time of compilation "hh:mm:ss" 858 "__TIMESTAMP__", // string literal of the date and time of compilation "www mmm dd hh:mm:ss yyyy" 859 "__VENDOR__", // Compiler vendor string, such as "Digital Mars D" 860 "__VERSION__", // Compiler version as an integer, such as 2001 861 862 863 "__gshared", 864 "__traits", 865 "__vector", 866 "__parameters" 867 ]; 868 869 public dstring getKeywordNameD(Keyword keyword) pure nothrow { 870 return KEYWORD_STRINGS[keyword]; 871 }; 872 873 public Keyword findKeyword(Keyword start, Keyword end, dchar * name, int len, ref int pos) pure nothrow { 874 for (Keyword i = start; i <= end; i++) { 875 dstring s = KEYWORD_STRINGS[i]; 876 if (s.length > len + 1) 877 continue; // too long 878 bool found = true; 879 for (uint j = 1; j < s.length; j++) { 880 if (s[j] != name[j - 1]) { 881 found = false; 882 break; 883 } 884 } 885 if (found) { 886 //if (s.length == len - 1 || !isIdentMiddleChar(name[s.length - 1])) { 887 if (s.length == len + 1 || !isIdentMiddleChar(name[s.length - 1])) { 888 pos += s.length - 1; 889 return i; 890 } 891 } 892 } 893 return Keyword.NONE; 894 } 895 896 /** 897 * Token. 898 */ 899 class Token { 900 // 32bit 64bit platform 901 // vtable 4 bytes 8 bytes 902 protected SourceFile _file; // 4 bytes 8 bytes 903 protected int _line; // 4 bytes 4 bytes 904 protected int _pos; // 4 bytes 4 bytes 905 protected TokenType _type; // 1 byte 1 byte 906 // total 17 bytes 25 bytes 907 /// returns token type 908 @property TokenType type() { return _type; } 909 /// returns file info for source 910 @property SourceFile filename() { return _file; } 911 /// returns 1-based source line number of token start 912 @property int line() { return _line; } 913 /// returns 1-based source line position of token start 914 @property int pos() { return _pos; } 915 /// returns token text 916 @property dstring text() { return null; } 917 918 // number token properties 919 @property dchar literalType() { return 0; } 920 @property ulong intValue() { return 0; } 921 @property bool isUnsigned() { return false; } 922 @property ulong isLong() { return false; } 923 @property real realValue() { return 0; } 924 @property double doubleValue() { return 0; } 925 @property float floatValue() { return 0; } 926 @property byte precision() { return 0; } 927 @property bool isImaginary() { return false; } 928 @property bool isBracket() { 929 OpCode op = opCode; 930 return op == OpCode.PAR_OPEN 931 || op == OpCode.PAR_CLOSE 932 || op == OpCode.SQ_OPEN 933 || op == OpCode.SQ_CLOSE 934 || op == OpCode.CURL_OPEN 935 || op == OpCode.CURL_CLOSE; 936 } 937 @property bool isOpenBracket() { 938 OpCode op = opCode; 939 return op == OpCode.PAR_OPEN 940 || op == OpCode.SQ_OPEN 941 || op == OpCode.CURL_OPEN; 942 } 943 @property bool isCloseBracket() { 944 OpCode op = opCode; 945 return op == OpCode.PAR_CLOSE 946 || op == OpCode.SQ_CLOSE 947 || op == OpCode.CURL_CLOSE; 948 } 949 @property bool isEof() { return type == TokenType.EOF; } 950 951 /// returns opcode ID - for opcode tokens 952 @property OpCode opCode() { return OpCode.NONE; } 953 /// returns keyword ID - for keyword tokens 954 @property Keyword keyword() { return Keyword.NONE; } 955 /// returns true if this is documentation comment token 956 @property bool isDocumentationComment() { return false; } 957 /// returns true if this is multiline 958 @property bool isMultilineComment() { return false; } 959 960 // error handling 961 962 /// returns true if it's invalid token (can be returned in error tolerant mode of tokenizer) 963 @property bool isError() { return type == TokenType.INVALID; } 964 /// returns error message if it's invalid token (can be returned in error tolerant mode of tokenizer) 965 @property string errorMessage() { return null; } 966 /// returns error code if it's invalid token (can be returned in error tolerant mode of tokenizer) 967 @property int errorCode() { return 0; } 968 /// returns type of token parsing of which has been failed - if it's invalid token (can be returned in error tolerant mode of tokenizer) 969 @property TokenType invalidTokenType() { return TokenType.INVALID; } 970 971 972 this(TokenType type) { 973 _type = type; 974 } 975 976 this(TokenType type, SourceFile file, int line, int pos) { 977 _type = type; 978 _file = file; 979 _line = line; 980 _pos = pos; 981 } 982 /// set start position for token (line is 1-based, pos is 0-based) 983 void setPos(SourceFile file, int line, int pos) { 984 _file = file; 985 _line = line; 986 _pos = pos + 1; 987 } 988 /// set source file information for token 989 void setFile(SourceFile file) { 990 _file = file; 991 } 992 /// set start position for token (line is 1-based, pos is 0-based) 993 void setPos(int line, int pos) { 994 _line = line; 995 _pos = pos + 1; 996 } 997 998 public abstract Token clone(); 999 public override @property string toString() { 1000 return "" ~ to!string(_line) ~ ":" ~ to!string(_pos) ~ " " ~ to!string(type) ~ " " ~ to!string(opCode) ~ " " ~ to!string(keyword) 1001 ~" \"" ~ toUTF8(text()) ~ "\""; 1002 } 1003 } 1004 1005 class EofToken : Token { 1006 this() { 1007 super(TokenType.EOF); 1008 } 1009 this(SourceFile file, uint line, uint pos) { 1010 super(TokenType.EOF, file, line, pos); 1011 } 1012 override public Token clone() { 1013 return new EofToken(_file, _line, _pos); 1014 } 1015 public override @property string toString() { 1016 return "EOF"; 1017 } 1018 } 1019 1020 // treat as white space 1021 //class EolToken : Token { 1022 // this(string file, uint line, uint pos) { 1023 // super(TokenType.EOL, file, line, pos); 1024 // } 1025 //} 1026 1027 /// white space token 1028 class WhiteSpaceToken : Token { 1029 this() { 1030 super(TokenType.WHITESPACE); 1031 } 1032 this(SourceFile file, uint line, uint pos) { 1033 super(TokenType.WHITESPACE, file, line, pos); 1034 } 1035 override public Token clone() { 1036 return new WhiteSpaceToken(_file, _line, _pos); 1037 } 1038 public override @property string toString() { 1039 return "WhiteSpace"; 1040 } 1041 } 1042 1043 class OpToken : Token { 1044 OpCode _op; 1045 public @property override OpCode opCode() { return _op; } 1046 public @property void opCode(OpCode op) { _op = op; } 1047 public @property override dstring text() { return getOpNameD(_op); } 1048 this() { 1049 super(TokenType.OP); 1050 } 1051 this(SourceFile file, uint line, uint pos) { 1052 super(TokenType.OP, file, line, pos); 1053 } 1054 override public Token clone() { 1055 OpToken res = new OpToken(_file, _line, _pos); 1056 res._op = _op; 1057 return res; 1058 } 1059 public override @property string toString() { 1060 return "Op:" ~ to!string(_op); 1061 } 1062 } 1063 1064 class KeywordToken : Token { 1065 Keyword _keyword; 1066 public @property override Keyword keyword() { return _keyword; } 1067 public @property void keyword(Keyword keyword) { _keyword = keyword; } 1068 public @property override dstring text() { return getKeywordNameD(_keyword); } 1069 this() { 1070 super(TokenType.KEYWORD); 1071 } 1072 this(SourceFile file, uint line, uint pos) { 1073 super(TokenType.KEYWORD, file, line, pos); 1074 } 1075 override public Token clone() { 1076 KeywordToken res = new KeywordToken(_file, _line, _pos); 1077 res._keyword = _keyword; 1078 return res; 1079 } 1080 public override @property string toString() { 1081 return "Keyword:" ~ to!string(_keyword); 1082 } 1083 } 1084 1085 /// comment token 1086 class CommentToken : Token { 1087 protected dstring _text; 1088 protected bool _isDocumentationComment; 1089 protected bool _isMultilineComment; 1090 1091 1092 override @property bool isDocumentationComment() { 1093 return _isDocumentationComment; 1094 } 1095 1096 @property void isDocumentationComment(bool f) { 1097 _isDocumentationComment = f; 1098 } 1099 1100 /// returns true if this is multiline 1101 override @property bool isMultilineComment() { 1102 return _isMultilineComment; 1103 } 1104 1105 @property void isMultilineComment(bool f) { 1106 _isMultilineComment = f; 1107 } 1108 1109 @property override dstring text() { return _text; } 1110 @property void text(dchar[] text) { _text = cast(dstring)text; } 1111 this() { 1112 super(TokenType.COMMENT); 1113 } 1114 this(SourceFile file, uint line, uint pos, dchar[] text) { 1115 super(TokenType.COMMENT, file, line, pos); 1116 _text = cast(dstring)text; 1117 } 1118 override public Token clone() { 1119 CommentToken res = new CommentToken(_file, _line, _pos, _text.dup); 1120 res._isDocumentationComment = _isDocumentationComment; 1121 res._isMultilineComment = _isMultilineComment; 1122 return res; 1123 } 1124 public override @property string toString() { 1125 return "Comment:" ~ to!string(_text); 1126 } 1127 } 1128 1129 /// Invalid token holder - for error tolerant parsing 1130 class InvalidToken : Token { 1131 protected dstring _text; 1132 protected TokenType _invalidTokenType; 1133 protected int _errorCode; 1134 protected string _errorMessage; 1135 1136 /// returns error message if it's invalid token (can be returned in error tolerant mode of tokenizer) 1137 override @property string errorMessage() { return _errorMessage; } 1138 /// sets error message 1139 @property void errorMessage(string s) { _errorMessage = s; } 1140 /// returns error code if it's invalid token (can be returned in error tolerant mode of tokenizer) 1141 override @property int errorCode() { return _errorCode; } 1142 /// sets error code 1143 @property void errorCode(int c) { _errorCode = c; } 1144 /// returns type of token parsing of which has been failed - if it's invalid token (can be returned in error tolerant mode of tokenizer) 1145 override @property TokenType invalidTokenType() { return _invalidTokenType; } 1146 /// sets type of token parsing of which has been failed 1147 @property void invalidTokenType(TokenType t) { _invalidTokenType = t; } 1148 1149 /// text of invalid token 1150 @property override dstring text() { return _text; } 1151 /// text of invalid token 1152 @property void text(dchar[] text) { _text = cast(dstring)text; } 1153 1154 this() { 1155 super(TokenType.INVALID); 1156 } 1157 this(SourceFile file, uint line, uint pos, dchar[] text) { 1158 super(TokenType.INVALID, file, line, pos); 1159 _text = cast(dstring)text; 1160 } 1161 override Token clone() { 1162 InvalidToken res = new InvalidToken(_file, _line, _pos, _text.dup); 1163 res._errorMessage = _errorMessage.dup; 1164 res._errorCode = _errorCode; 1165 res._invalidTokenType = _invalidTokenType; 1166 return res; 1167 } 1168 override @property string toString() { 1169 return "Invalid:" ~ to!string(_text); 1170 } 1171 } 1172 1173 alias tokenizer_ident_t = uint; 1174 alias tokenizer_ident_name_t = dstring; 1175 1176 enum : tokenizer_ident_t { 1177 NO_IDENT = 0 1178 } 1179 1180 /** 1181 * Global storage for identifier strings. 1182 */ 1183 class IdentHolder { 1184 protected tokenizer_ident_t _nextId; 1185 protected tokenizer_ident_name_t[tokenizer_ident_t] _idToName; 1186 protected tokenizer_ident_t[tokenizer_ident_name_t] _nameToId; 1187 1188 public this() { 1189 _nextId = NO_IDENT + 1; 1190 } 1191 1192 /** 1193 * Search for id by name, return NO_IDENT if not found. 1194 */ 1195 uint findByName(tokenizer_ident_name_t name) { 1196 tokenizer_ident_t * found = (name in _nameToId); 1197 if (found) 1198 return *found; 1199 return NO_IDENT; 1200 } 1201 1202 /** 1203 * Search for name by id, return null if not found. 1204 */ 1205 tokenizer_ident_name_t nameById(tokenizer_ident_t id) { 1206 auto found = (id in _idToName); 1207 if (found) 1208 return *found; 1209 return null; 1210 } 1211 1212 /** 1213 * Search for ident id by name, create new entry if not found. 1214 */ 1215 tokenizer_ident_t idByName(tokenizer_ident_name_t name) { 1216 uint * found = (name in _nameToId); 1217 if (found) 1218 return *found; 1219 uint newid = _nextId++; 1220 immutable tokenizer_ident_name_t nameCopy = name.dup; 1221 _nameToId[nameCopy] = newid; 1222 _idToName[newid] = nameCopy; 1223 return newid; 1224 } 1225 } 1226 1227 /** 1228 * Thread local storage for IDs. 1229 */ 1230 IdentHolder identMap; 1231 1232 static this() { 1233 // init ID storage 1234 identMap = new IdentHolder(); 1235 } 1236 1237 class StringLiteralToken : Token { 1238 dstring _text; 1239 dchar _literalType; 1240 public @property override dchar literalType() { return _literalType; } 1241 public @property override dstring text() { return _text; } 1242 public void setText(dchar[] text, dchar type) { _text = cast(dstring)text; _literalType = type; } 1243 this() { 1244 super(TokenType.STRING); 1245 } 1246 this(SourceFile file, uint line, uint pos, dchar[] text, dchar type) { 1247 super(TokenType.STRING, file, line, pos); 1248 _text = cast(dstring)text; 1249 _literalType = type; 1250 } 1251 override public Token clone() { 1252 return new StringLiteralToken(_file, _line, _pos, _text.dup, _literalType); 1253 } 1254 public override @property string toString() { 1255 return toUTF8("String:\"" ~ _text ~ "\"" ~ (_literalType ? _literalType : ' ')); 1256 } 1257 } 1258 1259 class CharacterLiteralToken : Token { 1260 dchar _character; 1261 dchar _literalType; 1262 @property override dchar literalType() { return _literalType; } 1263 @property dchar character() { return _character; } 1264 @property override dstring text() { return [_character]; } 1265 void setCharacter(dchar ch, dchar type) { _character = ch; _literalType = type; } 1266 this() { 1267 super(TokenType.CHARACTER); 1268 } 1269 this(SourceFile file, uint line, uint pos, dchar character, dchar type) { 1270 super(TokenType.CHARACTER, file, line, pos); 1271 _character = character; 1272 _literalType = type; 1273 } 1274 override public Token clone() { 1275 return new CharacterLiteralToken(_file, _line, _pos, _character, _literalType); 1276 } 1277 public override @property string toString() { 1278 return "Char:" ~ toUTF8([_character]); 1279 } 1280 } 1281 1282 class IntegerLiteralToken : Token { 1283 ulong _value; 1284 bool _unsigned; 1285 bool _long; 1286 public @property override ulong intValue() { return _value; } 1287 public @property override bool isUnsigned() { return _unsigned; } 1288 public @property override ulong isLong() { return _long; } 1289 public @property override dstring text() { return to!dstring(_value); } 1290 public void setValue(ulong value, bool unsignedFlag = false, bool longFlag = false) { 1291 _value = value; 1292 _unsigned = unsignedFlag; 1293 _long = longFlag; 1294 } 1295 public void setFlags(bool unsignedFlag = false, bool longFlag = false) { 1296 _unsigned = unsignedFlag; 1297 _long = longFlag; 1298 } 1299 this() { 1300 super(TokenType.INTEGER); 1301 } 1302 this(SourceFile file, uint line, uint pos, ulong value, bool unsignedFlag, bool longFlag) { 1303 super(TokenType.INTEGER, file, line, pos); 1304 _value = value; 1305 _unsigned = unsignedFlag; 1306 _long = longFlag; 1307 } 1308 override public Token clone() { 1309 return new IntegerLiteralToken(_file, _line, _pos, _value, _unsigned, _long); 1310 } 1311 public override @property string toString() { 1312 return "Integer:" ~ to!string(_value) ~ (_long ? "L" : "") ~ (_unsigned ? "U" : ""); 1313 } 1314 } 1315 1316 class RealLiteralToken : Token { 1317 real _value; 1318 byte _precision; 1319 bool _imaginary; 1320 public @property override ulong intValue() { return to!long(_value); } 1321 public @property override real realValue() { return _value; } 1322 public @property override double doubleValue() { return cast(double)_value; } 1323 public @property override float floatValue() { return cast(float)_value; } 1324 public @property override byte precision() { return _precision; } 1325 public @property override bool isImaginary() { return _imaginary; } 1326 public @property override dstring text() { return to!dstring(_value); } 1327 public void setValue(real value, byte precision = 1, bool imaginary = false) { 1328 _value = value; 1329 _precision = precision; 1330 _imaginary = imaginary; 1331 } 1332 public void setFlags(byte precision = 1, bool imaginary = false) { 1333 _precision = precision; 1334 _imaginary = imaginary; 1335 } 1336 this() { 1337 super(TokenType.FLOAT); 1338 } 1339 this(SourceFile file, uint line, uint pos, real value, byte precision, bool imaginary) { 1340 super(TokenType.FLOAT, file, line, pos); 1341 _value = value; 1342 _precision = precision; 1343 _imaginary = imaginary; 1344 } 1345 override public Token clone() { 1346 return new RealLiteralToken(_file, _line, _pos, _value, _precision, _imaginary); 1347 } 1348 public override @property string toString() { 1349 return "Real:" ~ to!string(_value) ~ (_precision == 0 ? "f" : (_precision == 2 ? "L" : "")) ~ (_imaginary ? "i" : ""); 1350 } 1351 } 1352 1353 class IdentToken : Token { 1354 tokenizer_ident_t _id; 1355 public @property override dstring text() { 1356 return identMap.nameById(_id); 1357 } 1358 public void setText(dchar[] text) { 1359 _id = identMap.idByName(cast(immutable)text); 1360 } 1361 this() { 1362 super(TokenType.IDENTIFIER); 1363 } 1364 this(SourceFile file, uint line, uint pos, dchar[] text) { 1365 super(TokenType.IDENTIFIER, file, line, pos); 1366 _id = identMap.idByName(cast(immutable)text); 1367 } 1368 this(SourceFile file, uint line, uint pos, tokenizer_ident_t id) { 1369 super(TokenType.IDENTIFIER, file, line, pos); 1370 _id = id; 1371 } 1372 override public Token clone() { 1373 return new IdentToken(_file, _line, _pos, _id); 1374 } 1375 public override @property string toString() { 1376 return "Ident:" ~ to!string(text); 1377 } 1378 } 1379 1380 // shared appender buffer, to avoid extra heap allocations 1381 struct StringAppender { 1382 dchar[] buf; 1383 uint len; 1384 dchar[] get() { 1385 return buf[0 .. len]; 1386 } 1387 void appendEol() { 1388 if (len + 1 > buf.length) { 1389 uint newsize = cast(uint)((len + 1 + buf.length) * 2); 1390 if (newsize < 128) 1391 newsize = 128; 1392 buf.length = newsize; 1393 } 1394 buf[len] = '\n'; 1395 len++; 1396 } 1397 void append(dchar[] s) { 1398 if (s.length == 0) 1399 return; 1400 if (len + s.length > buf.length) { 1401 uint newsize = cast(uint)((len + s.length + buf.length) * 2); 1402 if (newsize < 128) 1403 newsize = 128; 1404 buf.length = newsize; 1405 } 1406 buf[len .. len + s.length] = s; 1407 len += s.length; 1408 } 1409 void append(dchar ch) { 1410 if (len + 1 > buf.length) { 1411 uint newsize = cast(uint)(buf.length * 2); 1412 if (newsize < 128) 1413 newsize = 128; 1414 buf.length = newsize; 1415 } 1416 buf[len++] = ch; 1417 } 1418 void reset() { 1419 len = 0; 1420 } 1421 static int parseHexDigit(dchar ch) { 1422 if (ch >= '0' && ch <='9') 1423 return ch - '0'; 1424 if (ch >= 'a' && ch <='f') 1425 return ch - 'a' + 10; 1426 if (ch >= 'A' && ch <='F') 1427 return ch - 'A' + 10; 1428 return -1; 1429 } 1430 bool errorFlag = false; 1431 dchar decodeHex(ref int pos, int count) { 1432 dchar res = 0; 1433 for (int i = 0; i < count; i++) { 1434 if (pos >= len - 1) { 1435 errorFlag = true; 1436 return res; 1437 } 1438 dchar ch = buf[++pos]; 1439 int digit = parseHexDigit(ch); 1440 if (digit < 0) { 1441 errorFlag = true; 1442 digit = 0; 1443 } 1444 res = (res << 4) | digit; 1445 } 1446 return res; 1447 } 1448 dchar decodeOct(dchar firstChar, ref int pos) { 1449 dchar res = 0; 1450 res = firstChar - '0'; 1451 if (pos < len - 1 && buf[pos + 1] >= '0' && buf[pos + 1] <= '7') { 1452 res = (res << 3) | (buf[++pos] - '0'); 1453 } 1454 if (pos < len - 1 && buf[pos + 1] >= '0' && buf[pos + 1] <= '7') { 1455 res = (res << 3) | (buf[++pos] - '0'); 1456 } 1457 return res; 1458 } 1459 1460 char[] entityNameBuf; 1461 int entityNameLen; 1462 1463 dchar decodeCharacterEntity(ref int pos) { 1464 entityNameLen = 0; 1465 pos++; 1466 for(; pos < len && buf[pos] != ';'; pos++) { 1467 dchar ch = buf[pos]; 1468 if (ch >= 0x80) 1469 errorFlag = true; 1470 if (entityNameBuf.length < entityNameLen + 4) 1471 entityNameBuf.length += 32; 1472 entityNameBuf[entityNameLen++] = cast(char)ch; 1473 } 1474 if (pos < len && buf[pos] == ';') { 1475 dchar ch = entityToChar(cast(string)entityNameBuf[0 .. entityNameLen]); 1476 if (ch) 1477 return ch; 1478 } 1479 errorFlag = true; 1480 return '?'; 1481 } 1482 1483 bool processEscapeSequences() { 1484 errorFlag = false; 1485 int dst = 0; 1486 for (int src = 0; src < len; src++) { 1487 dchar ch = buf[src]; 1488 if (ch == '\\') { 1489 if (src == len - 1) 1490 break; // INVALID 1491 ch = buf[++src]; 1492 switch (ch) { 1493 case '\'': 1494 case '\"': 1495 case '?': 1496 case '\\': 1497 buf[dst++] = ch; 1498 break; 1499 case '0': 1500 buf[dst++] = '\0'; 1501 break; 1502 case 'a': 1503 buf[dst++] = '\a'; 1504 break; 1505 case 'b': 1506 buf[dst++] = '\b'; 1507 break; 1508 case 'f': 1509 buf[dst++] = '\f'; 1510 break; 1511 case 'n': 1512 buf[dst++] = '\n'; 1513 break; 1514 case 'r': 1515 buf[dst++] = '\r'; 1516 break; 1517 case 't': 1518 buf[dst++] = '\t'; 1519 break; 1520 case 'v': 1521 buf[dst++] = '\v'; 1522 break; 1523 case 'x': 1524 buf[dst++] = decodeHex(src, 2); 1525 break; 1526 case 'u': 1527 buf[dst++] = decodeHex(src, 4); 1528 break; 1529 case 'U': 1530 buf[dst++] = decodeHex(src, 8); 1531 break; 1532 default: 1533 if (ch >= '0' && ch <= '7') { 1534 // octal X XX or XXX 1535 buf[dst++] = decodeOct(ch, src); // something wrong 1536 } else if (ch == '&') { 1537 // named character entity 1538 buf[dst++] = decodeCharacterEntity(src); 1539 // just show it as is 1540 } else { 1541 buf[dst++] = ch; // something wrong 1542 errorFlag = true; 1543 } 1544 break; 1545 } 1546 } else { 1547 buf[dst++] = ch; 1548 } 1549 } 1550 len = dst; 1551 return errorFlag; 1552 } 1553 } 1554 1555 class Tokenizer 1556 { 1557 protected SourceLines _lineStream; 1558 protected dchar[] _lineText; 1559 protected int _line; // current line number 1560 protected int _len; // current line length 1561 protected int _pos; // current line read position 1562 protected int _prevLineLength; // previous line length 1563 protected uint _state; // tokenizer state 1564 1565 enum : int { 1566 EOF_CHAR = 0x001A, 1567 EOL_CHAR = 0x000A 1568 }; 1569 1570 protected WhiteSpaceToken _sharedWhiteSpaceToken = new WhiteSpaceToken(); 1571 protected CommentToken _sharedCommentToken = new CommentToken(); 1572 protected StringLiteralToken _sharedStringLiteralToken = new StringLiteralToken(); 1573 protected IdentToken _sharedIdentToken = new IdentToken(); 1574 protected OpToken _sharedOpToken = new OpToken(); 1575 protected KeywordToken _sharedKeywordToken = new KeywordToken(); 1576 protected IntegerLiteralToken _sharedIntegerToken = new IntegerLiteralToken(); 1577 protected RealLiteralToken _sharedRealToken = new RealLiteralToken(); 1578 protected InvalidToken _sharedInvalidToken = new InvalidToken(); 1579 protected CharacterLiteralToken _sharedCharacterLiteralToken = new CharacterLiteralToken(); 1580 protected StringAppender _stringLiteralAppender; 1581 protected StringAppender _commentAppender; 1582 protected StringAppender _identAppender; 1583 1584 protected bool _enableCommentText = true; 1585 /// when false, does not put comment text into comment token - for less allocations 1586 @property void enableCommentText(bool enabled) { 1587 _enableCommentText = enabled; 1588 } 1589 /// when false, does not put comment text into comment token - for less allocations 1590 @property bool enableCommentText() { 1591 return _enableCommentText; 1592 } 1593 1594 protected bool _errorTolerant = false; 1595 /// when true, returns BadToken instead of throwing exception 1596 @property void errorTolerant(bool enabled) { 1597 _errorTolerant = enabled; 1598 } 1599 /// when true, returns BadToken instead of throwing exception 1600 @property bool errorTolerant() { 1601 return _errorTolerant; 1602 } 1603 1604 this(SourceLines lineStream) { 1605 initialize(lineStream); 1606 } 1607 1608 void initialize(SourceLines lineStream, int pos = 0) { 1609 _lineStream = lineStream; 1610 SourceFile file = _lineStream.file; 1611 _sharedWhiteSpaceToken.setFile(file); 1612 _sharedCommentToken.setFile(file); 1613 _sharedStringLiteralToken.setFile(file); 1614 _sharedIdentToken.setFile(file); 1615 _sharedOpToken.setFile(file); 1616 _sharedKeywordToken.setFile(file); 1617 _sharedIntegerToken.setFile(file); 1618 _sharedRealToken.setFile(file); 1619 _sharedInvalidToken.setFile(file); 1620 _sharedCharacterLiteralToken.setFile(file); 1621 buildTime = Clock.currTime(); 1622 _line = lineStream.line; 1623 _pos = 0; 1624 _prevLineLength = 0; 1625 _lineText = null; 1626 nextLine(); 1627 _pos = pos; 1628 } 1629 1630 this(string code, string filename = "") { 1631 this(new ArraySourceLines(code, filename)); 1632 } 1633 1634 // fetch next line from source stream 1635 protected bool nextLine() { 1636 _prevLineLength = cast(int)_lineText.length; 1637 _lineText = _lineStream.readLine(); 1638 if (!_lineText) { 1639 if (_lineStream.errorCode != 0) 1640 throw new SourceEncodingException(_lineStream.errorMessage, _lineStream.file, _lineStream.errorLine, _lineStream.errorPos); 1641 if (_lineStream.eof) { 1642 // end of file 1643 _pos = 0; 1644 _len = 0; 1645 return false; 1646 } 1647 // just an empty line 1648 } 1649 _line = _lineStream.line; 1650 _pos = 0; 1651 _len = cast(int)_lineText.length; // do not support lines longer that 4Gb 1652 return true; 1653 } 1654 1655 protected dchar nextChar() { 1656 if (_pos >= _len) { 1657 if (!nextLine()) { 1658 _pos = _prevLineLength + 1; 1659 return EOF_CHAR; 1660 } 1661 return EOL_CHAR; 1662 } 1663 dchar res = _lineText[_pos++]; 1664 if (_pos >= _len) 1665 nextLine(); 1666 return res; 1667 } 1668 1669 protected dchar peekChar() { 1670 if (_lineText is null) { 1671 if (!nextLine()) { 1672 return EOF_CHAR; 1673 } 1674 } 1675 if (_pos >= _len) 1676 return EOL_CHAR; 1677 return _lineText[_pos++]; 1678 } 1679 1680 protected Token emitEof() { 1681 // TODO: check for current state 1682 return new EofToken(_lineStream.file, _startLine, _startPos + 2); 1683 } 1684 1685 protected Token processWhiteSpace(dchar firstChar) { 1686 // reuse the same token instance, to avoid extra heap spamming 1687 _sharedWhiteSpaceToken.setPos(_startLine, _startPos); 1688 for (;;) { 1689 int i = _pos; 1690 for (; i < _len; i++) { 1691 dchar ch = _lineText[i]; 1692 if (!(ch == 0x0020 || ch == 0x0009 || ch == 0x000B || ch == 0x000C || ch == EOL_CHAR)) 1693 break; 1694 } 1695 _pos = i; 1696 if (_pos < _len) 1697 break; 1698 // go to next line 1699 if (!nextLine()) 1700 break; 1701 } 1702 return _sharedWhiteSpaceToken; 1703 } 1704 1705 protected Token processOneLineComment() { 1706 _sharedCommentToken.setPos(_startLine, _startPos); 1707 _sharedCommentToken.isDocumentationComment = _pos + 1 < _lineText.length && _lineText[_pos + 1] == '/'; 1708 _sharedCommentToken.isMultilineComment = false; 1709 if (_enableCommentText) { 1710 _sharedCommentToken.text = _lineText[_pos + 1 .. $]; 1711 } 1712 _pos = _len; 1713 nextChar(); 1714 return _sharedCommentToken; 1715 } 1716 1717 protected Token processOneLineSharpComment() { 1718 _sharedCommentToken.setPos(_startLine, _startPos); 1719 if (_enableCommentText) { 1720 _sharedCommentToken.text = _lineText[_pos .. $]; 1721 } 1722 _pos = _len; 1723 return _sharedCommentToken; 1724 } 1725 1726 // Comment /* */ 1727 protected Token processMultilineComment() { 1728 _sharedCommentToken.setPos(_startLine, _startPos); 1729 _sharedCommentToken.isDocumentationComment = _pos + 1 < _lineText.length && _lineText[_pos + 1] == '*'; 1730 _sharedCommentToken.isMultilineComment = true; 1731 _commentAppender.reset(); 1732 int textStart = _pos + 1; 1733 for (;;) { 1734 int textEnd = int.max; 1735 int i = textStart; 1736 for (; i < _len - 1; i++) { 1737 if (_lineText[i] == '*' && _lineText[i + 1] == '/') { 1738 textEnd = i; 1739 break; 1740 } 1741 } 1742 if (textEnd != int.max) { 1743 if (_enableCommentText) 1744 _commentAppender.append(_lineText[textStart .. textEnd]); 1745 _pos = textEnd + 2; 1746 break; 1747 } 1748 if (!nextLine()) { 1749 // TODO: do we need throw exception if comment not closed by end of file? 1750 _pos = _len; 1751 break; 1752 } 1753 textStart = 0; 1754 } 1755 if (_enableCommentText) { 1756 _sharedCommentToken.text = _commentAppender.get(); 1757 } 1758 return _sharedCommentToken; 1759 } 1760 1761 // Comment /+ +/ 1762 protected Token processNestedComment() { 1763 _sharedCommentToken.setPos(_startLine, _startPos); 1764 _sharedCommentToken.isDocumentationComment = _pos + 1 < _lineText.length && _lineText[_pos + 1] == '+'; 1765 _sharedCommentToken.isMultilineComment = true; 1766 _commentAppender.reset(); 1767 dchar[] text; 1768 int textStart = _pos + 1; 1769 int level = 1; 1770 for (;;) { 1771 int textEnd = int.max; 1772 int i = textStart; 1773 for (; i < _len - 1; i++) { 1774 if (_lineText[i] == '/' && _lineText[i + 1] == '+') { 1775 level++; 1776 i++; 1777 } else if (_lineText[i] == '+' && _lineText[i + 1] == '/') { 1778 if (--level == 0) { 1779 textEnd = i; 1780 break; 1781 } 1782 } 1783 } 1784 if (textEnd != int.max) { 1785 if (_enableCommentText) 1786 _commentAppender.append(_lineText[textStart .. textEnd]); 1787 _pos = textEnd + 2; 1788 break; 1789 } 1790 if (!nextLine()) { 1791 // TODO: do we need throw exception if comment not closed by end of file? 1792 _pos = _len; 1793 break; 1794 } 1795 if (_enableCommentText) 1796 _commentAppender.appendEol(); 1797 textStart = 0; 1798 } 1799 if (_enableCommentText) { 1800 _sharedCommentToken.text = _commentAppender.get(); 1801 } 1802 return _sharedCommentToken; 1803 } 1804 1805 protected Token processHexString() { 1806 _pos++; 1807 // TODO: 1808 return null; 1809 } 1810 1811 protected Token processDelimitedString() { 1812 _pos++; 1813 // TODO: 1814 return null; 1815 } 1816 1817 // r"string" or `string` 1818 protected Token processWysiwygString(dchar ch) { 1819 _pos++; 1820 // TODO: 1821 return null; 1822 } 1823 1824 protected Token processIdent(dchar firstChar) { 1825 _sharedIdentToken.setPos(_startLine, _startPos); 1826 _identAppender.reset(); 1827 _identAppender.append(firstChar); 1828 for (; _pos < _len; ) { 1829 dchar ch = _lineText[_pos]; 1830 if (!isIdentMiddleChar(ch)) { 1831 break; 1832 } 1833 _identAppender.append(ch); 1834 _pos++; 1835 } 1836 _sharedIdentToken.setText(_identAppender.get); 1837 return _sharedIdentToken; 1838 } 1839 1840 protected Token processIntegerSuffix() { 1841 if (_pos >= _len) 1842 return _sharedIntegerToken; 1843 bool longFlag = false; 1844 bool unsignedFlag = false; 1845 dchar ch = _lineText[_pos]; 1846 dchar ch2 = _pos < _len - 1 ? _lineText[_pos + 1] : 0; 1847 if (ch == 'l' || ch == 'L') { 1848 longFlag = true; 1849 _pos++; 1850 if (ch2 == 'u' || ch2 == 'U') { 1851 unsignedFlag = true; 1852 _pos++; 1853 } 1854 } else if (ch == 'u' || ch == 'U') { 1855 unsignedFlag = true; 1856 _pos++; 1857 if (ch2 == 'l' || ch2 == 'L') { 1858 longFlag = true; 1859 _pos++; 1860 } 1861 } 1862 _sharedIntegerToken.setFlags(unsignedFlag, longFlag); 1863 ch = _pos < _len ? _lineText[_pos] : 0; 1864 if (isIdentMiddleChar(ch)) 1865 return parserError("Unexpected character after number", _sharedIntegerToken); 1866 return _sharedIntegerToken; 1867 } 1868 1869 protected Token processBinaryNumber() { 1870 _sharedIntegerToken.setPos(_startLine, _startPos); 1871 _pos++; 1872 if (_pos >= _len) 1873 return parserError("Unexpected end of line in binary number", _sharedIntegerToken); 1874 int digits = 0; 1875 ulong number = 0; 1876 int i = _pos; 1877 for (;i < _len; i++) { 1878 dchar ch = _lineText[i]; 1879 if (ch != '0' && ch != '1') 1880 break; 1881 number = (number << 1) | (ch == '1' ? 1 : 0); 1882 digits++; 1883 } 1884 _pos = i; 1885 if (digits > 64) 1886 return parserError("number is too big", _sharedIntegerToken); 1887 _sharedIntegerToken.setValue(number); 1888 return processIntegerSuffix(); 1889 } 1890 1891 protected Token processHexNumber() { 1892 _sharedIntegerToken.setPos(_startLine, _startPos); 1893 _sharedRealToken.setPos(_startLine, _startPos); 1894 _pos++; 1895 if (_pos >= _len) 1896 return parserError("Unexpected end of line in hex number", _sharedIntegerToken); 1897 int digits = 0; 1898 ulong number = 0; 1899 int i = _pos; 1900 for (;i < _len; i++) { 1901 dchar ch = _lineText[i]; 1902 uint digit = 0; 1903 if (ch >= '0' && ch <= '9') 1904 digit = ch - '0'; 1905 else if (ch >= 'a' && ch <= 'f') 1906 digit = ch - 'a' + 10; 1907 else if (ch >= 'A' && ch <= 'F') 1908 digit = ch - 'A' + 10; 1909 else if (ch == '_') 1910 continue; 1911 else 1912 break; 1913 number = (number << 4) | digit; 1914 digits++; 1915 } 1916 _pos = i; 1917 if (digits > 16) 1918 return parserError("number is too big to fit 64 bits", _sharedIntegerToken); 1919 _sharedIntegerToken.setValue(number); 1920 return processIntegerSuffix(); 1921 } 1922 1923 protected Token processOctNumber() { 1924 _sharedIntegerToken.setPos(_startLine, _startPos); 1925 if (_pos >= _len) 1926 return parserError("Unexpected end of line in octal number", _sharedIntegerToken); 1927 int digits = 0; 1928 ulong number = 0; 1929 int i = _pos; 1930 bool overflow = false; 1931 for (;i < _len; i++) { 1932 dchar ch = _lineText[i]; 1933 int digit = 0; 1934 if (ch >= '0' && ch <= '7') 1935 digit = ch - '0'; 1936 else if (ch == '_') 1937 continue; 1938 else 1939 break; 1940 number <<= 3; 1941 if (digits >= 20) { 1942 if ((number >> 3) << 3 != number) { 1943 overflow = true; 1944 break; 1945 } 1946 } 1947 number |= digit; 1948 digits++; 1949 } 1950 _pos = i; 1951 if (overflow) 1952 return parserError("number is too big to fit 64 bits", _sharedIntegerToken); 1953 _sharedIntegerToken.setValue(number); 1954 return processIntegerSuffix(); 1955 } 1956 1957 // 1958 protected Token processDecFloatSuffix(real value) { 1959 ubyte precision = 1; 1960 bool imaginary = false; 1961 dchar next = _pos < _len ? _lineText[_pos] : 0; 1962 if (next == 'f') { 1963 _pos++; 1964 precision = 0; 1965 } else if (next == 'L') { 1966 _pos++; 1967 precision = 2; 1968 } 1969 next = _pos < _len ? _lineText[_pos] : 0; 1970 if (next == 'i') { 1971 _pos++; 1972 imaginary = true; 1973 } 1974 next = _pos < _len ? _lineText[_pos] : 0; 1975 if (isIdentMiddleChar(next)) 1976 return parserError("invalid suffix for floating point literal", _sharedRealToken); 1977 _sharedRealToken.setValue(value, precision, imaginary); 1978 return _sharedRealToken; 1979 } 1980 1981 // after E char 1982 protected Token processDecFloatExponent(real value) { 1983 dchar next = _pos < _len ? _lineText[_pos] : 0; 1984 int sign = 1; 1985 if (next == '+') { 1986 _pos++; 1987 } else if (next == '-') { 1988 _pos++; 1989 sign = -1; 1990 } 1991 if (_pos >= _len) 1992 return parserError("Invalid exponent", _sharedRealToken); 1993 ulong digits = 0; 1994 ulong number = 0; 1995 int i = _pos; 1996 bool overflow = false; 1997 for (;i < _len; i++) { 1998 dchar ch = _lineText[i]; 1999 uint digit = 0; 2000 if (ch >= '0' && ch <= '9') 2001 digit = ch - '0'; 2002 else if (ch == '_') 2003 continue; 2004 else 2005 break; 2006 number *= 10; 2007 if (digits >= 18) { 2008 if ((number * 10) / 10 != number) { 2009 overflow = true; 2010 break; 2011 } 2012 } 2013 number += digit; 2014 digits++; 2015 } 2016 if (digits == 0) 2017 return parserError("Invalid exponent", _sharedRealToken); 2018 _pos = i; 2019 value *= pow(10., cast(long)number * sign); 2020 return processDecFloatSuffix(value); 2021 } 2022 2023 protected Token processDecFloatSecondPart(ulong firstPart) { 2024 if (_pos >= _len) { 2025 _sharedRealToken.setValue(cast(real)firstPart); 2026 return _sharedRealToken; 2027 } 2028 ulong divider = 1; 2029 ulong number = 0; 2030 int i = _pos; 2031 bool overflow = false; 2032 for (;i < _len; i++) { 2033 dchar ch = _lineText[i]; 2034 uint digit = 0; 2035 if (ch >= '0' && ch <= '9') 2036 digit = ch - '0'; 2037 else if (ch == '_') 2038 continue; 2039 else 2040 break; 2041 if (divider * 10 < divider) 2042 continue; // ignore extra digits 2043 number *= 10; 2044 number += digit; 2045 divider *= 10; 2046 } 2047 _pos = i; 2048 real value = cast(real)firstPart + (cast(real)number / divider); 2049 dchar next = _pos < _len ? _lineText[_pos] : 0; 2050 if (next == 0) { 2051 // neither exponent nor suffix 2052 _sharedRealToken.setValue(value); 2053 return _sharedRealToken; 2054 } 2055 if (next == 'e' || next == 'E') { 2056 _pos++; 2057 return processDecFloatExponent(value); 2058 } 2059 return processDecFloatSuffix(value); 2060 } 2061 2062 protected Token processDecNumber(dchar c) { 2063 _sharedIntegerToken.setPos(_startLine, _startPos); 2064 _sharedRealToken.setPos(_startLine, _startPos); 2065 //if (_pos >= _len) 2066 // return parserError("Unexpected end of line in number", _sharedIntegerToken); 2067 int digits = 1; 2068 ulong number = c - '0'; 2069 int i = _pos; 2070 bool overflow = false; 2071 if (_line == _startLine) { 2072 for (;i < _len; i++) { 2073 dchar ch = _lineText[i]; 2074 uint digit = 0; 2075 if (ch >= '0' && ch <= '9') 2076 digit = ch - '0'; 2077 else if (ch == '_') 2078 continue; 2079 else 2080 break; 2081 number *= 10; 2082 if (digits >= 18) { 2083 if ((number * 10) / 10 != number) { 2084 overflow = true; 2085 break; 2086 } 2087 } 2088 number += digit; 2089 digits++; 2090 } 2091 _pos = i; 2092 } 2093 if (overflow) 2094 return parserError("number is too big to fit 64 bits", _sharedIntegerToken); 2095 _sharedIntegerToken.setValue(number); 2096 dchar next = _line == _startLine && _pos < _len ? _lineText[_pos] : 0; 2097 if (next == 0) 2098 return _sharedIntegerToken; 2099 if (next == 'e' || next == 'E') { 2100 _pos++; 2101 return processDecFloatExponent(number); 2102 } else if (next == '.') { 2103 _pos++; 2104 return processDecFloatSecondPart(number); 2105 } 2106 return processIntegerSuffix(); 2107 } 2108 2109 /// Either return InvalidToken or throw parser exception depending on current errorTolerant flag 2110 protected Token parserError(string msg, Token incompleteToken) { 2111 return parserError(msg, incompleteToken.line, incompleteToken.pos, incompleteToken.type); 2112 } 2113 /// Either return InvalidToken or throw parser exception depending on current errorTolerant flag 2114 protected Token parserError(string msg, int startLine, int startPos, TokenType failedTokenType = TokenType.INVALID) { 2115 if (_errorTolerant) { 2116 startPos--; 2117 _sharedInvalidToken.setPos(startLine, startPos); 2118 _sharedInvalidToken.errorMessage = msg; 2119 _sharedInvalidToken.errorCode = 1; // for future extension 2120 _sharedInvalidToken.invalidTokenType = failedTokenType; // for future extension 2121 // make invalid source text 2122 dchar[] invalidText; 2123 int p = startLine == _line ? startPos : 0; 2124 for (int i = p; i < _pos && i < _lineText.length; i++) 2125 invalidText ~= _lineText[i]; 2126 2127 // recover after error 2128 for (; _pos < _lineText.length; _pos++) { 2129 dchar ch = _lineText[_pos]; 2130 if (ch == ' ' || ch == '\t' || ch == '(' || ch == ')' || ch == '[' || ch == ']' || ch == '{' || ch == '}') 2131 break; 2132 if (failedTokenType == TokenType.INTEGER || failedTokenType == TokenType.FLOAT) { 2133 if (ch == '*' || ch == '/') 2134 break; 2135 } 2136 invalidText ~= ch; 2137 } 2138 _sharedInvalidToken.text = invalidText; 2139 return _sharedInvalidToken; 2140 } 2141 throw new ParserException(msg, _lineStream.file, _line, _pos); 2142 } 2143 2144 protected Keyword detectKeyword(dchar ch) { 2145 if (ch > 'z') 2146 return Keyword.NONE; 2147 int len = _len - _pos; 2148 switch (cast(ubyte)ch) { 2149 // ABSTRACT, 2150 // ALIAS, 2151 // ALIGN, 2152 // ASM, 2153 // ASSERT, 2154 // AUTO, 2155 case 'a': return findKeyword(Keyword.ABSTRACT, Keyword.AUTO, _lineText.ptr + _pos, len, _pos); 2156 2157 // BODY, 2158 // BOOL, 2159 // BREAK, 2160 // BYTE, 2161 case 'b': return findKeyword(Keyword.BODY, Keyword.BYTE, _lineText.ptr + _pos, len, _pos); 2162 2163 // CASE, 2164 // CAST, 2165 // CATCH, 2166 // CDOUBLE, 2167 // CENT, 2168 // CFLOAT, 2169 // CHAR, 2170 // CLASS, 2171 // CONST, 2172 // CONTINUE, 2173 // CREAL, 2174 case 'c': return findKeyword(Keyword.CASE, Keyword.CREAL, _lineText.ptr + _pos, len, _pos); 2175 2176 // DCHAR, 2177 // DEBUG, 2178 // DEFAULT, 2179 // DELEGATE, 2180 // DELETE, 2181 // DEPRECATED, 2182 // DO, 2183 // DOUBLE, 2184 case 'd': return findKeyword(Keyword.DCHAR, Keyword.DOUBLE, _lineText.ptr + _pos, len, _pos); 2185 2186 // ELSE, 2187 // ENUM, 2188 // EXPORT, 2189 // EXTERN, 2190 case 'e': return findKeyword(Keyword.ELSE, Keyword.EXTERN, _lineText.ptr + _pos, len, _pos); 2191 2192 // FALSE, 2193 // FINAL, 2194 // FINALLY, 2195 // FLOAT, 2196 // FOR, 2197 // FOREACH, 2198 // FOREACH_REVERSE, 2199 // FUNCTION, 2200 case 'f': return findKeyword(Keyword.FALSE, Keyword.FUNCTION, _lineText.ptr + _pos, len, _pos); 2201 2202 // GOTO, 2203 case 'g': return findKeyword(Keyword.GOTO, Keyword.GOTO, _lineText.ptr + _pos, len, _pos); 2204 2205 // IDOUBLE, 2206 // IF, 2207 // IFLOAT, 2208 // IMMUTABLE, 2209 // IMPORT, 2210 // IN, 2211 // INOUT, 2212 // INT, 2213 // INTERFACE, 2214 // INVARIANT, 2215 // IREAL, 2216 // IS, 2217 case 'i': return findKeyword(Keyword.IDOUBLE, Keyword.IS, _lineText.ptr + _pos, len, _pos); 2218 2219 // LAZY, 2220 // LONG, 2221 case 'l': return findKeyword(Keyword.LAZY, Keyword.LONG, _lineText.ptr + _pos, len, _pos); 2222 2223 // MACRO, 2224 // MIXIN, 2225 // MODULE, 2226 case 'm': return findKeyword(Keyword.MACRO, Keyword.MODULE, _lineText.ptr + _pos, len, _pos); 2227 2228 // NEW, 2229 // NOTHROW, 2230 // NULL, 2231 case 'n': return findKeyword(Keyword.NEW, Keyword.NULL, _lineText.ptr + _pos, len, _pos); 2232 2233 // OUT, 2234 // OVERRIDE, 2235 case 'o': return findKeyword(Keyword.OUT, Keyword.OVERRIDE, _lineText.ptr + _pos, len, _pos); 2236 2237 // PACKAGE, 2238 // PRAGMA, 2239 // PRIVATE, 2240 // PROTECTED, 2241 // PUBLIC, 2242 // PURE, 2243 case 'p': return findKeyword(Keyword.PACKAGE, Keyword.PURE, _lineText.ptr + _pos, len, _pos); 2244 2245 // REAL, 2246 // REF, 2247 // RETURN, 2248 case 'r': return findKeyword(Keyword.REAL, Keyword.RETURN, _lineText.ptr + _pos, len, _pos); 2249 2250 // SCOPE, 2251 // SHARED, 2252 // SHORT, 2253 // STATIC, 2254 // STRUCT, 2255 // SUPER, 2256 // SWITCH, 2257 // SYNCHRONIZED, 2258 case 's': return findKeyword(Keyword.SCOPE, Keyword.SYNCHRONIZED, _lineText.ptr + _pos, len, _pos); 2259 2260 // TEMPLATE, 2261 // THIS, 2262 // THROW, 2263 // TRUE, 2264 // TRY, 2265 // TYPEDEF, 2266 // TYPEID, 2267 // TYPEOF, 2268 case 't': return findKeyword(Keyword.TEMPLATE, Keyword.TYPEOF, _lineText.ptr + _pos, len, _pos); 2269 2270 // UBYTE, 2271 // UCENT, 2272 // UINT, 2273 // ULONG, 2274 // UNION, 2275 // UNITTEST, 2276 // USHORT, 2277 case 'u': return findKeyword(Keyword.UBYTE, Keyword.USHORT, _lineText.ptr + _pos, len, _pos); 2278 2279 // VERSION, 2280 // VOID, 2281 // VOLATILE, 2282 case 'v': return findKeyword(Keyword.VERSION, Keyword.VOLATILE, _lineText.ptr + _pos, len, _pos); 2283 2284 // WCHAR, 2285 // WHILE, 2286 // WITH, 2287 case 'w': return findKeyword(Keyword.WCHAR, Keyword.WITH, _lineText.ptr + _pos, len, _pos); 2288 2289 // FILE, 2290 // MODULE, 2291 // LINE, 2292 // FUNCTION, 2293 // PRETTY_FUNCTION, 2294 // 2295 // GSHARED, 2296 // TRAITS, 2297 // VECTOR, 2298 // PARAMETERS, 2299 case '_': return findKeyword(Keyword.FILE, Keyword.PARAMETERS, _lineText.ptr + _pos, len, _pos); 2300 default: return Keyword.NONE; 2301 } 2302 } 2303 protected OpCode detectOp(dchar ch) nothrow { 2304 if (ch >= 128) 2305 return OpCode.NONE; 2306 dchar ch2 = _pos < _len ? _lineText[_pos] : 0; 2307 dchar ch3 = _pos < _len - 1 ? _lineText[_pos + 1] : 0; 2308 switch(cast(ubyte)ch) { 2309 // DIV, // / 2310 // DIV_EQ, // /= 2311 case '/': 2312 if (ch2 == '=') { 2313 _pos++; 2314 return OpCode.DIV_EQ; 2315 } 2316 return OpCode.DIV; 2317 // DOT, // . 2318 // DOT_DOT, // .. 2319 // DOT_DOT_DOT,// ... 2320 case '.': 2321 if (ch2 == '.') { 2322 if (ch3 == '.') { 2323 _pos += 2; 2324 return OpCode.DOT_DOT_DOT; 2325 } 2326 _pos++; 2327 return OpCode.DOT_DOT; 2328 } 2329 return OpCode.DOT; 2330 // AND, // & 2331 // AND_EQ, // &= 2332 // LOG_AND, // && 2333 case '&': 2334 if (ch2 == '=') { 2335 _pos++; 2336 return OpCode.AND_EQ; 2337 } 2338 if (ch2 == '&') { 2339 _pos++; 2340 return OpCode.LOG_AND; 2341 } 2342 return OpCode.AND; 2343 // OR, // | 2344 // OR_EQ, // |= 2345 // LOG_OR, // || 2346 case '|': 2347 if (ch2 == '=') { 2348 _pos++; 2349 return OpCode.OR_EQ; 2350 } 2351 if (ch2 == '|') { 2352 _pos++; 2353 return OpCode.LOG_OR; 2354 } 2355 return OpCode.OR; 2356 // MINUS, // - 2357 // MINUS_EQ, // -= 2358 // MINUS_MINUS,// -- 2359 case '-': 2360 if (ch2 == '=') { 2361 _pos++; 2362 return OpCode.MINUS_EQ; 2363 } 2364 if (ch2 == '-') { 2365 _pos++; 2366 return OpCode.MINUS_MINUS; 2367 } 2368 return OpCode.MINUS; 2369 // PLUS, // + 2370 // PLUS_EQ, // += 2371 // PLUS_PLUS, // ++ 2372 case '+': 2373 if (ch2 == '=') { 2374 _pos++; 2375 return OpCode.PLUS_EQ; 2376 } 2377 if (ch2 == '+') { 2378 _pos++; 2379 return OpCode.PLUS_PLUS; 2380 } 2381 return OpCode.PLUS; 2382 // LT, // < 2383 // LT_EQ, // <= 2384 // SHL, // << 2385 // SHL_EQ, // <<= 2386 // LT_GT, // <> 2387 // NE_EQ, // <>= 2388 case '<': 2389 if (ch2 == '<') { 2390 if (ch3 == '=') { 2391 _pos += 2; 2392 return OpCode.SHL_EQ; 2393 } 2394 _pos++; 2395 return OpCode.SHL; 2396 } 2397 if (ch2 == '>') { 2398 if (ch3 == '=') { 2399 _pos += 2; 2400 return OpCode.NE_EQ; 2401 } 2402 _pos++; 2403 return OpCode.LT_GT; 2404 } 2405 if (ch2 == '=') { 2406 _pos++; 2407 return OpCode.LT_EQ; 2408 } 2409 return OpCode.LT; 2410 // GT, // > 2411 // GT_EQ, // >= 2412 // SHR_EQ // >>= 2413 // ASR_EQ, // >>>= 2414 // SHR, // >> 2415 // ASR, // >>> 2416 case '>': 2417 if (ch2 == '>') { 2418 if (ch3 == '>') { 2419 dchar ch4 = _pos < _len - 2 ? _lineText[_pos + 2] : 0; 2420 if (ch4 == '=') { // >>>= 2421 _pos += 3; 2422 return OpCode.ASR_EQ; 2423 } 2424 _pos += 2; 2425 return OpCode.ASR; // >>> 2426 } 2427 if (ch3 == '=') { // >>= 2428 _pos += 2; 2429 return OpCode.SHR_EQ; 2430 } 2431 _pos++; 2432 return OpCode.SHR; 2433 } 2434 if (ch2 == '=') { // >= 2435 _pos++; 2436 return OpCode.GT_EQ; 2437 } 2438 // > 2439 return OpCode.GT; 2440 // NOT, // ! 2441 // NOT_EQ // != 2442 // NOT_LT_GT, // !<> 2443 // NOT_LT_GT_EQ, // !<>= 2444 // NOT_LT, // !< 2445 // NOT_LT_EQ, // !<= 2446 // NOT_GT, // !> 2447 // NOT_GT_EQ, // !>= 2448 case '!': 2449 if (ch2 == '<') { // !< 2450 if (ch3 == '>') { // !<> 2451 dchar ch4 = _pos < _len - 2 ? _lineText[_pos + 2] : 0; 2452 if (ch4 == '=') { // !<>= 2453 _pos += 3; 2454 return OpCode.NOT_LT_GT_EQ; 2455 } 2456 _pos += 2; 2457 return OpCode.NOT_LT_GT; // !<> 2458 } 2459 if (ch3 == '=') { // !<= 2460 _pos += 2; 2461 return OpCode.NOT_LT_EQ; 2462 } 2463 _pos++; 2464 return OpCode.NOT_LT; // !< 2465 } 2466 if (ch2 == '=') { // != 2467 _pos++; 2468 return OpCode.NOT_EQ; 2469 } 2470 return OpCode.NOT; 2471 // PAR_OPEN, // ( 2472 case '(': 2473 return OpCode.PAR_OPEN; 2474 // PAR_CLOSE, // ) 2475 case ')': 2476 return OpCode.PAR_CLOSE; 2477 // SQ_OPEN, // [ 2478 case '[': 2479 return OpCode.SQ_OPEN; 2480 // SQ_CLOSE, // ] 2481 case ']': 2482 return OpCode.SQ_CLOSE; 2483 // CURL_OPEN, // { 2484 case '{': 2485 return OpCode.CURL_OPEN; 2486 // CURL_CLOSE, // } 2487 case '}': 2488 return OpCode.CURL_CLOSE; 2489 // QUEST, // ? 2490 case '?': 2491 return OpCode.QUEST; 2492 // COMMA, // , 2493 case ',': 2494 return OpCode.COMMA; 2495 // SEMICOLON, // ; 2496 case ';': 2497 return OpCode.SEMICOLON; 2498 // COLON, // : 2499 case ':': 2500 return OpCode.COLON; 2501 // DOLLAR, // $ 2502 case '$': 2503 return OpCode.DOLLAR; 2504 // EQ, // = 2505 // QE_EQ, // == 2506 // EQ_GT, // => 2507 case '=': 2508 if (ch2 == '=') { // == 2509 _pos++; 2510 return OpCode.QE_EQ; 2511 } 2512 if (ch2 == '>') { // => 2513 _pos++; 2514 return OpCode.EQ_GT; 2515 } 2516 return OpCode.EQ; 2517 // MUL, // * 2518 // MUL_EQ, // *= 2519 case '*': 2520 if (ch2 == '=') { 2521 _pos++; 2522 return OpCode.MUL_EQ; 2523 } 2524 return OpCode.MUL; 2525 // MOD, // % 2526 // MOD_EQ, // %= 2527 case '%': 2528 if (ch2 == '=') { 2529 _pos++; 2530 return OpCode.MOD_EQ; 2531 } 2532 return OpCode.MOD; 2533 // XOR, // ^ 2534 // XOR_EQ, // ^= 2535 // LOG_XOR, // ^^ 2536 // LOG_XOR_EQ, // ^^= 2537 case '^': 2538 if (ch2 == '^') { 2539 if (ch3 == '=') { 2540 _pos += 2; 2541 return OpCode.LOG_XOR_EQ; 2542 } 2543 _pos++; 2544 return OpCode.LOG_XOR; 2545 } 2546 if (ch2 == '=') { 2547 _pos++; 2548 return OpCode.XOR_EQ; 2549 } 2550 return OpCode.XOR; 2551 // INV, // ~ 2552 // INV_EQ, // ~= 2553 case '~': 2554 if (ch2 == '=') { 2555 _pos++; 2556 return OpCode.INV_EQ; 2557 } 2558 return OpCode.INV; 2559 // AT, // @ 2560 case '@': 2561 return OpCode.AT; 2562 // SHARP // # 2563 case '#': 2564 return OpCode.SHARP; 2565 default: 2566 return OpCode.NONE; 2567 } 2568 } 2569 2570 protected Token processCharacterLiteral() { 2571 _sharedCharacterLiteralToken.setPos(_startLine, _startPos); 2572 if (_pos + 2 > _len) 2573 return parserError("Invalid character literal", _sharedCharacterLiteralToken); 2574 dchar ch = _lineText[_pos++]; 2575 dchar ch2 = _lineText[_pos++]; 2576 dchar type = 0; 2577 if (ch == '\\') { 2578 // process escaped character - store it in ch 2579 // TODO: support all escape sequences 2580 switch(ch2) { 2581 case 'r': 2582 ch = '\r'; 2583 break; 2584 case 'n': 2585 ch = '\n'; 2586 break; 2587 case 't': 2588 ch = '\t'; 2589 break; 2590 case '\\': 2591 ch = '\\'; 2592 break; 2593 default: 2594 ch = ch2; 2595 break; 2596 } 2597 // here must be closing ' 2598 if (_pos + 1 > _len) 2599 return parserError("Invalid character literal", _sharedCharacterLiteralToken); 2600 ch2 = _lineText[_pos++]; 2601 } 2602 if (ch2 != '\'') 2603 return parserError("Invalid character literal", _sharedCharacterLiteralToken); 2604 if (_pos < _len) { 2605 dchar t = _lineText[_pos]; 2606 if (t == 'd' || t == 'w' || t == 'c') { 2607 type = t; 2608 _pos++; 2609 } else if (isIdentMiddleChar(ch)) { 2610 return parserError("Unexpected character after character literal", _sharedCharacterLiteralToken); 2611 } 2612 } 2613 _sharedCharacterLiteralToken.setCharacter(ch, type); 2614 return _sharedCharacterLiteralToken; 2615 } 2616 2617 protected Token processDoubleQuotedOrWysiwygString(dchar delimiter) { 2618 bool wysiwyg = (delimiter == 'r' || delimiter == '`'); 2619 //writeln("processDoubleQuotedString()"); 2620 _sharedStringLiteralToken.setPos(_startLine, _startPos); 2621 _stringLiteralAppender.reset(); 2622 if (delimiter == 'r') { 2623 _pos++; 2624 delimiter = '\"'; 2625 } 2626 dchar type = 0; 2627 for (;;) { 2628 int i = _pos; 2629 int endPos = int.max; 2630 bool lastBackSlash = false; 2631 for(; i < _len; i++) { 2632 dchar ch = _lineText[i]; 2633 if (ch == '\\') { 2634 if (lastBackSlash) 2635 lastBackSlash = false; 2636 else 2637 lastBackSlash = true; 2638 } 2639 else if (ch == delimiter && !lastBackSlash) { 2640 endPos = i; 2641 break; 2642 } 2643 else if(lastBackSlash) 2644 lastBackSlash = false; 2645 } 2646 if (endPos != int.max) { 2647 // found end quote 2648 _stringLiteralAppender.append(_lineText[_pos .. endPos]); 2649 _pos = endPos + 1; 2650 break; 2651 } 2652 // no quote by end of line 2653 _stringLiteralAppender.append(_lineText[_pos .. $]); 2654 _stringLiteralAppender.appendEol(); 2655 if (!nextLine()) { 2656 // do we need to throw exception if eof comes before end of string? 2657 break; 2658 } 2659 } 2660 dchar t = 0; 2661 if (_pos < _len) { 2662 dchar ch = _lineText[_pos]; 2663 if (ch == 'c' || ch == 'w' || ch == 'd') { 2664 t = ch; 2665 _pos++; 2666 if (_pos < _len) { 2667 ch = _lineText[_pos]; 2668 if (isIdentMiddleChar(ch)) 2669 return parserError("Unexpected character after string literal", _sharedStringLiteralToken); 2670 } 2671 } else if (isIdentMiddleChar(ch)) 2672 return parserError("Unexpected character after string literal", _sharedStringLiteralToken); 2673 } 2674 if (t != 0) { 2675 if (type != 0 && t != type) 2676 return parserError("Cannot concatenate strings of different type", _sharedStringLiteralToken); 2677 type = t; 2678 } 2679 if (wysiwyg) { 2680 // no escape processing 2681 _sharedStringLiteralToken.setText(_stringLiteralAppender.get(), type); 2682 return _sharedStringLiteralToken; 2683 } 2684 _stringLiteralAppender.processEscapeSequences(); 2685 _sharedStringLiteralToken.setText(_stringLiteralAppender.get(), type); 2686 return _sharedStringLiteralToken; 2687 } 2688 2689 protected SysTime buildTime; 2690 2691 // string literal of the date of compilation "mmm dd yyyy" 2692 protected dstring formatBuildDate() { 2693 // TODO: provide proper format 2694 return to!dstring(buildTime); 2695 } 2696 2697 // string literal of the time of compilation "hh:mm:ss" 2698 protected dstring formatBuildTime() { 2699 // TODO: provide proper format 2700 return to!dstring(buildTime); 2701 } 2702 2703 // string literal of the date and time of compilation "www mmm dd hh:mm:ss yyyy" 2704 protected dstring formatBuildTimestamp() { 2705 // TODO: provide proper format 2706 return to!dstring(buildTime); 2707 } 2708 2709 static immutable dstring VERSION = "0.1"; 2710 static immutable dstring VENDOR = "coolreader.org"; 2711 2712 protected Token makeSpecialTokenString(dstring str, int pos) { 2713 _sharedStringLiteralToken.setPos(_startLine, _startPos); 2714 _sharedStringLiteralToken.setText(cast(dchar[])str, 0); 2715 return _sharedStringLiteralToken; 2716 } 2717 2718 protected Token processSpecialToken(Keyword keyword, int pos) { 2719 switch (keyword) { 2720 //Special Token Replaced with 2721 case Keyword.DATE: // string literal of the date of compilation "mmm dd yyyy" 2722 return makeSpecialTokenString(formatBuildDate(), pos); 2723 case Keyword.TIME: // string literal of the time of compilation "hh:mm:ss" 2724 return makeSpecialTokenString(formatBuildTime(), pos); 2725 case Keyword.TIMESTAMP: // string literal of the date and time of compilation "www mmm dd hh:mm:ss yyyy" 2726 return makeSpecialTokenString(formatBuildTimestamp(), pos); 2727 case Keyword.VENDOR: // Compiler vendor string, such as "Digital Mars D" 2728 return makeSpecialTokenString(VENDOR, pos); 2729 case Keyword.VERSION_: // Compiler version as an integer, such as 2001 2730 return makeSpecialTokenString(VERSION, pos); 2731 default: 2732 parserError("Unknown special token", _line, pos); 2733 } 2734 return null; 2735 } 2736 2737 protected int _startLine; 2738 protected int _startPos; 2739 2740 // returns next token (clone it if you want to store for future usage, otherwise it may be overwritten by further nextToken() calls). 2741 Token nextToken() { 2742 _startLine = _line; 2743 _startPos = _pos; 2744 dchar ch = nextChar(); 2745 if (ch == EOF_CHAR) { 2746 return emitEof(); 2747 } 2748 if (ch == '\r' || ch == '\n' || ch == 0x0020 || ch == 0x0009 || ch == 0x000B || ch == 0x000C) { 2749 // white space (treat EOL as whitespace, too) 2750 return processWhiteSpace(ch); 2751 } 2752 dchar next = _pos < _len ? _lineText[_pos] : 0; 2753 if (ch == '/') { 2754 if (next == '/') 2755 return processOneLineComment(); 2756 else if (next == '*') 2757 return processMultilineComment(); 2758 else if (next == '+') 2759 return processNestedComment(); 2760 } 2761 if (ch == '#' && _line == 1) 2762 return processOneLineSharpComment(); 2763 if (ch == '\"') 2764 return processDoubleQuotedOrWysiwygString(ch); 2765 if (ch == '\'') 2766 return processCharacterLiteral(); 2767 if (ch == 'x' && next == '\"') 2768 return processHexString(); 2769 if (ch == 'q' && next == '\"') 2770 return processDelimitedString(); 2771 if ((ch == 'r' && next == '\"') || (ch == '`')) 2772 return processDoubleQuotedOrWysiwygString(ch); 2773 int oldPos = _pos - 1; 2774 2775 if (ch == '0') { 2776 if (next == 'b' || next == 'B') 2777 return processBinaryNumber(); 2778 if (next == 'x' || next == 'X') 2779 return processHexNumber(); 2780 if (next >= '0' && next <= '9') 2781 return processOctNumber(); 2782 if (next >= '0' && next <= '9') 2783 return processDecNumber(ch); 2784 } 2785 if (ch >= '0' && ch <= '9') 2786 return processDecNumber(ch); 2787 if (ch == '.' && next >= '0' && next <= '9') // .123 2788 return processDecFloatSecondPart(0); 2789 2790 if (ch == '_' || isUniversalAlpha(ch)) { 2791 // start of identifier or keyword? 2792 Keyword keyword = detectKeyword(ch); 2793 if (keyword != Keyword.NONE) { 2794 switch (keyword) { 2795 //Special Token Replaced with 2796 case Keyword.EOF: return emitEof(); // sets the scanner to the end of the file 2797 case Keyword.DATE: // string literal of the date of compilation "mmm dd yyyy" 2798 case Keyword.TIME: // string literal of the time of compilation "hh:mm:ss" 2799 case Keyword.TIMESTAMP: // string literal of the date and time of compilation "www mmm dd hh:mm:ss yyyy" 2800 case Keyword.VENDOR: // Compiler vendor string, such as "Digital Mars D" 2801 case Keyword.VERSION_: // Compiler version as an integer, such as 2001 2802 return processSpecialToken(keyword, oldPos); 2803 default: 2804 _sharedKeywordToken.setPos(_startLine, _startPos); 2805 _sharedKeywordToken.keyword = keyword; 2806 return _sharedKeywordToken; 2807 } 2808 } 2809 return processIdent(ch); 2810 } 2811 OpCode op = detectOp(ch); 2812 if (op != OpCode.NONE) { 2813 _sharedOpToken.setPos(_startLine, _startPos); 2814 _sharedOpToken.opCode = op; 2815 return _sharedOpToken; 2816 } 2817 return parserError("Invalid token", _line, _pos); 2818 } 2819 2820 /// tokenize all 2821 Token[] allTokens() { 2822 Token[] res; 2823 res.assumeSafeAppend; 2824 for(;;) { 2825 Token tok = nextToken(); 2826 if (!tok || tok.type == TokenType.EOF) 2827 break; 2828 res ~= tok.clone(); 2829 } 2830 return res; 2831 } 2832 } 2833 2834 unittest { 2835 version(DisableLexerTest) { 2836 import std.stdio; 2837 import std.conv; 2838 import std.utf; 2839 import dlangui.core.linestream; 2840 string fname = "/home/lve/src/d/ddc/ddclexer/tests/tokenizer_test.d"; 2841 writeln("opening file"); 2842 try { 2843 std.stream.File f = new std.stream.File(fname); 2844 scope(exit) { f.close(); } 2845 try { 2846 LineStream lines = LineStream.create(f, fname); 2847 Tokenizer tokenizer = new Tokenizer(lines); 2848 for (;;) { 2849 Token token = tokenizer.nextToken(); 2850 if (token is null) { 2851 writeln("Null token returned"); 2852 break; 2853 } 2854 if (token.type == TokenType.EOF) { 2855 writeln("EOF token"); 2856 break; 2857 } 2858 writeln("", token.line, ":", token.pos, "\t", token.toString); 2859 } 2860 } catch (Exception e) { 2861 writeln("Exception " ~ e.toString); 2862 } 2863 } catch (Exception e) { 2864 writeln("Exception " ~ e.toString); 2865 } 2866 } 2867 } 2868 2869 /// converts named entity to character, returns 0 if not found 2870 dchar entityToChar(string name) { 2871 if (auto ch = name in entityToCharMap) { 2872 return *ch; 2873 } 2874 return 0; 2875 } 2876 2877 /// fings entity name for character, returns null if not found 2878 string charToEntity(dchar ch) { 2879 if (auto name = ch in charToEntityMap) { 2880 return *name; 2881 } 2882 return null; 2883 } 2884 2885 private __gshared dchar[string]entityToCharMap; 2886 private __gshared string[dchar]charToEntityMap; 2887 private void addEntity(string name, dchar ch) { 2888 entityToCharMap[name] = ch; 2889 charToEntityMap[ch] = name; 2890 } 2891 __gshared static this() { 2892 addEntity("quot", 34); 2893 addEntity("amp", 38); 2894 addEntity("lt", 60); 2895 addEntity("gt", 62); 2896 addEntity("OElig", 338); 2897 addEntity("oelig", 339); 2898 addEntity("Scaron", 352); 2899 addEntity("scaron", 353); 2900 addEntity("Yuml", 376); 2901 addEntity("circ", 710); 2902 addEntity("tilde", 732); 2903 addEntity("ensp", 8194); 2904 addEntity("emsp", 8195); 2905 addEntity("thinsp", 8201); 2906 addEntity("zwnj", 8204); 2907 addEntity("zwj", 8205); 2908 addEntity("lrm", 8206); 2909 addEntity("rlm", 8207); 2910 addEntity("ndash", 8211); 2911 addEntity("mdash", 8212); 2912 addEntity("lsquo", 8216); 2913 addEntity("rsquo", 8217); 2914 addEntity("sbquo", 8218); 2915 addEntity("ldquo", 8220); 2916 addEntity("rdquo", 8221); 2917 addEntity("bdquo", 8222); 2918 addEntity("dagger", 8224); 2919 addEntity("Dagger", 8225); 2920 addEntity("permil", 8240); 2921 addEntity("lsaquo", 8249); 2922 addEntity("rsaquo", 8250); 2923 addEntity("euro", 8364); 2924 addEntity("nbsp", 160); 2925 addEntity("iexcl", 161); 2926 addEntity("cent", 162); 2927 addEntity("pound", 163); 2928 addEntity("curren", 164); 2929 addEntity("yen", 165); 2930 addEntity("brvbar", 166); 2931 addEntity("sect", 167); 2932 addEntity("uml", 168); 2933 addEntity("copy", 169); 2934 addEntity("ordf", 170); 2935 addEntity("laquo", 171); 2936 addEntity("not", 172); 2937 addEntity("shy", 173); 2938 addEntity("reg", 174); 2939 addEntity("macr", 175); 2940 addEntity("deg", 176); 2941 addEntity("plusmn", 177); 2942 addEntity("sup2", 178); 2943 addEntity("sup3", 179); 2944 addEntity("acute", 180); 2945 addEntity("micro", 181); 2946 addEntity("para", 182); 2947 addEntity("middot", 183); 2948 addEntity("cedil", 184); 2949 addEntity("sup1", 185); 2950 addEntity("ordm", 186); 2951 addEntity("raquo", 187); 2952 addEntity("frac14", 188); 2953 addEntity("frac12", 189); 2954 addEntity("frac34", 190); 2955 addEntity("iquest", 191); 2956 addEntity("Agrave", 192); 2957 addEntity("Aacute", 193); 2958 addEntity("Acirc", 194); 2959 addEntity("Atilde", 195); 2960 addEntity("Auml", 196); 2961 addEntity("Aring", 197); 2962 addEntity("AElig", 198); 2963 addEntity("Ccedil", 199); 2964 addEntity("Egrave", 200); 2965 addEntity("Eacute", 201); 2966 addEntity("Ecirc", 202); 2967 addEntity("Euml", 203); 2968 addEntity("Igrave", 204); 2969 addEntity("Iacute", 205); 2970 addEntity("Icirc", 206); 2971 addEntity("Iuml", 207); 2972 addEntity("ETH", 208); 2973 addEntity("Ntilde", 209); 2974 addEntity("Ograve", 210); 2975 addEntity("Oacute", 211); 2976 addEntity("Ocirc", 212); 2977 addEntity("Otilde", 213); 2978 addEntity("Ouml", 214); 2979 addEntity("times", 215); 2980 addEntity("Oslash", 216); 2981 addEntity("Ugrave", 217); 2982 addEntity("Uacute", 218); 2983 addEntity("Ucirc", 219); 2984 addEntity("Uuml", 220); 2985 addEntity("Yacute", 221); 2986 addEntity("THORN", 222); 2987 addEntity("szlig", 223); 2988 addEntity("agrave", 224); 2989 addEntity("aacute", 225); 2990 addEntity("acirc", 226); 2991 addEntity("atilde", 227); 2992 addEntity("auml", 228); 2993 addEntity("aring", 229); 2994 addEntity("aelig", 230); 2995 addEntity("ccedil", 231); 2996 addEntity("egrave", 232); 2997 addEntity("eacute", 233); 2998 addEntity("ecirc", 234); 2999 addEntity("euml", 235); 3000 addEntity("igrave", 236); 3001 addEntity("iacute", 237); 3002 addEntity("icirc", 238); 3003 addEntity("iuml", 239); 3004 addEntity("eth", 240); 3005 addEntity("ntilde", 241); 3006 addEntity("ograve", 242); 3007 addEntity("oacute", 243); 3008 addEntity("ocirc", 244); 3009 addEntity("otilde", 245); 3010 addEntity("ouml", 246); 3011 addEntity("divide", 247); 3012 addEntity("oslash", 248); 3013 addEntity("ugrave", 249); 3014 addEntity("uacute", 250); 3015 addEntity("ucirc", 251); 3016 addEntity("uuml", 252); 3017 addEntity("yacute", 253); 3018 addEntity("thorn", 254); 3019 addEntity("yuml", 255); 3020 addEntity("fnof", 402); 3021 addEntity("Alpha", 913); 3022 addEntity("Beta", 914); 3023 addEntity("Gamma", 915); 3024 addEntity("Delta", 916); 3025 addEntity("Epsilon", 917); 3026 addEntity("Zeta", 918); 3027 addEntity("Eta", 919); 3028 addEntity("Theta", 920); 3029 addEntity("Iota", 921); 3030 addEntity("Kappa", 922); 3031 addEntity("Lambda", 923); 3032 addEntity("Mu", 924); 3033 addEntity("Nu", 925); 3034 addEntity("Xi", 926); 3035 addEntity("Omicron", 927); 3036 addEntity("Pi", 928); 3037 addEntity("Rho", 929); 3038 addEntity("Sigma", 931); 3039 addEntity("Tau", 932); 3040 addEntity("Upsilon", 933); 3041 addEntity("Phi", 934); 3042 addEntity("Chi", 935); 3043 addEntity("Psi", 936); 3044 addEntity("Omega", 937); 3045 addEntity("alpha", 945); 3046 addEntity("beta", 946); 3047 addEntity("gamma", 947); 3048 addEntity("delta", 948); 3049 addEntity("epsilon", 949); 3050 addEntity("zeta", 950); 3051 addEntity("eta", 951); 3052 addEntity("theta", 952); 3053 addEntity("iota", 953); 3054 addEntity("kappa", 954); 3055 addEntity("lambda", 955); 3056 addEntity("mu", 956); 3057 addEntity("nu", 957); 3058 addEntity("xi", 958); 3059 addEntity("omicron", 959); 3060 addEntity("pi", 960); 3061 addEntity("rho", 961); 3062 addEntity("sigmaf", 962); 3063 addEntity("sigma", 963); 3064 addEntity("tau", 964); 3065 addEntity("upsilon", 965); 3066 addEntity("phi", 966); 3067 addEntity("chi", 967); 3068 addEntity("psi", 968); 3069 addEntity("omega", 969); 3070 addEntity("thetasym", 977); 3071 addEntity("upsih", 978); 3072 addEntity("piv", 982); 3073 addEntity("bull", 8226); 3074 addEntity("hellip", 8230); 3075 addEntity("prime", 8242); 3076 addEntity("Prime", 8243); 3077 addEntity("oline", 8254); 3078 addEntity("frasl", 8260); 3079 addEntity("weierp", 8472); 3080 addEntity("image", 8465); 3081 addEntity("real", 8476); 3082 addEntity("trade", 8482); 3083 addEntity("alefsym", 8501); 3084 addEntity("larr", 8592); 3085 addEntity("uarr", 8593); 3086 addEntity("rarr", 8594); 3087 addEntity("darr", 8595); 3088 addEntity("harr", 8596); 3089 addEntity("crarr", 8629); 3090 addEntity("lArr", 8656); 3091 addEntity("uArr", 8657); 3092 addEntity("rArr", 8658); 3093 addEntity("dArr", 8659); 3094 addEntity("hArr", 8660); 3095 addEntity("forall", 8704); 3096 addEntity("part", 8706); 3097 addEntity("exist", 8707); 3098 addEntity("empty", 8709); 3099 addEntity("nabla", 8711); 3100 addEntity("isin", 8712); 3101 addEntity("notin", 8713); 3102 addEntity("ni", 8715); 3103 addEntity("prod", 8719); 3104 addEntity("sum", 8721); 3105 addEntity("minus", 8722); 3106 addEntity("lowast", 8727); 3107 addEntity("radic", 8730); 3108 addEntity("prop", 8733); 3109 addEntity("infin", 8734); 3110 addEntity("ang", 8736); 3111 addEntity("and", 8743); 3112 addEntity("or", 8744); 3113 addEntity("cap", 8745); 3114 addEntity("cup", 8746); 3115 addEntity("int", 8747); 3116 addEntity("there4", 8756); 3117 addEntity("sim", 8764); 3118 addEntity("cong", 8773); 3119 addEntity("asymp", 8776); 3120 addEntity("ne", 8800); 3121 addEntity("equiv", 8801); 3122 addEntity("le", 8804); 3123 addEntity("ge", 8805); 3124 addEntity("sub", 8834); 3125 addEntity("sup", 8835); 3126 addEntity("nsub", 8836); 3127 addEntity("sube", 8838); 3128 addEntity("supe", 8839); 3129 addEntity("oplus", 8853); 3130 addEntity("otimes", 8855); 3131 addEntity("perp", 8869); 3132 addEntity("sdot", 8901); 3133 addEntity("lceil", 8968); 3134 addEntity("rceil", 8969); 3135 addEntity("lfloor", 8970); 3136 addEntity("rfloor", 8971); 3137 addEntity("loz", 9674); 3138 addEntity("spades", 9824); 3139 addEntity("clubs", 9827); 3140 addEntity("hearts", 9829); 3141 addEntity("diams", 9830); 3142 addEntity("lang", 10216); 3143 addEntity("rang", 10217); 3144 } 3145 3146 3147 3148 //void runTokenizerTest() 3149 unittest 3150 { 3151 import std.algorithm; 3152 class TokenTest { 3153 int _line; 3154 string _file; 3155 this(string file, int line) { 3156 _file = file; 3157 _line = line; 3158 } 3159 bool doTest(Token token) { 3160 return true; 3161 } 3162 void execute(Tokenizer tokenizer) { 3163 Token token = tokenizer.nextToken(); 3164 if (!doTest(token)) { 3165 assert(false, " token doesn not match at " ~ _file ~ ":" ~ to!string(_line) ~ " foundToken: " ~ token.toString ~ " expected: " ~ toString); 3166 } 3167 } 3168 public override @property string toString() { 3169 return "TokenTest"; 3170 } 3171 } 3172 void testTokenizer(string code, TokenTest[] tokens, string file = __FILE__, uint line = __LINE__) { 3173 Tokenizer tokenizer = new Tokenizer(code, "tokenizerTest:" ~ file ~ ":" ~ to!string(line)); 3174 for (int i = 0; i < tokens.length; i++) { 3175 tokens[i].execute(tokenizer); 3176 } 3177 } 3178 class KeywordTest : TokenTest { 3179 Keyword _code; 3180 this(Keyword code, string file = __FILE__, uint line = __LINE__) { 3181 super(file, line); 3182 _code = code; 3183 } 3184 override bool doTest(Token token) { 3185 if (token.type != TokenType.KEYWORD) 3186 return false; 3187 if (token.keyword != _code) 3188 return false; 3189 return true; 3190 } 3191 public override @property string toString() { 3192 return "Keyword:" ~ to!string(_code); 3193 } 3194 } 3195 class OpTest : TokenTest { 3196 OpCode _code; 3197 this(OpCode code, string file = __FILE__, uint line = __LINE__) { 3198 super(file, line); 3199 _code = code; 3200 } 3201 override bool doTest(Token token) { 3202 if (token.type != TokenType.OP) 3203 return false; 3204 if (token.opCode != _code) 3205 return false; 3206 return true; 3207 } 3208 public override @property string toString() { 3209 return "Op:" ~ to!string(_code); 3210 } 3211 } 3212 class StringTest : TokenTest { 3213 dstring _value; 3214 dchar _literalType; 3215 this(dstring value, dchar literalType = 0, string file = __FILE__, uint line = __LINE__) { 3216 super(file, line); 3217 _value = value; 3218 _literalType = literalType; 3219 } 3220 override bool doTest(Token token) { 3221 if (token.type != TokenType.STRING) 3222 return false; 3223 if (!token.text.equal(_value)) 3224 return false; 3225 if (token.literalType != _literalType) 3226 return false; 3227 return true; 3228 } 3229 public override @property string toString() { 3230 return toUTF8("String:\"" ~ _value ~ "\"" ~ (_literalType ? _literalType : ' ')); 3231 } 3232 } 3233 class IntegerTest : TokenTest { 3234 ulong _value; 3235 bool _unsigned; 3236 bool _long; 3237 this(ulong value, bool unsignedFlag = false, bool longFlag = false, string file = __FILE__, uint line = __LINE__) { 3238 super(file, line); 3239 _value = value; 3240 _unsigned = unsignedFlag; 3241 _long = longFlag; 3242 } 3243 override bool doTest(Token token) { 3244 if (token.type != TokenType.INTEGER) 3245 return false; 3246 if (token.intValue != _value) 3247 return false; 3248 if (token.isUnsigned != _unsigned) 3249 return false; 3250 if (token.isLong != _long) 3251 return false; 3252 return true; 3253 } 3254 public override @property string toString() { 3255 return "Integer:" ~ to!string(_value); 3256 } 3257 } 3258 class RealTest : TokenTest { 3259 real _value; 3260 ubyte _precision; 3261 bool _imaginary; 3262 this(real value, ubyte precision = 1, bool imaginary = false, string file = __FILE__, uint line = __LINE__) { 3263 super(file, line); 3264 _value = value; 3265 _precision = precision; 3266 _imaginary = imaginary; 3267 } 3268 override bool doTest(Token token) { 3269 if (token.type != TokenType.FLOAT) 3270 return false; 3271 real diff = token.realValue - _value; 3272 real maxerr = _value / 1000000; 3273 if (diff < 0) diff = -diff; 3274 if (maxerr < 0) maxerr = -maxerr; 3275 if (diff > maxerr) 3276 return false; 3277 if (token.precision != _precision) 3278 return false; 3279 if (token.isImaginary != _imaginary) 3280 return false; 3281 return true; 3282 } 3283 public override @property string toString() { 3284 return "Real:" ~ to!string(_value) ~ (_precision == 0 ? "f" : (_precision == 2 ? "L" : "")) ~ (_imaginary ? "i" : ""); 3285 } 3286 } 3287 class IdentTest : TokenTest { 3288 string _value; 3289 this(string value, string file = __FILE__, uint line = __LINE__) { 3290 super(file, line); 3291 _value = value; 3292 } 3293 override bool doTest(Token token) { 3294 if (token.type != TokenType.IDENTIFIER) 3295 return false; 3296 if (! to!string(token.text).equal(_value)) 3297 return false; 3298 return true; 3299 } 3300 public override @property string toString() { 3301 return "Ident:" ~ _value; 3302 } 3303 } 3304 class CommentTest : TokenTest { 3305 this(string file = __FILE__, uint line = __LINE__) { 3306 super(file, line); 3307 } 3308 override bool doTest(Token token) { 3309 if (token.type != TokenType.COMMENT) 3310 return false; 3311 return true; 3312 } 3313 public override @property string toString() { 3314 return "Comment"; 3315 } 3316 } 3317 class EOFTest : TokenTest { 3318 this(string file = __FILE__, uint line = __LINE__) { 3319 super(file, line); 3320 } 3321 override bool doTest(Token token) { 3322 if (token.type != TokenType.EOF) 3323 return false; 3324 return true; 3325 } 3326 public override @property string toString() { 3327 return "EOF"; 3328 } 3329 } 3330 class WhiteSpaceTest : TokenTest { 3331 this(string file = __FILE__, uint line = __LINE__) { 3332 super(file, line); 3333 } 3334 override bool doTest(Token token) { 3335 if (token.type != TokenType.WHITESPACE) 3336 return false; 3337 return true; 3338 } 3339 public override @property string toString() { 3340 return "whiteSpace"; 3341 } 3342 } 3343 TokenTest checkString(dstring value, dchar literalType = 0, string file = __FILE__, uint line = __LINE__) { 3344 return new StringTest(value, literalType, file, line); 3345 } 3346 TokenTest checkInteger(ulong value, bool unsignedFlag = false, bool longFlag = false, string file = __FILE__, uint line = __LINE__) { 3347 return new IntegerTest(value, unsignedFlag, longFlag, file, line); 3348 } 3349 TokenTest checkReal(real value, byte precision = 1, bool imaginary = false, string file = __FILE__, uint line = __LINE__) { 3350 return new RealTest(value, precision, imaginary, file, line); 3351 } 3352 TokenTest checkIdent(string value, string file = __FILE__, uint line = __LINE__) { 3353 return new IdentTest(value, file, line); 3354 } 3355 TokenTest checkKeyword(Keyword value, string file = __FILE__, uint line = __LINE__) { 3356 return new KeywordTest(value, file, line); 3357 } 3358 TokenTest checkOp(OpCode value, string file = __FILE__, uint line = __LINE__) { 3359 return new OpTest(value, file, line); 3360 } 3361 TokenTest checkSpace(string file = __FILE__, uint line = __LINE__) { 3362 return new WhiteSpaceTest(file, line); 3363 } 3364 TokenTest checkComment(string file = __FILE__, uint line = __LINE__) { 3365 return new CommentTest(file, line); 3366 } 3367 TokenTest checkEOF(string file = __FILE__, uint line = __LINE__) { 3368 return new EOFTest(file, line); 3369 } 3370 3371 // test strings 3372 testTokenizer("r\"simple\\nstring\"", [checkString( r"simple\nstring" )]); 3373 3374 // test strings 3375 testTokenizer(q"TEST 3376 "simple string" 3377 "simple\nstring" 3378 `simple string` 3379 "simple string"d 3380 "simple string"c 3381 "simple string"w 3382 "simple\"string" 3383 "\r\n\f\t\\\"\'&" 3384 TEST" 3385 , [ 3386 checkString("simple string"), 3387 checkSpace(), 3388 checkString("simple\nstring"), 3389 checkSpace(), 3390 checkString("simple string"), 3391 checkSpace(), 3392 checkString("simple string", 'd'), 3393 checkSpace(), 3394 checkString("simple string", 'c'), 3395 checkSpace(), 3396 checkString("simple string", 'w'), 3397 checkSpace(), 3398 checkString("simple\"string"), 3399 checkSpace(), 3400 checkString("\r\n\f\t\\\"\'&"), 3401 ]); 3402 // basic test 3403 testTokenizer(q"TEST 3404 int i; 3405 TEST" 3406 , [ 3407 checkKeyword(Keyword.INT), 3408 checkSpace(), 3409 checkIdent("i"), 3410 checkOp(OpCode.SEMICOLON), 3411 checkEOF() 3412 ]); 3413 // test numbers 3414 testTokenizer("0b1101 0x123abcdU 0xABCL 0743 192837465 0 192_837_465 5.25 12.3f 54.1L 67.1i 3e3 25.67e-5f" 3415 , [ 3416 checkInteger(13), 3417 checkSpace(), 3418 checkInteger(0x123abcd, true, false), 3419 checkSpace(), 3420 checkInteger(0xabc, false, true), 3421 checkSpace(), 3422 checkInteger(std.conv.octal!743), 3423 checkSpace(), 3424 checkInteger(192_837_465), 3425 checkSpace(), 3426 checkInteger(0), 3427 checkSpace(), 3428 checkInteger(192837465), 3429 checkSpace(), 3430 checkReal(5.25), 3431 checkSpace(), 3432 checkReal(12.3f, 0), 3433 checkSpace(), 3434 checkReal(54.1L, 2), 3435 checkSpace(), 3436 checkReal(67.1, 1, true), 3437 checkSpace(), 3438 checkReal(3e3), 3439 checkSpace(), 3440 checkReal(25.67e-5f, 0), 3441 checkEOF() 3442 ]); 3443 // strange keyword detection: `fork;` or `ind;` keyword in beginning of ident is highlighted 3444 testTokenizer("fork;", [checkIdent("fork"),checkOp(OpCode.SEMICOLON),checkEOF()]); 3445 3446 } 3447