1 module ddc.lexer.tokenizer; 2 3 import ddc.lexer.textsource; 4 import ddc.lexer.exceptions; 5 6 import std.stdio; 7 import std.datetime; 8 import std.conv; 9 import std.utf; 10 import std.math; 11 12 enum TokenType : ubyte { 13 EOF, 14 //EOL, 15 WHITESPACE, 16 COMMENT, 17 IDENTIFIER, 18 STRING, 19 CHARACTER, 20 INTEGER, 21 FLOAT, 22 KEYWORD, 23 OP, 24 INVALID 25 } 26 27 // table for fast checking of UniversalAlpha (as per ISO/IEC 9899:1999 Annex E) OR a..z OR A..Z OR _ 28 // max code is 0xd7ff 29 //1728 30 const uint[1728] UNIVERSAL_ALPHA_FLAGS = [ 31 0x00000000,0x00000000,0x87fffffe,0x07fffffe,0x00000000,0x04a00400,0xff7fffff,0xff7fffff,// 0000-00ff 32 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xfc3fffff,// 0100-01ff 33 0x00ffffff,0x00000000,0xffff0000,0xffffffff,0xffffffff,0xe9ff01ff,0x00030003,0x0000001f,// 0200-02ff 34 0x00000000,0x00000000,0x00000000,0x04000000,0xffffd740,0xfffffffb,0x547f7fff,0x000ffffd,// 0300-03ff 35 0xffffdffe,0xffffffff,0xdffeffff,0xffffffff,0xffff0003,0xffffffff,0xffff199f,0x033fcfff,// 0400-04ff 36 0x00000000,0xfffe0000,0x027fffff,0xfffffffe,0x000000ff,0xbbff0000,0xffff0006,0x000707ff,// 0500-05ff 37 0x00000000,0x07fffffe,0x0007ffff,0xffff03ff,0xffffffff,0x7cffffff,0x1fff7fff,0x03ff3de0,// 0600-06ff 38 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 0700-07ff 39 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 0800-08ff 40 0xffffffee,0xe3ffffff,0xff073fff,0x0000ffcf,0xfff99fee,0xc3c5fdff,0xb000399f,0x0003ffcf,// 0900-09ff 41 0xfff987e4,0xc36dfdff,0x5e003987,0x0010ffc0,0xfffbafee,0xe3edfdff,0x00013bbf,0x0000ffc1,// 0a00-0aff 42 0xfff99fee,0xe3cdfdff,0xb000398f,0x0000ffc3,0xd63dc7ec,0xc3bfc718,0x00003dc7,0x0000ff80,// 0b00-0bff 43 0xfffddfee,0xc3effdff,0x00003ddf,0x0000ffc3,0xfffddfec,0xc3effdff,0x40003ddf,0x0000ffc3,// 0c00-0cff 44 0xfffddfec,0xc3fffdff,0x00003dcf,0x0000ffc3,0x00000000,0x00000000,0x00000000,0x00000000,// 0d00-0dff 45 0xfffffffe,0x07ffffff,0x0fffffff,0x00000000,0xfef02596,0x3bff6cae,0x33ff3f5f,0x00000000,// 0e00-0eff 46 0x03000001,0xc2afffff,0xfffffeff,0xfffe03ff,0xfebf0fdf,0x02fe3fff,0x00000000,0x00000000,// 0f00-0fff 47 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0xffffffff,0xffff003f,0x007fffff,// 1000-10ff 48 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 1100-11ff 49 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 1200-12ff 50 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 1300-13ff 51 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 1400-14ff 52 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 1500-15ff 53 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 1600-16ff 54 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 1700-17ff 55 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 1800-18ff 56 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 1900-19ff 57 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 1a00-1aff 58 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 1b00-1bff 59 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 1c00-1cff 60 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 1d00-1dff 61 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0x0fffffff,0xffffffff,0xffffffff,0x03ffffff,// 1e00-1eff 62 0x3f3fffff,0xffffffff,0xaaff3f3f,0x3fffffff,0xffffffff,0x5fdfffff,0x0fcf1fdc,0x1fdc1fff,// 1f00-1fff 63 0x00000000,0x80000000,0x00000001,0x80000000,0x00000000,0x00000000,0x00000000,0x00000000,// 2000-20ff 64 0x3f2ffc84,0x01fbfd50,0x00000000,0xffffffff,0x00000007,0x00000000,0x00000000,0x00000000,// 2100-21ff 65 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 2200-22ff 66 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 2300-23ff 67 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 2400-24ff 68 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 2500-25ff 69 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 2600-26ff 70 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 2700-27ff 71 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 2800-28ff 72 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 2900-29ff 73 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 2a00-2aff 74 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 2b00-2bff 75 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 2c00-2cff 76 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 2d00-2dff 77 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 2e00-2eff 78 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 2f00-2fff 79 0x000000e0,0x000003fe,0xfffffffe,0xffffffff,0x180fffff,0xfffffffe,0xffffffff,0x187fffff,// 3000-30ff 80 0xffffffe0,0x00001fff,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 3100-31ff 81 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 3200-32ff 82 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 3300-33ff 83 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 3400-34ff 84 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 3500-35ff 85 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 3600-36ff 86 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 3700-37ff 87 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 3800-38ff 88 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 3900-39ff 89 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 3a00-3aff 90 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 3b00-3bff 91 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 3c00-3cff 92 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 3d00-3dff 93 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 3e00-3eff 94 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 3f00-3fff 95 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 4000-40ff 96 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 4100-41ff 97 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 4200-42ff 98 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 4300-43ff 99 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 4400-44ff 100 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 4500-45ff 101 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 4600-46ff 102 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 4700-47ff 103 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 4800-48ff 104 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 4900-49ff 105 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 4a00-4aff 106 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 4b00-4bff 107 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 4c00-4cff 108 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 4d00-4dff 109 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 4e00-4eff 110 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 4f00-4fff 111 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 5000-50ff 112 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 5100-51ff 113 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 5200-52ff 114 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 5300-53ff 115 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 5400-54ff 116 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 5500-55ff 117 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 5600-56ff 118 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 5700-57ff 119 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 5800-58ff 120 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 5900-59ff 121 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 5a00-5aff 122 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 5b00-5bff 123 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 5c00-5cff 124 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 5d00-5dff 125 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 5e00-5eff 126 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 5f00-5fff 127 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 6000-60ff 128 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 6100-61ff 129 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 6200-62ff 130 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 6300-63ff 131 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 6400-64ff 132 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 6500-65ff 133 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 6600-66ff 134 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 6700-67ff 135 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 6800-68ff 136 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 6900-69ff 137 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 6a00-6aff 138 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 6b00-6bff 139 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 6c00-6cff 140 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 6d00-6dff 141 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 6e00-6eff 142 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 6f00-6fff 143 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 7000-70ff 144 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 7100-71ff 145 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 7200-72ff 146 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 7300-73ff 147 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 7400-74ff 148 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 7500-75ff 149 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 7600-76ff 150 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 7700-77ff 151 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 7800-78ff 152 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 7900-79ff 153 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 7a00-7aff 154 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 7b00-7bff 155 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 7c00-7cff 156 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 7d00-7dff 157 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 7e00-7eff 158 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 7f00-7fff 159 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 8000-80ff 160 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 8100-81ff 161 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 8200-82ff 162 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 8300-83ff 163 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 8400-84ff 164 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 8500-85ff 165 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 8600-86ff 166 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 8700-87ff 167 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 8800-88ff 168 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 8900-89ff 169 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 8a00-8aff 170 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 8b00-8bff 171 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 8c00-8cff 172 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 8d00-8dff 173 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 8e00-8eff 174 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 8f00-8fff 175 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 9000-90ff 176 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 9100-91ff 177 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 9200-92ff 178 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 9300-93ff 179 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 9400-94ff 180 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 9500-95ff 181 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 9600-96ff 182 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 9700-97ff 183 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 9800-98ff 184 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 9900-99ff 185 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 9a00-9aff 186 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 9b00-9bff 187 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 9c00-9cff 188 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 9d00-9dff 189 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 9e00-9eff 190 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0x0000003f,0x00000000,0x00000000,// 9f00-9fff 191 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// a000-a0ff 192 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// a100-a1ff 193 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// a200-a2ff 194 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// a300-a3ff 195 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// a400-a4ff 196 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// a500-a5ff 197 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// a600-a6ff 198 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// a700-a7ff 199 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// a800-a8ff 200 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// a900-a9ff 201 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// aa00-aaff 202 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// ab00-abff 203 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// ac00-acff 204 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// ad00-adff 205 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// ae00-aeff 206 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// af00-afff 207 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// b000-b0ff 208 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// b100-b1ff 209 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// b200-b2ff 210 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// b300-b3ff 211 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// b400-b4ff 212 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// b500-b5ff 213 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// b600-b6ff 214 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// b700-b7ff 215 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// b800-b8ff 216 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// b900-b9ff 217 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// ba00-baff 218 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// bb00-bbff 219 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// bc00-bcff 220 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// bd00-bdff 221 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// be00-beff 222 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// bf00-bfff 223 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// c000-c0ff 224 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// c100-c1ff 225 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// c200-c2ff 226 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// c300-c3ff 227 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// c400-c4ff 228 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// c500-c5ff 229 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// c600-c6ff 230 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// c700-c7ff 231 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// c800-c8ff 232 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// c900-c9ff 233 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// ca00-caff 234 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// cb00-cbff 235 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// cc00-ccff 236 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// cd00-cdff 237 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// ce00-ceff 238 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// cf00-cfff 239 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// d000-d0ff 240 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// d100-d1ff 241 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// d200-d2ff 242 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// d300-d3ff 243 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// d400-d4ff 244 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// d500-d5ff 245 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// d600-d6ff 246 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0x0000000f,0x00000000,0x00000000// d700-d7ff 247 ]; 248 249 /// returns true if character is A..Z, a..z, _ or universal alpha 250 bool isUniversalAlpha(dchar ch) pure nothrow { 251 return (ch <= 0xd7ff && (UNIVERSAL_ALPHA_FLAGS[ch >> 5] & (1 << (ch & 31)))); 252 } 253 254 /// character can present at the beginning of identifier 255 bool isIdentStartChar(dchar ch) pure nothrow { 256 return isUniversalAlpha(ch); 257 } 258 259 /// character can present in middle of identifier 260 bool isIdentMiddleChar(dchar ch) pure nothrow { 261 return (ch >= '0' && ch <='9') || isUniversalAlpha(ch); 262 } 263 264 immutable bool ENABLE_DUMP_UNIVERSAL_ALPHA_TABLE = false; 265 static if (ENABLE_DUMP_UNIVERSAL_ALPHA_TABLE) { 266 bool r(dchar ch, wchar v) pure nothrow { 267 return ch == v; 268 } 269 270 bool r(dchar ch, wchar v1, wchar v2) pure nothrow { 271 return ch >= v1 && ch <= v2; 272 } 273 274 bool isUniversalAlphaSlow(dchar c) pure nothrow { 275 return 276 // Latin: 00AA, 00BA, 00C0−00D6, 00D8−00F6, 00F8−01F5, 01FA−0217, 277 // 0250−02A8, 1E00−1E9B, 1EA0−1EF9, 207F 278 r(c, 0xAA) || r(c, 0x00BA) || r(c, 0x00C0,0x00D6) || r(c, 0x00D8,0x00F6) || r(c, 0x00F8,0x01F5) || r(c, 0x01FA,0x0217) 279 || r(c, 0x0250,0x02A8) || r(c, 0x1E00,0x1E9B) || r(c, 0x1EA0,0x1EF9) || r(c, 0x207F) 280 //Greek: 0386, 0388−038A, 038C, 038E−03A1, 03A3−03CE, 03D0−03D6, 281 //03DA, 03DC, 03DE, 03E0, 03E2−03F3, 1F00−1F15, 1F18−1F1D, 282 //1F20−1F45, 1F48−1F4D, 1F50−1F57, 1F59, 1F5B, 1F5D, 283 //1F5F−1F7D, 1F80−1FB4, 1FB6−1FBC, 1FC2−1FC4, 1FC6−1FCC, 284 //1FD0−1FD3, 1FD6−1FDB, 1FE0−1FEC, 1FF2−1FF4, 1FF6−1FFC 285 || r(c, 0x0386) || r(c, 0x0388,0x038A) || r(c, 0x038C) || r(c, 0x038E,0x03A1) || r(c, 0x03A3,0x03CE) || r(c, 0x03D0,0x03D6) 286 || r(c, 0x03DA) || r(c, 0x03DC) || r(c, 0x03DE) || r(c, 0x03E0) || r(c, 0x03E2,0x03F3) || r(c, 0x1F00,0x1F15) || r(c, 0x1F18,0x1F1D) 287 || r(c, 0x1F20,0x1F45) || r(c, 0x1F48,0x1F4D) || r(c, 0x1F50,0x1F57) || r(c, 0x1F59) || r(c, 0x1F5B) || r(c, 0x1F5D) 288 || r(c, 0x1F5F,0x1F7D) || r(c, 0x1F80,0x1FB4) || r(c, 0x1FB6,0x1FBC) || r(c, 0x1FC2,0x1FC4) || r(c, 0x1FC6,0x1FCC) 289 || r(c, 0x1FD0,0x1FD3) || r(c, 0x1FD6,0x1FDB) || r(c, 0x1FE0,0x1FEC) || r(c, 0x1FF2,0x1FF4) || r(c, 0x1FF6,0x1FFC) 290 //Cyrillic: 0401−040C, 040E−044F, 0451−045C, 045E−0481, 0490−04C4, 291 //04C7−04C8, 04CB−04CC, 04D0−04EB, 04EE−04F5, 04F8−04F9 292 || r(c, 0x0401,0x040C) || r(c, 0x040E,0x044F) || r(c, 0x0451,0x045C) || r(c, 0x045E,0x0481) || r(c, 0x0490,0x04C4) 293 || r(c, 0x04C7,0x04C8) || r(c, 0x04CB,0x04CC) || r(c, 0x04D0,0x04EB) || r(c, 0x04EE,0x04F5) || r(c, 0x04F8,0x04F9) 294 //Armenian: 0531−0556, 0561−0587 295 || r(c, 0x0531,0x0556) || r(c, 0x0561,0x0587) 296 //Hebrew: 05B0−05B9, 05BB−05BD, 05BF, 05C1−05C2, 05D0−05EA, 297 //05F0−05F2 298 || r(c, 0x05B0,0x05B9) || r(c, 0x05BB,0x05BD) || r(c, 0x05BF) || r(c, 0x05C1,0x05C2) || r(c, 0x05D0,0x05EA) 299 || r(c, 0x05F0,0x05F2) 300 //Arabic: 0621−063A, 0640−0652, 0670−06B7, 06BA−06BE, 06C0−06CE, 301 //06D0−06DC, 06E5−06E8, 06EA−06ED 302 || r(c, 0x0621,0x063A) || r(c, 0x0640,0x0652) || r(c, 0x0670,0x06B7) || r(c, 0x06BA,0x06BE) || r(c, 0x06C0,0x06CE) 303 || r(c, 0x06D0,0x06DC) || r(c, 0x06E5,0x06E8) || r(c, 0x06EA,0x06ED) 304 //Devanagari: 0901−0903, 0905−0939, 093E−094D, 0950−0952, 0958−0963 305 || r(c, 0x0901,0x0903) || r(c, 0x0905,0x0939) || r(c, 0x093E,0x094D) || r(c, 0x0950,0x0952) || r(c, 0x0958,0x0963) 306 //Bengali: 0981−0983, 0985−098C, 098F−0990, 0993−09A8, 09AA−09B0, 307 //09B2, 09B6−09B9, 09BE−09C4, 09C7−09C8, 09CB−09CD, 308 //09DC−09DD, 09DF−09E3, 09F0−09F1 309 || r(c, 0x0981,0x0983) || r(c, 0x0985,0x098C) || r(c, 0x098F,0x0990) || r(c, 0x0993,0x09A8) || r(c, 0x09AA,0x09B0) 310 || r(c, 0x09B2) || r(c, 0x09B6,0x09B9) || r(c, 0x09BE,0x09C4) || r(c, 0x09C7,0x09C8) || r(c, 0x09CB,0x09CD) 311 || r(c, 0x09DC,0x09DD) || r(c, 0x09DF,0x09E3) || r(c, 0x09F0,0x09F1) 312 //Gurmukhi: 0A02, 0A05−0A0A, 0A0F−0A10, 0A13−0A28, 0A2A−0A30, 313 //0A32−0A33, 0A35−0A36, 0A38−0A39, 0A3E−0A42, 0A47−0A48, 314 //0A4B−0A4D, 0A59−0A5C, 0A5E, 0A74 315 || r(c, 0x0A02) || r(c, 0x0A05,0x0A0A) || r(c, 0x0A0F,0x0A10) || r(c, 0x0A13,0x0A28) || r(c, 0x0A2A,0x0A30) 316 || r(c, 0x0A32,0x0A33) || r(c, 0x0A35,0x0A36) || r(c, 0x0A38,0x0A39) || r(c, 0x0A3E,0x0A42) || r(c, 0x0A47,0x0A48) 317 || r(c, 0x0A4B,0x0A4D) || r(c, 0x0A59,0x0A5C) || r(c, 0x0A5E) || r(c, 0x0A74) 318 //Gujarati: 0A81−0A83, 0A85−0A8B, 0A8D, 0A8F−0A91, 0A93−0AA8, 319 //0AAA−0AB0, 0AB2−0AB3, 0AB5−0AB9, 0ABD−0AC5, 320 //0AC7−0AC9, 0ACB−0ACD, 0AD0, 0AE0 321 || r(c, 0x0A81,0x0A83) || r(c, 0x0A85,0x0A8B) || r(c, 0x0A8D) || r(c, 0x0A8F,0x0A91) || r(c, 0x0A93,0x0AA8) 322 || r(c, 0x0AAA,0x0AB0) || r(c, 0x0AB2,0x0AB3) || r(c, 0x0AB5,0x0AB9) || r(c, 0x0ABD,0x0AC5) 323 || r(c, 0x0AC7,0x0AC9) || r(c, 0x0ACB,0x0ACD) || r(c, 0x0AD0) || r(c, 0x0AE0) 324 // Oriya: 0B01−0B03, 0B05−0B0C, 0B0F−0B10, 0B13−0B28, 0B2A−0B30, 325 //0B32−0B33, 0B36−0B39, 0B3E−0B43, 0B47−0B48, 0B4B−0B4D, 326 //0B5C−0B5D, 0B5F−0B61 327 || r(c, 0x0B01,0x0B03) || r(c, 0x0B05,0x0B0C) || r(c, 0x0B0F,0x0B10) || r(c, 0x0B13,0x0B28) || r(c, 0x0B2A,0x0B30) 328 || r(c, 0x0B32,0x0B33) || r(c, 0x0B36,0x0B39) || r(c, 0x0B3E,0x0B43) || r(c, 0x0B47,0x0B48) || r(c, 0x0B4B,0x0B4D) 329 || r(c, 0x0B5C,0x0B5D) || r(c, 0x0B5F,0x0B61) 330 //Tamil: 0B82−0B83, 0B85−0B8A, 0B8E−0B90, 0B92−0B95, 0B99−0B9A, 331 //0B9C, 0B9E−0B9F, 0BA3−0BA4, 0BA8−0BAA, 0BAE−0BB5, 332 //0BB7−0BB9, 0BBE−0BC2, 0BC6−0BC8, 0BCA−0BCD 333 || r(c, 0x0B82,0x0B83) || r(c, 0x0B85,0x0B8A) || r(c, 0x0B8E,0x0B90) || r(c, 0x0B92,0x0B95) || r(c, 0x0B99,0x0B9A) 334 || r(c, 0x0B9C) || r(c, 0x0B9E,0x0B9F) || r(c, 0x0BA3,0x0BA4) || r(c, 0x0BA8,0x0BAA) || r(c, 0x0BAE,0x0BB5) 335 || r(c, 0x0BB7,0x0BB9) || r(c, 0x0BBE,0x0BC2) || r(c, 0x0BC6,0x0BC8) || r(c, 0x0BCA,0x0BCD) 336 //Telugu: 0C01−0C03, 0C05−0C0C, 0C0E−0C10, 0C12−0C28, 0C2A−0C33, 337 //0C35−0C39, 0C3E−0C44, 0C46−0C48, 0C4A−0C4D, 0C60−0C61 338 || r(c, 0x0C01,0x0C03) || r(c, 0x0C05,0x0C0C) || r(c, 0x0C0E,0x0C10) || r(c, 0x0C12,0x0C28) || r(c, 0x0C2A,0x0C33) 339 || r(c, 0x0C35,0x0C39) || r(c, 0x0C3E,0x0C44) || r(c, 0x0C46,0x0C48) || r(c, 0x0C4A,0x0C4D) || r(c, 0x0C60,0x0C61) 340 //Kannada: 0C82−0C83, 0C85−0C8C, 0C8E−0C90, 0C92−0CA8, 0CAA−0CB3, 341 //0CB5−0CB9, 0CBE−0CC4, 0CC6−0CC8, 0CCA−0CCD, 0CDE, 342 //0CE0−0CE1 343 || r(c, 0x0C82,0x0C83) || r(c, 0x0C85,0x0C8C) || r(c, 0x0C8E,0x0C90) || r(c, 0x0C92,0x0CA8) || r(c, 0x0CAA,0x0CB3) 344 || r(c, 0x0CB5,0x0CB9) || r(c, 0x0CBE,0x0CC4) || r(c, 0x0CC6,0x0CC8) || r(c, 0x0CCA,0x0CCD) || r(c, 0x0CDE) 345 || r(c, 0x0CE0,0x0CE1) 346 //Malayalam: 0D02−0D03, 0D05−0D0C, 0D0E−0D10, 0D12−0D28, 0D2A−0D39, 347 //0D3E−0D43, 0D46−0D48, 0D4A−0D4D, 0D60−0D61 348 || r(c, 0x0D02,0x0D03) || r(c, 0x0D05,0x0D0C) || r(c, 0x0D0E,0x0D10) || r(c, 0x0D12,0x0D28) || r(c, 0x0D2A,0x0D39) 349 || r(c, 0xD3E,0x0D43) || r(c, 0x0D46,0x0D48) || r(c, 0x0D4A,0x0D4D) || r(c, 0x0D60,0x0D61) 350 //Thai: 0E01−0E3A, 0E40−0E5B 351 || r(c, 0x0E01,0x0E3A) || r(c, 0x0E40,0x0E5B) 352 //Lao: 0E81−0E82, 0E84, 0E87−0E88, 0E8A, 0E8D, 0E94−0E97, 353 //0E99−0E9F, 0EA1−0EA3, 0EA5, 0EA7, 0EAA−0EAB, 354 //0EAD−0EAE, 0EB0−0EB9, 0EBB−0EBD, 0EC0−0EC4, 0EC6, 355 //0EC8−0ECD, 0EDC−0EDD 356 || r(c, 0x0E81,0x0E82) || r(c, 0x0E84) || r(c, 0x0E87,0x0E88) || r(c, 0x0E8A) || r(c, 0x0E8D) || r(c, 0x0E94,0x0E97) 357 || r(c, 0x0E99,0x0E9F) || r(c, 0x0EA1,0x0EA3) || r(c, 0x0EA5) || r(c, 0x0EA7) || r(c, 0x0EAA,0x0EAB) 358 || r(c, 0x0EAD,0x0EAE) || r(c, 0x0EB0,0x0EB9) || r(c, 0x0EBB,0x0EBD) || r(c, 0x0EC0,0x0EC4) || r(c, 0x0EC6) 359 || r(c, 0x0EC8,0x0ECD) || r(c, 0x0EDC,0x0EDD) 360 //Tibetan: 0F00, 0F18−0F19, 0F35, 0F37, 0F39, 0F3E−0F47, 0F49−0F69, 361 //0F71−0F84, 0F86−0F8B, 0F90−0F95, 0F97, 0F99−0FAD, 362 //0FB1−0FB7, 0FB9 363 || r(c, 0x0F00) || r(c, 0x0F18,0x0F19) || r(c, 0x0F35) || r(c, 0x0F37) || r(c, 0x0F39) || r(c, 0x0F3E,0x0F47) || r(c, 0x0F49,0x0F69) 364 || r(c, 0x0F71,0x0F84) || r(c, 0x0F86,0x0F8B) || r(c, 0x0F90,0x0F95) || r(c, 0x0F97) || r(c, 0x0F99,0x0FAD) 365 || r(c, 0x0FB1,0x0FB7) || r(c, 0x0FB9) 366 //Georgian: 10A0−10C5, 10D0−10F6 367 || r(c, 0x10A0,0x10C5) || r(c, 0x10D0,0x10F6) 368 //Hiragana: 3041−3093, 309B−309C 369 || r(c, 0x3041,0x3093) || r(c, 0x309B,0x309C) 370 //Katakana: 30A1−30F6, 30FB−30FC 371 || r(c, 0x30A1,0x30F6) || r(c, 0x30FB,0x30FC) 372 //Bopomofo: 3105−312C 373 || r(c, 0x3105,0x312C) 374 //CJK Unified Ideographs: 4E00−9FA5 375 || r(c, 0x4E00,0x9FA5) 376 //Hangul: AC00−D7A3 377 || r(c, 0xAC00,0xD7A3) 378 //Digits: 0660−0669, 06F0−06F9, 0966−096F, 09E6−09EF, 0A66−0A6F, 379 //0AE6−0AEF, 0B66−0B6F, 0BE7−0BEF, 0C66−0C6F, 0CE6−0CEF, 380 //0D66−0D6F, 0E50−0E59, 0ED0−0ED9, 0F20−0F33 381 || r(c, 0x0660,0x0669) || r(c, 0x06F0,0x06F9) || r(c, 0x0966,0x096F) || r(c, 0x09E6,0x09EF) || r(c, 0x0A66,0x0A6F) 382 || r(c, 0x0AE6,0x0AEF) || r(c, 0x0B66,0x0B6F) || r(c, 0x0BE7,0x0BEF) || r(c, 0x0C66,0x0C6F) || r(c, 0x0CE6,0x0CEF) 383 || r(c, 0x0D66,0x0D6F) || r(c, 0x0E50,0x0E59) || r(c, 0x0ED0,0x0ED9) || r(c, 0x0F20,0x0F33) 384 //Special characters: 00B5, 00B7, 02B0−02B8, 02BB, 02BD−02C1, 02D0−02D1, 385 //02E0−02E4, 037A, 0559, 093D, 0B3D, 1FBE, 203F−2040, 2102, 386 //2107, 210A−2113, 2115, 2118−211D, 2124, 2126, 2128, 212A−2131, 387 //2133−2138, 2160−2182, 3005−3007, 3021−3029 388 || r(c, 0x00B5) || r(c, 0x00B7) || r(c, 0x02B0,0x02B8) || r(c, 0x02BB) || r(c, 0x02BD,0x02C1) || r(c, 0x02D0,0x02D1) 389 || r(c, 0x2E0,0x02E4) || r(c, 0x037A) || r(c, 0x0559) || r(c, 0x093D) || r(c, 0x0B3D) || r(c, 0x1FBE) || r(c, 0x203F,0x2040) || r(c, 0x2102) 390 || r(c, 0x2107) || r(c, 0x210A,0x2113) || r(c, 0x2115) || r(c, 0x2118,0x211D) || r(c, 0x2124) || r(c, 0x2126) || r(c, 0x2128) || r(c, 0x212A,0x2131) 391 || r(c, 0x2133,0x2138) || r(c, 0x2160,0x2182) || r(c, 0x3005,0x3007) || r(c, 0x3021,0x3029) 392 ; 393 } 394 395 } 396 397 unittest { 398 399 400 static if (ENABLE_DUMP_UNIVERSAL_ALPHA_TABLE) { 401 immutable uint itemsInRow = 8; 402 403 uint maxAlpha = 0; 404 for (uint i = 0; i < 0x10000; i++) { 405 uint ch = i; 406 if (isUniversalAlphaSlow(ch) || ch == '_' || (ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z')) 407 maxAlpha = i; 408 } 409 maxAlpha = (maxAlpha + itemsInRow * 32 - 1) / (itemsInRow * 32) * (itemsInRow * 32) - 1; 410 writeln("// table for fast checking of UniversalAlpha (as per ISO/IEC 9899:1999 Annex E) OR a..z OR A..Z OR _"); 411 writefln("// max code is 0x%04x", maxAlpha); 412 writeln("immutable uint[", (maxAlpha + 1) / 32,"] UNIVERSAL_ALPHA_FLAGS = ["); 413 for (uint i = 0; i <= maxAlpha; i += 32) { 414 if ((i / 32) % itemsInRow == 0) 415 write(" "); 416 uint flags = 0; 417 for (uint j = 0; j < 32; j++) { 418 uint ch = i + j; 419 bool flag = isUniversalAlphaSlow(ch) || ch == '_' || (ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z'); 420 if (flag) 421 flags |= (1 << j); 422 } 423 writef("0x%08x", flags); 424 if (i != maxAlpha / 32 * 32) 425 write(","); 426 if ((i / 32) % itemsInRow == itemsInRow - 1) 427 writefln("// %04x-%04x", i - itemsInRow * 32 + 1 + 31, i + 31); 428 } 429 writeln("];"); 430 431 for (uint ch = 0; ch < 0x100000; ch++) { 432 bool flag = isUniversalAlphaSlow(ch) || ch == '_' || (ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z'); 433 bool flag2 = isUniversalAlpha(ch); 434 if (flag2 != flag) { 435 isUniversalAlpha(ch); 436 writefln("universalAlpha test failed for char %06x expeced %d actual %d", ch, flag ? 1 : 0, flag2 ? 1 : 0); 437 } 438 assert(flag2 == flag); 439 } 440 } 441 } 442 443 enum OpCode : ubyte { 444 NONE, // no op 445 DIV, // / 446 DIV_EQ, // /= 447 DOT, // . 448 DOT_DOT, // .. 449 DOT_DOT_DOT,// ... 450 AND, // & 451 AND_EQ, // &= 452 LOG_AND, // && 453 OR, // | 454 OR_EQ, // |= 455 LOG_OR, // || 456 MINUS, // - 457 MINUS_EQ, // -= 458 MINUS_MINUS,// -- 459 PLUS, // + 460 PLUS_EQ, // += 461 PLUS_PLUS, // ++ 462 LT, // < 463 LT_EQ, // <= 464 SHL, // << 465 SHL_EQ, // <<= 466 LT_GT, // <> 467 NE_EQ, // <>= 468 GT, // > 469 GT_EQ, // >= 470 SHR_EQ, // >>= 471 ASR_EQ, // >>>= 472 SHR, // >> 473 ASR, // >>> 474 NOT, // ! 475 NOT_EQ, // != 476 NOT_LT_GT, // !<> 477 NOT_LT_GT_EQ, // !<>= 478 NOT_LT, // !< 479 NOT_LT_EQ, // !<= 480 NOT_GT, // !> 481 NOT_GT_EQ, // !>= 482 PAR_OPEN, // ( 483 PAR_CLOSE, // ) 484 SQ_OPEN, // [ 485 SQ_CLOSE, // ] 486 CURL_OPEN, // { 487 CURL_CLOSE, // } 488 QUEST, // ? 489 COMMA, // , 490 SEMICOLON, // ; 491 COLON, // : 492 DOLLAR, // $ 493 EQ, // = 494 QE_EQ, // == 495 MUL, // * 496 MUL_EQ, // *= 497 MOD, // % 498 MOD_EQ, // %= 499 XOR, // ^ 500 XOR_EQ, // ^= 501 LOG_XOR, // ^^ 502 LOG_XOR_EQ, // ^^= 503 INV, // ~ 504 INV_EQ, // ~= 505 AT, // @ 506 EQ_GT, // => 507 SHARP // # 508 }; 509 510 immutable dstring[] OP_CODE_STRINGS = [ 511 "", 512 "/", 513 "/=", 514 ".", 515 "..", 516 "...", 517 "&", 518 "&=", 519 "&&", 520 "|", 521 "|=", 522 "||", 523 "-", 524 "-=", 525 "--", 526 "+", 527 "+=", 528 "++", 529 "<", 530 "<=", 531 "<<", 532 "<<=", 533 "<>", 534 "<>=", 535 ">", 536 ">=", 537 ">>=", 538 ">>>=", 539 ">>", 540 ">>>", 541 "!", 542 "!=", 543 "!<>", 544 "!<>=", 545 "!<", 546 "!<=", 547 "!>", 548 "!>=", 549 "(", 550 ")", 551 "[", 552 "]", 553 "{", 554 "}", 555 "?", 556 ",", 557 ";", 558 ":", 559 "$", 560 "=", 561 "==", 562 "*", 563 "*=", 564 "%", 565 "%=", 566 "^", 567 "^=", 568 "^^", 569 "^^=", 570 "~", 571 "~=", 572 "@", 573 "=>", 574 "#" 575 ]; 576 577 dstring getOpNameD(OpCode op) pure nothrow { 578 return OP_CODE_STRINGS[op]; 579 }; 580 581 enum Keyword : ubyte { 582 NONE, 583 ABSTRACT, 584 ALIAS, 585 ALIGN, 586 ASM, 587 ASSERT, 588 AUTO, 589 590 BODY, 591 BOOL, 592 BREAK, 593 BYTE, 594 595 CASE, 596 CAST, 597 CATCH, 598 CDOUBLE, 599 CENT, 600 CFLOAT, 601 CHAR, 602 CLASS, 603 CONST, 604 CONTINUE, 605 CREAL, 606 607 DCHAR, 608 DEBUG, 609 DEFAULT, 610 DELEGATE, 611 DELETE, 612 DEPRECATED, 613 DO, 614 DOUBLE, 615 616 ELSE, 617 ENUM, 618 EXPORT, 619 EXTERN, 620 621 FALSE, 622 FINAL, 623 FINALLY, 624 FLOAT, 625 FOR, 626 FOREACH, 627 FOREACH_REVERSE, 628 FUNCTION, 629 630 GOTO, 631 632 IDOUBLE, 633 IF, 634 IFLOAT, 635 IMMUTABLE, 636 IMPORT, 637 IN, 638 INOUT, 639 INT, 640 INTERFACE, 641 INVARIANT, 642 IREAL, 643 IS, 644 645 LAZY, 646 LONG, 647 648 MACRO, 649 MIXIN, 650 MODULE, 651 652 NEW, 653 NOTHROW, 654 NULL, 655 656 OUT, 657 OVERRIDE, 658 659 PACKAGE, 660 PRAGMA, 661 PRIVATE, 662 PROTECTED, 663 PUBLIC, 664 PURE, 665 666 REAL, 667 REF, 668 RETURN, 669 670 SCOPE, 671 SHARED, 672 SHORT, 673 STATIC, 674 STRUCT, 675 SUPER, 676 SWITCH, 677 SYNCHRONIZED, 678 679 TEMPLATE, 680 THIS, 681 THROW, 682 TRUE, 683 TRY, 684 TYPEDEF, 685 TYPEID, 686 TYPEOF, 687 688 UBYTE, 689 UCENT, 690 UINT, 691 ULONG, 692 UNION, 693 UNITTEST, 694 USHORT, 695 696 VERSION, 697 VOID, 698 VOLATILE, 699 700 WCHAR, 701 WHILE, 702 WITH, 703 704 FILE, 705 MODULE__, 706 LINE, 707 FUNCTION__, 708 PRETTY_FUNCTION, 709 710 //Special Token Replaced with 711 DATE, // string literal of the date of compilation "mmm dd yyyy" 712 EOF, // sets the scanner to the end of the file 713 TIME, // string literal of the time of compilation "hh:mm:ss" 714 TIMESTAMP, // string literal of the date and time of compilation "www mmm dd hh:mm:ss yyyy" 715 VENDOR, // Compiler vendor string, such as "Digital Mars D" 716 VERSION_, // Compiler version as an integer, such as 2001 717 718 GSHARED, 719 TRAITS, 720 VECTOR, 721 PARAMETERS, 722 723 } 724 725 immutable dstring[] KEYWORD_STRINGS = [ 726 "", 727 "abstract", 728 "alias", 729 "align", 730 "asm", 731 "assert", 732 "auto", 733 734 "body", 735 "bool", 736 "break", 737 "byte", 738 739 "case", 740 "cast", 741 "catch", 742 "cdouble", 743 "cent", 744 "cfloat", 745 "char", 746 "class", 747 "const", 748 "continue", 749 "creal", 750 751 "dchar", 752 "debug", 753 "default", 754 "delegate", 755 "delete", 756 "deprecated", 757 "do", 758 "double", 759 760 "else", 761 "enum", 762 "export", 763 "extern", 764 765 "false", 766 "final", 767 "finally", 768 "float", 769 "for", 770 "foreach", 771 "foreach_reverse", 772 "function", 773 774 "goto", 775 776 "idouble", 777 "if", 778 "ifloat", 779 "immutable", 780 "import", 781 "in", 782 "inout", 783 "int", 784 "interface", 785 "invariant", 786 "ireal", 787 "is", 788 789 "lazy", 790 "long", 791 792 "macro", 793 "mixin", 794 "module", 795 796 "new", 797 "nothrow", 798 "null", 799 800 "out", 801 "override", 802 803 "package", 804 "pragma", 805 "private", 806 "protected", 807 "public", 808 "pure", 809 810 "real", 811 "ref", 812 "return", 813 814 "scope", 815 "shared", 816 "short", 817 "static", 818 "struct", 819 "super", 820 "switch", 821 "synchronized", 822 823 "template", 824 "this", 825 "throw", 826 "true", 827 "try", 828 "typedef", 829 "typeid", 830 "typeof", 831 832 "ubyte", 833 "ucent", 834 "uint", 835 "ulong", 836 "union", 837 "unittest", 838 "ushort", 839 840 "version", 841 "void", 842 "volatile", 843 844 "wchar", 845 "while", 846 "with", 847 848 "__FILE__", 849 "__MODULE__", 850 "__LINE__", 851 "__FUNCTION__", 852 "__PRETTY_FUNCTION__", 853 854 //Special Token Replaced with 855 "__DATE__", // string literal of the date of compilation "mmm dd yyyy" 856 "__EOF__", // sets the scanner to the end of the file 857 "__TIME__", // string literal of the time of compilation "hh:mm:ss" 858 "__TIMESTAMP__", // string literal of the date and time of compilation "www mmm dd hh:mm:ss yyyy" 859 "__VENDOR__", // Compiler vendor string, such as "Digital Mars D" 860 "__VERSION__", // Compiler version as an integer, such as 2001 861 862 863 "__gshared", 864 "__traits", 865 "__vector", 866 "__parameters" 867 ]; 868 869 public dstring getKeywordNameD(Keyword keyword) pure nothrow { 870 return KEYWORD_STRINGS[keyword]; 871 }; 872 873 public Keyword findKeyword(Keyword start, Keyword end, dchar * name, int len, ref int pos) pure nothrow { 874 for (Keyword i = start; i <= end; i++) { 875 dstring s = KEYWORD_STRINGS[i]; 876 if (s.length > len + 1) 877 continue; // too long 878 bool found = true; 879 for (uint j = 1; j < s.length; j++) { 880 if (s[j] != name[j - 1]) { 881 found = false; 882 break; 883 } 884 } 885 if (found) { 886 if (s.length == len - 1 || !isIdentMiddleChar(name[s.length - 1])) { 887 pos += s.length - 1; 888 return i; 889 } 890 } 891 } 892 return Keyword.NONE; 893 } 894 895 /** 896 * Token. 897 */ 898 class Token { 899 // 32bit 64bit platform 900 // vtable 4 bytes 8 bytes 901 protected SourceFile _file; // 4 bytes 8 bytes 902 protected int _line; // 4 bytes 4 bytes 903 protected int _pos; // 4 bytes 4 bytes 904 protected TokenType _type; // 1 byte 1 byte 905 // total 17 bytes 25 bytes 906 /// returns token type 907 @property TokenType type() { return _type; } 908 /// returns file info for source 909 @property SourceFile filename() { return _file; } 910 /// returns 1-based source line number of token start 911 @property int line() { return _line; } 912 /// returns 1-based source line position of token start 913 @property int pos() { return _pos; } 914 /// returns token text 915 @property dstring text() { return null; } 916 917 // number token properties 918 @property dchar literalType() { return 0; } 919 @property ulong intValue() { return 0; } 920 @property bool isUnsigned() { return false; } 921 @property ulong isLong() { return false; } 922 @property real realValue() { return 0; } 923 @property double doubleValue() { return 0; } 924 @property float floatValue() { return 0; } 925 @property byte precision() { return 0; } 926 @property bool isImaginary() { return false; } 927 @property bool isBracket() { 928 OpCode op = opCode; 929 return op == OpCode.PAR_OPEN 930 || op == OpCode.PAR_CLOSE 931 || op == OpCode.SQ_OPEN 932 || op == OpCode.SQ_CLOSE 933 || op == OpCode.CURL_OPEN 934 || op == OpCode.CURL_CLOSE; 935 } 936 @property bool isOpenBracket() { 937 OpCode op = opCode; 938 return op == OpCode.PAR_OPEN 939 || op == OpCode.SQ_OPEN 940 || op == OpCode.CURL_OPEN; 941 } 942 @property bool isCloseBracket() { 943 OpCode op = opCode; 944 return op == OpCode.PAR_CLOSE 945 || op == OpCode.SQ_CLOSE 946 || op == OpCode.CURL_CLOSE; 947 } 948 @property bool isEof() { return type == TokenType.EOF; } 949 950 /// returns opcode ID - for opcode tokens 951 @property OpCode opCode() { return OpCode.NONE; } 952 /// returns keyword ID - for keyword tokens 953 @property Keyword keyword() { return Keyword.NONE; } 954 /// returns true if this is documentation comment token 955 @property bool isDocumentationComment() { return false; } 956 /// returns true if this is multiline 957 @property bool isMultilineComment() { return false; } 958 959 // error handling 960 961 /// returns true if it's invalid token (can be returned in error tolerant mode of tokenizer) 962 @property bool isError() { return type == TokenType.INVALID; } 963 /// returns error message if it's invalid token (can be returned in error tolerant mode of tokenizer) 964 @property string errorMessage() { return null; } 965 /// returns error code if it's invalid token (can be returned in error tolerant mode of tokenizer) 966 @property int errorCode() { return 0; } 967 /// returns type of token parsing of which has been failed - if it's invalid token (can be returned in error tolerant mode of tokenizer) 968 @property TokenType invalidTokenType() { return TokenType.INVALID; } 969 970 971 this(TokenType type) { 972 _type = type; 973 } 974 975 this(TokenType type, SourceFile file, int line, int pos) { 976 _type = type; 977 _file = file; 978 _line = line; 979 _pos = pos; 980 } 981 /// set start position for token (line is 1-based, pos is 0-based) 982 void setPos(SourceFile file, int line, int pos) { 983 _file = file; 984 _line = line; 985 _pos = pos + 1; 986 } 987 /// set source file information for token 988 void setFile(SourceFile file) { 989 _file = file; 990 } 991 /// set start position for token (line is 1-based, pos is 0-based) 992 void setPos(int line, int pos) { 993 _line = line; 994 _pos = pos + 1; 995 } 996 997 public abstract Token clone(); 998 public override @property string toString() { 999 return "" ~ to!string(_line) ~ ":" ~ to!string(_pos) ~ " " ~ to!string(type) ~ " " ~ to!string(opCode) ~ " " ~ to!string(keyword) 1000 ~" \"" ~ toUTF8(text()) ~ "\""; 1001 } 1002 } 1003 1004 class EofToken : Token { 1005 this() { 1006 super(TokenType.EOF); 1007 } 1008 this(SourceFile file, uint line, uint pos) { 1009 super(TokenType.EOF, file, line, pos); 1010 } 1011 override public Token clone() { 1012 return new EofToken(_file, _line, _pos); 1013 } 1014 public override @property string toString() { 1015 return "EOF"; 1016 } 1017 } 1018 1019 // treat as white space 1020 //class EolToken : Token { 1021 // this(string file, uint line, uint pos) { 1022 // super(TokenType.EOL, file, line, pos); 1023 // } 1024 //} 1025 1026 /// white space token 1027 class WhiteSpaceToken : Token { 1028 this() { 1029 super(TokenType.WHITESPACE); 1030 } 1031 this(SourceFile file, uint line, uint pos) { 1032 super(TokenType.WHITESPACE, file, line, pos); 1033 } 1034 override public Token clone() { 1035 return new WhiteSpaceToken(_file, _line, _pos); 1036 } 1037 public override @property string toString() { 1038 return "WhiteSpace"; 1039 } 1040 } 1041 1042 class OpToken : Token { 1043 OpCode _op; 1044 public @property override OpCode opCode() { return _op; } 1045 public @property void opCode(OpCode op) { _op = op; } 1046 public @property override dstring text() { return getOpNameD(_op); } 1047 this() { 1048 super(TokenType.OP); 1049 } 1050 this(SourceFile file, uint line, uint pos) { 1051 super(TokenType.OP, file, line, pos); 1052 } 1053 override public Token clone() { 1054 OpToken res = new OpToken(_file, _line, _pos); 1055 res._op = _op; 1056 return res; 1057 } 1058 public override @property string toString() { 1059 return "Op:" ~ to!string(_op); 1060 } 1061 } 1062 1063 class KeywordToken : Token { 1064 Keyword _keyword; 1065 public @property override Keyword keyword() { return _keyword; } 1066 public @property void keyword(Keyword keyword) { _keyword = keyword; } 1067 public @property override dstring text() { return getKeywordNameD(_keyword); } 1068 this() { 1069 super(TokenType.KEYWORD); 1070 } 1071 this(SourceFile file, uint line, uint pos) { 1072 super(TokenType.KEYWORD, file, line, pos); 1073 } 1074 override public Token clone() { 1075 KeywordToken res = new KeywordToken(_file, _line, _pos); 1076 res._keyword = _keyword; 1077 return res; 1078 } 1079 public override @property string toString() { 1080 return "Keyword:" ~ to!string(_keyword); 1081 } 1082 } 1083 1084 /// comment token 1085 class CommentToken : Token { 1086 protected dstring _text; 1087 protected bool _isDocumentationComment; 1088 protected bool _isMultilineComment; 1089 1090 1091 override @property bool isDocumentationComment() { 1092 return _isDocumentationComment; 1093 } 1094 1095 @property void isDocumentationComment(bool f) { 1096 _isDocumentationComment = f; 1097 } 1098 1099 /// returns true if this is multiline 1100 override @property bool isMultilineComment() { 1101 return _isMultilineComment; 1102 } 1103 1104 @property void isMultilineComment(bool f) { 1105 _isMultilineComment = f; 1106 } 1107 1108 @property override dstring text() { return _text; } 1109 @property void text(dchar[] text) { _text = cast(dstring)text; } 1110 this() { 1111 super(TokenType.COMMENT); 1112 } 1113 this(SourceFile file, uint line, uint pos, dchar[] text) { 1114 super(TokenType.COMMENT, file, line, pos); 1115 _text = cast(dstring)text; 1116 } 1117 override public Token clone() { 1118 CommentToken res = new CommentToken(_file, _line, _pos, _text.dup); 1119 res._isDocumentationComment = _isDocumentationComment; 1120 res._isMultilineComment = _isMultilineComment; 1121 return res; 1122 } 1123 public override @property string toString() { 1124 return "Comment:" ~ to!string(_text); 1125 } 1126 } 1127 1128 /// Invalid token holder - for error tolerant parsing 1129 class InvalidToken : Token { 1130 protected dstring _text; 1131 protected TokenType _invalidTokenType; 1132 protected int _errorCode; 1133 protected string _errorMessage; 1134 1135 /// returns error message if it's invalid token (can be returned in error tolerant mode of tokenizer) 1136 override @property string errorMessage() { return _errorMessage; } 1137 /// sets error message 1138 @property void errorMessage(string s) { _errorMessage = s; } 1139 /// returns error code if it's invalid token (can be returned in error tolerant mode of tokenizer) 1140 override @property int errorCode() { return _errorCode; } 1141 /// sets error code 1142 @property void errorCode(int c) { _errorCode = c; } 1143 /// returns type of token parsing of which has been failed - if it's invalid token (can be returned in error tolerant mode of tokenizer) 1144 override @property TokenType invalidTokenType() { return _invalidTokenType; } 1145 /// sets type of token parsing of which has been failed 1146 @property void invalidTokenType(TokenType t) { _invalidTokenType = t; } 1147 1148 /// text of invalid token 1149 @property override dstring text() { return _text; } 1150 /// text of invalid token 1151 @property void text(dchar[] text) { _text = cast(dstring)text; } 1152 1153 this() { 1154 super(TokenType.INVALID); 1155 } 1156 this(SourceFile file, uint line, uint pos, dchar[] text) { 1157 super(TokenType.INVALID, file, line, pos); 1158 _text = cast(dstring)text; 1159 } 1160 override Token clone() { 1161 InvalidToken res = new InvalidToken(_file, _line, _pos, _text.dup); 1162 res._errorMessage = _errorMessage.dup; 1163 res._errorCode = _errorCode; 1164 res._invalidTokenType = _invalidTokenType; 1165 return res; 1166 } 1167 override @property string toString() { 1168 return "Invalid:" ~ to!string(_text); 1169 } 1170 } 1171 1172 alias tokenizer_ident_t = uint; 1173 alias tokenizer_ident_name_t = dstring; 1174 1175 enum : tokenizer_ident_t { 1176 NO_IDENT = 0 1177 } 1178 1179 /** 1180 * Global storage for identifier strings. 1181 */ 1182 class IdentHolder { 1183 protected tokenizer_ident_t _nextId; 1184 protected tokenizer_ident_name_t[tokenizer_ident_t] _idToName; 1185 protected tokenizer_ident_t[tokenizer_ident_name_t] _nameToId; 1186 1187 public this() { 1188 _nextId = NO_IDENT + 1; 1189 } 1190 1191 /** 1192 * Search for id by name, return NO_IDENT if not found. 1193 */ 1194 uint findByName(tokenizer_ident_name_t name) { 1195 tokenizer_ident_t * found = (name in _nameToId); 1196 if (found) 1197 return *found; 1198 return NO_IDENT; 1199 } 1200 1201 /** 1202 * Search for name by id, return null if not found. 1203 */ 1204 tokenizer_ident_name_t nameById(tokenizer_ident_t id) { 1205 auto found = (id in _idToName); 1206 if (found) 1207 return *found; 1208 return null; 1209 } 1210 1211 /** 1212 * Search for ident id by name, create new entry if not found. 1213 */ 1214 tokenizer_ident_t idByName(tokenizer_ident_name_t name) { 1215 uint * found = (name in _nameToId); 1216 if (found) 1217 return *found; 1218 uint newid = _nextId++; 1219 immutable tokenizer_ident_name_t nameCopy = name.dup; 1220 _nameToId[nameCopy] = newid; 1221 _idToName[newid] = nameCopy; 1222 return newid; 1223 } 1224 } 1225 1226 /** 1227 * Thread local storage for IDs. 1228 */ 1229 IdentHolder identMap; 1230 1231 static this() { 1232 // init ID storage 1233 identMap = new IdentHolder(); 1234 } 1235 1236 class StringLiteralToken : Token { 1237 dstring _text; 1238 dchar _literalType; 1239 public @property override dchar literalType() { return _literalType; } 1240 public @property override dstring text() { return _text; } 1241 public void setText(dchar[] text, dchar type) { _text = cast(dstring)text; _literalType = type; } 1242 this() { 1243 super(TokenType.STRING); 1244 } 1245 this(SourceFile file, uint line, uint pos, dchar[] text, dchar type) { 1246 super(TokenType.STRING, file, line, pos); 1247 _text = cast(dstring)text; 1248 _literalType = type; 1249 } 1250 override public Token clone() { 1251 return new StringLiteralToken(_file, _line, _pos, _text.dup, _literalType); 1252 } 1253 public override @property string toString() { 1254 return toUTF8("String:\"" ~ _text ~ "\"" ~ (_literalType ? _literalType : ' ')); 1255 } 1256 } 1257 1258 class CharacterLiteralToken : Token { 1259 dchar _character; 1260 dchar _literalType; 1261 @property override dchar literalType() { return _literalType; } 1262 @property dchar character() { return _character; } 1263 @property override dstring text() { return [_character]; } 1264 void setCharacter(dchar ch, dchar type) { _character = ch; _literalType = type; } 1265 this() { 1266 super(TokenType.CHARACTER); 1267 } 1268 this(SourceFile file, uint line, uint pos, dchar character, dchar type) { 1269 super(TokenType.CHARACTER, file, line, pos); 1270 _character = character; 1271 _literalType = type; 1272 } 1273 override public Token clone() { 1274 return new CharacterLiteralToken(_file, _line, _pos, _character, _literalType); 1275 } 1276 public override @property string toString() { 1277 return "Char:" ~ toUTF8([_character]); 1278 } 1279 } 1280 1281 class IntegerLiteralToken : Token { 1282 ulong _value; 1283 bool _unsigned; 1284 bool _long; 1285 public @property override ulong intValue() { return _value; } 1286 public @property override bool isUnsigned() { return _unsigned; } 1287 public @property override ulong isLong() { return _long; } 1288 public @property override dstring text() { return to!dstring(_value); } 1289 public void setValue(ulong value, bool unsignedFlag = false, bool longFlag = false) { 1290 _value = value; 1291 _unsigned = unsignedFlag; 1292 _long = longFlag; 1293 } 1294 public void setFlags(bool unsignedFlag = false, bool longFlag = false) { 1295 _unsigned = unsignedFlag; 1296 _long = longFlag; 1297 } 1298 this() { 1299 super(TokenType.INTEGER); 1300 } 1301 this(SourceFile file, uint line, uint pos, ulong value, bool unsignedFlag, bool longFlag) { 1302 super(TokenType.INTEGER, file, line, pos); 1303 _value = value; 1304 _unsigned = unsignedFlag; 1305 _long = longFlag; 1306 } 1307 override public Token clone() { 1308 return new IntegerLiteralToken(_file, _line, _pos, _value, _unsigned, _long); 1309 } 1310 public override @property string toString() { 1311 return "Integer:" ~ to!string(_value) ~ (_long ? "L" : "") ~ (_unsigned ? "U" : ""); 1312 } 1313 } 1314 1315 class RealLiteralToken : Token { 1316 real _value; 1317 byte _precision; 1318 bool _imaginary; 1319 public @property override ulong intValue() { return to!long(_value); } 1320 public @property override real realValue() { return _value; } 1321 public @property override double doubleValue() { return cast(double)_value; } 1322 public @property override float floatValue() { return cast(float)_value; } 1323 public @property override byte precision() { return _precision; } 1324 public @property override bool isImaginary() { return _imaginary; } 1325 public @property override dstring text() { return to!dstring(_value); } 1326 public void setValue(real value, byte precision = 1, bool imaginary = false) { 1327 _value = value; 1328 _precision = precision; 1329 _imaginary = imaginary; 1330 } 1331 public void setFlags(byte precision = 1, bool imaginary = false) { 1332 _precision = precision; 1333 _imaginary = imaginary; 1334 } 1335 this() { 1336 super(TokenType.FLOAT); 1337 } 1338 this(SourceFile file, uint line, uint pos, real value, byte precision, bool imaginary) { 1339 super(TokenType.FLOAT, file, line, pos); 1340 _value = value; 1341 _precision = precision; 1342 _imaginary = imaginary; 1343 } 1344 override public Token clone() { 1345 return new RealLiteralToken(_file, _line, _pos, _value, _precision, _imaginary); 1346 } 1347 public override @property string toString() { 1348 return "Real:" ~ to!string(_value) ~ (_precision == 0 ? "f" : (_precision == 2 ? "L" : "")) ~ (_imaginary ? "i" : ""); 1349 } 1350 } 1351 1352 class IdentToken : Token { 1353 tokenizer_ident_t _id; 1354 public @property override dstring text() { 1355 return identMap.nameById(_id); 1356 } 1357 public void setText(dchar[] text) { 1358 _id = identMap.idByName(cast(immutable)text); 1359 } 1360 this() { 1361 super(TokenType.IDENTIFIER); 1362 } 1363 this(SourceFile file, uint line, uint pos, dchar[] text) { 1364 super(TokenType.IDENTIFIER, file, line, pos); 1365 _id = identMap.idByName(cast(immutable)text); 1366 } 1367 this(SourceFile file, uint line, uint pos, tokenizer_ident_t id) { 1368 super(TokenType.IDENTIFIER, file, line, pos); 1369 _id = id; 1370 } 1371 override public Token clone() { 1372 return new IdentToken(_file, _line, _pos, _id); 1373 } 1374 public override @property string toString() { 1375 return "Ident:" ~ to!string(text); 1376 } 1377 } 1378 1379 // shared appender buffer, to avoid extra heap allocations 1380 struct StringAppender { 1381 dchar[] buf; 1382 uint len; 1383 dchar[] get() { 1384 return buf[0 .. len]; 1385 } 1386 void appendEol() { 1387 if (len + 1 > buf.length) { 1388 uint newsize = cast(uint)((len + 1 + buf.length) * 2); 1389 if (newsize < 128) 1390 newsize = 128; 1391 buf.length = newsize; 1392 } 1393 buf[len] = '\n'; 1394 len++; 1395 } 1396 void append(dchar[] s) { 1397 if (s.length == 0) 1398 return; 1399 if (len + s.length > buf.length) { 1400 uint newsize = cast(uint)((len + s.length + buf.length) * 2); 1401 if (newsize < 128) 1402 newsize = 128; 1403 buf.length = newsize; 1404 } 1405 buf[len .. len + s.length] = s; 1406 len += s.length; 1407 } 1408 void append(dchar ch) { 1409 if (len + 1 > buf.length) { 1410 uint newsize = cast(uint)(buf.length * 2); 1411 if (newsize < 128) 1412 newsize = 128; 1413 buf.length = newsize; 1414 } 1415 buf[len++] = ch; 1416 } 1417 void reset() { 1418 len = 0; 1419 } 1420 static int parseHexDigit(dchar ch) { 1421 if (ch >= '0' && ch <='9') 1422 return ch - '0'; 1423 if (ch >= 'a' && ch <='f') 1424 return ch - 'a' + 10; 1425 if (ch >= 'A' && ch <='F') 1426 return ch - 'A' + 10; 1427 return -1; 1428 } 1429 bool errorFlag = false; 1430 dchar decodeHex(ref int pos, int count) { 1431 dchar res = 0; 1432 for (int i = 0; i < count; i++) { 1433 if (pos >= len - 1) { 1434 errorFlag = true; 1435 return res; 1436 } 1437 dchar ch = buf[++pos]; 1438 int digit = parseHexDigit(ch); 1439 if (digit < 0) { 1440 errorFlag = true; 1441 digit = 0; 1442 } 1443 res = (res << 4) | digit; 1444 } 1445 return res; 1446 } 1447 dchar decodeOct(dchar firstChar, ref int pos) { 1448 dchar res = 0; 1449 res = firstChar - '0'; 1450 if (pos < len - 1 && buf[pos + 1] >= '0' && buf[pos + 1] <= '7') { 1451 res = (res << 3) | (buf[++pos] - '0'); 1452 } 1453 if (pos < len - 1 && buf[pos + 1] >= '0' && buf[pos + 1] <= '7') { 1454 res = (res << 3) | (buf[++pos] - '0'); 1455 } 1456 return res; 1457 } 1458 1459 char[] entityNameBuf; 1460 int entityNameLen; 1461 1462 dchar decodeCharacterEntity(ref int pos) { 1463 entityNameLen = 0; 1464 pos++; 1465 for(; pos < len && buf[pos] != ';'; pos++) { 1466 dchar ch = buf[pos]; 1467 if (ch >= 0x80) 1468 errorFlag = true; 1469 if (entityNameBuf.length < entityNameLen + 4) 1470 entityNameBuf.length += 32; 1471 entityNameBuf[entityNameLen++] = cast(char)ch; 1472 } 1473 if (pos < len && buf[pos] == ';') { 1474 dchar ch = entityToChar(cast(string)entityNameBuf[0 .. entityNameLen]); 1475 if (ch) 1476 return ch; 1477 } 1478 errorFlag = true; 1479 return '?'; 1480 } 1481 1482 bool processEscapeSequences() { 1483 errorFlag = false; 1484 int dst = 0; 1485 for (int src = 0; src < len; src++) { 1486 dchar ch = buf[src]; 1487 if (ch == '\\') { 1488 if (src == len - 1) 1489 break; // INVALID 1490 ch = buf[++src]; 1491 switch (ch) { 1492 case '\'': 1493 case '\"': 1494 case '?': 1495 case '\\': 1496 buf[dst++] = ch; 1497 break; 1498 case '0': 1499 buf[dst++] = '\0'; 1500 break; 1501 case 'a': 1502 buf[dst++] = '\a'; 1503 break; 1504 case 'b': 1505 buf[dst++] = '\b'; 1506 break; 1507 case 'f': 1508 buf[dst++] = '\f'; 1509 break; 1510 case 'n': 1511 buf[dst++] = '\n'; 1512 break; 1513 case 'r': 1514 buf[dst++] = '\r'; 1515 break; 1516 case 't': 1517 buf[dst++] = '\t'; 1518 break; 1519 case 'v': 1520 buf[dst++] = '\v'; 1521 break; 1522 case 'x': 1523 buf[dst++] = decodeHex(src, 2); 1524 break; 1525 case 'u': 1526 buf[dst++] = decodeHex(src, 4); 1527 break; 1528 case 'U': 1529 buf[dst++] = decodeHex(src, 8); 1530 break; 1531 default: 1532 if (ch >= '0' && ch <= '7') { 1533 // octal X XX or XXX 1534 buf[dst++] = decodeOct(ch, src); // something wrong 1535 } else if (ch == '&') { 1536 // named character entity 1537 buf[dst++] = decodeCharacterEntity(src); 1538 // just show it as is 1539 } else { 1540 buf[dst++] = ch; // something wrong 1541 errorFlag = true; 1542 } 1543 break; 1544 } 1545 } else { 1546 buf[dst++] = ch; 1547 } 1548 } 1549 len = dst; 1550 return errorFlag; 1551 } 1552 } 1553 1554 class Tokenizer 1555 { 1556 protected SourceLines _lineStream; 1557 protected dchar[] _lineText; 1558 protected int _line; // current line number 1559 protected int _len; // current line length 1560 protected int _pos; // current line read position 1561 protected int _prevLineLength; // previous line length 1562 protected uint _state; // tokenizer state 1563 1564 enum : int { 1565 EOF_CHAR = 0x001A, 1566 EOL_CHAR = 0x000A 1567 }; 1568 1569 protected WhiteSpaceToken _sharedWhiteSpaceToken = new WhiteSpaceToken(); 1570 protected CommentToken _sharedCommentToken = new CommentToken(); 1571 protected StringLiteralToken _sharedStringLiteralToken = new StringLiteralToken(); 1572 protected IdentToken _sharedIdentToken = new IdentToken(); 1573 protected OpToken _sharedOpToken = new OpToken(); 1574 protected KeywordToken _sharedKeywordToken = new KeywordToken(); 1575 protected IntegerLiteralToken _sharedIntegerToken = new IntegerLiteralToken(); 1576 protected RealLiteralToken _sharedRealToken = new RealLiteralToken(); 1577 protected InvalidToken _sharedInvalidToken = new InvalidToken(); 1578 protected CharacterLiteralToken _sharedCharacterLiteralToken = new CharacterLiteralToken(); 1579 protected StringAppender _stringLiteralAppender; 1580 protected StringAppender _commentAppender; 1581 protected StringAppender _identAppender; 1582 1583 protected bool _enableCommentText = true; 1584 /// when false, does not put comment text into comment token - for less allocations 1585 @property void enableCommentText(bool enabled) { 1586 _enableCommentText = enabled; 1587 } 1588 /// when false, does not put comment text into comment token - for less allocations 1589 @property bool enableCommentText() { 1590 return _enableCommentText; 1591 } 1592 1593 protected bool _errorTolerant = false; 1594 /// when true, returns BadToken instead of throwing exception 1595 @property void errorTolerant(bool enabled) { 1596 _errorTolerant = enabled; 1597 } 1598 /// when true, returns BadToken instead of throwing exception 1599 @property bool errorTolerant() { 1600 return _errorTolerant; 1601 } 1602 1603 this(SourceLines lineStream) { 1604 initialize(lineStream); 1605 } 1606 1607 void initialize(SourceLines lineStream, int pos = 0) { 1608 _lineStream = lineStream; 1609 SourceFile file = _lineStream.file; 1610 _sharedWhiteSpaceToken.setFile(file); 1611 _sharedCommentToken.setFile(file); 1612 _sharedStringLiteralToken.setFile(file); 1613 _sharedIdentToken.setFile(file); 1614 _sharedOpToken.setFile(file); 1615 _sharedKeywordToken.setFile(file); 1616 _sharedIntegerToken.setFile(file); 1617 _sharedRealToken.setFile(file); 1618 _sharedInvalidToken.setFile(file); 1619 _sharedCharacterLiteralToken.setFile(file); 1620 buildTime = Clock.currTime(); 1621 _line = lineStream.line; 1622 _pos = 0; 1623 _prevLineLength = 0; 1624 _lineText = null; 1625 nextLine(); 1626 _pos = pos; 1627 } 1628 1629 this(string code, string filename = "") { 1630 this(new ArraySourceLines(code, filename)); 1631 } 1632 1633 // fetch next line from source stream 1634 protected bool nextLine() { 1635 _prevLineLength = cast(int)_lineText.length; 1636 _lineText = _lineStream.readLine(); 1637 if (!_lineText) { 1638 if (_lineStream.errorCode != 0) 1639 throw new SourceEncodingException(_lineStream.errorMessage, _lineStream.file, _lineStream.errorLine, _lineStream.errorPos); 1640 if (_lineStream.eof) { 1641 // end of file 1642 _pos = 0; 1643 _len = 0; 1644 return false; 1645 } 1646 // just an empty line 1647 } 1648 _line = _lineStream.line; 1649 _pos = 0; 1650 _len = cast(int)_lineText.length; // do not support lines longer that 4Gb 1651 return true; 1652 } 1653 1654 protected dchar nextChar() { 1655 if (_pos >= _len) { 1656 if (!nextLine()) { 1657 _pos = _prevLineLength + 1; 1658 return EOF_CHAR; 1659 } 1660 return EOL_CHAR; 1661 } 1662 dchar res = _lineText[_pos++]; 1663 if (_pos >= _len) 1664 nextLine(); 1665 return res; 1666 } 1667 1668 protected dchar peekChar() { 1669 if (_lineText is null) { 1670 if (!nextLine()) { 1671 return EOF_CHAR; 1672 } 1673 } 1674 if (_pos >= _len) 1675 return EOL_CHAR; 1676 return _lineText[_pos++]; 1677 } 1678 1679 protected Token emitEof() { 1680 // TODO: check for current state 1681 return new EofToken(_lineStream.file, _startLine, _startPos + 2); 1682 } 1683 1684 protected Token processWhiteSpace(dchar firstChar) { 1685 // reuse the same token instance, to avoid extra heap spamming 1686 _sharedWhiteSpaceToken.setPos(_startLine, _startPos); 1687 for (;;) { 1688 int i = _pos; 1689 for (; i < _len; i++) { 1690 dchar ch = _lineText[i]; 1691 if (!(ch == 0x0020 || ch == 0x0009 || ch == 0x000B || ch == 0x000C || ch == EOL_CHAR)) 1692 break; 1693 } 1694 _pos = i; 1695 if (_pos < _len) 1696 break; 1697 // go to next line 1698 if (!nextLine()) 1699 break; 1700 } 1701 return _sharedWhiteSpaceToken; 1702 } 1703 1704 protected Token processOneLineComment() { 1705 _sharedCommentToken.setPos(_startLine, _startPos); 1706 _sharedCommentToken.isDocumentationComment = _pos + 1 < _lineText.length && _lineText[_pos + 1] == '/'; 1707 _sharedCommentToken.isMultilineComment = false; 1708 if (_enableCommentText) { 1709 _sharedCommentToken.text = _lineText[_pos + 1 .. $]; 1710 } 1711 _pos = _len; 1712 nextChar(); 1713 return _sharedCommentToken; 1714 } 1715 1716 protected Token processOneLineSharpComment() { 1717 _sharedCommentToken.setPos(_startLine, _startPos); 1718 if (_enableCommentText) { 1719 _sharedCommentToken.text = _lineText[_pos .. $]; 1720 } 1721 _pos = _len; 1722 return _sharedCommentToken; 1723 } 1724 1725 // Comment /* */ 1726 protected Token processMultilineComment() { 1727 _sharedCommentToken.setPos(_startLine, _startPos); 1728 _sharedCommentToken.isDocumentationComment = _pos + 1 < _lineText.length && _lineText[_pos + 1] == '*'; 1729 _sharedCommentToken.isMultilineComment = true; 1730 _commentAppender.reset(); 1731 int textStart = _pos + 1; 1732 for (;;) { 1733 int textEnd = int.max; 1734 int i = textStart; 1735 for (; i < _len - 1; i++) { 1736 if (_lineText[i] == '*' && _lineText[i + 1] == '/') { 1737 textEnd = i; 1738 break; 1739 } 1740 } 1741 if (textEnd != int.max) { 1742 if (_enableCommentText) 1743 _commentAppender.append(_lineText[textStart .. textEnd]); 1744 _pos = textEnd + 2; 1745 break; 1746 } 1747 if (!nextLine()) { 1748 // TODO: do we need throw exception if comment not closed by end of file? 1749 _pos = _len; 1750 break; 1751 } 1752 textStart = 0; 1753 } 1754 if (_enableCommentText) { 1755 _sharedCommentToken.text = _commentAppender.get(); 1756 } 1757 return _sharedCommentToken; 1758 } 1759 1760 // Comment /+ +/ 1761 protected Token processNestedComment() { 1762 _sharedCommentToken.setPos(_startLine, _startPos); 1763 _sharedCommentToken.isDocumentationComment = _pos + 1 < _lineText.length && _lineText[_pos + 1] == '+'; 1764 _sharedCommentToken.isMultilineComment = true; 1765 _commentAppender.reset(); 1766 dchar[] text; 1767 int textStart = _pos + 1; 1768 int level = 1; 1769 for (;;) { 1770 int textEnd = int.max; 1771 int i = textStart; 1772 for (; i < _len - 1; i++) { 1773 if (_lineText[i] == '/' && _lineText[i + 1] == '+') { 1774 level++; 1775 i++; 1776 } else if (_lineText[i] == '+' && _lineText[i + 1] == '/') { 1777 if (--level == 0) { 1778 textEnd = i; 1779 break; 1780 } 1781 } 1782 } 1783 if (textEnd != int.max) { 1784 if (_enableCommentText) 1785 _commentAppender.append(_lineText[textStart .. textEnd]); 1786 _pos = textEnd + 2; 1787 break; 1788 } 1789 if (!nextLine()) { 1790 // TODO: do we need throw exception if comment not closed by end of file? 1791 _pos = _len; 1792 break; 1793 } 1794 if (_enableCommentText) 1795 _commentAppender.appendEol(); 1796 textStart = 0; 1797 } 1798 if (_enableCommentText) { 1799 _sharedCommentToken.text = _commentAppender.get(); 1800 } 1801 return _sharedCommentToken; 1802 } 1803 1804 protected Token processHexString() { 1805 _pos++; 1806 // TODO: 1807 return null; 1808 } 1809 1810 protected Token processDelimitedString() { 1811 _pos++; 1812 // TODO: 1813 return null; 1814 } 1815 1816 // r"string" or `string` 1817 protected Token processWysiwygString(dchar ch) { 1818 _pos++; 1819 // TODO: 1820 return null; 1821 } 1822 1823 protected Token processIdent(dchar firstChar) { 1824 _sharedIdentToken.setPos(_startLine, _startPos); 1825 _identAppender.reset(); 1826 _identAppender.append(firstChar); 1827 for (; _pos < _len; ) { 1828 dchar ch = _lineText[_pos]; 1829 if (!isIdentMiddleChar(ch)) { 1830 break; 1831 } 1832 _identAppender.append(ch); 1833 _pos++; 1834 } 1835 _sharedIdentToken.setText(_identAppender.get); 1836 return _sharedIdentToken; 1837 } 1838 1839 protected Token processIntegerSuffix() { 1840 if (_pos >= _len) 1841 return _sharedIntegerToken; 1842 bool longFlag = false; 1843 bool unsignedFlag = false; 1844 dchar ch = _lineText[_pos]; 1845 dchar ch2 = _pos < _len - 1 ? _lineText[_pos + 1] : 0; 1846 if (ch == 'l' || ch == 'L') { 1847 longFlag = true; 1848 _pos++; 1849 if (ch2 == 'u' || ch2 == 'U') { 1850 unsignedFlag = true; 1851 _pos++; 1852 } 1853 } else if (ch == 'u' || ch == 'U') { 1854 unsignedFlag = true; 1855 _pos++; 1856 if (ch2 == 'l' || ch2 == 'L') { 1857 longFlag = true; 1858 _pos++; 1859 } 1860 } 1861 _sharedIntegerToken.setFlags(unsignedFlag, longFlag); 1862 ch = _pos < _len ? _lineText[_pos] : 0; 1863 if (isIdentMiddleChar(ch)) 1864 return parserError("Unexpected character after number", _sharedIntegerToken); 1865 return _sharedIntegerToken; 1866 } 1867 1868 protected Token processBinaryNumber() { 1869 _sharedIntegerToken.setPos(_startLine, _startPos); 1870 _pos++; 1871 if (_pos >= _len) 1872 return parserError("Unexpected end of line in binary number", _sharedIntegerToken); 1873 int digits = 0; 1874 ulong number = 0; 1875 int i = _pos; 1876 for (;i < _len; i++) { 1877 dchar ch = _lineText[i]; 1878 if (ch != '0' && ch != '1') 1879 break; 1880 number = (number << 1) | (ch == '1' ? 1 : 0); 1881 digits++; 1882 } 1883 _pos = i; 1884 if (digits > 64) 1885 return parserError("number is too big", _sharedIntegerToken); 1886 _sharedIntegerToken.setValue(number); 1887 return processIntegerSuffix(); 1888 } 1889 1890 protected Token processHexNumber() { 1891 _sharedIntegerToken.setPos(_startLine, _startPos); 1892 _sharedRealToken.setPos(_startLine, _startPos); 1893 _pos++; 1894 if (_pos >= _len) 1895 return parserError("Unexpected end of line in hex number", _sharedIntegerToken); 1896 int digits = 0; 1897 ulong number = 0; 1898 int i = _pos; 1899 for (;i < _len; i++) { 1900 dchar ch = _lineText[i]; 1901 uint digit = 0; 1902 if (ch >= '0' && ch <= '9') 1903 digit = ch - '0'; 1904 else if (ch >= 'a' && ch <= 'f') 1905 digit = ch - 'a' + 10; 1906 else if (ch >= 'A' && ch <= 'F') 1907 digit = ch - 'A' + 10; 1908 else if (ch == '_') 1909 continue; 1910 else 1911 break; 1912 number = (number << 4) | digit; 1913 digits++; 1914 } 1915 _pos = i; 1916 if (digits > 16) 1917 return parserError("number is too big to fit 64 bits", _sharedIntegerToken); 1918 _sharedIntegerToken.setValue(number); 1919 return processIntegerSuffix(); 1920 } 1921 1922 protected Token processOctNumber() { 1923 _sharedIntegerToken.setPos(_startLine, _startPos); 1924 if (_pos >= _len) 1925 return parserError("Unexpected end of line in octal number", _sharedIntegerToken); 1926 int digits = 0; 1927 ulong number = 0; 1928 int i = _pos; 1929 bool overflow = false; 1930 for (;i < _len; i++) { 1931 dchar ch = _lineText[i]; 1932 int digit = 0; 1933 if (ch >= '0' && ch <= '7') 1934 digit = ch - '0'; 1935 else if (ch == '_') 1936 continue; 1937 else 1938 break; 1939 number <<= 3; 1940 if (digits >= 20) { 1941 if ((number >> 3) << 3 != number) { 1942 overflow = true; 1943 break; 1944 } 1945 } 1946 number |= digit; 1947 digits++; 1948 } 1949 _pos = i; 1950 if (overflow) 1951 return parserError("number is too big to fit 64 bits", _sharedIntegerToken); 1952 _sharedIntegerToken.setValue(number); 1953 return processIntegerSuffix(); 1954 } 1955 1956 // 1957 protected Token processDecFloatSuffix(real value) { 1958 ubyte precision = 1; 1959 bool imaginary = false; 1960 dchar next = _pos < _len ? _lineText[_pos] : 0; 1961 if (next == 'f') { 1962 _pos++; 1963 precision = 0; 1964 } else if (next == 'L') { 1965 _pos++; 1966 precision = 2; 1967 } 1968 next = _pos < _len ? _lineText[_pos] : 0; 1969 if (next == 'i') { 1970 _pos++; 1971 imaginary = true; 1972 } 1973 next = _pos < _len ? _lineText[_pos] : 0; 1974 if (isIdentMiddleChar(next)) 1975 return parserError("invalid suffix for floating point literal", _sharedRealToken); 1976 _sharedRealToken.setValue(value, precision, imaginary); 1977 return _sharedRealToken; 1978 } 1979 1980 // after E char 1981 protected Token processDecFloatExponent(real value) { 1982 dchar next = _pos < _len ? _lineText[_pos] : 0; 1983 int sign = 1; 1984 if (next == '+') { 1985 _pos++; 1986 } else if (next == '-') { 1987 _pos++; 1988 sign = -1; 1989 } 1990 if (_pos >= _len) 1991 return parserError("Invalid exponent", _sharedRealToken); 1992 ulong digits = 0; 1993 ulong number = 0; 1994 int i = _pos; 1995 bool overflow = false; 1996 for (;i < _len; i++) { 1997 dchar ch = _lineText[i]; 1998 uint digit = 0; 1999 if (ch >= '0' && ch <= '9') 2000 digit = ch - '0'; 2001 else if (ch == '_') 2002 continue; 2003 else 2004 break; 2005 number *= 10; 2006 if (digits >= 18) { 2007 if ((number * 10) / 10 != number) { 2008 overflow = true; 2009 break; 2010 } 2011 } 2012 number += digit; 2013 digits++; 2014 } 2015 if (digits == 0) 2016 return parserError("Invalid exponent", _sharedRealToken); 2017 _pos = i; 2018 value *= pow(10., cast(long)number * sign); 2019 return processDecFloatSuffix(value); 2020 } 2021 2022 protected Token processDecFloatSecondPart(ulong firstPart) { 2023 if (_pos >= _len) { 2024 _sharedRealToken.setValue(cast(real)firstPart); 2025 return _sharedRealToken; 2026 } 2027 ulong divider = 1; 2028 ulong number = 0; 2029 int i = _pos; 2030 bool overflow = false; 2031 for (;i < _len; i++) { 2032 dchar ch = _lineText[i]; 2033 uint digit = 0; 2034 if (ch >= '0' && ch <= '9') 2035 digit = ch - '0'; 2036 else if (ch == '_') 2037 continue; 2038 else 2039 break; 2040 if (divider * 10 < divider) 2041 continue; // ignore extra digits 2042 number *= 10; 2043 number += digit; 2044 divider *= 10; 2045 } 2046 _pos = i; 2047 real value = cast(real)firstPart + (cast(real)number / divider); 2048 dchar next = _pos < _len ? _lineText[_pos] : 0; 2049 if (next == 0) { 2050 // neither exponent nor suffix 2051 _sharedRealToken.setValue(value); 2052 return _sharedRealToken; 2053 } 2054 if (next == 'e' || next == 'E') { 2055 _pos++; 2056 return processDecFloatExponent(value); 2057 } 2058 return processDecFloatSuffix(value); 2059 } 2060 2061 protected Token processDecNumber(dchar c) { 2062 _sharedIntegerToken.setPos(_startLine, _startPos); 2063 _sharedRealToken.setPos(_startLine, _startPos); 2064 //if (_pos >= _len) 2065 // return parserError("Unexpected end of line in number", _sharedIntegerToken); 2066 int digits = 1; 2067 ulong number = c - '0'; 2068 int i = _pos; 2069 bool overflow = false; 2070 if (_line == _startLine) { 2071 for (;i < _len; i++) { 2072 dchar ch = _lineText[i]; 2073 uint digit = 0; 2074 if (ch >= '0' && ch <= '9') 2075 digit = ch - '0'; 2076 else if (ch == '_') 2077 continue; 2078 else 2079 break; 2080 number *= 10; 2081 if (digits >= 18) { 2082 if ((number * 10) / 10 != number) { 2083 overflow = true; 2084 break; 2085 } 2086 } 2087 number += digit; 2088 digits++; 2089 } 2090 _pos = i; 2091 } 2092 if (overflow) 2093 return parserError("number is too big to fit 64 bits", _sharedIntegerToken); 2094 _sharedIntegerToken.setValue(number); 2095 dchar next = _line == _startLine && _pos < _len ? _lineText[_pos] : 0; 2096 if (next == 0) 2097 return _sharedIntegerToken; 2098 if (next == 'e' || next == 'E') { 2099 _pos++; 2100 return processDecFloatExponent(number); 2101 } else if (next == '.') { 2102 _pos++; 2103 return processDecFloatSecondPart(number); 2104 } 2105 return processIntegerSuffix(); 2106 } 2107 2108 /// Either return InvalidToken or throw parser exception depending on current errorTolerant flag 2109 protected Token parserError(string msg, Token incompleteToken) { 2110 return parserError(msg, incompleteToken.line, incompleteToken.pos, incompleteToken.type); 2111 } 2112 /// Either return InvalidToken or throw parser exception depending on current errorTolerant flag 2113 protected Token parserError(string msg, int startLine, int startPos, TokenType failedTokenType = TokenType.INVALID) { 2114 if (_errorTolerant) { 2115 startPos--; 2116 _sharedInvalidToken.setPos(startLine, startPos); 2117 _sharedInvalidToken.errorMessage = msg; 2118 _sharedInvalidToken.errorCode = 1; // for future extension 2119 _sharedInvalidToken.invalidTokenType = failedTokenType; // for future extension 2120 // make invalid source text 2121 dchar[] invalidText; 2122 int p = startLine == _line ? startPos : 0; 2123 for (int i = p; i < _pos && i < _lineText.length; i++) 2124 invalidText ~= _lineText[i]; 2125 2126 // recover after error 2127 for (; _pos < _lineText.length; _pos++) { 2128 dchar ch = _lineText[_pos]; 2129 if (ch == ' ' || ch == '\t' || ch == '(' || ch == ')' || ch == '[' || ch == ']' || ch == '{' || ch == '}') 2130 break; 2131 if (failedTokenType == TokenType.INTEGER || failedTokenType == TokenType.FLOAT) { 2132 if (ch == '*' || ch == '/') 2133 break; 2134 } 2135 invalidText ~= ch; 2136 } 2137 _sharedInvalidToken.text = invalidText; 2138 return _sharedInvalidToken; 2139 } 2140 throw new ParserException(msg, _lineStream.file, _line, _pos); 2141 } 2142 2143 protected Keyword detectKeyword(dchar ch) { 2144 if (ch > 'z') 2145 return Keyword.NONE; 2146 int len = _len - _pos; 2147 switch (cast(ubyte)ch) { 2148 // ABSTRACT, 2149 // ALIAS, 2150 // ALIGN, 2151 // ASM, 2152 // ASSERT, 2153 // AUTO, 2154 case 'a': return findKeyword(Keyword.ABSTRACT, Keyword.AUTO, _lineText.ptr + _pos, len, _pos); 2155 2156 // BODY, 2157 // BOOL, 2158 // BREAK, 2159 // BYTE, 2160 case 'b': return findKeyword(Keyword.BODY, Keyword.BYTE, _lineText.ptr + _pos, len, _pos); 2161 2162 // CASE, 2163 // CAST, 2164 // CATCH, 2165 // CDOUBLE, 2166 // CENT, 2167 // CFLOAT, 2168 // CHAR, 2169 // CLASS, 2170 // CONST, 2171 // CONTINUE, 2172 // CREAL, 2173 case 'c': return findKeyword(Keyword.CASE, Keyword.CREAL, _lineText.ptr + _pos, len, _pos); 2174 2175 // DCHAR, 2176 // DEBUG, 2177 // DEFAULT, 2178 // DELEGATE, 2179 // DELETE, 2180 // DEPRECATED, 2181 // DO, 2182 // DOUBLE, 2183 case 'd': return findKeyword(Keyword.DCHAR, Keyword.DOUBLE, _lineText.ptr + _pos, len, _pos); 2184 2185 // ELSE, 2186 // ENUM, 2187 // EXPORT, 2188 // EXTERN, 2189 case 'e': return findKeyword(Keyword.ELSE, Keyword.EXTERN, _lineText.ptr + _pos, len, _pos); 2190 2191 // FALSE, 2192 // FINAL, 2193 // FINALLY, 2194 // FLOAT, 2195 // FOR, 2196 // FOREACH, 2197 // FOREACH_REVERSE, 2198 // FUNCTION, 2199 case 'f': return findKeyword(Keyword.FALSE, Keyword.FUNCTION, _lineText.ptr + _pos, len, _pos); 2200 2201 // GOTO, 2202 case 'g': return findKeyword(Keyword.GOTO, Keyword.GOTO, _lineText.ptr + _pos, len, _pos); 2203 2204 // IDOUBLE, 2205 // IF, 2206 // IFLOAT, 2207 // IMMUTABLE, 2208 // IMPORT, 2209 // IN, 2210 // INOUT, 2211 // INT, 2212 // INTERFACE, 2213 // INVARIANT, 2214 // IREAL, 2215 // IS, 2216 case 'i': return findKeyword(Keyword.IDOUBLE, Keyword.IS, _lineText.ptr + _pos, len, _pos); 2217 2218 // LAZY, 2219 // LONG, 2220 case 'l': return findKeyword(Keyword.LAZY, Keyword.LONG, _lineText.ptr + _pos, len, _pos); 2221 2222 // MACRO, 2223 // MIXIN, 2224 // MODULE, 2225 case 'm': return findKeyword(Keyword.MACRO, Keyword.MODULE, _lineText.ptr + _pos, len, _pos); 2226 2227 // NEW, 2228 // NOTHROW, 2229 // NULL, 2230 case 'n': return findKeyword(Keyword.NEW, Keyword.NULL, _lineText.ptr + _pos, len, _pos); 2231 2232 // OUT, 2233 // OVERRIDE, 2234 case 'o': return findKeyword(Keyword.OUT, Keyword.OVERRIDE, _lineText.ptr + _pos, len, _pos); 2235 2236 // PACKAGE, 2237 // PRAGMA, 2238 // PRIVATE, 2239 // PROTECTED, 2240 // PUBLIC, 2241 // PURE, 2242 case 'p': return findKeyword(Keyword.PACKAGE, Keyword.PURE, _lineText.ptr + _pos, len, _pos); 2243 2244 // REAL, 2245 // REF, 2246 // RETURN, 2247 case 'r': return findKeyword(Keyword.REAL, Keyword.RETURN, _lineText.ptr + _pos, len, _pos); 2248 2249 // SCOPE, 2250 // SHARED, 2251 // SHORT, 2252 // STATIC, 2253 // STRUCT, 2254 // SUPER, 2255 // SWITCH, 2256 // SYNCHRONIZED, 2257 case 's': return findKeyword(Keyword.SCOPE, Keyword.SYNCHRONIZED, _lineText.ptr + _pos, len, _pos); 2258 2259 // TEMPLATE, 2260 // THIS, 2261 // THROW, 2262 // TRUE, 2263 // TRY, 2264 // TYPEDEF, 2265 // TYPEID, 2266 // TYPEOF, 2267 case 't': return findKeyword(Keyword.TEMPLATE, Keyword.TYPEOF, _lineText.ptr + _pos, len, _pos); 2268 2269 // UBYTE, 2270 // UCENT, 2271 // UINT, 2272 // ULONG, 2273 // UNION, 2274 // UNITTEST, 2275 // USHORT, 2276 case 'u': return findKeyword(Keyword.UBYTE, Keyword.USHORT, _lineText.ptr + _pos, len, _pos); 2277 2278 // VERSION, 2279 // VOID, 2280 // VOLATILE, 2281 case 'v': return findKeyword(Keyword.VERSION, Keyword.VOLATILE, _lineText.ptr + _pos, len, _pos); 2282 2283 // WCHAR, 2284 // WHILE, 2285 // WITH, 2286 case 'w': return findKeyword(Keyword.WCHAR, Keyword.WITH, _lineText.ptr + _pos, len, _pos); 2287 2288 // FILE, 2289 // MODULE, 2290 // LINE, 2291 // FUNCTION, 2292 // PRETTY_FUNCTION, 2293 // 2294 // GSHARED, 2295 // TRAITS, 2296 // VECTOR, 2297 // PARAMETERS, 2298 case '_': return findKeyword(Keyword.FILE, Keyword.PARAMETERS, _lineText.ptr + _pos, len, _pos); 2299 default: return Keyword.NONE; 2300 } 2301 } 2302 protected OpCode detectOp(dchar ch) nothrow { 2303 if (ch >= 128) 2304 return OpCode.NONE; 2305 dchar ch2 = _pos < _len ? _lineText[_pos] : 0; 2306 dchar ch3 = _pos < _len - 1 ? _lineText[_pos + 1] : 0; 2307 switch(cast(ubyte)ch) { 2308 // DIV, // / 2309 // DIV_EQ, // /= 2310 case '/': 2311 if (ch2 == '=') { 2312 _pos++; 2313 return OpCode.DIV_EQ; 2314 } 2315 return OpCode.DIV; 2316 // DOT, // . 2317 // DOT_DOT, // .. 2318 // DOT_DOT_DOT,// ... 2319 case '.': 2320 if (ch2 == '.') { 2321 if (ch3 == '.') { 2322 _pos += 2; 2323 return OpCode.DOT_DOT_DOT; 2324 } 2325 _pos++; 2326 return OpCode.DOT_DOT; 2327 } 2328 return OpCode.DOT; 2329 // AND, // & 2330 // AND_EQ, // &= 2331 // LOG_AND, // && 2332 case '&': 2333 if (ch2 == '=') { 2334 _pos++; 2335 return OpCode.AND_EQ; 2336 } 2337 if (ch2 == '&') { 2338 _pos++; 2339 return OpCode.LOG_AND; 2340 } 2341 return OpCode.AND; 2342 // OR, // | 2343 // OR_EQ, // |= 2344 // LOG_OR, // || 2345 case '|': 2346 if (ch2 == '=') { 2347 _pos++; 2348 return OpCode.OR_EQ; 2349 } 2350 if (ch2 == '|') { 2351 _pos++; 2352 return OpCode.LOG_OR; 2353 } 2354 return OpCode.OR; 2355 // MINUS, // - 2356 // MINUS_EQ, // -= 2357 // MINUS_MINUS,// -- 2358 case '-': 2359 if (ch2 == '=') { 2360 _pos++; 2361 return OpCode.MINUS_EQ; 2362 } 2363 if (ch2 == '-') { 2364 _pos++; 2365 return OpCode.MINUS_MINUS; 2366 } 2367 return OpCode.MINUS; 2368 // PLUS, // + 2369 // PLUS_EQ, // += 2370 // PLUS_PLUS, // ++ 2371 case '+': 2372 if (ch2 == '=') { 2373 _pos++; 2374 return OpCode.PLUS_EQ; 2375 } 2376 if (ch2 == '+') { 2377 _pos++; 2378 return OpCode.PLUS_PLUS; 2379 } 2380 return OpCode.PLUS; 2381 // LT, // < 2382 // LT_EQ, // <= 2383 // SHL, // << 2384 // SHL_EQ, // <<= 2385 // LT_GT, // <> 2386 // NE_EQ, // <>= 2387 case '<': 2388 if (ch2 == '<') { 2389 if (ch3 == '=') { 2390 _pos += 2; 2391 return OpCode.SHL_EQ; 2392 } 2393 _pos++; 2394 return OpCode.SHL; 2395 } 2396 if (ch2 == '>') { 2397 if (ch3 == '=') { 2398 _pos += 2; 2399 return OpCode.NE_EQ; 2400 } 2401 _pos++; 2402 return OpCode.LT_GT; 2403 } 2404 if (ch2 == '=') { 2405 _pos++; 2406 return OpCode.LT_EQ; 2407 } 2408 return OpCode.LT; 2409 // GT, // > 2410 // GT_EQ, // >= 2411 // SHR_EQ // >>= 2412 // ASR_EQ, // >>>= 2413 // SHR, // >> 2414 // ASR, // >>> 2415 case '>': 2416 if (ch2 == '>') { 2417 if (ch3 == '>') { 2418 dchar ch4 = _pos < _len - 2 ? _lineText[_pos + 2] : 0; 2419 if (ch4 == '=') { // >>>= 2420 _pos += 3; 2421 return OpCode.ASR_EQ; 2422 } 2423 _pos += 2; 2424 return OpCode.ASR; // >>> 2425 } 2426 if (ch3 == '=') { // >>= 2427 _pos += 2; 2428 return OpCode.SHR_EQ; 2429 } 2430 _pos++; 2431 return OpCode.SHR; 2432 } 2433 if (ch2 == '=') { // >= 2434 _pos++; 2435 return OpCode.GT_EQ; 2436 } 2437 // > 2438 return OpCode.GT; 2439 // NOT, // ! 2440 // NOT_EQ // != 2441 // NOT_LT_GT, // !<> 2442 // NOT_LT_GT_EQ, // !<>= 2443 // NOT_LT, // !< 2444 // NOT_LT_EQ, // !<= 2445 // NOT_GT, // !> 2446 // NOT_GT_EQ, // !>= 2447 case '!': 2448 if (ch2 == '<') { // !< 2449 if (ch3 == '>') { // !<> 2450 dchar ch4 = _pos < _len - 2 ? _lineText[_pos + 2] : 0; 2451 if (ch4 == '=') { // !<>= 2452 _pos += 3; 2453 return OpCode.NOT_LT_GT_EQ; 2454 } 2455 _pos += 2; 2456 return OpCode.NOT_LT_GT; // !<> 2457 } 2458 if (ch3 == '=') { // !<= 2459 _pos += 2; 2460 return OpCode.NOT_LT_EQ; 2461 } 2462 _pos++; 2463 return OpCode.NOT_LT; // !< 2464 } 2465 if (ch2 == '=') { // != 2466 _pos++; 2467 return OpCode.NOT_EQ; 2468 } 2469 return OpCode.NOT; 2470 // PAR_OPEN, // ( 2471 case '(': 2472 return OpCode.PAR_OPEN; 2473 // PAR_CLOSE, // ) 2474 case ')': 2475 return OpCode.PAR_CLOSE; 2476 // SQ_OPEN, // [ 2477 case '[': 2478 return OpCode.SQ_OPEN; 2479 // SQ_CLOSE, // ] 2480 case ']': 2481 return OpCode.SQ_CLOSE; 2482 // CURL_OPEN, // { 2483 case '{': 2484 return OpCode.CURL_OPEN; 2485 // CURL_CLOSE, // } 2486 case '}': 2487 return OpCode.CURL_CLOSE; 2488 // QUEST, // ? 2489 case '?': 2490 return OpCode.QUEST; 2491 // COMMA, // , 2492 case ',': 2493 return OpCode.COMMA; 2494 // SEMICOLON, // ; 2495 case ';': 2496 return OpCode.SEMICOLON; 2497 // COLON, // : 2498 case ':': 2499 return OpCode.COLON; 2500 // DOLLAR, // $ 2501 case '$': 2502 return OpCode.DOLLAR; 2503 // EQ, // = 2504 // QE_EQ, // == 2505 // EQ_GT, // => 2506 case '=': 2507 if (ch2 == '=') { // == 2508 _pos++; 2509 return OpCode.QE_EQ; 2510 } 2511 if (ch2 == '>') { // => 2512 _pos++; 2513 return OpCode.EQ_GT; 2514 } 2515 return OpCode.EQ; 2516 // MUL, // * 2517 // MUL_EQ, // *= 2518 case '*': 2519 if (ch2 == '=') { 2520 _pos++; 2521 return OpCode.MUL_EQ; 2522 } 2523 return OpCode.MUL; 2524 // MOD, // % 2525 // MOD_EQ, // %= 2526 case '%': 2527 if (ch2 == '=') { 2528 _pos++; 2529 return OpCode.MOD_EQ; 2530 } 2531 return OpCode.MOD; 2532 // XOR, // ^ 2533 // XOR_EQ, // ^= 2534 // LOG_XOR, // ^^ 2535 // LOG_XOR_EQ, // ^^= 2536 case '^': 2537 if (ch2 == '^') { 2538 if (ch3 == '=') { 2539 _pos += 2; 2540 return OpCode.LOG_XOR_EQ; 2541 } 2542 _pos++; 2543 return OpCode.LOG_XOR; 2544 } 2545 if (ch2 == '=') { 2546 _pos++; 2547 return OpCode.XOR_EQ; 2548 } 2549 return OpCode.XOR; 2550 // INV, // ~ 2551 // INV_EQ, // ~= 2552 case '~': 2553 if (ch2 == '=') { 2554 _pos++; 2555 return OpCode.INV_EQ; 2556 } 2557 return OpCode.INV; 2558 // AT, // @ 2559 case '@': 2560 return OpCode.AT; 2561 // SHARP // # 2562 case '#': 2563 return OpCode.SHARP; 2564 default: 2565 return OpCode.NONE; 2566 } 2567 } 2568 2569 protected Token processCharacterLiteral() { 2570 _sharedCharacterLiteralToken.setPos(_startLine, _startPos); 2571 if (_pos + 2 > _len) 2572 return parserError("Invalid character literal", _sharedCharacterLiteralToken); 2573 dchar ch = _lineText[_pos++]; 2574 dchar ch2 = _lineText[_pos++]; 2575 dchar type = 0; 2576 if (ch == '\\') { 2577 // process escaped character - store it in ch 2578 // TODO: support all escape sequences 2579 switch(ch2) { 2580 case 'r': 2581 ch = '\r'; 2582 break; 2583 case 'n': 2584 ch = '\n'; 2585 break; 2586 case 't': 2587 ch = '\t'; 2588 break; 2589 case '\\': 2590 ch = '\\'; 2591 break; 2592 default: 2593 ch = ch2; 2594 break; 2595 } 2596 // here must be closing ' 2597 if (_pos + 1 > _len) 2598 return parserError("Invalid character literal", _sharedCharacterLiteralToken); 2599 ch2 = _lineText[_pos++]; 2600 } 2601 if (ch2 != '\'') 2602 return parserError("Invalid character literal", _sharedCharacterLiteralToken); 2603 if (_pos < _len) { 2604 dchar t = _lineText[_pos]; 2605 if (t == 'd' || t == 'w' || t == 'c') { 2606 type = t; 2607 _pos++; 2608 } else if (isIdentMiddleChar(ch)) { 2609 return parserError("Unexpected character after character literal", _sharedCharacterLiteralToken); 2610 } 2611 } 2612 _sharedCharacterLiteralToken.setCharacter(ch, type); 2613 return _sharedCharacterLiteralToken; 2614 } 2615 2616 protected Token processDoubleQuotedOrWysiwygString(dchar delimiter) { 2617 bool wysiwyg = (delimiter == 'r' || delimiter == '`'); 2618 //writeln("processDoubleQuotedString()"); 2619 _sharedStringLiteralToken.setPos(_startLine, _startPos); 2620 _stringLiteralAppender.reset(); 2621 if (delimiter == 'r') { 2622 _pos++; 2623 delimiter = '\"'; 2624 } 2625 dchar type = 0; 2626 for (;;) { 2627 int i = _pos; 2628 int endPos = int.max; 2629 bool lastBackSlash = false; 2630 for(; i < _len; i++) { 2631 dchar ch = _lineText[i]; 2632 if (ch == '\\') { 2633 if (lastBackSlash) 2634 lastBackSlash = false; 2635 else 2636 lastBackSlash = true; 2637 } 2638 else if (ch == delimiter && !lastBackSlash) { 2639 endPos = i; 2640 break; 2641 } 2642 else if(lastBackSlash) 2643 lastBackSlash = false; 2644 } 2645 if (endPos != int.max) { 2646 // found end quote 2647 _stringLiteralAppender.append(_lineText[_pos .. endPos]); 2648 _pos = endPos + 1; 2649 break; 2650 } 2651 // no quote by end of line 2652 _stringLiteralAppender.append(_lineText[_pos .. $]); 2653 _stringLiteralAppender.appendEol(); 2654 if (!nextLine()) { 2655 // do we need to throw exception if eof comes before end of string? 2656 break; 2657 } 2658 } 2659 dchar t = 0; 2660 if (_pos < _len) { 2661 dchar ch = _lineText[_pos]; 2662 if (ch == 'c' || ch == 'w' || ch == 'd') { 2663 t = ch; 2664 _pos++; 2665 if (_pos < _len) { 2666 ch = _lineText[_pos]; 2667 if (isIdentMiddleChar(ch)) 2668 return parserError("Unexpected character after string literal", _sharedStringLiteralToken); 2669 } 2670 } else if (isIdentMiddleChar(ch)) 2671 return parserError("Unexpected character after string literal", _sharedStringLiteralToken); 2672 } 2673 if (t != 0) { 2674 if (type != 0 && t != type) 2675 return parserError("Cannot concatenate strings of different type", _sharedStringLiteralToken); 2676 type = t; 2677 } 2678 if (wysiwyg) { 2679 // no escape processing 2680 _sharedStringLiteralToken.setText(_stringLiteralAppender.get(), type); 2681 return _sharedStringLiteralToken; 2682 } 2683 _stringLiteralAppender.processEscapeSequences(); 2684 _sharedStringLiteralToken.setText(_stringLiteralAppender.get(), type); 2685 return _sharedStringLiteralToken; 2686 } 2687 2688 protected SysTime buildTime; 2689 2690 // string literal of the date of compilation "mmm dd yyyy" 2691 protected dstring formatBuildDate() { 2692 // TODO: provide proper format 2693 return to!dstring(buildTime); 2694 } 2695 2696 // string literal of the time of compilation "hh:mm:ss" 2697 protected dstring formatBuildTime() { 2698 // TODO: provide proper format 2699 return to!dstring(buildTime); 2700 } 2701 2702 // string literal of the date and time of compilation "www mmm dd hh:mm:ss yyyy" 2703 protected dstring formatBuildTimestamp() { 2704 // TODO: provide proper format 2705 return to!dstring(buildTime); 2706 } 2707 2708 static immutable dstring VERSION = "0.1"; 2709 static immutable dstring VENDOR = "coolreader.org"; 2710 2711 protected Token makeSpecialTokenString(dstring str, int pos) { 2712 _sharedStringLiteralToken.setPos(_startLine, _startPos); 2713 _sharedStringLiteralToken.setText(cast(dchar[])str, 0); 2714 return _sharedStringLiteralToken; 2715 } 2716 2717 protected Token processSpecialToken(Keyword keyword, int pos) { 2718 switch (keyword) { 2719 //Special Token Replaced with 2720 case Keyword.DATE: // string literal of the date of compilation "mmm dd yyyy" 2721 return makeSpecialTokenString(formatBuildDate(), pos); 2722 case Keyword.TIME: // string literal of the time of compilation "hh:mm:ss" 2723 return makeSpecialTokenString(formatBuildTime(), pos); 2724 case Keyword.TIMESTAMP: // string literal of the date and time of compilation "www mmm dd hh:mm:ss yyyy" 2725 return makeSpecialTokenString(formatBuildTimestamp(), pos); 2726 case Keyword.VENDOR: // Compiler vendor string, such as "Digital Mars D" 2727 return makeSpecialTokenString(VENDOR, pos); 2728 case Keyword.VERSION_: // Compiler version as an integer, such as 2001 2729 return makeSpecialTokenString(VERSION, pos); 2730 default: 2731 parserError("Unknown special token", _line, pos); 2732 } 2733 return null; 2734 } 2735 2736 protected int _startLine; 2737 protected int _startPos; 2738 2739 // returns next token (clone it if you want to store for future usage, otherwise it may be overwritten by further nextToken() calls). 2740 Token nextToken() { 2741 _startLine = _line; 2742 _startPos = _pos; 2743 dchar ch = nextChar(); 2744 if (ch == EOF_CHAR) { 2745 return emitEof(); 2746 } 2747 if (ch == '\r' || ch == '\n' || ch == 0x0020 || ch == 0x0009 || ch == 0x000B || ch == 0x000C) { 2748 // white space (treat EOL as whitespace, too) 2749 return processWhiteSpace(ch); 2750 } 2751 dchar next = _pos < _len ? _lineText[_pos] : 0; 2752 if (ch == '/') { 2753 if (next == '/') 2754 return processOneLineComment(); 2755 else if (next == '*') 2756 return processMultilineComment(); 2757 else if (next == '+') 2758 return processNestedComment(); 2759 } 2760 if (ch == '#' && _line == 1) 2761 return processOneLineSharpComment(); 2762 if (ch == '\"') 2763 return processDoubleQuotedOrWysiwygString(ch); 2764 if (ch == '\'') 2765 return processCharacterLiteral(); 2766 if (ch == 'x' && next == '\"') 2767 return processHexString(); 2768 if (ch == 'q' && next == '\"') 2769 return processDelimitedString(); 2770 if ((ch == 'r' && next == '\"') || (ch == '`')) 2771 return processDoubleQuotedOrWysiwygString(ch); 2772 int oldPos = _pos - 1; 2773 2774 if (ch == '0') { 2775 if (next == 'b' || next == 'B') 2776 return processBinaryNumber(); 2777 if (next == 'x' || next == 'X') 2778 return processHexNumber(); 2779 if (next >= '0' && next <= '9') 2780 return processOctNumber(); 2781 if (next >= '0' && next <= '9') 2782 return processDecNumber(ch); 2783 } 2784 if (ch >= '0' && ch <= '9') 2785 return processDecNumber(ch); 2786 if (ch == '.' && next >= '0' && next <= '9') // .123 2787 return processDecFloatSecondPart(0); 2788 2789 if (ch == '_' || isUniversalAlpha(ch)) { 2790 // start of identifier or keyword? 2791 Keyword keyword = detectKeyword(ch); 2792 if (keyword != Keyword.NONE) { 2793 switch (keyword) { 2794 //Special Token Replaced with 2795 case Keyword.EOF: return emitEof(); // sets the scanner to the end of the file 2796 case Keyword.DATE: // string literal of the date of compilation "mmm dd yyyy" 2797 case Keyword.TIME: // string literal of the time of compilation "hh:mm:ss" 2798 case Keyword.TIMESTAMP: // string literal of the date and time of compilation "www mmm dd hh:mm:ss yyyy" 2799 case Keyword.VENDOR: // Compiler vendor string, such as "Digital Mars D" 2800 case Keyword.VERSION_: // Compiler version as an integer, such as 2001 2801 return processSpecialToken(keyword, oldPos); 2802 default: 2803 _sharedKeywordToken.setPos(_startLine, _startPos); 2804 _sharedKeywordToken.keyword = keyword; 2805 return _sharedKeywordToken; 2806 } 2807 } 2808 return processIdent(ch); 2809 } 2810 OpCode op = detectOp(ch); 2811 if (op != OpCode.NONE) { 2812 _sharedOpToken.setPos(_startLine, _startPos); 2813 _sharedOpToken.opCode = op; 2814 return _sharedOpToken; 2815 } 2816 return parserError("Invalid token", _line, _pos); 2817 } 2818 2819 /// tokenize all 2820 Token[] allTokens() { 2821 Token[] res; 2822 res.assumeSafeAppend; 2823 for(;;) { 2824 Token tok = nextToken(); 2825 if (!tok || tok.type == TokenType.EOF) 2826 break; 2827 res ~= tok.clone(); 2828 } 2829 return res; 2830 } 2831 } 2832 2833 unittest { 2834 version(DisableLexerTest) { 2835 import std.stdio; 2836 import std.conv; 2837 import std.utf; 2838 import dlangui.core.linestream; 2839 string fname = "/home/lve/src/d/ddc/ddclexer/tests/tokenizer_test.d"; 2840 writeln("opening file"); 2841 try { 2842 std.stream.File f = new std.stream.File(fname); 2843 scope(exit) { f.close(); } 2844 try { 2845 LineStream lines = LineStream.create(f, fname); 2846 Tokenizer tokenizer = new Tokenizer(lines); 2847 for (;;) { 2848 Token token = tokenizer.nextToken(); 2849 if (token is null) { 2850 writeln("Null token returned"); 2851 break; 2852 } 2853 if (token.type == TokenType.EOF) { 2854 writeln("EOF token"); 2855 break; 2856 } 2857 writeln("", token.line, ":", token.pos, "\t", token.toString); 2858 } 2859 } catch (Exception e) { 2860 writeln("Exception " ~ e.toString); 2861 } 2862 } catch (Exception e) { 2863 writeln("Exception " ~ e.toString); 2864 } 2865 } 2866 } 2867 2868 /// converts named entity to character, returns 0 if not found 2869 dchar entityToChar(string name) { 2870 if (auto ch = name in entityToCharMap) { 2871 return *ch; 2872 } 2873 return 0; 2874 } 2875 2876 /// fings entity name for character, returns null if not found 2877 string charToEntity(dchar ch) { 2878 if (auto name = ch in charToEntityMap) { 2879 return *name; 2880 } 2881 return null; 2882 } 2883 2884 private __gshared dchar[string]entityToCharMap; 2885 private __gshared string[dchar]charToEntityMap; 2886 private void addEntity(string name, dchar ch) { 2887 entityToCharMap[name] = ch; 2888 charToEntityMap[ch] = name; 2889 } 2890 __gshared static this() { 2891 addEntity("quot", 34); 2892 addEntity("amp", 38); 2893 addEntity("lt", 60); 2894 addEntity("gt", 62); 2895 addEntity("OElig", 338); 2896 addEntity("oelig", 339); 2897 addEntity("Scaron", 352); 2898 addEntity("scaron", 353); 2899 addEntity("Yuml", 376); 2900 addEntity("circ", 710); 2901 addEntity("tilde", 732); 2902 addEntity("ensp", 8194); 2903 addEntity("emsp", 8195); 2904 addEntity("thinsp", 8201); 2905 addEntity("zwnj", 8204); 2906 addEntity("zwj", 8205); 2907 addEntity("lrm", 8206); 2908 addEntity("rlm", 8207); 2909 addEntity("ndash", 8211); 2910 addEntity("mdash", 8212); 2911 addEntity("lsquo", 8216); 2912 addEntity("rsquo", 8217); 2913 addEntity("sbquo", 8218); 2914 addEntity("ldquo", 8220); 2915 addEntity("rdquo", 8221); 2916 addEntity("bdquo", 8222); 2917 addEntity("dagger", 8224); 2918 addEntity("Dagger", 8225); 2919 addEntity("permil", 8240); 2920 addEntity("lsaquo", 8249); 2921 addEntity("rsaquo", 8250); 2922 addEntity("euro", 8364); 2923 addEntity("nbsp", 160); 2924 addEntity("iexcl", 161); 2925 addEntity("cent", 162); 2926 addEntity("pound", 163); 2927 addEntity("curren", 164); 2928 addEntity("yen", 165); 2929 addEntity("brvbar", 166); 2930 addEntity("sect", 167); 2931 addEntity("uml", 168); 2932 addEntity("copy", 169); 2933 addEntity("ordf", 170); 2934 addEntity("laquo", 171); 2935 addEntity("not", 172); 2936 addEntity("shy", 173); 2937 addEntity("reg", 174); 2938 addEntity("macr", 175); 2939 addEntity("deg", 176); 2940 addEntity("plusmn", 177); 2941 addEntity("sup2", 178); 2942 addEntity("sup3", 179); 2943 addEntity("acute", 180); 2944 addEntity("micro", 181); 2945 addEntity("para", 182); 2946 addEntity("middot", 183); 2947 addEntity("cedil", 184); 2948 addEntity("sup1", 185); 2949 addEntity("ordm", 186); 2950 addEntity("raquo", 187); 2951 addEntity("frac14", 188); 2952 addEntity("frac12", 189); 2953 addEntity("frac34", 190); 2954 addEntity("iquest", 191); 2955 addEntity("Agrave", 192); 2956 addEntity("Aacute", 193); 2957 addEntity("Acirc", 194); 2958 addEntity("Atilde", 195); 2959 addEntity("Auml", 196); 2960 addEntity("Aring", 197); 2961 addEntity("AElig", 198); 2962 addEntity("Ccedil", 199); 2963 addEntity("Egrave", 200); 2964 addEntity("Eacute", 201); 2965 addEntity("Ecirc", 202); 2966 addEntity("Euml", 203); 2967 addEntity("Igrave", 204); 2968 addEntity("Iacute", 205); 2969 addEntity("Icirc", 206); 2970 addEntity("Iuml", 207); 2971 addEntity("ETH", 208); 2972 addEntity("Ntilde", 209); 2973 addEntity("Ograve", 210); 2974 addEntity("Oacute", 211); 2975 addEntity("Ocirc", 212); 2976 addEntity("Otilde", 213); 2977 addEntity("Ouml", 214); 2978 addEntity("times", 215); 2979 addEntity("Oslash", 216); 2980 addEntity("Ugrave", 217); 2981 addEntity("Uacute", 218); 2982 addEntity("Ucirc", 219); 2983 addEntity("Uuml", 220); 2984 addEntity("Yacute", 221); 2985 addEntity("THORN", 222); 2986 addEntity("szlig", 223); 2987 addEntity("agrave", 224); 2988 addEntity("aacute", 225); 2989 addEntity("acirc", 226); 2990 addEntity("atilde", 227); 2991 addEntity("auml", 228); 2992 addEntity("aring", 229); 2993 addEntity("aelig", 230); 2994 addEntity("ccedil", 231); 2995 addEntity("egrave", 232); 2996 addEntity("eacute", 233); 2997 addEntity("ecirc", 234); 2998 addEntity("euml", 235); 2999 addEntity("igrave", 236); 3000 addEntity("iacute", 237); 3001 addEntity("icirc", 238); 3002 addEntity("iuml", 239); 3003 addEntity("eth", 240); 3004 addEntity("ntilde", 241); 3005 addEntity("ograve", 242); 3006 addEntity("oacute", 243); 3007 addEntity("ocirc", 244); 3008 addEntity("otilde", 245); 3009 addEntity("ouml", 246); 3010 addEntity("divide", 247); 3011 addEntity("oslash", 248); 3012 addEntity("ugrave", 249); 3013 addEntity("uacute", 250); 3014 addEntity("ucirc", 251); 3015 addEntity("uuml", 252); 3016 addEntity("yacute", 253); 3017 addEntity("thorn", 254); 3018 addEntity("yuml", 255); 3019 addEntity("fnof", 402); 3020 addEntity("Alpha", 913); 3021 addEntity("Beta", 914); 3022 addEntity("Gamma", 915); 3023 addEntity("Delta", 916); 3024 addEntity("Epsilon", 917); 3025 addEntity("Zeta", 918); 3026 addEntity("Eta", 919); 3027 addEntity("Theta", 920); 3028 addEntity("Iota", 921); 3029 addEntity("Kappa", 922); 3030 addEntity("Lambda", 923); 3031 addEntity("Mu", 924); 3032 addEntity("Nu", 925); 3033 addEntity("Xi", 926); 3034 addEntity("Omicron", 927); 3035 addEntity("Pi", 928); 3036 addEntity("Rho", 929); 3037 addEntity("Sigma", 931); 3038 addEntity("Tau", 932); 3039 addEntity("Upsilon", 933); 3040 addEntity("Phi", 934); 3041 addEntity("Chi", 935); 3042 addEntity("Psi", 936); 3043 addEntity("Omega", 937); 3044 addEntity("alpha", 945); 3045 addEntity("beta", 946); 3046 addEntity("gamma", 947); 3047 addEntity("delta", 948); 3048 addEntity("epsilon", 949); 3049 addEntity("zeta", 950); 3050 addEntity("eta", 951); 3051 addEntity("theta", 952); 3052 addEntity("iota", 953); 3053 addEntity("kappa", 954); 3054 addEntity("lambda", 955); 3055 addEntity("mu", 956); 3056 addEntity("nu", 957); 3057 addEntity("xi", 958); 3058 addEntity("omicron", 959); 3059 addEntity("pi", 960); 3060 addEntity("rho", 961); 3061 addEntity("sigmaf", 962); 3062 addEntity("sigma", 963); 3063 addEntity("tau", 964); 3064 addEntity("upsilon", 965); 3065 addEntity("phi", 966); 3066 addEntity("chi", 967); 3067 addEntity("psi", 968); 3068 addEntity("omega", 969); 3069 addEntity("thetasym", 977); 3070 addEntity("upsih", 978); 3071 addEntity("piv", 982); 3072 addEntity("bull", 8226); 3073 addEntity("hellip", 8230); 3074 addEntity("prime", 8242); 3075 addEntity("Prime", 8243); 3076 addEntity("oline", 8254); 3077 addEntity("frasl", 8260); 3078 addEntity("weierp", 8472); 3079 addEntity("image", 8465); 3080 addEntity("real", 8476); 3081 addEntity("trade", 8482); 3082 addEntity("alefsym", 8501); 3083 addEntity("larr", 8592); 3084 addEntity("uarr", 8593); 3085 addEntity("rarr", 8594); 3086 addEntity("darr", 8595); 3087 addEntity("harr", 8596); 3088 addEntity("crarr", 8629); 3089 addEntity("lArr", 8656); 3090 addEntity("uArr", 8657); 3091 addEntity("rArr", 8658); 3092 addEntity("dArr", 8659); 3093 addEntity("hArr", 8660); 3094 addEntity("forall", 8704); 3095 addEntity("part", 8706); 3096 addEntity("exist", 8707); 3097 addEntity("empty", 8709); 3098 addEntity("nabla", 8711); 3099 addEntity("isin", 8712); 3100 addEntity("notin", 8713); 3101 addEntity("ni", 8715); 3102 addEntity("prod", 8719); 3103 addEntity("sum", 8721); 3104 addEntity("minus", 8722); 3105 addEntity("lowast", 8727); 3106 addEntity("radic", 8730); 3107 addEntity("prop", 8733); 3108 addEntity("infin", 8734); 3109 addEntity("ang", 8736); 3110 addEntity("and", 8743); 3111 addEntity("or", 8744); 3112 addEntity("cap", 8745); 3113 addEntity("cup", 8746); 3114 addEntity("int", 8747); 3115 addEntity("there4", 8756); 3116 addEntity("sim", 8764); 3117 addEntity("cong", 8773); 3118 addEntity("asymp", 8776); 3119 addEntity("ne", 8800); 3120 addEntity("equiv", 8801); 3121 addEntity("le", 8804); 3122 addEntity("ge", 8805); 3123 addEntity("sub", 8834); 3124 addEntity("sup", 8835); 3125 addEntity("nsub", 8836); 3126 addEntity("sube", 8838); 3127 addEntity("supe", 8839); 3128 addEntity("oplus", 8853); 3129 addEntity("otimes", 8855); 3130 addEntity("perp", 8869); 3131 addEntity("sdot", 8901); 3132 addEntity("lceil", 8968); 3133 addEntity("rceil", 8969); 3134 addEntity("lfloor", 8970); 3135 addEntity("rfloor", 8971); 3136 addEntity("loz", 9674); 3137 addEntity("spades", 9824); 3138 addEntity("clubs", 9827); 3139 addEntity("hearts", 9829); 3140 addEntity("diams", 9830); 3141 addEntity("lang", 10216); 3142 addEntity("rang", 10217); 3143 } 3144 3145 3146 3147 //void runTokenizerTest() 3148 unittest 3149 { 3150 import std.algorithm; 3151 class TokenTest { 3152 int _line; 3153 string _file; 3154 this(string file, int line) { 3155 _file = file; 3156 _line = line; 3157 } 3158 bool doTest(Token token) { 3159 return true; 3160 } 3161 void execute(Tokenizer tokenizer) { 3162 Token token = tokenizer.nextToken(); 3163 if (!doTest(token)) { 3164 assert(false, " token doesn not match at " ~ _file ~ ":" ~ to!string(_line) ~ " foundToken: " ~ token.toString ~ " expected: " ~ toString); 3165 } 3166 } 3167 public override @property string toString() { 3168 return "TokenTest"; 3169 } 3170 } 3171 void testTokenizer(string code, TokenTest[] tokens, string file = __FILE__, uint line = __LINE__) { 3172 Tokenizer tokenizer = new Tokenizer(code, "tokenizerTest:" ~ file ~ ":" ~ to!string(line)); 3173 for (int i = 0; i < tokens.length; i++) { 3174 tokens[i].execute(tokenizer); 3175 } 3176 } 3177 class KeywordTest : TokenTest { 3178 Keyword _code; 3179 this(Keyword code, string file = __FILE__, uint line = __LINE__) { 3180 super(file, line); 3181 _code = code; 3182 } 3183 override bool doTest(Token token) { 3184 if (token.type != TokenType.KEYWORD) 3185 return false; 3186 if (token.keyword != _code) 3187 return false; 3188 return true; 3189 } 3190 public override @property string toString() { 3191 return "Keyword:" ~ to!string(_code); 3192 } 3193 } 3194 class OpTest : TokenTest { 3195 OpCode _code; 3196 this(OpCode code, string file = __FILE__, uint line = __LINE__) { 3197 super(file, line); 3198 _code = code; 3199 } 3200 override bool doTest(Token token) { 3201 if (token.type != TokenType.OP) 3202 return false; 3203 if (token.opCode != _code) 3204 return false; 3205 return true; 3206 } 3207 public override @property string toString() { 3208 return "Op:" ~ to!string(_code); 3209 } 3210 } 3211 class StringTest : TokenTest { 3212 dstring _value; 3213 dchar _literalType; 3214 this(dstring value, dchar literalType = 0, string file = __FILE__, uint line = __LINE__) { 3215 super(file, line); 3216 _value = value; 3217 _literalType = literalType; 3218 } 3219 override bool doTest(Token token) { 3220 if (token.type != TokenType.STRING) 3221 return false; 3222 if (!token.text.equal(_value)) 3223 return false; 3224 if (token.literalType != _literalType) 3225 return false; 3226 return true; 3227 } 3228 public override @property string toString() { 3229 return toUTF8("String:\"" ~ _value ~ "\"" ~ (_literalType ? _literalType : ' ')); 3230 } 3231 } 3232 class IntegerTest : TokenTest { 3233 ulong _value; 3234 bool _unsigned; 3235 bool _long; 3236 this(ulong value, bool unsignedFlag = false, bool longFlag = false, string file = __FILE__, uint line = __LINE__) { 3237 super(file, line); 3238 _value = value; 3239 _unsigned = unsignedFlag; 3240 _long = longFlag; 3241 } 3242 override bool doTest(Token token) { 3243 if (token.type != TokenType.INTEGER) 3244 return false; 3245 if (token.intValue != _value) 3246 return false; 3247 if (token.isUnsigned != _unsigned) 3248 return false; 3249 if (token.isLong != _long) 3250 return false; 3251 return true; 3252 } 3253 public override @property string toString() { 3254 return "Integer:" ~ to!string(_value); 3255 } 3256 } 3257 class RealTest : TokenTest { 3258 real _value; 3259 ubyte _precision; 3260 bool _imaginary; 3261 this(real value, ubyte precision = 1, bool imaginary = false, string file = __FILE__, uint line = __LINE__) { 3262 super(file, line); 3263 _value = value; 3264 _precision = precision; 3265 _imaginary = imaginary; 3266 } 3267 override bool doTest(Token token) { 3268 if (token.type != TokenType.FLOAT) 3269 return false; 3270 real diff = token.realValue - _value; 3271 real maxerr = _value / 1000000; 3272 if (diff < 0) diff = -diff; 3273 if (maxerr < 0) maxerr = -maxerr; 3274 if (diff > maxerr) 3275 return false; 3276 if (token.precision != _precision) 3277 return false; 3278 if (token.isImaginary != _imaginary) 3279 return false; 3280 return true; 3281 } 3282 public override @property string toString() { 3283 return "Real:" ~ to!string(_value) ~ (_precision == 0 ? "f" : (_precision == 2 ? "L" : "")) ~ (_imaginary ? "i" : ""); 3284 } 3285 } 3286 class IdentTest : TokenTest { 3287 string _value; 3288 this(string value, string file = __FILE__, uint line = __LINE__) { 3289 super(file, line); 3290 _value = value; 3291 } 3292 override bool doTest(Token token) { 3293 if (token.type != TokenType.IDENTIFIER) 3294 return false; 3295 if (! to!string(token.text).equal(_value)) 3296 return false; 3297 return true; 3298 } 3299 public override @property string toString() { 3300 return "Ident:" ~ _value; 3301 } 3302 } 3303 class CommentTest : TokenTest { 3304 this(string file = __FILE__, uint line = __LINE__) { 3305 super(file, line); 3306 } 3307 override bool doTest(Token token) { 3308 if (token.type != TokenType.COMMENT) 3309 return false; 3310 return true; 3311 } 3312 public override @property string toString() { 3313 return "Comment"; 3314 } 3315 } 3316 class EOFTest : TokenTest { 3317 this(string file = __FILE__, uint line = __LINE__) { 3318 super(file, line); 3319 } 3320 override bool doTest(Token token) { 3321 if (token.type != TokenType.EOF) 3322 return false; 3323 return true; 3324 } 3325 public override @property string toString() { 3326 return "EOF"; 3327 } 3328 } 3329 class WhiteSpaceTest : TokenTest { 3330 this(string file = __FILE__, uint line = __LINE__) { 3331 super(file, line); 3332 } 3333 override bool doTest(Token token) { 3334 if (token.type != TokenType.WHITESPACE) 3335 return false; 3336 return true; 3337 } 3338 public override @property string toString() { 3339 return "whiteSpace"; 3340 } 3341 } 3342 TokenTest checkString(dstring value, dchar literalType = 0, string file = __FILE__, uint line = __LINE__) { 3343 return new StringTest(value, literalType, file, line); 3344 } 3345 TokenTest checkInteger(ulong value, bool unsignedFlag = false, bool longFlag = false, string file = __FILE__, uint line = __LINE__) { 3346 return new IntegerTest(value, unsignedFlag, longFlag, file, line); 3347 } 3348 TokenTest checkReal(real value, byte precision = 1, bool imaginary = false, string file = __FILE__, uint line = __LINE__) { 3349 return new RealTest(value, precision, imaginary, file, line); 3350 } 3351 TokenTest checkIdent(string value, string file = __FILE__, uint line = __LINE__) { 3352 return new IdentTest(value, file, line); 3353 } 3354 TokenTest checkKeyword(Keyword value, string file = __FILE__, uint line = __LINE__) { 3355 return new KeywordTest(value, file, line); 3356 } 3357 TokenTest checkOp(OpCode value, string file = __FILE__, uint line = __LINE__) { 3358 return new OpTest(value, file, line); 3359 } 3360 TokenTest checkSpace(string file = __FILE__, uint line = __LINE__) { 3361 return new WhiteSpaceTest(file, line); 3362 } 3363 TokenTest checkComment(string file = __FILE__, uint line = __LINE__) { 3364 return new CommentTest(file, line); 3365 } 3366 TokenTest checkEOF(string file = __FILE__, uint line = __LINE__) { 3367 return new EOFTest(file, line); 3368 } 3369 3370 // test strings 3371 testTokenizer("r\"simple\\nstring\"", [checkString( r"simple\nstring" )]); 3372 3373 // test strings 3374 testTokenizer(q"TEST 3375 "simple string" 3376 "simple\nstring" 3377 `simple string` 3378 "simple string"d 3379 "simple string"c 3380 "simple string"w 3381 "simple\"string" 3382 "\r\n\f\t\\\"\'&" 3383 TEST" 3384 , [ 3385 checkString("simple string"), 3386 checkSpace(), 3387 checkString("simple\nstring"), 3388 checkSpace(), 3389 checkString("simple string"), 3390 checkSpace(), 3391 checkString("simple string", 'd'), 3392 checkSpace(), 3393 checkString("simple string", 'c'), 3394 checkSpace(), 3395 checkString("simple string", 'w'), 3396 checkSpace(), 3397 checkString("simple\"string"), 3398 checkSpace(), 3399 checkString("\r\n\f\t\\\"\'&"), 3400 ]); 3401 // basic test 3402 testTokenizer(q"TEST 3403 int i; 3404 TEST" 3405 , [ 3406 checkKeyword(Keyword.INT), 3407 checkSpace(), 3408 checkIdent("i"), 3409 checkOp(OpCode.SEMICOLON), 3410 checkEOF() 3411 ]); 3412 // test numbers 3413 testTokenizer("0b1101 0x123abcdU 0xABCL 0743 192837465 0 192_837_465 5.25 12.3f 54.1L 67.1i 3e3 25.67e-5f" 3414 , [ 3415 checkInteger(13), 3416 checkSpace(), 3417 checkInteger(0x123abcd, true, false), 3418 checkSpace(), 3419 checkInteger(0xabc, false, true), 3420 checkSpace(), 3421 checkInteger(std.conv.octal!743), 3422 checkSpace(), 3423 checkInteger(192_837_465), 3424 checkSpace(), 3425 checkInteger(0), 3426 checkSpace(), 3427 checkInteger(192837465), 3428 checkSpace(), 3429 checkReal(5.25), 3430 checkSpace(), 3431 checkReal(12.3f, 0), 3432 checkSpace(), 3433 checkReal(54.1L, 2), 3434 checkSpace(), 3435 checkReal(67.1, 1, true), 3436 checkSpace(), 3437 checkReal(3e3), 3438 checkSpace(), 3439 checkReal(25.67e-5f, 0), 3440 checkEOF() 3441 ]); 3442 } 3443