1 /// Author: Aziz Köksal 2 /// License: GPL3 3 /// $(Maturity high) 4 module dil.lexer.Token; 5 6 import dil.lexer.Identifier, 7 dil.lexer.Funcs; 8 import dil.Location; 9 import dil.Float; 10 import dil.Array; 11 import common; 12 13 public import dil.lexer.TokensEnum; 14 15 /// A Token is a sequence of characters recognized by the lexical analyzer. 16 /// 17 /// Example: 18 /// $(PRE ‘ StringValue’ 19 //// ^$(Token ws, ws) ^$(Token start, start) ^$(Token end, end) 20 /// 21 ///$(Token kind, kind) = TOK.Identifier 22 ///$(Token flags, flags) = Flags.None 23 ///$(Token union.ident, ident) = $(Identifier)("StringValue", kind)) 24 /// Macros: 25 /// Token = $(SYMLINK Token.$1, $2) 26 /// Identifier = $(SYMLINK2 dil.lexer.Identifier, Identifier) 27 struct Token 28 { 29 TOK kind; /// The token kind. 30 cchar* ws; /// Points to the preceding whitespace characters if present. 31 cchar* start; /// Points to the first character of the token. 32 cchar* end; /// Points one character past the end of the token. 33 34 /// Represents the string value of a single string literal, 35 /// where possible escape sequences have been converted to their values. 36 struct StringValue 37 { 38 cbinstr str; /// The typeless string value. 39 char pf = 0; /// Postfix: 'c', 'w', 'd'. '\0' for none. 40 version(D2) 41 Token* tokens; /// Points to the contents of a token string stored 42 /// as a zero-terminated array. 43 } 44 45 /// Represents the long/ulong value of a number literal. 46 union IntegerValue 47 { 48 long long_; /// A long integer value. 49 ulong ulong_; /// An unsigned long integer value. 50 } 51 52 /// Represents the data of a newline token. 53 struct NewlineValue 54 { 55 size_t lineNum; /// The line number in the source text. 56 HashLineInfo* hlinfo; /// Info from a "#line" token. 57 } 58 59 /// Represents the value of a "#line Number Filespec?" token. 60 struct HashLineValue 61 { 62 Token* lineNum; /// The Number. 63 Token* filespec; /// The optional Filespec. 64 } 65 66 /// Represents the info of a #line token. Used for error messages. 67 struct HashLineInfo 68 { 69 size_t lineNum; /// Delta line number calculated from #line Number. 70 cstring path; /// File path set by #line num Filespec. 71 /// Calculates and returns the line number. 72 size_t getLineNum(size_t realnum) 73 { 74 return realnum - lineNum; 75 } 76 /// Calculates a delta value and sets 'lineNum'. 77 void setLineNum(size_t realnum, size_t hlnum) 78 { 79 lineNum = realnum - hlnum + 1; 80 } 81 } 82 83 /// Data associated with this token. 84 union /+TokenValue+/ 85 { 86 NewlineValue* nlval; /// Value of a newline token. 87 HashLineValue* hlval; /// Value of a #line token. 88 StringValue* strval; /// The value of a string token. 89 Identifier* ident; /// For keywords and identifiers. 90 dchar dchar_; /// Value of a character literal. 91 size_t sizet_; /// An integer that fits into the address space. 92 int int_; /// Value of an Int32 token. 93 uint uint_; /// Value of a UInt32 token. 94 version(X86_64) 95 IntegerValue intval; /// Value of a number literal. 96 else 97 IntegerValue* intval; /// Value of a number literal. 98 Float mpfloat; /// A multiple precision float value. 99 void* pvoid; /// Associate arbitrary data with this token. 100 } 101 // static assert(TokenValue.sizeof == (void*).sizeof); 102 103 /// Returns the text of the token. 104 cstring text() 105 { 106 assert(start <= end); 107 return start[0 .. end - start]; 108 } 109 110 /// Sets the text of the token. 111 void text(cstring s) 112 { 113 start = s.ptr; 114 end = s.ptr + s.length; 115 } 116 117 /// Returns the preceding whitespace of the token. 118 cstring wsChars() 119 { 120 assert(ws && start); 121 return ws[0 .. start - ws]; 122 } 123 124 /// Returns the next token. 125 Token* next() 126 { 127 assert(kind != TOK.Invalid); 128 return &this + 1; 129 } 130 131 /// Returns the previous token. 132 Token* prev() 133 { 134 assert(kind != TOK.Invalid); 135 return &this - 1; 136 } 137 138 /// Finds the next non-whitespace token. Does not go past TOK.EOF. 139 Token* nextNWS() 140 { 141 assert(kind != TOK.Invalid); 142 auto token = &this; 143 if (kind != TOK.EOF) 144 while ((++token).isWhitespace) 145 {} 146 return token; 147 } 148 149 /// Finds the previous non-whitespace token. Does not go past TOK.HEAD. 150 Token* prevNWS() 151 { 152 assert(kind != TOK.Invalid); 153 auto token = &this; 154 if (kind != TOK.HEAD) 155 while ((--token).isWhitespace) 156 {} 157 return token; 158 } 159 160 /// Returns the text of this token. 161 cstring toString() 162 { 163 return text(); 164 } 165 166 /// Returns true if this is a token that can have newlines in it. 167 /// 168 /// These can be block and nested comments and any string literal 169 /// except for escape string literals. 170 bool isMultiline() 171 { 172 return kind == TOK.String && start[0] != '\\' || 173 kind == TOK.Comment && start[1] != '/'; 174 } 175 176 /// Returns true if this is a keyword token. 177 bool isKeyword() 178 { 179 return KeywordsBegin <= kind && kind <= KeywordsEnd; 180 } 181 182 /// Returns true if this is an integral type token. 183 bool isIntegralType() 184 { 185 return IntegralTypeBegin <= kind && kind <= IntegralTypeEnd; 186 } 187 188 /// Returns true if this is a whitespace token. 189 bool isWhitespace() 190 { // Tokens from TOK.init to TOK.LastWhitespace are whitespace. 191 return kind <= TOK.LastWhitespace; 192 } 193 194 /// Returns true if this is a special token. 195 bool isSpecialToken() 196 { 197 return kind == TOK.SpecialID; 198 } 199 200 version(D2) 201 { 202 /// Returns true if this is a token string literal. 203 bool isTokenStringLiteral() 204 { // strval.tok_str !is null 205 return kind == TOK.String && *start == 'q' && start[1] == '{'; 206 } 207 } 208 209 /// Returns true if this token starts a DeclarationDefinition. 210 bool isDeclDefStart() 211 { 212 return isDeclDefStartToken(kind); 213 } 214 215 /// Returns true if this token starts a Statement. 216 bool isStatementStart() 217 { 218 return isStatementStartToken(kind); 219 } 220 221 /// Returns true if this token starts an AsmStatement. 222 bool isAsmStatementStart() 223 { 224 return isAsmStatementStartToken(kind); 225 } 226 227 /// Compares a token's kind to kind2. 228 int opEquals(TOK kind2) 229 { 230 return kind == kind2; 231 } 232 233 /// Compares the position of two tokens. 234 /// Assumes they are from the same source text. 235 int opCmp(Token* rhs) 236 { // Returns: (lower, equal, greater) = (-1, 0, 1) 237 return start < rhs.start ? -1 : start !is rhs.start; 238 } 239 240 /// Returns the Location of this token. 241 Location getLocation(bool realLocation)(cstring filePath) 242 { 243 auto search_t = &this; 244 // Find previous newline token. 245 while ((--search_t).kind != TOK.Newline) 246 {} 247 auto newline = search_t.nlval; 248 auto lineNum = newline.lineNum; 249 static if (!realLocation) 250 if (auto hlinfo = newline.hlinfo) 251 { // Change file path and line number. 252 filePath = hlinfo.path; 253 lineNum = hlinfo.getLineNum(newline.lineNum); 254 } 255 auto lineBegin = search_t.end; 256 // Determine actual line begin and line number. 257 while (++search_t < &this) 258 // Multiline tokens must be rescanned for newlines. 259 if (search_t.isMultiline) 260 for (auto p = search_t.start, end = search_t.end; p < end;) 261 if (scanNewline(p)) 262 ++lineNum, 263 lineBegin = p; 264 else 265 ++p; 266 return new Location(filePath, lineNum, lineBegin, this.start); 267 } 268 269 alias getRealLocation = getLocation!(true); 270 alias getErrorLocation = getLocation!(false); 271 272 /// Returns the location of the character past the end of this token. 273 Location errorLocationOfEnd(cstring filePath) 274 { 275 auto loc = getErrorLocation(filePath); 276 loc.to = end; 277 if (isMultiline) // Mutliline tokens may have newlines. 278 for (auto p = start, end_ = end; p < end_;) 279 if (scanNewline(p)) 280 loc.lineBegin = p; 281 else 282 ++p; 283 return loc; 284 } 285 286 /// Counts the newlines in this token. 287 uint lineCount() 288 { 289 uint count; 290 if (this.isMultiline) 291 for (auto p = start, end_ = end; p < end_;) 292 if (scanNewline(p, end_)) 293 count++; 294 else 295 p++; 296 return count; 297 } 298 299 /// Return the source text enclosed by the left and right token. 300 static cstring textSpan(Token* left, Token* right) 301 { 302 assert(left.end <= right.start || left is right ); 303 return left.start[0 .. right.end - left.start]; 304 } 305 306 /// ditto 307 cstring textSpan(Token* right) 308 { 309 return textSpan(&this, right); 310 } 311 } 312 313 alias TokenArray = DArray!Token; 314 315 /// Returns true if this token starts a DeclarationDefinition. 316 bool isDeclDefStartToken(TOK tok) 317 { 318 switch (tok) 319 { 320 alias T = TOK; 321 case T.Align, T.Pragma, T.Export, T.Private, T.Package, T.Protected, 322 T.Public, T.Extern, T.Deprecated, T.Override, T.Abstract, 323 T.Synchronized, T.Static, T.Final, T.Const, 324 T.Auto, T.Scope, T.Alias, T.Typedef, T.Import, T.Enum, T.Class, 325 T.Interface, T.Struct, T.Union, T.This, T.Tilde, T.Unittest, T.Debug, 326 T.Version, T.Template, T.New, T.Delete, T.Mixin, T.Semicolon, 327 T.Identifier, T.Dot, T.Typeof: 328 return true; 329 version(D2) 330 { 331 case T.Immutable, T.Pure, T.Shared, T.Gshared, 332 T.Ref, T.Nothrow, T.At: 333 return true; 334 } 335 default: 336 if (IntegralTypeBegin <= tok && tok <= IntegralTypeEnd) 337 return true; 338 } 339 return false; 340 } 341 342 /// Returns true if this token starts a Statement. 343 bool isStatementStartToken(TOK tok) 344 { 345 switch (tok) 346 { 347 alias T = TOK; 348 case T.Align, T.Extern, T.Final, T.Const, T.Auto, T.Identifier, T.Dot, 349 T.Typeof, T.If, T.While, T.Do, T.For, T.Foreach, T.ForeachReverse, 350 T.Switch, T.Case, T.Default, T.Continue, T.Break, T.Return, T.Goto, 351 T.With, T.Synchronized, T.Try, T.Throw, T.Scope, T.Volatile, T.Asm, 352 T.Pragma, T.Mixin, T.Static, T.Debug, T.Version, T.Alias, T.Semicolon, 353 T.Enum, T.Class, T.Interface, T.Struct, T.Union, T.LBrace, T.Typedef, 354 T.This, T.Super, T.Null, T.True, T.False, T.Int32, T.Int64, T.UInt32, 355 T.UInt64, T.Float32, T.Float64, T.Float80, T.IFloat32, 356 T.IFloat64, T.IFloat80, T.Character, T.String, T.LBracket, 357 T.Function, T.Delegate, T.Assert, T.Import, T.Typeid, T.Is, T.LParen, 358 T.Amp, T.Plus2, T.Minus2, T.Star, 359 T.Minus, T.Plus, T.Exclaim, T.Tilde, T.New, T.Delete, T.Cast: 360 return true; 361 version(D2) 362 { 363 case T.Traits, T.Immutable, T.Pure, T.Shared, T.Gshared, 364 T.Ref, T.Nothrow, T.At: 365 return true; 366 } 367 default: 368 if (IntegralTypeBegin <= tok && tok <= IntegralTypeEnd || 369 tok == T.SpecialID) 370 return true; 371 } 372 return false; 373 } 374 375 /// Returns true if this token starts an AsmStatement. 376 bool isAsmStatementStartToken(TOK tok) 377 { 378 switch (tok) 379 { 380 alias T = TOK; 381 // TODO: need to add all opcodes. 382 case T.In, T.Int, T.Out, T.Identifier, T.Align, T.Semicolon: 383 return true; 384 default: 385 } 386 return false; 387 } 388 389 /// A list of tokens that point to tokToString[kind] as their text. 390 static Token[TOK.MAX] staticTokens; 391 392 /// Returns the token corresponding to a token kind. 393 Token* toToken(TOK kind) 394 { 395 return &staticTokens[kind]; 396 } 397 398 /// Initializes staticTokens. 399 static this() 400 { 401 import dil.lexer.IDs; 402 403 foreach (i, ref t; staticTokens) 404 { 405 auto kind = cast(TOK)i; 406 auto text = kind.toString(); 407 t.kind = kind; 408 t.start = text.ptr; 409 t.end = text.ptr + text.length; 410 } 411 412 /// Set the ident member of the keyword tokens and the one Identifier token. 413 foreach (ref kw; IDs.getKeywordIDs()) 414 kw.kind.toToken().ident = &kw; 415 TOK.Identifier.toToken().ident = &IDs.Identifier_; 416 TOK.SpecialID.toToken().ident = &IDs.SpecialID; 417 }