1 /// Author: Aziz Köksal 2 /// License: GPL3 3 /// $(Maturity average) 4 module dil.Highlighter; 5 6 import dil.ast.DefaultVisitor, 7 dil.ast.Node, 8 dil.ast.Declaration, 9 dil.ast.Statement, 10 dil.ast.Expression, 11 dil.ast.Types; 12 import dil.lexer.Lexer, 13 dil.lexer.Funcs; 14 import dil.parser.Parser; 15 import dil.semantic.Module; 16 import dil.Compilation; 17 import dil.SourceText; 18 import dil.String, 19 dil.Array; 20 import util.Path; 21 import common; 22 23 /// A token and syntax highlighter. 24 class Highlighter 25 { 26 TagMap tags; /// Which tag map to use. 27 CharArray buffer; /// Buffer that receives the text. 28 CompilationContext cc; /// The compilation context. 29 30 /// Constructs a TokenHighlighter object. 31 this(TagMap tags, CompilationContext cc) 32 { 33 this.tags = tags; 34 this.cc = cc; 35 } 36 37 /// Empties the buffer and returns its contents. 38 char[] takeText() 39 { 40 return buffer.take(); 41 } 42 43 /// Writes arguments formatted to the buffer. 44 void printf(cstring format, ...) 45 { 46 buffer ~= Format(_arguments, _argptr, format); 47 } 48 49 /// Writes s to the buffer. 50 void print(cstring s) 51 { 52 buffer ~= s; 53 } 54 55 /// Writes c to the buffer. 56 void print(char c) 57 { 58 buffer ~= c; 59 } 60 61 /// Highlights tokens in a string. 62 /// Returns: A string with the highlighted tokens. 63 cstring highlightTokens(cstring text, cstring filePath, out uint lines) 64 { 65 auto src = new SourceText(filePath, text); 66 auto lx = new Lexer(src, cc.tables.lxtables, cc.diag); 67 lx.scanAll(); 68 lines = lx.lineNum; 69 highlightTokens(lx.tokenList); 70 return takeText(); 71 } 72 73 /// Highlights the tokens from begin to end (both included). 74 /// Returns: A string with the highlighted tokens. 75 /// Params: 76 /// skipWS = Skips whitespace tokens (e.g. comments) if true. 77 void highlightTokens(Token[] tokens, bool skipWS = false) 78 { 79 // Traverse linked list and print tokens. 80 foreach (token; tokens) 81 { 82 if (skipWS && token.isWhitespace) 83 continue; 84 token.ws && print(token.wsChars); // Print preceding whitespace. 85 printToken(&token); 86 } 87 } 88 89 /// ditto 90 void highlightTokens(ref CharArray buffer, Token[] tokens, 91 bool skipWS = false) 92 { 93 auto buffer_saved = this.buffer; 94 this.buffer = buffer; 95 highlightTokens(tokens, skipWS); 96 buffer = this.buffer; // Update callers instance. 97 this.buffer = buffer_saved; 98 } 99 100 /// Highlights all tokens of a source file. 101 void highlightTokens(cstring filePath, bool opt_printLines) 102 { 103 auto src = new SourceText(filePath, true); 104 auto lx = new Lexer(src, cc.tables.lxtables, cc.diag); 105 lx.scanAll(); 106 107 printf(tags["DocHead"], Path(filePath).name()); 108 if (lx.errors.length) 109 { 110 print(tags["CompBegin"]); 111 printErrors(lx); 112 print(tags["CompEnd"]); 113 } 114 115 if (opt_printLines) 116 { 117 print(tags["LineNumberBegin"]); 118 printLines(lx.lineNum); 119 print(tags["LineNumberEnd"]); 120 } 121 122 print(tags["SourceBegin"]); 123 // Traverse linked list and print tokens. 124 foreach (token; lx.tokenList) { 125 token.ws && print(token.wsChars); // Print preceding whitespace. 126 printToken(&token); 127 } 128 print(tags["SourceEnd"]); 129 print(tags["DocEnd"]); 130 } 131 132 /// Highlights the syntax in a source file. 133 void highlightSyntax(cstring filePath, bool printHTML, bool opt_printLines) 134 { 135 auto modul = new Module(filePath, cc); 136 modul.parse(); 137 highlightSyntax(modul, printHTML, opt_printLines); 138 } 139 140 /// ditto 141 void highlightSyntax(Module modul, bool printHTML, bool opt_printLines) 142 { 143 auto parser = modul.parser; 144 auto lx = parser.lexer; 145 auto tokens = lx.tokenList; 146 auto tokenExList = new TokenExBuilder().build(modul.root, tokens); 147 148 printf(tags["DocHead"], modul.getFQN()); 149 if (lx.errors.length || parser.errors.length) 150 { // Output error messages. 151 print(tags["CompBegin"]); 152 printErrors(lx); 153 printErrors(parser); 154 print(tags["CompEnd"]); 155 } 156 157 if (opt_printLines) 158 { 159 print(tags["LineNumberBegin"]); 160 printLines(lx.lineNum); 161 print(tags["LineNumberEnd"]); 162 } 163 164 print(tags["SourceBegin"]); 165 166 auto tagNodeBegin = tags["NodeBegin"]; 167 auto tagNodeEnd = tags["NodeEnd"]; 168 169 // Iterate over list of tokens. 170 foreach (i, ref tokenEx; tokenExList) 171 { 172 auto token = &tokens[i]; 173 token.ws && print(token.wsChars); // Print preceding whitespace. 174 if (token.isWhitespace) { 175 printToken(token); 176 continue; 177 } 178 // <node> 179 foreach (node; tokenEx.beginNodes) 180 printf(tagNodeBegin, tags.getTag(node.kind), 181 node.getShortClassName()); 182 // Token text. 183 printToken(token); 184 // </node> 185 if (printHTML) 186 foreach_reverse (node; tokenEx.endNodes) 187 print(tagNodeEnd); 188 else 189 foreach_reverse (node; tokenEx.endNodes) 190 printf(tagNodeEnd, tags.getTag(node.kind)); 191 } 192 print(tags["SourceEnd"]); 193 print(tags["DocEnd"]); 194 } 195 196 void printErrors(Lexer lx) 197 { 198 foreach (e; lx.errors) 199 printf(tags["LexerError"], e.filePath, 200 e.loc, e.col, xml_escape(e.getMsg)); 201 } 202 203 void printErrors(Parser parser) 204 { 205 foreach (e; parser.errors) 206 printf(tags["ParserError"], e.filePath, 207 e.loc, e.col, xml_escape(e.getMsg)); 208 } 209 210 void printLines(uint lines) 211 { 212 auto lineNumberFormat = tags["LineNumber"]; 213 for (auto lineNum = 1; lineNum <= lines; lineNum++) 214 printf(lineNumberFormat, lineNum); 215 } 216 217 /// Prints a token to the stream 'print'. 218 void printToken(Token* token) 219 { 220 switch (token.kind) 221 { 222 case TOK.Identifier: 223 printf(tags.Identifier, token.text); 224 break; 225 case TOK.Comment: 226 cstring formatStr; 227 switch (token.start[1]) 228 { 229 case '/': formatStr = tags.LineC; break; 230 case '*': formatStr = tags.BlockC; break; 231 case '+': formatStr = tags.NestedC; break; 232 default: assert(0); 233 } 234 printf(formatStr, xml_escape(token.text)); 235 break; 236 case TOK.String: 237 cstring text = token.text; 238 assert(text.length); 239 if (text.length > 1 && text[0] == 'q' && text[1] == '{') 240 { 241 version(D2) 242 { 243 auto buffer_saved = this.buffer; // Save; 244 this.buffer = CharArray(text.length); 245 print("q{"); 246 // Traverse and print inner tokens. 247 Token* last; // Remember last token. 248 for (auto t = token.strval.tokens; t.kind; t++) 249 { 250 t.ws && print(t.wsChars); // Print preceding whitespace. 251 printToken(t); 252 last = t; 253 } 254 if (last) // Print: Whitespace? "}" Postfix? 255 print(slice(last.end, token.end)); 256 text = takeText(); 257 this.buffer = buffer_saved; // Restore 258 } 259 } 260 else 261 text = (text[0] == '"') ? 262 scanEscapeSequences(text, tags.Escape) : 263 xml_escape(text); 264 printf(tags.String, text); 265 break; 266 case TOK.Character: 267 cstring text = token.text; 268 text = (text.length > 1 && text[1] == '\\') ? 269 scanEscapeSequences(text, tags.Escape) : 270 xml_escape(text); 271 printf(tags.Char, text); 272 break; 273 case TOK.Int32, TOK.Int64, TOK.UInt32, TOK.UInt64, 274 TOK.Float32, TOK.Float64, TOK.Float80, 275 TOK.IFloat32, TOK.IFloat64, TOK.IFloat80: 276 printf(tags.Number, token.text); 277 break; 278 case TOK.Shebang: 279 printf(tags.Shebang, xml_escape(token.text)); 280 break; 281 case TOK.HashLine: 282 // The text to be inserted into formatStr. 283 char[] lineText; 284 285 void printWS(cchar* start, cchar* end) 286 { 287 if (start != end) lineText ~= start[0 .. end - start]; 288 } 289 290 auto num = token.hlval.lineNum; 291 if (num is null) // Malformed #line 292 lineText = token.text.dup; 293 else 294 { 295 // Print whitespace between #line and number. 296 printWS(token.start, num.start); // Prints "#line" as well. 297 lineText ~= Format(tags.Number, num.text); // Print the number. 298 299 if (auto filespec = token.hlval.filespec) 300 { // Print whitespace between number and filespec. 301 printWS(num.end, filespec.start); 302 lineText ~= Format(tags.Filespec, xml_escape(filespec.text)); 303 } 304 } 305 // Finally print the whole token. 306 printf(tags.HLine, lineText); 307 break; 308 case TOK.Illegal: 309 printf(tags.Illegal, token.text); 310 break; 311 case TOK.Newline: 312 printf(tags.Newline, token.text); 313 break; 314 case TOK.EOF: 315 print(tags.EOF); 316 break; 317 default: 318 if (token.isKeyword()) 319 printf(tags.Keyword, token.text); 320 else if (token.isSpecialToken) 321 printf(tags.SpecialToken, token.text); 322 else 323 print(tags[token.kind]); 324 } 325 } 326 327 /// Highlights escape sequences inside a text. Also escapes XML characters. 328 /// Params: 329 /// text = The text to search in. 330 /// fmt = The format string passed to the function Format(). 331 static cstring scanEscapeSequences(cstring text, cstring fmt) 332 { 333 auto p = text.ptr, end = p + text.length; 334 auto prev = p; // Remembers the end of the previous escape sequence. 335 CharArray result; 336 cstring escape_str; 337 338 while (p < end) 339 { 340 string xml_entity = void; 341 switch (*p) 342 { 343 case '\\': break; // Found beginning of an escape sequence. 344 // Code to escape XML chars: 345 case '<': xml_entity = "<"; goto Lxml; 346 case '>': xml_entity = ">"; goto Lxml; 347 case '&': xml_entity = "&"; goto Lxml; 348 Lxml: 349 if (prev < p) result ~= slice(prev, p); // Append previous string. 350 result ~= xml_entity; // Append entity. 351 prev = ++p; 352 continue; // End of "XML" code. 353 default: 354 p++; 355 continue; // Nothing to escape. Continue. 356 } 357 358 auto escape_str_begin = p; 359 assert(*p == '\\'); 360 p++; 361 if (p >= end) 362 break; 363 364 uint digits = void; 365 switch (*p) 366 { 367 case 'x': 368 digits = 2+1; 369 case_Unicode: 370 assert(digits == 2+1 || digits == 4+1 || digits == 8+1); 371 if (p+digits >= end) 372 p++; // Broken sequence. Only skip the letter. 373 else // +1 was added everywhere else, so that the digits are skipped. 374 p += digits; 375 break; 376 case 'u': digits = 4+1; goto case_Unicode; 377 case 'U': digits = 8+1; goto case_Unicode; 378 default: 379 if (char2ev(*p)) // Table lookup. 380 p++; 381 else if (isoctal(*p)) 382 { 383 if (++p < end && isoctal(*p)) 384 if (++p < end && isoctal(*p)) 385 p++; 386 } 387 else if (*p == '&') 388 { // Skip to ";". Assume valid sequence. 389 auto entity_name_begin = p+1; 390 while (++p < end && isalnum(*p)) 391 {} 392 if (p < end && *p == ';') 393 p++; // Skip ';'. 394 escape_str = "\\&" ~ slice(entity_name_begin, p); 395 goto Lescape_str_assigned; 396 } 397 // else 398 // continue; // Broken escape sequence. 399 } 400 401 escape_str = slice(escape_str_begin, p); 402 Lescape_str_assigned: 403 if (prev < p) // Append previous string. 404 result ~= slice(prev, escape_str_begin); 405 result ~= Format(fmt, escape_str); // Finally format the escape sequence. 406 prev = p; // Update prev pointer. 407 } 408 assert(p <= end && prev <= end); 409 410 if (prev is text.ptr) 411 return text; // Nothing escaped. Return original, unchanged text. 412 if (prev < end) 413 result ~= slice(prev, end); 414 return result[]; 415 } 416 } 417 418 /// Escapes '<', '>' and '&' with named HTML entities. 419 /// Returns: The escaped text, or the original if no entities were found. 420 cstring xml_escape(cstring text) 421 { 422 auto p = text.ptr, end = p + text.length; 423 auto prev = p; // Points to the end of the previous escape char. 424 string entity; // Current entity to be appended. 425 CharArray result; 426 while (p < end) 427 switch (*p) 428 { 429 case '<': entity = "<"; goto Lcommon; 430 case '>': entity = ">"; goto Lcommon; 431 case '&': entity = "&"; goto Lcommon; 432 Lcommon: 433 if (!result.ptr) 434 result.cap = text.length; 435 prev != p && (result ~= slice(prev, p)); // Append previous string. 436 result ~= entity; // Append entity. 437 p++; // Skip '<', '>' or '&'. 438 prev = p; 439 break; 440 default: 441 p++; 442 } 443 if (prev is text.ptr) 444 return text; // Nothing escaped. Return original, unchanged text. 445 if (prev < end) 446 result ~= slice(prev, end); 447 return result[]; 448 } 449 450 /// Maps tokens to (format) strings. 451 class TagMap 452 { 453 cstring[hash_t] table; 454 cstring[TOK.MAX] tokenTable; 455 456 this(cstring[hash_t] table) 457 { 458 this.table = table; 459 Identifier = this["Identifier", "{0}"]; 460 String = this["String", "{0}"]; 461 Char = this["Char", "{0}"]; 462 Number = this["Number", "{0}"]; 463 Keyword = this["Keyword", "{0}"]; 464 LineC = this["LineC", "{0}"]; 465 BlockC = this["BlockC", "{0}"]; 466 NestedC = this["NestedC", "{0}"]; 467 Escape = this["Escape", "{0}"]; 468 Shebang = this["Shebang", "{0}"]; 469 HLine = this["HLine", "{0}"]; 470 Filespec = this["Filespec", "{0}"]; 471 Illegal = this["Illegal", "{0}"]; 472 Newline = this["Newline", "{0}"]; 473 SpecialToken = this["SpecialToken", "{0}"]; 474 Declaration = this["Declaration", "d"]; 475 Statement = this["Statement", "s"]; 476 Expression = this["Expression", "e"]; 477 Type = this["Type", "t"]; 478 Other = this["Other", "o"]; 479 EOF = this["EOF", ""]; 480 481 foreach (i, tokStr; tokToString) 482 if (auto pStr = hashOf(tokStr) in this.table) 483 tokenTable[i] = *pStr; 484 } 485 486 /// Returns the value for str, or 'fallback' if str is not in the table. 487 cstring opIndex(cstring str, cstring fallback = "") 488 { 489 if (auto p = hashOf(str) in table) 490 return *p; 491 return fallback; 492 } 493 494 /// Returns the value for tok in O(1) time. 495 cstring opIndex(TOK tok) 496 { 497 return tokenTable[tok]; 498 } 499 500 /// Assigns str to tokenTable[tok]. 501 void opIndexAssign(cstring str, TOK tok) 502 { 503 tokenTable[tok] = str; 504 } 505 506 /// Shortcuts for quick access. 507 cstring Identifier, String, Char, Number, Keyword, LineC, BlockC, Escape, 508 NestedC, Shebang, HLine, Filespec, Illegal, Newline, SpecialToken, 509 Declaration, Statement, Expression, Type, Other, EOF; 510 511 /// Returns the tag for the category 'k'. 512 cstring getTag(NodeKind k) 513 { 514 cstring tag; 515 if (k.isDeclaration) 516 tag = Declaration; 517 else if (k.isStatement) 518 tag = Statement; 519 else if (k.isExpression) 520 tag = Expression; 521 else if (k.isType) 522 tag = Type; 523 else if (k.isParameter) 524 tag = Other; 525 return tag; 526 } 527 } 528 529 /// Returns the short class name of a class descending from Node.$(BR) 530 /// E.g.: dil.ast.Declarations.ClassDecl -> Class 531 string getShortClassName(Node node) 532 { 533 static string[] name_table; 534 if (name_table is null) 535 name_table = new string[NodeKind.max+1]; // Create a new table. 536 // Look up in table. 537 auto pname = &name_table[node.kind]; 538 if (!pname.ptr) 539 { // Get fully qualified name of the class and extract just the name. 540 auto name = IString(typeid(node).name).rpartition('.')[1]; 541 // Decl, Stmt, Expr, Type have length 4. 542 size_t suffixLength = node.isParameter ? 0 : 4; 543 // Remove common suffix and store. 544 *pname = name[0..Neg(suffixLength)][]; 545 } 546 return *pname; 547 } 548 549 /// Extended token structure. 550 struct TokenEx 551 { 552 //Token* token; /// The lexer token. 553 Node[] beginNodes; /// beginNodes[n].begin == token 554 Node[] endNodes; /// endNodes[n].end == token 555 } 556 557 /// Builds an array of TokenEx items. 558 class TokenExBuilder : DefaultVisitor 559 { 560 TokenEx[] tokenExs; /// Extended tokens. 561 Token[] tokens; /// Original tokens. 562 563 TokenEx[] build(Node root, Token[] tokens) 564 { // Creat the exact number of TokenEx instances. 565 this.tokens = tokens; 566 tokenExs = new TokenEx[tokens.length]; 567 super.visitN(root); 568 return tokenExs; 569 } 570 571 TokenEx* getTokenEx(Token* t) 572 { 573 assert(tokens.ptr <= t && t < tokens.ptr+tokens.length); 574 return &tokenExs[t - tokens.ptr]; 575 } 576 577 /// Override dispatch function. 578 override Node dispatch(Node n) 579 { 580 assert(n && n.begin && n.end); 581 getTokenEx(n.begin).beginNodes ~= n; 582 getTokenEx(n.end).endNodes ~= n; 583 return super.dispatch(n); 584 } 585 }