1 /// Author: Aziz Köksal 2 /// License: GPL3 3 /// $(Maturity high) 4 module dil.doc.Doc; 5 6 import dil.doc.Parser; 7 import dil.ast.Node; 8 import dil.lexer.Funcs; 9 import dil.Unicode, 10 dil.String; 11 import common; 12 13 alias textBody = dil.doc.Parser.IdentValueParser.textBody; 14 15 /// Represents a sanitized and parsed DDoc comment. 16 class DDocComment 17 { 18 Section[] sections; /// The sections of this comment. 19 Section summary; /// Optional summary section. 20 Section description; /// Optional description section. 21 22 /// Constructs a DDocComment object. 23 this(Section[] sections, Section summary, Section description) 24 { 25 this.sections = sections; 26 this.summary = summary; 27 this.description = description; 28 } 29 30 /// Removes the first copyright section and returns it. 31 Section takeCopyright() 32 { 33 foreach (i, section; sections) 34 if (section.Is("copyright")) 35 { 36 sections = sections[0..i] ~ sections[i+1..$]; 37 return section; 38 } 39 return null; 40 } 41 42 /// Returns true if "ditto" is the only text in this comment. 43 bool isDitto() 44 { 45 return summary && sections.length == 1 && 46 String(summary.text).ieql("ditto"); 47 } 48 49 /// Returns true when this comment has no text. 50 bool isEmpty() 51 { 52 return sections.length == 0 || sections[0].text.length == 0; 53 } 54 } 55 56 /// A namespace for some utility functions. 57 struct DDocUtils 58 { 59 static: 60 /// Returns a node's DDocComment. 61 DDocComment getDDocComment(Node node) 62 { 63 DDocParser p; 64 auto docTokens = getDocTokens(node); 65 if (!docTokens.length) 66 return null; 67 p.parse(getDDocText(docTokens)); 68 return new DDocComment(p.sections, p.summary, p.description); 69 } 70 71 /// Returns a DDocComment created from a text. 72 DDocComment getDDocComment(cstring text) 73 { 74 text = sanitize(text, '\0'); // May be unnecessary. 75 DDocParser p; 76 p.parse(text); 77 return new DDocComment(p.sections, p.summary, p.description); 78 } 79 80 /// Returns true if token is a Doxygen comment. 81 bool isDoxygenComment(Token* token) 82 { // Doxygen: '/+!' '/*!' '//!' 83 return token.kind == TOK.Comment && token.start[2] == '!'; 84 } 85 86 /// Returns true if token is a DDoc comment. 87 bool isDDocComment(Token* token) 88 { // Ddoc: '/++' '/**' '///' 89 return token.kind == TOK.Comment && token.start[1] == token.start[2] && 90 // Exclude special cases: '/++/' and '/**/' 91 (isLineComment(token) ? 1 : token.text.length > 4); 92 } 93 94 /// Returns the surrounding documentation comment tokens. 95 /// Params: 96 /// node = The node to find doc comments for. 97 /// isDocComment = A function predicate that checks for doc comment tokens. 98 /// Note: This function works correctly only if 99 /// the source text is syntactically correct. 100 Token*[] getDocTokens(Node node, 101 bool function(Token*) isDocComment = &isDDocComment) 102 { 103 Token*[] comments; 104 auto isEnumMember = node.kind == NodeKind.EnumMemberDecl; 105 // Get preceding comments. 106 auto token = node.begin; 107 // Scan backwards until we hit another declaration. 108 while ((--token).kind) 109 if (token.kind.In(TOK.LBrace, TOK.RBrace, TOK.Semicolon) || 110 (isEnumMember && token.kind == TOK.Comma)) 111 break; 112 else if (token.kind == TOK.Comment) 113 // Check that this comment doesn't belong to the previous declaration. 114 if (token.prev.kind.In(TOK.Semicolon, TOK.RBrace, TOK.Comma)) 115 break; 116 else if (isDocComment(token)) 117 comments ~= token; // Comments are appended in reverse order. 118 comments.reverse; // Reverse the list when finished. 119 // Get single comment to the right. 120 token = node.end.next; 121 if (token.kind == TOK.Comment && isDocComment(token)) 122 comments ~= token; 123 else if (isEnumMember) 124 { 125 token = node.end.nextNWS; 126 if (token.kind == TOK.Comma) 127 if ((++token).kind == TOK.Comment && isDocComment(token)) 128 comments ~= token; 129 } 130 return comments; 131 } 132 133 bool isLineComment(Token* t) 134 { 135 assert(t.kind == TOK.Comment); 136 return t.start[1] == '/'; 137 } 138 139 /// Extracts the text body of the comment tokens. 140 cstring getDDocText(Token*[] tokens) 141 { 142 if (tokens.length == 0) 143 return null; 144 char[] result; 145 foreach (token; tokens) 146 { // Determine how many characters to slice off from the end of the comment. 147 // 0 for "//", 2 for "+/" and "*/". 148 auto n = isLineComment(token) ? 0 : 2; 149 result ~= sanitize(token.text[3 .. $-n], token.start[1]); 150 assert(token.next); 151 result ~= (token.next.kind == TOK.Newline) ? '\n' : ' '; 152 } 153 return result[0..$-1]; // Slice off last '\n' or ' '. 154 } 155 156 /// Sanitizes a DDoc comment string. 157 /// 158 /// Leading padding characters are removed from the lines. 159 /// The various newline types are converted to '\n'. 160 /// Params: 161 /// comment = The string to be sanitized. 162 /// padding = '/', '+' or '*' 163 cstring sanitize(char[] comment, char padding) 164 { 165 bool isNewline = true; // True when at the beginning of a new line. 166 auto q = comment.ptr; // Writer. 167 cchar* p = q; // Reader. 168 auto end = p + comment.length; 169 170 while (p < end) 171 { 172 if (isNewline) 173 { // Ignore padding at the beginning of each new line. 174 isNewline = false; 175 auto begin = p; 176 while (p < end && isspace(*p)) // Skip spaces. 177 p++; 178 if (p < end && *p == padding) 179 while (++p < end && *p == padding) // Skip padding. 180 {} 181 else 182 p = begin; // Reset. No padding found. 183 } 184 else 185 { 186 isNewline = scanNewline(p, end); 187 if (isNewline) 188 *q++ = '\n'; // Copy newlines as '\n'. 189 else 190 *q++ = *p++; // Copy character. 191 } 192 } 193 comment.length = q - comment.ptr; // Adjust length. 194 if (!comment.length) 195 return null; 196 // Lastly, strip trailing padding. 197 p = q - 1; // q points to the end of the string. 198 q = comment.ptr - 1; // Now let q point to the start. 199 while (p > q && *p == padding) 200 p--; // Go back until no padding characters are left. 201 assert(p == q || p >= comment.ptr); 202 comment.length = p - comment.ptr + 1; 203 return comment; 204 } 205 /// ditto 206 cstring sanitize(cstring comment, char padding) 207 { 208 return sanitize(comment.dup, padding); 209 } 210 211 /// Unindents all lines in text by the maximum amount possible. 212 /// Note: counts tabulators the same as single spaces. 213 /// Returns: the unindented text or the original text. 214 cstring unindentText(cstring text) 215 { 216 auto p = text.ptr, end = p + text.length; 217 auto indent = size_t.max; // Start with the largest number. 218 auto lbegin = p; // The beginning of a line. 219 // First determine the maximum amount we may remove. 220 while (p < end) 221 { 222 while (p < end && isspace(*p)) // Skip leading whitespace. 223 p++; 224 if (p < end && *p != '\n') // Don't count blank lines. 225 if (p - lbegin < indent) 226 { 227 indent = p - lbegin; 228 if (indent == 0) 229 return text; // Nothing to unindent; 230 } 231 // Skip to the end of the line. 232 while (p < end && *p != '\n') 233 p++; 234 while (p < end && *p == '\n') 235 p++; 236 lbegin = p; 237 } 238 239 p = text.ptr, end = p + text.length; 240 lbegin = p; 241 auto newText = text.dup; 242 auto q = newText.ptr; // Writer. 243 // Remove the determined amount. 244 while (p < end) 245 { 246 while (p < end && isspace(*p)) // Skip leading whitespace. 247 *q++ = *p++; 248 if (p < end && *p == '\n') // Strip empty lines. 249 q -= p - lbegin; // Back up q by the amount of spaces on this line. 250 else {//if (indent <= p - lbegin) 251 assert(indent <= p - lbegin); 252 q -= indent; // Back up q by the indent amount. 253 } 254 // Skip to the end of the line. 255 while (p < end && *p != '\n') 256 *q++ = *p++; 257 // Skip multiple newlines. 258 while (p < end && *p == '\n') 259 *q++ = *p++; 260 lbegin = p; 261 } 262 newText.length = q - newText.ptr; 263 return newText; 264 } 265 } 266 267 /// Parses a DDoc comment string. 268 struct DDocParser 269 { 270 cchar* p; /// Current character pointer. 271 cchar* textEnd; /// Points one character past the end of the text. 272 Section[] sections; /// Parsed sections. 273 Section summary; /// Optional summary section. 274 Section description; /// Optional description section. 275 276 /// Parses the DDoc text into sections. 277 /// All newlines in the text must be converted to '\n'. 278 Section[] parse(cstring text) 279 { 280 if (!text.length) 281 return null; 282 p = text.ptr; 283 textEnd = p + text.length; 284 285 cchar* summaryBegin; 286 cstring ident, nextIdent; 287 cchar* bodyBegin, nextBodyBegin; 288 289 while (p < textEnd && (isspace(*p) || *p == '\n')) 290 p++; 291 summaryBegin = p; 292 293 if (findNextIdColon(ident, bodyBegin)) 294 { // Check if there's text before the explicit section. 295 if (summaryBegin != ident.ptr) 296 scanSummaryAndDescription(summaryBegin, ident.ptr); 297 // Continue parsing. 298 while (findNextIdColon(nextIdent, nextBodyBegin)) 299 { 300 sections ~= new Section(ident, textBody(bodyBegin, nextIdent.ptr)); 301 ident = nextIdent; 302 bodyBegin = nextBodyBegin; 303 } 304 // Add last section. 305 sections ~= new Section(ident, textBody(bodyBegin, textEnd)); 306 } 307 else // There are no explicit sections. 308 scanSummaryAndDescription(summaryBegin, textEnd); 309 return sections; 310 } 311 312 /// Separates the text between p and end 313 /// into a summary and an optional description section. 314 void scanSummaryAndDescription(cchar* p, cchar* end) 315 { 316 assert(p <= end); 317 auto sectionBegin = p; 318 // Search for the end of the first paragraph. 319 while (p < end && !(*p == '\n' && p+1 < end && p[1] == '\n')) 320 if (skipCodeSection(p, end) == false) 321 p++; 322 assert(p == end || (*p == '\n' && p[1] == '\n')); 323 // The first paragraph is the summary. 324 summary = new Section("", textBody(sectionBegin, p)); 325 sections ~= summary; 326 // The rest is the description section. 327 if (auto descText = textBody(p, end)) 328 sections ~= (description = new Section("", descText)); 329 assert(description ? description.text !is null : true); 330 } 331 332 /// Returns true if p points to "$(DDD)". 333 static bool isCodeSection(cchar* p, cchar* end) 334 { 335 return p < end && *p == '-' && p+2 < end && p[1] == '-' && p[2] == '-'; 336 } 337 338 /// Skips over a code section and sets p one character past it. 339 /// 340 /// Note: apparently DMD doesn't skip over code sections when 341 /// parsing DDoc sections. However, from experience it seems 342 /// to be a good idea to do that. 343 /// Returns: true if a code section was skipped. 344 static bool skipCodeSection(ref cchar* p, cchar* end) 345 { 346 if (!isCodeSection(p, end)) 347 return false; 348 p += 3; // Skip "---". 349 while (p < end && *p == '-') 350 p++; 351 while (p < end && !(*p == '-' && p+2 < end && p[1] == '-' && p[2] == '-')) 352 p++; 353 while (p < end && *p == '-') 354 p++; 355 assert(p is end || p[-1] == '-'); 356 return true; 357 } 358 359 /// Find next "Identifier:". 360 /// Params: 361 /// ident = Set to the Identifier. 362 /// bodyBegin = Set to the beginning of the text body (whitespace skipped.) 363 /// Returns: true if found. 364 bool findNextIdColon(out cstring ident, out cchar* bodyBegin) 365 { 366 while (p < textEnd) 367 { 368 skipWhitespace(); 369 if (p is textEnd) 370 break; 371 if (skipCodeSection(p, textEnd)) 372 continue; 373 assert(p < textEnd && (isascii(*p) || isLeadByte(*p))); 374 auto id = scanIdentifier(p, textEnd); 375 if (id && p < textEnd && *p == ':') 376 if (!(++p < textEnd && *p == '/')) // Ignore links: http:// ftp:// etc. 377 { 378 ident = id; 379 bodyBegin = p; 380 skipLine(); 381 return true; 382 } 383 skipLine(); 384 } 385 assert(p is textEnd); 386 return false; 387 } 388 389 /// Skips $(SYMLINK3 dil.lexer.Funcs, CProperty.Whitespace, whitespace). 390 void skipWhitespace() 391 { 392 while (p < textEnd && isspace(*p)) 393 p++; 394 } 395 396 /// Skips to the beginning of the next non-blank line. 397 void skipLine() 398 { 399 while (p < textEnd && *p != '\n') 400 p++; 401 while (p < textEnd && *p == '\n') 402 p++; 403 } 404 } 405 406 /// Represents a DDoc section. 407 class Section 408 { 409 cstring name; /// The name of the section. 410 cstring text; /// The text of the section. 411 /// Constructs a Section object. 412 this(cstring name, cstring text) 413 { 414 this.name = name; 415 this.text = text; 416 } 417 418 /// Case-insensitively compares the section's name with name2. 419 bool Is(cstring name2) 420 { 421 return String(name).ieql(name2); 422 } 423 424 /// Returns the section's text including its name. 425 cstring wholeText() 426 { 427 if (name.length == 0) 428 return text; 429 return name ~ ": " ~ text; 430 } 431 } 432 433 /// Represents a params section. 434 class ParamsSection : Section 435 { 436 cstring[] paramNames; /// Parameter names. 437 cstring[] paramDescs; /// Parameter descriptions. 438 /// Constructs a ParamsSection object. 439 this(cstring name, cstring text) 440 { 441 super(name, text); 442 IdentValueParser parser; 443 auto idvalues = parser.parse(text); 444 this.paramNames = new cstring[idvalues.length]; 445 this.paramDescs = new cstring[idvalues.length]; 446 foreach (i, idvalue; idvalues) 447 { 448 this.paramNames[i] = idvalue.ident; 449 this.paramDescs[i] = idvalue.value; 450 } 451 } 452 } 453 454 /// Represents a macros section. 455 class MacrosSection : Section 456 { 457 cstring[] macroNames; /// Macro names. 458 cstring[] macroTexts; /// Macro texts. 459 /// Constructs a MacrosSection object. 460 this(cstring name, cstring text) 461 { 462 super(name, text); 463 IdentValueParser parser; 464 auto idvalues = parser.parse(text); 465 this.macroNames = new cstring[idvalues.length]; 466 this.macroTexts = new cstring[idvalues.length]; 467 foreach (i, idvalue; idvalues) 468 { 469 this.macroNames[i] = idvalue.ident; 470 this.macroTexts[i] = idvalue.value; 471 } 472 } 473 }