1 /// Author: Aziz Köksal
2 /// License: GPL3
3 /// $(Maturity high)
4 module dil.doc.Doc;
5 
6 import dil.doc.Parser;
7 import dil.ast.Node;
8 import dil.lexer.Funcs;
9 import dil.Unicode,
10        dil.String;
11 import common;
12 
13 alias textBody = dil.doc.Parser.IdentValueParser.textBody;
14 
15 /// Represents a sanitized and parsed DDoc comment.
16 class DDocComment
17 {
18   Section[] sections; /// The sections of this comment.
19   Section summary; /// Optional summary section.
20   Section description; /// Optional description section.
21 
22   /// Constructs a DDocComment object.
23   this(Section[] sections, Section summary, Section description)
24   {
25     this.sections = sections;
26     this.summary = summary;
27     this.description = description;
28   }
29 
30   /// Removes the first copyright section and returns it.
31   Section takeCopyright()
32   {
33     foreach (i, section; sections)
34       if (section.Is("copyright"))
35       {
36         sections = sections[0..i] ~ sections[i+1..$];
37         return section;
38       }
39     return null;
40   }
41 
42   /// Returns true if "ditto" is the only text in this comment.
43   bool isDitto()
44   {
45     return summary && sections.length == 1 &&
46            String(summary.text).ieql("ditto");
47   }
48 
49   /// Returns true when this comment has no text.
50   bool isEmpty()
51   {
52     return sections.length == 0 || sections[0].text.length == 0;
53   }
54 }
55 
56 /// A namespace for some utility functions.
57 struct DDocUtils
58 {
59 static:
60   /// Returns a node's DDocComment.
61   DDocComment getDDocComment(Node node)
62   {
63     DDocParser p;
64     auto docTokens = getDocTokens(node);
65     if (!docTokens.length)
66       return null;
67     p.parse(getDDocText(docTokens));
68     return new DDocComment(p.sections, p.summary, p.description);
69   }
70 
71   /// Returns a DDocComment created from a text.
72   DDocComment getDDocComment(cstring text)
73   {
74     text = sanitize(text, '\0'); // May be unnecessary.
75     DDocParser p;
76     p.parse(text);
77     return new DDocComment(p.sections, p.summary, p.description);
78   }
79 
80   /// Returns true if token is a Doxygen comment.
81   bool isDoxygenComment(Token* token)
82   { // Doxygen: '/+!' '/*!' '//!'
83     return token.kind == TOK.Comment && token.start[2] == '!';
84   }
85 
86   /// Returns true if token is a DDoc comment.
87   bool isDDocComment(Token* token)
88   { // Ddoc: '/++' '/**' '///'
89     return token.kind == TOK.Comment && token.start[1] == token.start[2] &&
90       // Exclude special cases: '/++/' and '/**/'
91       (isLineComment(token) ? 1 : token.text.length > 4);
92   }
93 
94   /// Returns the surrounding documentation comment tokens.
95   /// Params:
96   ///   node = The node to find doc comments for.
97   ///   isDocComment = A function predicate that checks for doc comment tokens.
98   /// Note: This function works correctly only if
99   ///       the source text is syntactically correct.
100   Token*[] getDocTokens(Node node,
101     bool function(Token*) isDocComment = &isDDocComment)
102   {
103     Token*[] comments;
104     auto isEnumMember = node.kind == NodeKind.EnumMemberDecl;
105     // Get preceding comments.
106     auto token = node.begin;
107     // Scan backwards until we hit another declaration.
108     while ((--token).kind)
109       if (token.kind.In(TOK.LBrace, TOK.RBrace, TOK.Semicolon) ||
110           (isEnumMember && token.kind == TOK.Comma))
111         break;
112       else if (token.kind == TOK.Comment)
113         // Check that this comment doesn't belong to the previous declaration.
114         if (token.prev.kind.In(TOK.Semicolon, TOK.RBrace, TOK.Comma))
115           break;
116         else if (isDocComment(token))
117           comments ~= token; // Comments are appended in reverse order.
118     comments.reverse; // Reverse the list when finished.
119     // Get single comment to the right.
120     token = node.end.next;
121     if (token.kind == TOK.Comment && isDocComment(token))
122       comments ~= token;
123     else if (isEnumMember)
124     {
125       token = node.end.nextNWS;
126       if (token.kind == TOK.Comma)
127         if ((++token).kind == TOK.Comment && isDocComment(token))
128           comments ~= token;
129     }
130     return comments;
131   }
132 
133   bool isLineComment(Token* t)
134   {
135     assert(t.kind == TOK.Comment);
136     return t.start[1] == '/';
137   }
138 
139   /// Extracts the text body of the comment tokens.
140   cstring getDDocText(Token*[] tokens)
141   {
142     if (tokens.length == 0)
143       return null;
144     char[] result;
145     foreach (token; tokens)
146     { // Determine how many characters to slice off from the end of the comment.
147       // 0 for "//", 2 for "+/" and "*/".
148       auto n = isLineComment(token) ? 0 : 2;
149       result ~= sanitize(token.text[3 .. $-n], token.start[1]);
150       assert(token.next);
151       result ~= (token.next.kind == TOK.Newline) ? '\n' : ' ';
152     }
153     return result[0..$-1]; // Slice off last '\n' or ' '.
154   }
155 
156   /// Sanitizes a DDoc comment string.
157   ///
158   /// Leading padding characters are removed from the lines.
159   /// The various newline types are converted to '\n'.
160   /// Params:
161   ///   comment = The string to be sanitized.
162   ///   padding = '/', '+' or '*'
163   cstring sanitize(char[] comment, char padding)
164   {
165     bool isNewline = true; // True when at the beginning of a new line.
166     auto q = comment.ptr; // Writer.
167     cchar* p = q; // Reader.
168     auto end = p + comment.length;
169 
170     while (p < end)
171     {
172       if (isNewline)
173       { // Ignore padding at the beginning of each new line.
174         isNewline = false;
175         auto begin = p;
176         while (p < end && isspace(*p)) // Skip spaces.
177           p++;
178         if (p < end && *p == padding)
179           while (++p < end && *p == padding) // Skip padding.
180           {}
181         else
182           p = begin; // Reset. No padding found.
183       }
184       else
185       {
186         isNewline = scanNewline(p, end);
187         if (isNewline)
188           *q++ = '\n'; // Copy newlines as '\n'.
189         else
190           *q++ = *p++; // Copy character.
191       }
192     }
193     comment.length = q - comment.ptr; // Adjust length.
194     if (!comment.length)
195       return null;
196     // Lastly, strip trailing padding.
197     p = q - 1; // q points to the end of the string.
198     q = comment.ptr - 1; // Now let q point to the start.
199     while (p > q && *p == padding)
200       p--; // Go back until no padding characters are left.
201     assert(p == q || p >= comment.ptr);
202     comment.length = p - comment.ptr + 1;
203     return comment;
204   }
205   /// ditto
206   cstring sanitize(cstring comment, char padding)
207   {
208     return sanitize(comment.dup, padding);
209   }
210 
211   /// Unindents all lines in text by the maximum amount possible.
212   /// Note: counts tabulators the same as single spaces.
213   /// Returns: the unindented text or the original text.
214   cstring unindentText(cstring text)
215   {
216     auto p = text.ptr, end = p + text.length;
217     auto indent = size_t.max; // Start with the largest number.
218     auto lbegin = p; // The beginning of a line.
219     // First determine the maximum amount we may remove.
220     while (p < end)
221     {
222       while (p < end && isspace(*p)) // Skip leading whitespace.
223         p++;
224       if (p < end && *p != '\n') // Don't count blank lines.
225         if (p - lbegin < indent)
226         {
227           indent = p - lbegin;
228           if (indent == 0)
229             return text; // Nothing to unindent;
230         }
231       // Skip to the end of the line.
232       while (p < end && *p != '\n')
233         p++;
234       while (p < end && *p == '\n')
235         p++;
236       lbegin = p;
237     }
238 
239     p = text.ptr, end = p + text.length;
240     lbegin = p;
241     auto newText = text.dup;
242     auto q = newText.ptr; // Writer.
243     // Remove the determined amount.
244     while (p < end)
245     {
246       while (p < end && isspace(*p)) // Skip leading whitespace.
247         *q++ = *p++;
248       if (p < end && *p == '\n') // Strip empty lines.
249         q -= p - lbegin; // Back up q by the amount of spaces on this line.
250       else {//if (indent <= p - lbegin)
251         assert(indent <= p - lbegin);
252         q -= indent; // Back up q by the indent amount.
253       }
254       // Skip to the end of the line.
255       while (p < end && *p != '\n')
256         *q++ = *p++;
257       // Skip multiple newlines.
258       while (p < end && *p == '\n')
259         *q++ = *p++;
260       lbegin = p;
261     }
262     newText.length = q - newText.ptr;
263     return newText;
264   }
265 }
266 
267 /// Parses a DDoc comment string.
268 struct DDocParser
269 {
270   cchar* p; /// Current character pointer.
271   cchar* textEnd; /// Points one character past the end of the text.
272   Section[] sections; /// Parsed sections.
273   Section summary; /// Optional summary section.
274   Section description; /// Optional description section.
275 
276   /// Parses the DDoc text into sections.
277   /// All newlines in the text must be converted to '\n'.
278   Section[] parse(cstring text)
279   {
280     if (!text.length)
281       return null;
282     p = text.ptr;
283     textEnd = p + text.length;
284 
285     cchar* summaryBegin;
286     cstring ident, nextIdent;
287     cchar* bodyBegin, nextBodyBegin;
288 
289     while (p < textEnd && (isspace(*p) || *p == '\n'))
290       p++;
291     summaryBegin = p;
292 
293     if (findNextIdColon(ident, bodyBegin))
294     { // Check if there's text before the explicit section.
295       if (summaryBegin != ident.ptr)
296         scanSummaryAndDescription(summaryBegin, ident.ptr);
297       // Continue parsing.
298       while (findNextIdColon(nextIdent, nextBodyBegin))
299       {
300         sections ~= new Section(ident, textBody(bodyBegin, nextIdent.ptr));
301         ident = nextIdent;
302         bodyBegin = nextBodyBegin;
303       }
304       // Add last section.
305       sections ~= new Section(ident, textBody(bodyBegin, textEnd));
306     }
307     else // There are no explicit sections.
308       scanSummaryAndDescription(summaryBegin, textEnd);
309     return sections;
310   }
311 
312   /// Separates the text between p and end
313   /// into a summary and an optional description section.
314   void scanSummaryAndDescription(cchar* p, cchar* end)
315   {
316     assert(p <= end);
317     auto sectionBegin = p;
318     // Search for the end of the first paragraph.
319     while (p < end && !(*p == '\n' && p+1 < end && p[1] == '\n'))
320       if (skipCodeSection(p, end) == false)
321         p++;
322     assert(p == end || (*p == '\n' && p[1] == '\n'));
323     // The first paragraph is the summary.
324     summary = new Section("", textBody(sectionBegin, p));
325     sections ~= summary;
326     // The rest is the description section.
327     if (auto descText = textBody(p, end))
328       sections ~= (description = new Section("", descText));
329     assert(description ? description.text !is null : true);
330   }
331 
332   /// Returns true if p points to "$(DDD)".
333   static bool isCodeSection(cchar* p, cchar* end)
334   {
335     return p < end && *p == '-' && p+2 < end && p[1] == '-' && p[2] == '-';
336   }
337 
338   /// Skips over a code section and sets p one character past it.
339   ///
340   /// Note: apparently DMD doesn't skip over code sections when
341   /// parsing DDoc sections. However, from experience it seems
342   /// to be a good idea to do that.
343   /// Returns: true if a code section was skipped.
344   static bool skipCodeSection(ref cchar* p, cchar* end)
345   {
346     if (!isCodeSection(p, end))
347       return false;
348     p += 3; // Skip "---".
349     while (p < end && *p == '-')
350       p++;
351     while (p < end && !(*p == '-' && p+2 < end && p[1] == '-' && p[2] == '-'))
352       p++;
353     while (p < end && *p == '-')
354       p++;
355     assert(p is end || p[-1] == '-');
356     return true;
357   }
358 
359   /// Find next "Identifier:".
360   /// Params:
361   ///   ident = Set to the Identifier.
362   ///   bodyBegin = Set to the beginning of the text body (whitespace skipped.)
363   /// Returns: true if found.
364   bool findNextIdColon(out cstring ident, out cchar* bodyBegin)
365   {
366     while (p < textEnd)
367     {
368       skipWhitespace();
369       if (p is textEnd)
370         break;
371       if (skipCodeSection(p, textEnd))
372         continue;
373       assert(p < textEnd && (isascii(*p) || isLeadByte(*p)));
374       auto id = scanIdentifier(p, textEnd);
375       if (id && p < textEnd && *p == ':')
376         if (!(++p < textEnd && *p == '/')) // Ignore links: http:// ftp:// etc.
377         {
378           ident = id;
379           bodyBegin = p;
380           skipLine();
381           return true;
382         }
383       skipLine();
384     }
385     assert(p is textEnd);
386     return false;
387   }
388 
389   /// Skips $(SYMLINK3 dil.lexer.Funcs, CProperty.Whitespace, whitespace).
390   void skipWhitespace()
391   {
392     while (p < textEnd && isspace(*p))
393       p++;
394   }
395 
396   /// Skips to the beginning of the next non-blank line.
397   void skipLine()
398   {
399     while (p < textEnd && *p != '\n')
400       p++;
401     while (p < textEnd && *p == '\n')
402       p++;
403   }
404 }
405 
406 /// Represents a DDoc section.
407 class Section
408 {
409   cstring name; /// The name of the section.
410   cstring text; /// The text of the section.
411   /// Constructs a Section object.
412   this(cstring name, cstring text)
413   {
414     this.name = name;
415     this.text = text;
416   }
417 
418   /// Case-insensitively compares the section's name with name2.
419   bool Is(cstring name2)
420   {
421     return String(name).ieql(name2);
422   }
423 
424   /// Returns the section's text including its name.
425   cstring wholeText()
426   {
427     if (name.length == 0)
428       return text;
429     return name ~ ": " ~ text;
430   }
431 }
432 
433 /// Represents a params section.
434 class ParamsSection : Section
435 {
436   cstring[] paramNames; /// Parameter names.
437   cstring[] paramDescs; /// Parameter descriptions.
438   /// Constructs a ParamsSection object.
439   this(cstring name, cstring text)
440   {
441     super(name, text);
442     IdentValueParser parser;
443     auto idvalues = parser.parse(text);
444     this.paramNames = new cstring[idvalues.length];
445     this.paramDescs = new cstring[idvalues.length];
446     foreach (i, idvalue; idvalues)
447     {
448       this.paramNames[i] = idvalue.ident;
449       this.paramDescs[i] = idvalue.value;
450     }
451   }
452 }
453 
454 /// Represents a macros section.
455 class MacrosSection : Section
456 {
457   cstring[] macroNames; /// Macro names.
458   cstring[] macroTexts; /// Macro texts.
459   /// Constructs a MacrosSection object.
460   this(cstring name, cstring text)
461   {
462     super(name, text);
463     IdentValueParser parser;
464     auto idvalues = parser.parse(text);
465     this.macroNames = new cstring[idvalues.length];
466     this.macroTexts = new cstring[idvalues.length];
467     foreach (i, idvalue; idvalues)
468     {
469       this.macroNames[i] = idvalue.ident;
470       this.macroTexts[i] = idvalue.value;
471     }
472   }
473 }