dil.doc.Parser source code

1 /// Author: Aziz Köksal
2 /// License: GPL3
3 /// $(Maturity very high)
4 module dil.doc.Parser;
5 
6 import dil.lexer.Funcs;
7 import dil.Unicode,
8        dil.String;
9 import common;
10 
11 /// A pair of strings.
12 class IdentValue
13 {
14   cstring ident;
15   cstring value;
16   this(cstring ident, cstring value)
17   {
18     this.ident = ident;
19     this.value = value;
20   }
21 }
22 
23 /// Parses text in the form of:
24 /// $(PRE
25 ////ident = value
26 ////ident2 = value2
27 ////         more text
28 ////)
29 struct IdentValueParser
30 {
31   cchar* p; /// Current pointer.
32   cchar* textEnd;
33 
34   /// Parses the text into a list of IdentValues.
35   /// All newlines in text must be converted to '\n'.
36   IdentValue[] parse(cstring text)
37   {
38     if (!text.length)
39       return null;
40 
41     p = text.ptr;
42     textEnd = p + text.length;
43 
44     IdentValue[] idvalues;
45 
46     cstring ident, nextIdent;
47     cchar* bodyBegin, nextBodyBegin;
48 
49     // Init.
50     if (findNextIdent(ident, bodyBegin))
51       // Continue.
52       while (findNextIdent(nextIdent, nextBodyBegin))
53       {
54         idvalues ~= new IdentValue(ident, textBody(bodyBegin, nextIdent.ptr));
55         ident = nextIdent;
56         bodyBegin = nextBodyBegin;
57       }
58     else // No "ident = value" pair found.
59       bodyBegin = p; // Take the whole text and give it an empty ident.
60     // Add last ident value.
61     idvalues ~= new IdentValue(ident, textBody(bodyBegin, textEnd));
62     return idvalues;
63   }
64 
65   /// Strips off leading and trailing whitespace characters.
66   /// Returns: the text body, or null if empty.
67   static cstring textBody(cchar* begin, cchar* end)
68   {
69     while (begin < end && (isspace(*begin) || *begin == '\n'))
70       begin++;
71     // The body of A is empty when e.g.:
72     // A =
73     // B = some text
74     // ^- begin and end point to B (or to this.textEnd in the 2nd case.)
75     if (begin is end)
76       return null;
77     // Remove trailing whitespace.
78     while (isspace(*--end) || *end == '\n')
79     {}
80     end++;
81     return slice(begin, end);
82   }
83 
84   /// Finds the next "Identifier =".
85   /// Params:
86   ///   ident = Set to Identifier.
87   ///   bodyBegin = Set to the beginning of the text body (whitespace skipped.)
88   /// Returns: true if found.
89   bool findNextIdent(out cstring ident, out cchar* bodyBegin)
90   {
91     while (p < textEnd)
92     {
93       skipWhitespace();
94       if (p is textEnd)
95         break;
96       assert(p < textEnd && (isascii(*p) || isLeadByte(*p)));
97       auto id = scanIdentifier(p, textEnd);
98       skipWhitespace();
99       if (id && p < textEnd && *p == '=')
100       {
101         ident = id;
102         bodyBegin = ++p;
103         skipLine();
104         return true;
105       }
106       skipLine();
107     }
108     assert(p is textEnd);
109     return false;
110   }
111 
112   void skipWhitespace()
113   {
114     while (p < textEnd && isspace(*p))
115       p++;
116   }
117 
118   void skipLine()
119   {
120     while (p < textEnd && *p != '\n')
121       p++;
122     while (p < textEnd && *p == '\n')
123       p++;
124   }
125 }
126 
127 void testDocParser()
128 {
129   scope msg = new UnittestMsg("Testing struct dil.doc.Parser.");
130   auto text = "A =
131 B = text
132 C =
133  <b>text</b>
134   D = $(LINK www.dil.com)
135 E=<
136 F = G = H
137 Äş=??
138 A6İ=µ
139 End=";
140 
141   IdentValue iv(cstring s1, cstring s2)
142   {
143     return new IdentValue(s1, s2);
144   }
145 
146   auto results = [
147     iv("A", ""),
148     iv("B", "text"),
149     iv("C", "<b>text</b>"),
150     iv("D", "$(LINK www.dil.com)"),
151     iv("E", "<"),
152     iv("F", "G = H"),
153     iv("Äş", "??"),
154     iv("A6İ", "µ"),
155     iv("End", ""),
156   ];
157 
158   auto parser = IdentValueParser();
159   foreach (i, parsed; parser.parse(text))
160   {
161     auto expected = results[i];
162     assert(parsed.ident == expected.ident,
163            Format("Parsed ident '{}', but expected '{}'.",
164                   parsed.ident, expected.ident));
165     assert(parsed.value == expected.value,
166            Format("Parsed value '{}', but expected '{}'.",
167                   parsed.value, expected.value));
168   }
169 }