1 /// Author: Aziz Köksal
2 /// License: GPL3
3 /// $(Maturity very high)
4 module dil.doc.Parser;
5
6 import dil.lexer.Funcs;
7 import dil.Unicode,
8 dil.String;
9 import common;
10
11 /// A pair of strings.
12 class IdentValue
13 {
14 cstring ident;
15 cstring value;
16 this(cstring ident, cstring value)
17 {
18 this.ident = ident;
19 this.value = value;
20 }
21 }
22
23 /// Parses text in the form of:
24 /// $(PRE
25 ////ident = value
26 ////ident2 = value2
27 //// more text
28 ////)
29 struct IdentValueParser
30 {
31 cchar* p; /// Current pointer.
32 cchar* textEnd;
33
34 /// Parses the text into a list of IdentValues.
35 /// All newlines in text must be converted to '\n'.
36 IdentValue[] parse(cstring text)
37 {
38 if (!text.length)
39 return null;
40
41 p = text.ptr;
42 textEnd = p + text.length;
43
44 IdentValue[] idvalues;
45
46 cstring ident, nextIdent;
47 cchar* bodyBegin, nextBodyBegin;
48
49 // Init.
50 if (findNextIdent(ident, bodyBegin))
51 // Continue.
52 while (findNextIdent(nextIdent, nextBodyBegin))
53 {
54 idvalues ~= new IdentValue(ident, textBody(bodyBegin, nextIdent.ptr));
55 ident = nextIdent;
56 bodyBegin = nextBodyBegin;
57 }
58 else // No "ident = value" pair found.
59 bodyBegin = p; // Take the whole text and give it an empty ident.
60 // Add last ident value.
61 idvalues ~= new IdentValue(ident, textBody(bodyBegin, textEnd));
62 return idvalues;
63 }
64
65 /// Strips off leading and trailing whitespace characters.
66 /// Returns: the text body, or null if empty.
67 static cstring textBody(cchar* begin, cchar* end)
68 {
69 while (begin < end && (isspace(*begin) || *begin == '\n'))
70 begin++;
71 // The body of A is empty when e.g.:
72 // A =
73 // B = some text
74 // ^- begin and end point to B (or to this.textEnd in the 2nd case.)
75 if (begin is end)
76 return null;
77 // Remove trailing whitespace.
78 while (isspace(*--end) || *end == '\n')
79 {}
80 end++;
81 return slice(begin, end);
82 }
83
84 /// Finds the next "Identifier =".
85 /// Params:
86 /// ident = Set to Identifier.
87 /// bodyBegin = Set to the beginning of the text body (whitespace skipped.)
88 /// Returns: true if found.
89 bool findNextIdent(out cstring ident, out cchar* bodyBegin)
90 {
91 while (p < textEnd)
92 {
93 skipWhitespace();
94 if (p is textEnd)
95 break;
96 assert(p < textEnd && (isascii(*p) || isLeadByte(*p)));
97 auto id = scanIdentifier(p, textEnd);
98 skipWhitespace();
99 if (id && p < textEnd && *p == '=')
100 {
101 ident = id;
102 bodyBegin = ++p;
103 skipLine();
104 return true;
105 }
106 skipLine();
107 }
108 assert(p is textEnd);
109 return false;
110 }
111
112 void skipWhitespace()
113 {
114 while (p < textEnd && isspace(*p))
115 p++;
116 }
117
118 void skipLine()
119 {
120 while (p < textEnd && *p != '\n')
121 p++;
122 while (p < textEnd && *p == '\n')
123 p++;
124 }
125 }
126
127 void testDocParser()
128 {
129 scope msg = new UnittestMsg("Testing struct dil.doc.Parser.");
130 auto text = "A =
131 B = text
132 C =
133 <b>text</b>
134 D = $(LINK www.dil.com)
135 E=<
136 F = G = H
137 Äş=??
138 A6İ=µ
139 End=";
140
141 IdentValue iv(cstring s1, cstring s2)
142 {
143 return new IdentValue(s1, s2);
144 }
145
146 auto results = [
147 iv("A", ""),
148 iv("B", "text"),
149 iv("C", "<b>text</b>"),
150 iv("D", "$(LINK www.dil.com)"),
151 iv("E", "<"),
152 iv("F", "G = H"),
153 iv("Äş", "??"),
154 iv("A6İ", "µ"),
155 iv("End", ""),
156 ];
157
158 auto parser = IdentValueParser();
159 foreach (i, parsed; parser.parse(text))
160 {
161 auto expected = results[i];
162 assert(parsed.ident == expected.ident,
163 Format("Parsed ident '{}', but expected '{}'.",
164 parsed.ident, expected.ident));
165 assert(parsed.value == expected.value,
166 Format("Parsed value '{}', but expected '{}'.",
167 parsed.value, expected.value));
168 }
169 }