1 /// Author: Aziz Köksal 2 /// License: GPL3 3 /// $(Maturity very high) 4 module dil.doc.Parser; 5 6 import dil.lexer.Funcs; 7 import dil.Unicode, 8 dil.String; 9 import common; 10 11 /// A pair of strings. 12 class IdentValue 13 { 14 cstring ident; 15 cstring value; 16 this(cstring ident, cstring value) 17 { 18 this.ident = ident; 19 this.value = value; 20 } 21 } 22 23 /// Parses text in the form of: 24 /// $(PRE 25 ////ident = value 26 ////ident2 = value2 27 //// more text 28 ////) 29 struct IdentValueParser 30 { 31 cchar* p; /// Current pointer. 32 cchar* textEnd; 33 34 /// Parses the text into a list of IdentValues. 35 /// All newlines in text must be converted to '\n'. 36 IdentValue[] parse(cstring text) 37 { 38 if (!text.length) 39 return null; 40 41 p = text.ptr; 42 textEnd = p + text.length; 43 44 IdentValue[] idvalues; 45 46 cstring ident, nextIdent; 47 cchar* bodyBegin, nextBodyBegin; 48 49 // Init. 50 if (findNextIdent(ident, bodyBegin)) 51 // Continue. 52 while (findNextIdent(nextIdent, nextBodyBegin)) 53 { 54 idvalues ~= new IdentValue(ident, textBody(bodyBegin, nextIdent.ptr)); 55 ident = nextIdent; 56 bodyBegin = nextBodyBegin; 57 } 58 else // No "ident = value" pair found. 59 bodyBegin = p; // Take the whole text and give it an empty ident. 60 // Add last ident value. 61 idvalues ~= new IdentValue(ident, textBody(bodyBegin, textEnd)); 62 return idvalues; 63 } 64 65 /// Strips off leading and trailing whitespace characters. 66 /// Returns: the text body, or null if empty. 67 static cstring textBody(cchar* begin, cchar* end) 68 { 69 while (begin < end && (isspace(*begin) || *begin == '\n')) 70 begin++; 71 // The body of A is empty when e.g.: 72 // A = 73 // B = some text 74 // ^- begin and end point to B (or to this.textEnd in the 2nd case.) 75 if (begin is end) 76 return null; 77 // Remove trailing whitespace. 78 while (isspace(*--end) || *end == '\n') 79 {} 80 end++; 81 return slice(begin, end); 82 } 83 84 /// Finds the next "Identifier =". 85 /// Params: 86 /// ident = Set to Identifier. 87 /// bodyBegin = Set to the beginning of the text body (whitespace skipped.) 88 /// Returns: true if found. 89 bool findNextIdent(out cstring ident, out cchar* bodyBegin) 90 { 91 while (p < textEnd) 92 { 93 skipWhitespace(); 94 if (p is textEnd) 95 break; 96 assert(p < textEnd && (isascii(*p) || isLeadByte(*p))); 97 auto id = scanIdentifier(p, textEnd); 98 skipWhitespace(); 99 if (id && p < textEnd && *p == '=') 100 { 101 ident = id; 102 bodyBegin = ++p; 103 skipLine(); 104 return true; 105 } 106 skipLine(); 107 } 108 assert(p is textEnd); 109 return false; 110 } 111 112 void skipWhitespace() 113 { 114 while (p < textEnd && isspace(*p)) 115 p++; 116 } 117 118 void skipLine() 119 { 120 while (p < textEnd && *p != '\n') 121 p++; 122 while (p < textEnd && *p == '\n') 123 p++; 124 } 125 } 126 127 void testDocParser() 128 { 129 scope msg = new UnittestMsg("Testing struct dil.doc.Parser."); 130 auto text = "A = 131 B = text 132 C = 133 <b>text</b> 134 D = $(LINK www.dil.com) 135 E=< 136 F = G = H 137 Äş=?? 138 A6İ=µ 139 End="; 140 141 IdentValue iv(cstring s1, cstring s2) 142 { 143 return new IdentValue(s1, s2); 144 } 145 146 auto results = [ 147 iv("A", ""), 148 iv("B", "text"), 149 iv("C", "<b>text</b>"), 150 iv("D", "$(LINK www.dil.com)"), 151 iv("E", "<"), 152 iv("F", "G = H"), 153 iv("Äş", "??"), 154 iv("A6İ", "µ"), 155 iv("End", ""), 156 ]; 157 158 auto parser = IdentValueParser(); 159 foreach (i, parsed; parser.parse(text)) 160 { 161 auto expected = results[i]; 162 assert(parsed.ident == expected.ident, 163 Format("Parsed ident '{}', but expected '{}'.", 164 parsed.ident, expected.ident)); 165 assert(parsed.value == expected.value, 166 Format("Parsed value '{}', but expected '{}'.", 167 parsed.value, expected.value)); 168 } 169 }