1 /// Author: Aziz Köksal 2 /// License: GPL3 3 /// $(Maturity low) 4 module dil.StringSet; 5 6 import dil.Array; 7 import dil.String; 8 import common; 9 10 import std.c..string; 11 12 /// Binary tree implementation of a hash set with strings as elements. 13 struct StringSet 14 { 15 struct Node 16 { 17 size_t left; /// Index of left branch. 18 size_t right; /// Index of right branch. 19 hash_t hash; /// Hash value. 20 cbinstr str; /// Binary string. 21 cstring repr() 22 { 23 return Format("Node(left={},right={},hash={},str={})", 24 left, right, hash, str.length > 4 ? str[0..4] : str); 25 } 26 string toString() 27 { 28 return cast(string)repr(); 29 } 30 } 31 32 size_t[] list; /// List of 1-based indices to Nodes. 33 DArray!Node nodes; /// Instances of Node. 34 35 /// Constructs a StringSet with the desired properties. 36 this(size_t nrOfNodes = 0, size_t size = 0) 37 { 38 nodes.cap = nrOfNodes; 39 resize(size); 40 } 41 42 /// Allocates and returns the 1-based index of a new uninitialized Node. 43 size_t newNode() 44 { 45 if (nodes.cap == 0) 46 nodes.cap = 1; 47 else if (nodes.rem == 0) 48 nodes.growX1_5(); 49 //*nodes.cur = Node.init; 50 nodes.cur++; 51 return nodes.len; 52 } 53 54 /// Returns the Node for the 1-based index. 55 Node* getNode(size_t nindex) 56 { 57 assert(nindex); 58 return nodes.ptr + nindex - 1; 59 } 60 61 /// Resizes the bucket list and reassigns the nodes. 62 void resize(size_t size) 63 { 64 list = new typeof(list[0])[size]; 65 foreach (i, ref n; nodes[]) 66 { 67 n.left = n.right = 0; 68 auto bindex = n.hash % list.length; 69 auto pindex = &list[bindex]; 70 auto hash = n.hash; 71 auto str = n.str; 72 while (*pindex) 73 { 74 auto node = getNode(*pindex); 75 int_t diff; 76 if ((diff = node.hash - hash) == 0 && 77 (diff = node.str.length - str.length) == 0 && 78 (diff = memcmp(node.str.ptr, str.ptr, str.length)) == 0) 79 assert(0, "can't have two equal nodes in the same set"); 80 // Go left or right down the tree. 81 pindex = diff < 0 ? &node.left : &node.right; 82 } 83 *pindex = i + 1; 84 } 85 } 86 87 /// Finds the Node holding str, or a slot where a new Node can be saved. 88 size_t* find(cbinstr str, hash_t hash) 89 { 90 if (!list.length) 91 resize(32); 92 auto pindex = &list[hash % list.length]; 93 while (*pindex) 94 { 95 auto node = getNode(*pindex); 96 int_t diff; 97 if ((diff = node.hash - hash) == 0 && 98 (diff = node.str.length - str.length) == 0 && 99 (diff = memcmp(node.str.ptr, str.ptr, str.length)) == 0) 100 break; // If equal in hash, length and content. 101 // Go left or right down the tree. 102 pindex = diff < 0 ? &node.left : &node.right; 103 } 104 return pindex; 105 } 106 107 /// ditto 108 size_t* find(cbinstr str) 109 { 110 return find(str, hashOf(cast(cstring)str)); 111 } 112 113 /// Returns a reference to str if it exists. 114 const(cbinstr)* get(cbinstr str) 115 { 116 auto p = find(str); 117 return *p ? &getNode(*p).str : null; 118 } 119 120 /// Returns true if str is contained in the set. 121 bool has(cbinstr str) 122 { 123 return list.length && !!*find(str); 124 } 125 126 /// Returns an existing entry or creates a new one. 127 const(cbinstr)* add(cbinstr str) 128 { 129 auto hash = hashOf(cast(cstring)str); 130 auto p = find(str, hash); 131 Node* node; 132 if (*p) 133 node = getNode(*p); 134 else // Empty spot. 135 { 136 node = getNode(*p = newNode()); 137 *node = Node(0, 0, hash, str); 138 } 139 return &node.str; 140 } 141 142 /// Returns a string representation. 143 cstring repr() 144 { 145 return Format("StringSet(list={}, nodes={})", list, nodes[]); 146 } 147 } 148 149 inout(ubyte)[] tobin(inout(char)[] str) 150 { 151 return cast(inout(ubyte)[]) str; 152 } 153 154 void testStringSet() 155 { 156 scope msg = new UnittestMsg("Testing struct StringSet."); 157 StringSet s; 158 auto pstr = s.add("abcd".tobin); 159 assert(pstr is s.get("abcd".tobin)); 160 assert(s.has("abcd".tobin)); 161 s.resize(64); 162 assert(s.has("abcd".tobin)); 163 }