1 module tooling.Scanner; 2 3 import std.algorithm; 4 import std.conv; 5 import std.exception; 6 import std.file; 7 import std.range; 8 import std.regex; 9 import std.stdio; 10 import std.string; 11 12 import tooling.Tokenizer; 13 import tooling.TreeRange; 14 15 class Entity 16 { 17 auto structRe = regex(r"^(namespace|class|struct|enum)( (?P<name>\w+))?"); 18 19 auto functionRe = regex(r"(template <[^>]+> )?" ~ 20 r"(?P<return>([a-zA-Z0-9_&<,>* ]+|(::))+ )?" ~ 21 r"(?P<name>(((~ )|(:: ))?[a-zA-Z0-9_]+|( :: ))+)" ~ 22 r"(o?perator *.[^(]*)?" ~ 23 r" \( (?P<args>[a-zA-Z0-9_ :&<,>*]*)\)" ~ 24 r"(?P<suffix> [a-zA-Z]+)*" 25 ); 26 27 public: 28 29 this(Token[] expr, Token[] tokens, Entity[] content) 30 { 31 auto line = expr.map!(value).joiner(" ").to!string; 32 33 type_ = "code"; 34 name_ = ""; 35 36 auto m = line.matchFirst(structRe); 37 if (!m.empty) 38 { 39 type_ = expr[0].value; 40 name_ = m["name"]; 41 } 42 else 43 { 44 m = line.matchFirst(functionRe); 45 if (!m.empty) 46 { 47 type_ = "function"; 48 name_ = m["name"]; 49 50 returnType_ = m["return"].strip; 51 arguments_ = m["args"].strip.splitter(" , ").array; 52 suffix_ = m["suffix"].strip.splitter(" ").array; 53 } 54 } 55 56 expr_ = expr; 57 tokens_ = tokens; 58 content_ = content; 59 } 60 61 string name() 62 { 63 if (type_ == "function") 64 { 65 //return expr_[0].value ~ expr_[1 .. $].map!(t => t.precedingWhitespace_ ~ t.value).joiner.to!string; 66 return name_ ~ "(" ~ arguments_.joiner(",").text ~ ") " ~ suffix_.joiner(" ").text; 67 } 68 else 69 { 70 return name_; 71 } 72 } 73 74 void print(string indent="") 75 { 76 if (type_ == "function") 77 { 78 writeln(indent, "<", type_, "> `", returnType_, "` ", name_, "(", arguments_, ") ", suffix_); 79 } 80 else 81 { 82 writeln(indent, "<", type_, "> ", name_); 83 } 84 foreach (c; content_) 85 { 86 c.print(indent~" "); 87 } 88 } 89 90 string type_; 91 string name_; 92 Token[] expr_; 93 Token[] tokens_; 94 Entity[] content_; 95 string returnType_; 96 string[] arguments_; 97 string[] suffix_; 98 } 99 100 bool isNamespace(Entity e) 101 { 102 return e.type_ == "namespace"; 103 } 104 105 bool isClass(Entity e) 106 { 107 return e.type_ == "class"; 108 } 109 110 bool isFunction(Entity e) 111 { 112 return e.type_ == "function"; 113 } 114 115 bool isInline(Entity e) 116 { 117 // if the difference in length is just one, its tk!";", otherwise there is a 118 // pair of braces 119 return e.expr_.length+1 < e.tokens_.length; 120 } 121 122 Entity[] readTokenStream(Token[] tokens, ref ulong start) 123 { 124 Entity[] entries; 125 for (ulong i = start; i < tokens.length; i++) 126 { 127 auto token = tokens[i]; 128 if (token.type_ is tk!";") 129 { 130 if (i - start > 1) 131 { 132 if (value(tokens[start]) == "}") 133 { 134 start += 1; 135 } 136 auto expr = tokens[start .. i]; 137 auto e = new Entity(expr, tokens[start .. i+1], []); 138 if (e.type_ != "code") 139 { 140 entries ~= e; 141 } 142 } 143 start = i+1; 144 } 145 else if (token.type_ is tk!"{" && i) 146 { 147 if (value(tokens[start]) == "}") 148 { 149 start += 1; 150 } 151 152 auto expr = tokens[start .. i]; 153 i += 1; 154 auto content = readTokenStream(tokens, i); 155 auto e = new Entity(expr, 156 tokens[start .. i+1], 157 content); 158 if (e.type_ != "code") 159 { 160 entries ~= e; 161 } 162 start = i; 163 } 164 else if (token.type_ is tk!"}") 165 { 166 start = i; 167 return entries; 168 } 169 } 170 return entries; 171 } 172 173 string value(Token t) 174 { 175 if (t.type_ is tk!"identifier") 176 return t.value_; 177 else 178 return t.type_.sym; 179 } 180 181 auto entityNames(Entity[] entries) 182 { 183 return entries.map!(i => i.name_); 184 } 185 186 auto scanTokens(Token[] tokens) 187 { 188 // Start at the first namespace. 189 // FIXME: Right now we only scan the first namespace. 190 auto namespaceTokens = find!(t => t.value == "namespace")(tokens); 191 ulong start = 0; 192 return readTokenStream(namespaceTokens, start); 193 } 194 195 string detab(string input) 196 { 197 string output; 198 size_t j; 199 200 int column; 201 for (size_t i = 0; i < input.length; i++) 202 { 203 char c = input[i]; 204 205 switch (c) 206 { 207 case '\t': 208 while ((column & 1) != 1) 209 { 210 output ~= ' '; 211 j++; 212 column++; 213 } 214 c = ' '; 215 column++; 216 break; 217 218 case '\r': 219 case '\n': 220 while (j && output[j - 1] == ' ') 221 j--; 222 output = output[0 .. j]; 223 column = 0; 224 break; 225 226 default: 227 column++; 228 break; 229 } 230 output ~= c; 231 j++; 232 } 233 while (j && output[j - 1] == ' ') 234 j--; 235 return output[0 .. j]; 236 } 237 238 239 auto readTokens(string path) 240 { 241 string content; 242 if (path == "-") 243 { 244 foreach (ulong i, string line; lines(stdin)) 245 { 246 content ~= detab(line); 247 } 248 } 249 else 250 { 251 content = detab(std.file.readText(path)); 252 } 253 Token[] tokens; 254 tokenize(content, path, tokens); 255 return tokens; 256 } 257 258 void writeTokens(File f, Token[] tokens) 259 { 260 foreach (ref t; tokens[0 .. $-1]) { 261 f.write(t.precedingWhitespace_, t.value); 262 } 263 f.write(tokens[$-1].precedingWhitespace_); 264 } 265 266 // FIXME: Remove 267 struct SourceFile 268 { 269 this(string path) 270 { 271 tokens_ = readTokens(path); 272 content_ = scanTokens(tokens_); 273 } 274 275 this(Token[] tokens) 276 { 277 tokens_ = tokens; 278 content_ = scanTokens(tokens_); 279 } 280 281 Token[] tokens_; 282 Entity[] content_; 283 }