1 module tooling.Scanner;
2 
3 import std.algorithm;
4 import std.conv;
5 import std.exception;
6 import std.file;
7 import std.range;
8 import std.regex;
9 import std.stdio;
10 import std.string;
11 
12 import tooling.Tokenizer;
13 import tooling.TreeRange;
14 
15 class Entity
16 {
17   auto structRe = regex(r"^(namespace|class|struct|enum)( (?P<name>\w+))?");
18 
19   auto functionRe = regex(r"(template <[^>]+> )?" ~
20 						  r"(?P<return>([a-zA-Z0-9_&<,>* ]+|(::))+ )?" ~
21 						  r"(?P<name>(((~ )|(:: ))?[a-zA-Z0-9_]+|( :: ))+)" ~
22 						  r"(o?perator *.[^(]*)?" ~
23 						  r" \( (?P<args>[a-zA-Z0-9_ :&<,>*]*)\)" ~
24 						  r"(?P<suffix> [a-zA-Z]+)*"
25 						  );
26 
27 public:
28 
29   this(Token[] expr, Token[] tokens, Entity[] content)
30   {
31 	auto line = expr.map!(value).joiner(" ").to!string;
32 
33 	type_ = "code";
34 	name_ = "";
35 
36 	auto m = line.matchFirst(structRe);
37 	if (!m.empty)
38 	{
39 	  type_ = expr[0].value;
40 	  name_ = m["name"];
41 	}
42 	else
43 	{
44 	  m = line.matchFirst(functionRe);
45 	  if (!m.empty)
46 	  {
47 		type_ = "function";
48 		name_ = m["name"];
49 
50 		returnType_ = m["return"].strip;
51 		arguments_ = m["args"].strip.splitter(" , ").array;
52 		suffix_ = m["suffix"].strip.splitter(" ").array;
53 	  }
54 	}
55 
56 	expr_ = expr;
57 	tokens_ = tokens;
58 	content_ = content;
59   }
60 
61   string name()
62   {
63 	if (type_ == "function")
64 	{
65 	  //return expr_[0].value ~ expr_[1 .. $].map!(t => t.precedingWhitespace_ ~ t.value).joiner.to!string;
66 	  return name_ ~ "(" ~ arguments_.joiner(",").text ~ ") " ~ suffix_.joiner(" ").text;
67 	}
68 	else
69 	{
70 	  return name_;
71 	}
72   }
73 
74   void print(string indent="")
75   {
76 	if (type_ == "function")
77 	{
78 	  writeln(indent, "<", type_, "> `", returnType_, "` ", name_, "(", arguments_, ") ", suffix_);
79 	}
80 	else
81 	{
82 	  writeln(indent, "<", type_, "> ", name_);
83 	}
84 	foreach (c; content_)
85 	{
86 	  c.print(indent~"  ");
87 	}
88   }
89 
90   string type_;
91   string name_;
92   Token[] expr_;
93   Token[] tokens_;
94   Entity[] content_;
95   string returnType_;
96   string[] arguments_;
97   string[] suffix_;
98 }
99 
100 bool isNamespace(Entity e)
101 {
102   return e.type_ == "namespace";
103 }
104 
105 bool isClass(Entity e)
106 {
107   return e.type_ == "class";
108 }
109 
110 bool isFunction(Entity e)
111 {
112   return e.type_ == "function";
113 }
114 
115 bool isInline(Entity e)
116 {
117   // if the difference in length is just one, its tk!";", otherwise there is a
118   // pair of braces
119   return e.expr_.length+1 < e.tokens_.length;
120 }
121 
122 Entity[] readTokenStream(Token[] tokens, ref ulong start)
123 {
124   Entity[] entries;
125   for (ulong i = start; i < tokens.length; i++)
126   {
127 	auto token = tokens[i];
128 	if (token.type_ is tk!";")
129 	{
130 	  if (i - start > 1)
131 	  {
132 		if (value(tokens[start]) == "}")
133 		{
134 		  start += 1;
135 		}
136 		auto expr = tokens[start .. i];
137 		auto e = new Entity(expr, tokens[start .. i+1], []);
138 		if (e.type_ != "code")
139 		{
140 		  entries ~= e;
141 		}
142 	  }
143 	  start = i+1;
144 	}
145 	else if (token.type_ is tk!"{" && i)
146 	{
147 	  if (value(tokens[start]) == "}")
148 	  {
149 		start += 1;
150 	  }
151 
152 	  auto expr = tokens[start .. i];
153 	  i += 1;
154 	  auto content = readTokenStream(tokens, i);
155 	  auto e = new Entity(expr,
156 						  tokens[start .. i+1],
157 						  content);
158 	  if (e.type_ != "code")
159 	  {
160 		entries ~= e;
161 	  }
162 	  start = i;
163 	}
164 	else if (token.type_ is tk!"}")
165 	{
166 	  start = i;
167 	  return entries;
168 	}
169   }
170   return entries;
171 }
172 
173 string value(Token t)
174 {
175   if (t.type_ is tk!"identifier")
176 	return t.value_;
177   else
178 	return t.type_.sym;
179 }
180 
181 auto entityNames(Entity[] entries)
182 {
183   return entries.map!(i => i.name_);
184 }
185 
186 auto scanTokens(Token[] tokens)
187 {
188   // Start at the first namespace.
189   // FIXME: Right now we only scan the first namespace.
190   auto namespaceTokens = find!(t => t.value == "namespace")(tokens);
191   ulong start = 0;
192   return readTokenStream(namespaceTokens, start);
193 }
194 
195 string detab(string input)
196 {
197   string output;
198   size_t j;
199 
200   int column;
201   for (size_t i = 0; i < input.length; i++)
202   {
203 	char c = input[i];
204 
205 	switch (c)
206 	{
207 	  case '\t':
208 		while ((column & 1) != 1)
209 		{
210 		  output ~= ' ';
211 		  j++;
212 		  column++;
213 		}
214 		c = ' ';
215 		column++;
216 		break;
217 
218 	  case '\r':
219 	  case '\n':
220 		while (j && output[j - 1] == ' ')
221 		  j--;
222 		output = output[0 .. j];
223 		column = 0;
224 		break;
225 
226 	  default:
227 		column++;
228 		break;
229 	}
230 	output ~= c;
231 	j++;
232   }
233   while (j && output[j - 1] == ' ')
234 	j--;
235   return output[0 .. j];
236 }
237 
238 
239 auto readTokens(string path)
240 {
241   string content;
242   if (path == "-")
243   {
244 	foreach (ulong i, string line; lines(stdin))
245 	{
246 	  content ~= detab(line);
247 	}
248   }
249   else
250   {
251 	content = detab(std.file.readText(path));
252   }
253   Token[] tokens;
254   tokenize(content, path, tokens);
255   return tokens;
256 }
257 
258 void writeTokens(File f, Token[] tokens)
259 {
260   foreach (ref t; tokens[0 .. $-1]) {
261 	f.write(t.precedingWhitespace_, t.value);
262   }
263   f.write(tokens[$-1].precedingWhitespace_);
264 }
265 
266 // FIXME: Remove
267 struct SourceFile
268 {
269   this(string path)
270   {
271 	tokens_ = readTokens(path);
272 	content_ = scanTokens(tokens_);
273   }
274 
275   this(Token[] tokens)
276   {
277 	tokens_ = tokens;
278 	content_ = scanTokens(tokens_);
279   }
280 
281   Token[] tokens_;
282   Entity[] content_;
283 }