1 |
/* Generated by re2c 0.12.3 on Thu May 14 00:29:12 2009 */ |
---|
2 |
#line 1 "noit_tokenizer.re" |
---|
3 |
/* |
---|
4 |
* Copyright (c) 2007, OmniTI Computer Consulting, Inc. |
---|
5 |
* All rights reserved. |
---|
6 |
* |
---|
7 |
* Redistribution and use in source and binary forms, with or without |
---|
8 |
* modification, are permitted provided that the following conditions are |
---|
9 |
* met: |
---|
10 |
* |
---|
11 |
* * Redistributions of source code must retain the above copyright |
---|
12 |
* notice, this list of conditions and the following disclaimer. |
---|
13 |
* * Redistributions in binary form must reproduce the above |
---|
14 |
* copyright notice, this list of conditions and the following |
---|
15 |
* disclaimer in the documentation and/or other materials provided |
---|
16 |
* with the distribution. |
---|
17 |
* * Neither the name OmniTI Computer Consulting, Inc. nor the names |
---|
18 |
* of its contributors may be used to endorse or promote products |
---|
19 |
* derived from this software without specific prior written |
---|
20 |
* permission. |
---|
21 |
* |
---|
22 |
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
---|
23 |
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
---|
24 |
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR |
---|
25 |
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT |
---|
26 |
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
---|
27 |
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT |
---|
28 |
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
---|
29 |
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
---|
30 |
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
---|
31 |
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
---|
32 |
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
---|
33 |
*/ |
---|
34 |
|
---|
35 |
#include "noit_defines.h" |
---|
36 |
#include <stdlib.h> |
---|
37 |
#include <string.h> |
---|
38 |
|
---|
39 |
struct token { |
---|
40 |
char *token; |
---|
41 |
const char *start; |
---|
42 |
const char *end; |
---|
43 |
const char *next; |
---|
44 |
enum { NT_IDENT, NT_DQSTRING, NT_SPACE, NT_UNKNOWN, NT_EOF } type; |
---|
45 |
}; |
---|
46 |
#define SET_TOKEN(t,a) (t)->next = (a) |
---|
47 |
|
---|
48 |
static void c_unescape(char *p, char *only) { |
---|
49 |
char *bt = p; |
---|
50 |
#define ASSIGN(a) *(bt++) = (a) |
---|
51 |
while(p[0] != '\0') { |
---|
52 |
if(p[0] == '\\' && p[1] != '\0' && (!only || p[1] == *only)) { |
---|
53 |
switch(p[1]) { |
---|
54 |
case ' ': ASSIGN(' '); p+=2; break; |
---|
55 |
case '"': ASSIGN('"'); p+=2; break; |
---|
56 |
case 'n': ASSIGN('\n'); p+=2; break; |
---|
57 |
case 'r': ASSIGN('\r'); p+=2; break; |
---|
58 |
case 't': ASSIGN('\t'); p+=2; break; |
---|
59 |
case 'a': ASSIGN('\a'); p+=2; break; |
---|
60 |
case 'b': ASSIGN('\b'); p+=2; break; |
---|
61 |
case 'v': ASSIGN('\v'); p+=2; break; |
---|
62 |
case 'f': ASSIGN('\f'); p+=2; break; |
---|
63 |
case '0': ASSIGN('\0'); p+=2; break; |
---|
64 |
case '\\': ASSIGN('\\'); p+=2; break; |
---|
65 |
default: ASSIGN(*p); p++; ASSIGN(*p); p++; break; |
---|
66 |
} |
---|
67 |
} |
---|
68 |
else { |
---|
69 |
ASSIGN(*p); p++; |
---|
70 |
} |
---|
71 |
} |
---|
72 |
*bt = '\0'; |
---|
73 |
} |
---|
74 |
|
---|
75 |
#define BAIL_UNKNOWN do { t->type = NT_UNKNOWN; return -1; } while(0) |
---|
76 |
static int token_scan(struct token *t) |
---|
77 |
{ |
---|
78 |
t->start = t->end = t->next; |
---|
79 |
|
---|
80 |
mainpattern: |
---|
81 |
|
---|
82 |
#line 83 "noit_tokenizer.c" |
---|
83 |
{ |
---|
84 |
unsigned char yych; |
---|
85 |
|
---|
86 |
yych = (unsigned char)*t->next; |
---|
87 |
switch(yych) { |
---|
88 |
case 0x00: goto yy10; |
---|
89 |
case 0x09: |
---|
90 |
case 0x0A: |
---|
91 |
case 0x0D: |
---|
92 |
case ' ': goto yy2; |
---|
93 |
case '"': goto yy4; |
---|
94 |
case '\'': goto yy6; |
---|
95 |
default: goto yy8; |
---|
96 |
} |
---|
97 |
yy2: |
---|
98 |
++t->next; |
---|
99 |
yych = (unsigned char)*t->next; |
---|
100 |
goto yy17; |
---|
101 |
yy3: |
---|
102 |
#line 86 "noit_tokenizer.re" |
---|
103 |
{ t->token = NULL; |
---|
104 |
t->end = t->next; |
---|
105 |
t->type = NT_SPACE; |
---|
106 |
return 1; } |
---|
107 |
#line 108 "noit_tokenizer.c" |
---|
108 |
yy4: |
---|
109 |
++t->next; |
---|
110 |
#line 90 "noit_tokenizer.re" |
---|
111 |
{ t->type = NT_DQSTRING; |
---|
112 |
if(t->start != t->end) { |
---|
113 |
t->start++; |
---|
114 |
t->end = t->next - 1; |
---|
115 |
t->token = malloc(t->end-t->start + 1); |
---|
116 |
strlcpy(t->token, t->start, t->end-t->start + 1); |
---|
117 |
c_unescape(t->token, NULL); |
---|
118 |
return 1; |
---|
119 |
} |
---|
120 |
else |
---|
121 |
goto dqstring; |
---|
122 |
} |
---|
123 |
#line 124 "noit_tokenizer.c" |
---|
124 |
yy6: |
---|
125 |
++t->next; |
---|
126 |
#line 102 "noit_tokenizer.re" |
---|
127 |
{ t->type = NT_IDENT; |
---|
128 |
if(t->start != t->end) { |
---|
129 |
t->start++; |
---|
130 |
t->end = t->next - 1; |
---|
131 |
t->token = malloc(t->end-t->start + 1); |
---|
132 |
strlcpy(t->token, t->start, t->end-t->start + 1); |
---|
133 |
return 1; |
---|
134 |
} |
---|
135 |
else |
---|
136 |
goto sqstring; |
---|
137 |
} |
---|
138 |
#line 139 "noit_tokenizer.c" |
---|
139 |
yy8: |
---|
140 |
++t->next; |
---|
141 |
yych = (unsigned char)*t->next; |
---|
142 |
goto yy13; |
---|
143 |
yy9: |
---|
144 |
#line 114 "noit_tokenizer.re" |
---|
145 |
{ char only = ' '; |
---|
146 |
t->end = t->next; |
---|
147 |
t->type = NT_IDENT; |
---|
148 |
t->token = malloc(t->end-t->start + 1); |
---|
149 |
strlcpy(t->token, t->start, t->end-t->start + 1); |
---|
150 |
c_unescape(t->token, &only); |
---|
151 |
return 1; |
---|
152 |
} |
---|
153 |
#line 154 "noit_tokenizer.c" |
---|
154 |
yy10: |
---|
155 |
++t->next; |
---|
156 |
#line 122 "noit_tokenizer.re" |
---|
157 |
{ t->token = NULL; |
---|
158 |
t->type = NT_EOF; |
---|
159 |
return 0; |
---|
160 |
} |
---|
161 |
#line 162 "noit_tokenizer.c" |
---|
162 |
yy12: |
---|
163 |
++t->next; |
---|
164 |
yych = (unsigned char)*t->next; |
---|
165 |
yy13: |
---|
166 |
switch(yych) { |
---|
167 |
case 0x00: |
---|
168 |
case 0x09: |
---|
169 |
case 0x0A: |
---|
170 |
case 0x0D: |
---|
171 |
case ' ': goto yy9; |
---|
172 |
case '\\': goto yy14; |
---|
173 |
default: goto yy12; |
---|
174 |
} |
---|
175 |
yy14: |
---|
176 |
++t->next; |
---|
177 |
yych = (unsigned char)*t->next; |
---|
178 |
switch(yych) { |
---|
179 |
case 0x00: |
---|
180 |
case 0x09: |
---|
181 |
case 0x0A: |
---|
182 |
case 0x0D: goto yy9; |
---|
183 |
case '\\': goto yy14; |
---|
184 |
default: goto yy12; |
---|
185 |
} |
---|
186 |
yy16: |
---|
187 |
++t->next; |
---|
188 |
yych = (unsigned char)*t->next; |
---|
189 |
yy17: |
---|
190 |
switch(yych) { |
---|
191 |
case 0x09: |
---|
192 |
case 0x0A: |
---|
193 |
case 0x0D: |
---|
194 |
case ' ': goto yy16; |
---|
195 |
default: goto yy3; |
---|
196 |
} |
---|
197 |
} |
---|
198 |
#line 127 "noit_tokenizer.re" |
---|
199 |
|
---|
200 |
|
---|
201 |
sqstring: |
---|
202 |
|
---|
203 |
#line 204 "noit_tokenizer.c" |
---|
204 |
{ |
---|
205 |
unsigned char yych; |
---|
206 |
yych = (unsigned char)*t->next; |
---|
207 |
switch(yych) { |
---|
208 |
case 0x00: goto yy23; |
---|
209 |
case '\'': goto yy20; |
---|
210 |
default: goto yy21; |
---|
211 |
} |
---|
212 |
yy20: |
---|
213 |
#line 131 "noit_tokenizer.re" |
---|
214 |
{ t->end = t->next; |
---|
215 |
goto mainpattern; } |
---|
216 |
#line 217 "noit_tokenizer.c" |
---|
217 |
yy21: |
---|
218 |
++t->next; |
---|
219 |
yych = (unsigned char)*t->next; |
---|
220 |
switch(yych) { |
---|
221 |
case 0x00: |
---|
222 |
case '\'': goto yy20; |
---|
223 |
default: goto yy21; |
---|
224 |
} |
---|
225 |
yy23: |
---|
226 |
++t->next; |
---|
227 |
#line 133 "noit_tokenizer.re" |
---|
228 |
{ BAIL_UNKNOWN; } |
---|
229 |
#line 230 "noit_tokenizer.c" |
---|
230 |
} |
---|
231 |
#line 134 "noit_tokenizer.re" |
---|
232 |
|
---|
233 |
|
---|
234 |
dqstring: |
---|
235 |
|
---|
236 |
#line 237 "noit_tokenizer.c" |
---|
237 |
{ |
---|
238 |
unsigned char yych; |
---|
239 |
yych = (unsigned char)*t->next; |
---|
240 |
switch(yych) { |
---|
241 |
case 0x00: goto yy33; |
---|
242 |
case '"': goto yy29; |
---|
243 |
case '\\': goto yy27; |
---|
244 |
default: goto yy31; |
---|
245 |
} |
---|
246 |
yy27: |
---|
247 |
yych = (unsigned char)*++t->next; |
---|
248 |
switch(yych) { |
---|
249 |
case 0x00: goto yy28; |
---|
250 |
case '"': |
---|
251 |
case '0': |
---|
252 |
case '\\': |
---|
253 |
case 'a': |
---|
254 |
case 'b': |
---|
255 |
case 'f': |
---|
256 |
case 'n': |
---|
257 |
case 'r': |
---|
258 |
case 't': |
---|
259 |
case 'v': goto yy37; |
---|
260 |
default: goto yy35; |
---|
261 |
} |
---|
262 |
yy28: |
---|
263 |
yy29: |
---|
264 |
++t->next; |
---|
265 |
#line 142 "noit_tokenizer.re" |
---|
266 |
{ t->end = t->next--; |
---|
267 |
goto mainpattern; |
---|
268 |
} |
---|
269 |
#line 270 "noit_tokenizer.c" |
---|
270 |
yy31: |
---|
271 |
++t->next; |
---|
272 |
#line 145 "noit_tokenizer.re" |
---|
273 |
{ goto dqstring; } |
---|
274 |
#line 275 "noit_tokenizer.c" |
---|
275 |
yy33: |
---|
276 |
++t->next; |
---|
277 |
#line 146 "noit_tokenizer.re" |
---|
278 |
{ BAIL_UNKNOWN; } |
---|
279 |
#line 280 "noit_tokenizer.c" |
---|
280 |
yy35: |
---|
281 |
++t->next; |
---|
282 |
#line 141 "noit_tokenizer.re" |
---|
283 |
{ goto dqstring; } |
---|
284 |
#line 285 "noit_tokenizer.c" |
---|
285 |
yy37: |
---|
286 |
++t->next; |
---|
287 |
#line 139 "noit_tokenizer.re" |
---|
288 |
{ goto dqstring; } |
---|
289 |
#line 290 "noit_tokenizer.c" |
---|
290 |
} |
---|
291 |
#line 147 "noit_tokenizer.re" |
---|
292 |
|
---|
293 |
} |
---|
294 |
|
---|
295 |
int noit_tokenize(const char *input, char **vector, int *cnt) { |
---|
296 |
struct token t; |
---|
297 |
int i = 0; |
---|
298 |
|
---|
299 |
SET_TOKEN(&t, input); |
---|
300 |
while(token_scan(&t) != -1) { |
---|
301 |
switch(t.type) { |
---|
302 |
case NT_IDENT: |
---|
303 |
case NT_DQSTRING: |
---|
304 |
if(i<*cnt) vector[i] = t.token; |
---|
305 |
i++; |
---|
306 |
break; |
---|
307 |
case NT_SPACE: |
---|
308 |
break; |
---|
309 |
case NT_EOF: |
---|
310 |
if(i<*cnt) *cnt = i; |
---|
311 |
return i; |
---|
312 |
case NT_UNKNOWN: |
---|
313 |
/* UNREACHED */ |
---|
314 |
goto failure; |
---|
315 |
} |
---|
316 |
} |
---|
317 |
failure: |
---|
318 |
if(i<*cnt) *cnt = i; |
---|
319 |
return input - t.next; |
---|
320 |
} |
---|