root/src/noit_tokenizer.c

Revision 88a71780101cbf23034aa0cb840f9f0368fda2dd, 7.8 kB (checked in by Theo Schlossnagle <jesus@omniti.com>, 6 years ago)

fixes #126

  • Property mode set to 100644
Line 
1 /* Generated by re2c 0.12.3 on Thu May 14 00:29:12 2009 */
2 #line 1 "noit_tokenizer.re"
3 /*
4  * Copyright (c) 2007, OmniTI Computer Consulting, Inc.
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions are
9  * met:
10  *
11  *     * Redistributions of source code must retain the above copyright
12  *       notice, this list of conditions and the following disclaimer.
13  *     * Redistributions in binary form must reproduce the above
14  *       copyright notice, this list of conditions and the following
15  *       disclaimer in the documentation and/or other materials provided
16  *       with the distribution.
17  *     * Neither the name OmniTI Computer Consulting, Inc. nor the names
18  *       of its contributors may be used to endorse or promote products
19  *       derived from this software without specific prior written
20  *       permission.
21  *
22  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
23  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
24  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
25  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
26  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
27  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
28  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
29  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
30  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
31  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
32  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33  */
34
35 #include "noit_defines.h"
36 #include <stdlib.h>
37 #include <string.h>
38
39 struct token {
40   char *token;
41   const char *start;
42   const char *end;
43   const char *next;
44   enum { NT_IDENT, NT_DQSTRING, NT_SPACE, NT_UNKNOWN, NT_EOF } type;
45 };
46 #define SET_TOKEN(t,a) (t)->next = (a)
47
48 static void c_unescape(char *p, char *only) {
49   char *bt = p;
50 #define ASSIGN(a) *(bt++) = (a)
51   while(p[0] != '\0') {
52     if(p[0] == '\\' && p[1] != '\0' && (!only || p[1] == *only)) {
53       switch(p[1]) {
54         case ' ': ASSIGN(' '); p+=2; break;
55         case '"': ASSIGN('"'); p+=2; break;
56         case 'n': ASSIGN('\n'); p+=2; break;
57         case 'r': ASSIGN('\r'); p+=2; break;
58         case 't': ASSIGN('\t'); p+=2; break;
59         case 'a': ASSIGN('\a'); p+=2; break;
60         case 'b': ASSIGN('\b'); p+=2; break;
61         case 'v': ASSIGN('\v'); p+=2; break;
62         case 'f': ASSIGN('\f'); p+=2; break;
63         case '0': ASSIGN('\0'); p+=2; break;
64         case '\\': ASSIGN('\\'); p+=2; break;
65         default: ASSIGN(*p); p++; ASSIGN(*p); p++; break;
66       }
67     }
68     else {
69       ASSIGN(*p); p++;
70     }
71   }
72   *bt = '\0';
73 }
74
75 #define BAIL_UNKNOWN do { t->type = NT_UNKNOWN; return -1; } while(0)
76 static int token_scan(struct token *t)
77 {
78   t->start = t->end = t->next;
79
80  mainpattern:
81
82 #line 83 "noit_tokenizer.c"
83         {
84                 unsigned char yych;
85
86                 yych = (unsigned char)*t->next;
87                 switch(yych) {
88                 case 0x00:      goto yy10;
89                 case 0x09:
90                 case 0x0A:
91                 case 0x0D:
92                 case ' ':       goto yy2;
93                 case '"':       goto yy4;
94                 case '\'':      goto yy6;
95                 default:        goto yy8;
96                 }
97 yy2:
98                 ++t->next;
99                 yych = (unsigned char)*t->next;
100                 goto yy17;
101 yy3:
102 #line 86 "noit_tokenizer.re"
103                 { t->token = NULL;
104                       t->end = t->next;
105                       t->type = NT_SPACE;
106                       return 1; }
107 #line 108 "noit_tokenizer.c"
108 yy4:
109                 ++t->next;
110 #line 90 "noit_tokenizer.re"
111                 { t->type = NT_DQSTRING;
112                       if(t->start != t->end) {
113                         t->start++;
114                         t->end = t->next - 1;
115                         t->token = malloc(t->end-t->start + 1);
116                         strlcpy(t->token, t->start, t->end-t->start + 1);
117                         c_unescape(t->token, NULL);
118                         return 1;
119                       }
120                       else
121                         goto dqstring;
122                     }
123 #line 124 "noit_tokenizer.c"
124 yy6:
125                 ++t->next;
126 #line 102 "noit_tokenizer.re"
127                 { t->type = NT_IDENT;
128                       if(t->start != t->end) {
129                         t->start++;
130                         t->end = t->next - 1;
131                         t->token = malloc(t->end-t->start + 1);
132                         strlcpy(t->token, t->start, t->end-t->start + 1);
133                         return 1;
134                       }
135                       else
136                         goto sqstring;
137                     }
138 #line 139 "noit_tokenizer.c"
139 yy8:
140                 ++t->next;
141                 yych = (unsigned char)*t->next;
142                 goto yy13;
143 yy9:
144 #line 114 "noit_tokenizer.re"
145                 { char only = ' ';
146                       t->end = t->next;
147                       t->type = NT_IDENT;
148                       t->token = malloc(t->end-t->start + 1);
149                       strlcpy(t->token, t->start, t->end-t->start + 1);
150                       c_unescape(t->token, &only);
151                       return 1;
152                     }
153 #line 154 "noit_tokenizer.c"
154 yy10:
155                 ++t->next;
156 #line 122 "noit_tokenizer.re"
157                 { t->token = NULL;
158                       t->type = NT_EOF;
159                       return 0;
160                     }
161 #line 162 "noit_tokenizer.c"
162 yy12:
163                 ++t->next;
164                 yych = (unsigned char)*t->next;
165 yy13:
166                 switch(yych) {
167                 case 0x00:
168                 case 0x09:
169                 case 0x0A:
170                 case 0x0D:
171                 case ' ':       goto yy9;
172                 case '\\':      goto yy14;
173                 default:        goto yy12;
174                 }
175 yy14:
176                 ++t->next;
177                 yych = (unsigned char)*t->next;
178                 switch(yych) {
179                 case 0x00:
180                 case 0x09:
181                 case 0x0A:
182                 case 0x0D:      goto yy9;
183                 case '\\':      goto yy14;
184                 default:        goto yy12;
185                 }
186 yy16:
187                 ++t->next;
188                 yych = (unsigned char)*t->next;
189 yy17:
190                 switch(yych) {
191                 case 0x09:
192                 case 0x0A:
193                 case 0x0D:
194                 case ' ':       goto yy16;
195                 default:        goto yy3;
196                 }
197         }
198 #line 127 "noit_tokenizer.re"
199
200
201  sqstring:
202
203 #line 204 "noit_tokenizer.c"
204         {
205                 unsigned char yych;
206                 yych = (unsigned char)*t->next;
207                 switch(yych) {
208                 case 0x00:      goto yy23;
209                 case '\'':      goto yy20;
210                 default:        goto yy21;
211                 }
212 yy20:
213 #line 131 "noit_tokenizer.re"
214                 { t->end = t->next;
215                       goto mainpattern; }
216 #line 217 "noit_tokenizer.c"
217 yy21:
218                 ++t->next;
219                 yych = (unsigned char)*t->next;
220                 switch(yych) {
221                 case 0x00:
222                 case '\'':      goto yy20;
223                 default:        goto yy21;
224                 }
225 yy23:
226                 ++t->next;
227 #line 133 "noit_tokenizer.re"
228                 { BAIL_UNKNOWN; }
229 #line 230 "noit_tokenizer.c"
230         }
231 #line 134 "noit_tokenizer.re"
232
233
234  dqstring:
235
236 #line 237 "noit_tokenizer.c"
237         {
238                 unsigned char yych;
239                 yych = (unsigned char)*t->next;
240                 switch(yych) {
241                 case 0x00:      goto yy33;
242                 case '"':       goto yy29;
243                 case '\\':      goto yy27;
244                 default:        goto yy31;
245                 }
246 yy27:
247                 yych = (unsigned char)*++t->next;
248                 switch(yych) {
249                 case 0x00:      goto yy28;
250                 case '"':
251                 case '0':
252                 case '\\':
253                 case 'a':
254                 case 'b':
255                 case 'f':
256                 case 'n':
257                 case 'r':
258                 case 't':
259                 case 'v':       goto yy37;
260                 default:        goto yy35;
261                 }
262 yy28:
263 yy29:
264                 ++t->next;
265 #line 142 "noit_tokenizer.re"
266                 { t->end = t->next--;
267                       goto mainpattern;
268                     }
269 #line 270 "noit_tokenizer.c"
270 yy31:
271                 ++t->next;
272 #line 145 "noit_tokenizer.re"
273                 { goto dqstring; }
274 #line 275 "noit_tokenizer.c"
275 yy33:
276                 ++t->next;
277 #line 146 "noit_tokenizer.re"
278                 { BAIL_UNKNOWN; }
279 #line 280 "noit_tokenizer.c"
280 yy35:
281                 ++t->next;
282 #line 141 "noit_tokenizer.re"
283                 { goto dqstring; }
284 #line 285 "noit_tokenizer.c"
285 yy37:
286                 ++t->next;
287 #line 139 "noit_tokenizer.re"
288                 { goto dqstring; }
289 #line 290 "noit_tokenizer.c"
290         }
291 #line 147 "noit_tokenizer.re"
292
293 }
294
295 int noit_tokenize(const char *input, char **vector, int *cnt) {
296   struct token t;
297   int i = 0;
298
299   SET_TOKEN(&t, input);
300   while(token_scan(&t) != -1) {
301     switch(t.type) {
302       case NT_IDENT:
303       case NT_DQSTRING:
304         if(i<*cnt) vector[i] = t.token;
305         i++;
306         break;
307       case NT_SPACE:
308         break;
309       case NT_EOF:
310         if(i<*cnt) *cnt = i;
311         return i;
312       case NT_UNKNOWN:
313         /* UNREACHED */
314         goto failure;
315     }
316   }
317  failure:
318   if(i<*cnt) *cnt = i;
319   return input - t.next;
320 }
Note: See TracBrowser for help on using the browser.