1 |
/* |
---|
2 |
* Copyright (c) 2007, OmniTI Computer Consulting, Inc. |
---|
3 |
* All rights reserved. |
---|
4 |
* |
---|
5 |
* Redistribution and use in source and binary forms, with or without |
---|
6 |
* modification, are permitted provided that the following conditions are |
---|
7 |
* met: |
---|
8 |
* |
---|
9 |
* * Redistributions of source code must retain the above copyright |
---|
10 |
* notice, this list of conditions and the following disclaimer. |
---|
11 |
* * Redistributions in binary form must reproduce the above |
---|
12 |
* copyright notice, this list of conditions and the following |
---|
13 |
* disclaimer in the documentation and/or other materials provided |
---|
14 |
* with the distribution. |
---|
15 |
* * Neither the name OmniTI Computer Consulting, Inc. nor the names |
---|
16 |
* of its contributors may be used to endorse or promote products |
---|
17 |
* derived from this software without specific prior written |
---|
18 |
* permission. |
---|
19 |
* |
---|
20 |
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
---|
21 |
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
---|
22 |
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR |
---|
23 |
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT |
---|
24 |
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
---|
25 |
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT |
---|
26 |
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
---|
27 |
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
---|
28 |
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
---|
29 |
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
---|
30 |
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
---|
31 |
*/ |
---|
32 |
|
---|
33 |
#include "noit_defines.h" |
---|
34 |
#include <stdlib.h> |
---|
35 |
#include <string.h> |
---|
36 |
|
---|
37 |
struct token { |
---|
38 |
char *token; |
---|
39 |
const char *start; |
---|
40 |
const char *end; |
---|
41 |
const char *next; |
---|
42 |
enum { NT_IDENT, NT_DQSTRING, NT_SPACE, NT_UNKNOWN, NT_EOF } type; |
---|
43 |
}; |
---|
44 |
#define SET_TOKEN(t,a) (t)->next = (a) |
---|
45 |
|
---|
46 |
static void c_unescape(char *p, char *only) { |
---|
47 |
char *bt = p; |
---|
48 |
#define ASSIGN(a) *(bt++) = (a) |
---|
49 |
while(p[0] != '\0') { |
---|
50 |
if(p[0] == '\\' && p[1] != '\0' && (!only || p[1] == *only)) { |
---|
51 |
switch(p[1]) { |
---|
52 |
case ' ': ASSIGN(' '); p+=2; break; |
---|
53 |
case '"': ASSIGN('"'); p+=2; break; |
---|
54 |
case 'n': ASSIGN('\n'); p+=2; break; |
---|
55 |
case 'r': ASSIGN('\r'); p+=2; break; |
---|
56 |
case 't': ASSIGN('\t'); p+=2; break; |
---|
57 |
case 'a': ASSIGN('\a'); p+=2; break; |
---|
58 |
case 'b': ASSIGN('\b'); p+=2; break; |
---|
59 |
case 'v': ASSIGN('\v'); p+=2; break; |
---|
60 |
case 'f': ASSIGN('\f'); p+=2; break; |
---|
61 |
case '0': ASSIGN('\0'); p+=2; break; |
---|
62 |
case '\\': ASSIGN('\\'); p+=2; break; |
---|
63 |
default: ASSIGN(*p); p++; ASSIGN(*p); p++; break; |
---|
64 |
} |
---|
65 |
} |
---|
66 |
else { |
---|
67 |
ASSIGN(*p); p++; |
---|
68 |
} |
---|
69 |
} |
---|
70 |
*bt = '\0'; |
---|
71 |
} |
---|
72 |
|
---|
73 |
#define BAIL_UNKNOWN do { t->type = NT_UNKNOWN; return -1; } while(0) |
---|
74 |
static int token_scan(struct token *t) |
---|
75 |
{ |
---|
76 |
t->start = t->end = t->next; |
---|
77 |
|
---|
78 |
mainpattern: |
---|
79 |
/*!re2c |
---|
80 |
re2c:define:YYCTYPE = "unsigned char"; |
---|
81 |
re2c:define:YYCURSOR = t->next; |
---|
82 |
re2c:yyfill:enable = 0; |
---|
83 |
re2c:yych:conversion = 1; |
---|
84 |
re2c:indent:top = 1; |
---|
85 |
|
---|
86 |
[ \t\r\n]+ { t->token = NULL; |
---|
87 |
t->end = t->next; |
---|
88 |
t->type = NT_SPACE; |
---|
89 |
return 1; } |
---|
90 |
["] { t->type = NT_DQSTRING; |
---|
91 |
if(t->start != t->end) { |
---|
92 |
t->start++; |
---|
93 |
t->end = t->next - 1; |
---|
94 |
t->token = malloc(t->end-t->start + 1); |
---|
95 |
strlcpy(t->token, t->start, t->end-t->start + 1); |
---|
96 |
c_unescape(t->token, NULL); |
---|
97 |
return 1; |
---|
98 |
} |
---|
99 |
else |
---|
100 |
goto dqstring; |
---|
101 |
} |
---|
102 |
"'" { t->type = NT_IDENT; |
---|
103 |
if(t->start != t->end) { |
---|
104 |
t->start++; |
---|
105 |
t->end = t->next - 1; |
---|
106 |
t->token = malloc(t->end-t->start + 1); |
---|
107 |
strlcpy(t->token, t->start, t->end-t->start + 1); |
---|
108 |
return 1; |
---|
109 |
} |
---|
110 |
else |
---|
111 |
goto sqstring; |
---|
112 |
} |
---|
113 |
[^\000'" \t\r\n] ([^\000 \t\r\n]|[\\][ ])* |
---|
114 |
{ char only = ' '; |
---|
115 |
t->end = t->next; |
---|
116 |
t->type = NT_IDENT; |
---|
117 |
t->token = malloc(t->end-t->start + 1); |
---|
118 |
strlcpy(t->token, t->start, t->end-t->start + 1); |
---|
119 |
c_unescape(t->token, &only); |
---|
120 |
return 1; |
---|
121 |
} |
---|
122 |
[\000] { t->token = NULL; |
---|
123 |
t->type = NT_EOF; |
---|
124 |
return 0; |
---|
125 |
} |
---|
126 |
[\000-\377] { BAIL_UNKNOWN; } |
---|
127 |
*/ |
---|
128 |
|
---|
129 |
sqstring: |
---|
130 |
/*!re2c |
---|
131 |
[^'\000]* { t->end = t->next; |
---|
132 |
goto mainpattern; } |
---|
133 |
[\000] { BAIL_UNKNOWN; } |
---|
134 |
*/ |
---|
135 |
|
---|
136 |
dqstring: |
---|
137 |
/*!re2c |
---|
138 |
[\\][nrtabvf0"\\] |
---|
139 |
{ goto dqstring; } |
---|
140 |
"\\" ( [^\000] \ [nrtabvf0"\\] ) |
---|
141 |
{ goto dqstring; } |
---|
142 |
["] { t->end = t->next--; |
---|
143 |
goto mainpattern; |
---|
144 |
} |
---|
145 |
[^"\000]\[\\"] { goto dqstring; } |
---|
146 |
[\000] { BAIL_UNKNOWN; } |
---|
147 |
*/ |
---|
148 |
} |
---|
149 |
|
---|
150 |
int noit_tokenize(const char *input, char **vector, int *cnt) { |
---|
151 |
struct token t; |
---|
152 |
int i = 0; |
---|
153 |
|
---|
154 |
SET_TOKEN(&t, input); |
---|
155 |
while(token_scan(&t) != -1) { |
---|
156 |
switch(t.type) { |
---|
157 |
case NT_IDENT: |
---|
158 |
case NT_DQSTRING: |
---|
159 |
if(i<*cnt) vector[i] = t.token; |
---|
160 |
i++; |
---|
161 |
break; |
---|
162 |
case NT_SPACE: |
---|
163 |
break; |
---|
164 |
case NT_EOF: |
---|
165 |
if(i<*cnt) *cnt = i; |
---|
166 |
return i; |
---|
167 |
case NT_UNKNOWN: |
---|
168 |
/* UNREACHED */ |
---|
169 |
goto failure; |
---|
170 |
} |
---|
171 |
} |
---|
172 |
failure: |
---|
173 |
if(i<*cnt) *cnt = i; |
---|
174 |
return input - t.next; |
---|
175 |
} |
---|