1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28 package org.treetank.service.xml.xpath.parser;
29
30
31
32
33
34
35
36
37
38
39
40 public final class XPathScanner {
41
42
43 private final String mQuery;
44
45
46 private int mPos;
47
48
49 private int mLastPos;
50
51
52 private enum State {
53
54 START,
55
56 NUMBER,
57
58 TEXT,
59
60 SPECIAL,
61
62 SPECIAL2,
63
64 COMMENT,
65
66 E_NUM,
67
68 UNKNOWN
69 }
70
71
72 private State mState;
73
74
75 private StringBuilder mOutput;
76
77
78
79
80
81 private boolean mFinnished;
82
83
84 private TokenType mType;
85
86
87 private char mInput;
88
89
90
91
92
93
94 private State mStartState;
95
96
97
98
99
100
101 private int mCommentCount;
102
103
104
105
106
107
108
109
110 public XPathScanner(final String mQuery) {
111
112 this.mQuery = mQuery + '#';
113 mPos = 0;
114 mLastPos = mPos;
115 mStartState = State.START;
116 mCommentCount = 0;
117 }
118
119
120
121
122
123
124
125
126
127
128 public IXPathToken nextToken() {
129
130
131 mState = mStartState;
132
133 mStartState = State.START;
134 mOutput = new StringBuilder();
135 mFinnished = false;
136 mType = TokenType.INVALID;
137 mLastPos = mPos;
138
139 do {
140 mInput = mQuery.charAt(mPos);
141
142 switch (mState) {
143 case START:
144 scanStart();
145 break;
146 case NUMBER:
147 scanNumber();
148 break;
149 case TEXT:
150 scanText();
151 break;
152 case SPECIAL2:
153 scanTwoDigitSpecial();
154 break;
155 case COMMENT:
156 scanComment();
157 break;
158 case E_NUM:
159 scanENum();
160 break;
161 default:
162 mPos++;
163 mFinnished = true;
164 }
165 } while (!mFinnished || mPos >= mQuery.length());
166
167 if (mCommentCount > 0) {
168 throw new IllegalStateException("Error in Query. Comment does not end.");
169 }
170
171 return new VariableXPathToken(mOutput.toString(), mType);
172 }
173
174
175
176
177 private void scanStart() {
178
179 if (isNumber(mInput)) {
180 mState = State.NUMBER;
181 mOutput.append(mInput);
182 mType = TokenType.VALUE;
183 } else if (isFirstLetter(mInput)) {
184 mState = State.TEXT;
185 mOutput.append(mInput);
186 mType = TokenType.TEXT;
187 } else if (isSpecial(mInput)) {
188 mState = State.SPECIAL;
189 mOutput.append(mInput);
190 mType = retrieveType(mInput);
191 mFinnished = true;
192 } else if (isTwoDigistSpecial(mInput)) {
193 mState = State.SPECIAL2;
194 mOutput.append(mInput);
195 mType = retrieveType(mInput);
196 } else if ((mInput == ' ') || (mInput == '\n')) {
197 mState = State.START;
198 mOutput.append(mInput);
199 mFinnished = true;
200 mType = TokenType.SPACE;
201 } else if (mInput == '#') {
202 mType = TokenType.END;
203 mFinnished = true;
204 mPos--;
205 } else {
206 mState = State.UNKNOWN;
207 mOutput.append(mInput);
208 mFinnished = true;
209 }
210 mPos++;
211 }
212
213
214
215
216
217
218
219
220 private TokenType retrieveType(final char paramInput) {
221
222 TokenType type;
223 switch (paramInput) {
224 case ',':
225 type = TokenType.COMMA;
226 break;
227 case '(':
228 type = TokenType.OPEN_BR;
229 break;
230 case ')':
231 type = TokenType.CLOSE_BR;
232 break;
233 case '[':
234 type = TokenType.OPEN_SQP;
235 break;
236 case ']':
237 type = TokenType.CLOSE_SQP;
238 break;
239 case '@':
240 type = TokenType.AT;
241 break;
242 case '=':
243 type = TokenType.EQ;
244 break;
245 case '<':
246 case '>':
247 type = TokenType.COMP;
248 break;
249 case '!':
250 type = TokenType.N_EQ;
251 break;
252 case '/':
253 type = TokenType.SLASH;
254 break;
255 case ':':
256 type = TokenType.COLON;
257 break;
258 case '.':
259 type = TokenType.POINT;
260 break;
261 case '+':
262 type = TokenType.PLUS;
263 break;
264 case '-':
265 type = TokenType.MINUS;
266 break;
267 case '\'':
268 type = TokenType.SINGLE_QUOTE;
269 break;
270 case '"':
271 type = TokenType.DBL_QUOTE;
272 break;
273 case '$':
274 type = TokenType.DOLLAR;
275 break;
276 case '?':
277 type = TokenType.INTERROGATION;
278 break;
279 case '*':
280 type = TokenType.STAR;
281 break;
282 case '|':
283 type = TokenType.OR;
284 break;
285 default:
286 type = TokenType.INVALID;
287 }
288 return type;
289
290 }
291
292
293
294
295
296
297
298
299 private boolean isFirstLetter(final char paramInput) {
300
301 return (paramInput >= 'a' && paramInput <= 'z') || (paramInput >= 'A' && paramInput <= 'Z')
302 || (paramInput == '_');
303 }
304
305
306
307
308
309
310
311
312 private boolean isNumber(final char paramInput) {
313
314 return paramInput >= '0' && paramInput <= '9';
315 }
316
317
318
319
320
321
322
323
324
325
326 private boolean isTwoDigistSpecial(final char paramInput) {
327
328 return (paramInput == '<') || (paramInput == '>') || (paramInput == '(') || (paramInput == '!')
329 || (paramInput == '/') || (paramInput == '.');
330 }
331
332
333
334
335
336
337
338
339 private boolean isSpecial(final char paramInput) {
340
341 return (paramInput == ')') || (paramInput == ';') || (paramInput == ',') || (paramInput == '@')
342 || (paramInput == '[') || (paramInput == ']') || (paramInput == '=') || (paramInput == '"')
343 || (paramInput == '\'') || (paramInput == '$') || (paramInput == ':') || (paramInput == '|')
344 || (paramInput == '+') || (paramInput == '-') || (paramInput == '?') || (paramInput == '*');
345 }
346
347
348
349
350 private void scanNumber() {
351
352 if (mInput >= '0' && mInput <= '9') {
353 mOutput.append(mInput);
354 mPos++;
355 } else {
356
357 if (mInput == 'E' || mInput == 'e') {
358 mStartState = State.E_NUM;
359 }
360 mFinnished = true;
361 }
362 }
363
364
365
366
367
368 private void scanText() {
369
370 if (isLetter(mInput)) {
371 mOutput.append(mInput);
372 mPos++;
373
374 } else {
375 mType = TokenType.TEXT;
376 mFinnished = true;
377 }
378 }
379
380
381
382
383
384 private void scanTwoDigitSpecial() {
385
386 if (mInput == '=' && (mType == TokenType.COMP || mType == TokenType.EQ || mType == TokenType.N_EQ)) {
387 mOutput.append(mInput);
388 mPos++;
389 } else if (mInput == '/' && (mType == TokenType.SLASH)) {
390 mOutput.append(mInput);
391 mType = TokenType.DESC_STEP;
392 mPos++;
393 } else if (mInput == '.' && (mType == TokenType.POINT)) {
394 mOutput.append(mInput);
395 mType = TokenType.PARENT;
396 mPos++;
397 } else if (mInput == '<' && mOutput.toString().equals("<")) {
398 mOutput.append(mInput);
399 mType = TokenType.L_SHIFT;
400 mPos++;
401 } else if (mInput == '>' && mOutput.toString().equals(">")) {
402 mOutput.append(mInput);
403 mType = TokenType.R_SHIFT;
404 mPos++;
405 } else if (mInput == ':' && mType == TokenType.OPEN_BR) {
406
407 mOutput = new StringBuilder();
408 mType = TokenType.COMMENT;
409 mCommentCount++;
410 mState = State.COMMENT;
411 mPos++;
412 } else {
413 mFinnished = true;
414 }
415 }
416
417
418
419
420 private void scanENum() {
421
422 if (mInput == 'E' || mInput == 'e') {
423 mOutput.append(mInput);
424 mState = State.START;
425 mType = TokenType.E_NUMBER;
426 mFinnished = true;
427 mPos++;
428 } else {
429 mFinnished = true;
430 mState = State.START;
431 mType = TokenType.INVALID;
432 }
433 }
434
435
436
437
438 private void scanComment() {
439 final char input = mQuery.charAt(mPos + 1);
440 if (mInput == ':') {
441
442 if (input == ')') {
443 mCommentCount--;
444 if (mCommentCount == 0) {
445 mState = State.START;
446
447
448 mPos++;
449 }
450
451 }
452 } else if (mInput == '(') {
453
454 if (input == ':') {
455 mCommentCount++;
456
457 }
458 }
459 mPos++;
460 }
461
462
463
464
465
466
467
468
469 private boolean isLetter(final char paramInput) {
470
471 return (paramInput >= '0' && paramInput <= '9') || (paramInput >= 'a' && paramInput <= 'z')
472 || (paramInput >= 'A' && paramInput <= 'Z') || (paramInput == '_') || (paramInput == '-')
473 || (paramInput == '.');
474
475 }
476
477
478
479
480
481
482
483
484
485 public IXPathToken lookUpTokens(final int paramNext) {
486
487 int nextCount = paramNext;
488
489
490 final int lastPos = mPos;
491 IXPathToken token = nextToken();
492
493 while (--nextCount > 0) {
494 token = nextToken();
495 if (token.getType() == TokenType.SPACE) {
496 nextCount++;
497 }
498 }
499
500
501 mPos = lastPos;
502 return token;
503 }
504
505
506
507
508
509
510
511
512 public String begin() {
513
514 return mQuery.substring(0, mLastPos);
515 }
516
517
518
519
520
521
522 public int getPos() {
523
524 return mPos;
525 }
526
527
528
529
530 public String toString() {
531 return mQuery;
532 }
533 }