| 1 | | | |
| 2 | | | |
| 3 | | | |
| 4 | | | |
| 5 | | | |
| 6 | | | |
| 7 | | | |
| 8 | | | |
| 9 | | | |
| 10 | | | |
| 11 | | | |
| 12 | | | |
| 13 | | | |
| 14 | | | |
| 15 | | | |
| 16 | | | |
| 17 | | | |
| 18 | | | |
| 19 | | | |
| 20 | | | |
| 21 | | | |
| 22 | | | |
| 23 | | | |
| 24 | | | |
| 25 | | | |
| 26 | | | |
| 27 | | | |
| 28 | | | |
| 29 | | | |
| 30 | | | |
| 31 | | | |
| 32 | | | |
| 33 | | | package org.jcoderz.phoenix.sqlparser; |
| 34 | | | |
| 35 | | | import java.io.BufferedInputStream; |
| 36 | | | import java.io.FileInputStream; |
| 37 | | | import java.io.IOException; |
| 38 | | | import java.io.InputStream; |
| 39 | | | import java.math.BigDecimal; |
| 40 | | | import java.util.ArrayList; |
| 41 | | | import java.util.Iterator; |
| 42 | | | import java.util.List; |
| 43 | | | |
| 44 | | | import org.jcoderz.commons.util.Constants; |
| 45 | | | |
| 46 | | | |
| 47 | | | |
| 48 | | | |
| 49 | | | @author |
| 50 | | | |
| 51 | | | public final class SqlScanner |
| 52 | | | implements ScannerInterface |
| 53 | | | { |
| 54 | | | private final BufferedInputStream mInputStream; |
| 55 | 100 | | private int mColumn = 0; |
| 56 | 100 | | private int mLine = 1; |
| 57 | 100 | | private boolean mReportWhitespace = true; |
| 58 | 100 | | private int mSaveColumn = 0; |
| 59 | | | |
| 60 | | | |
| 61 | | | |
| 62 | | | @param |
| 63 | | | |
| 64 | | | public SqlScanner (InputStream input) |
| 65 | 100 | | { |
| 66 | 100 | | mInputStream = new BufferedInputStream(input); |
| 67 | 100 | | } |
| 68 | | | |
| 69 | | | |
| 70 | | | |
| 71 | | | @return |
| 72 | | | |
| 73 | | | public boolean isSetReportWhitespace () |
| 74 | | | { |
| 75 | 0 | | return mReportWhitespace; |
| 76 | | | } |
| 77 | | | |
| 78 | | | |
| 79 | | | <code></code> |
| 80 | | | @param |
| 81 | | | |
| 82 | | | public void setReportWhitespace (boolean reportWhitespace) |
| 83 | | | { |
| 84 | 100 | | mReportWhitespace = reportWhitespace; |
| 85 | 100 | | } |
| 86 | | | |
| 87 | | | |
| 88 | | | |
| 89 | | | @return |
| 90 | | | |
| 91 | | | public int getLine () |
| 92 | | | { |
| 93 | 0 | | return mLine; |
| 94 | | | } |
| 95 | | | |
| 96 | | | |
| 97 | | | |
| 98 | | | @return |
| 99 | | | |
| 100 | | | public int getColumn () |
| 101 | | | { |
| 102 | 0 | | return mColumn; |
| 103 | | | } |
| 104 | | | |
| 105 | | | |
| 106 | | | |
| 107 | | | @return |
| 108 | | | @throws |
| 109 | | | @see |
| 110 | | | |
| 111 | | | public Token nextToken () |
| 112 | | | throws ParseException |
| 113 | | | { |
| 114 | 100 | | return getNextToken(); |
| 115 | | | } |
| 116 | | | |
| 117 | | | {@inheritDoc} |
| 118 | | (1)(2)(3)(4) | private Token getNextToken () |
| 119 | | | throws ParseException |
| 120 | | | { |
| 121 | | | for (;;) |
| 122 | | | { |
| 123 | 100 | | mark(); |
| 124 | 100 | | final int c = read(); |
| 125 | | | |
| 126 | 100 | | if (c == -1) |
| 127 | | | { |
| 128 | 100 | | return new Token(TokenType.EOF); |
| 129 | | | } |
| 130 | 100 | | else if (isNewlineChar((char) c)) |
| 131 | | | { |
| 132 | 100 | | final Token t = eatNewline(c); |
| 133 | 100 | | if (mReportWhitespace) |
| 134 | | | { |
| 135 | 100 | | return t; |
| 136 | | | } |
| 137 | | | continue; |
| 138 | | | } |
| 139 | 100 | | else if (Character.isWhitespace((char) c)) |
| 140 | | | { |
| 141 | 100 | | final Token t = eatWhitespaces(c); |
| 142 | 100 | | if (mReportWhitespace) |
| 143 | | | { |
| 144 | 100 | | return t; |
| 145 | | | } |
| 146 | | | continue; |
| 147 | | | } |
| 148 | 100 | | else if (c == '(') |
| 149 | | | { |
| 150 | 100 | | return new Token(TokenType.OPEN_PAREN, asString(c)); |
| 151 | | | } |
| 152 | 100 | | else if (c == ')') |
| 153 | | | { |
| 154 | 100 | | return new Token(TokenType.CLOSE_PAREN, asString(c)); |
| 155 | | | } |
| 156 | 100 | | else if (c == ';') |
| 157 | | | { |
| 158 | 100 | | return new Token(TokenType.SEMICOLON, asString(c)); |
| 159 | | | } |
| 160 | 100 | | else if (c == ',') |
| 161 | | | { |
| 162 | 100 | | return new Token(TokenType.COMMA, asString(c)); |
| 163 | | | } |
| 164 | 100 | | else if (c == '/') |
| 165 | | | { |
| 166 | 0 | | mark(); |
| 167 | 0 | | if (read() == '*') |
| 168 | | | { |
| 169 | 0 | | final String comment = eatBlockComment(); |
| 170 | 0 | | return new Token(TokenType.COMMENT, comment); |
| 171 | | | } |
| 172 | 0 | | reset(); |
| 173 | 0 | | return new Token(TokenType.SLASH, asString(c)); |
| 174 | | | } |
| 175 | 100 | | else if (c == '-') |
| 176 | | | { |
| 177 | 100 | | mark(); |
| 178 | 100 | | final int d = read(); |
| 179 | | | |
| 180 | | | final Token t; |
| 181 | 100 | | if (d == '-') |
| 182 | | | { |
| 183 | 100 | | final StringBuffer sb = new StringBuffer(); |
| 184 | 100 | | sb.append("--"); |
| 185 | | | for (;;) |
| 186 | | | { |
| 187 | 100 | | mark(); |
| 188 | 100 | | final int e = read(); |
| 189 | 100 | | if (e == '\n' || e == -1) |
| 190 | | | { |
| 191 | 100 | | reset(); |
| 192 | 100 | | break; |
| 193 | | | } |
| 194 | 100 | | sb.append((char) e); |
| 195 | 100 | | } |
| 196 | 100 | | t = new Token(TokenType.COMMENT, sb.toString()); |
| 197 | 100 | | } |
| 198 | 0 | | else if (Character.isDigit((char) d)) |
| 199 | | | { |
| 200 | 0 | | final StringBuffer sb = new StringBuffer(); |
| 201 | 0 | | sb.append('-'); |
| 202 | 0 | | sb.append((char) d); |
| 203 | | | for (;;) |
| 204 | | | { |
| 205 | 0 | | mark(); |
| 206 | 0 | | final int e = read(); |
| 207 | 0 | | if (! Character.isDigit((char) e)) |
| 208 | | | { |
| 209 | 0 | | reset(); |
| 210 | 0 | | break; |
| 211 | | | } |
| 212 | 0 | | sb.append((char) e); |
| 213 | 0 | | } |
| 214 | | | |
| 215 | 0 | | final String negativeNumeric = sb.toString(); |
| 216 | | | try |
| 217 | | | { |
| 218 | 0 | | Integer.parseInt(negativeNumeric); |
| 219 | 0 | | t = new Token(TokenType.NUMERIC_LITERAL, negativeNumeric); |
| 220 | | | } |
| 221 | 0 | | catch (NumberFormatException shouldNotOccur) |
| 222 | | | { |
| 223 | 0 | | throw new ParseException("Cannot parse negative numberic '" |
| 224 | | | + negativeNumeric |
| 225 | | | + "'", shouldNotOccur, mLine, mColumn); |
| 226 | 0 | | } |
| 227 | 0 | | } |
| 228 | | | |
| 229 | 0 | | else if (d == '(' || Character.isLetter((char) d) |
| 230 | | | || Character.isWhitespace((char) d)) |
| 231 | | | { |
| 232 | 0 | | reset(); |
| 233 | 0 | | return new Token(TokenType.OPERATOR, asString(c)); |
| 234 | | | } |
| 235 | | | else |
| 236 | | | { |
| 237 | 0 | | throw new ParseException("Unexpected char '" + (char) d |
| 238 | | | + "', expected '-' or digit.", mLine, mColumn); |
| 239 | | | } |
| 240 | 100 | | return t; |
| 241 | | | } |
| 242 | 100 | | else if (c == '"' || c == '\'') |
| 243 | | | { |
| 244 | 100 | | final String literal = readStringLiteral(c); |
| 245 | 100 | | return new Token(TokenType.STRING_LITERAL, literal); |
| 246 | | | } |
| 247 | | | else |
| 248 | | | { |
| 249 | 100 | | final String word = readWord(c); |
| 250 | | | |
| 251 | | | try |
| 252 | | | { |
| 253 | | (5) | |
| 254 | 100 | | if (!TokenType.OPERATOR.toString().equalsIgnoreCase(word)) |
| 255 | | | { |
| 256 | 100 | | final TokenType tokenType |
| 257 | | | = TokenType.fromString( |
| 258 | | | word.toLowerCase(Constants.SYSTEM_LOCALE)); |
| 259 | 100 | | return new Token(tokenType, word); |
| 260 | | | } |
| 261 | | | } |
| 262 | 100 | | catch (IllegalArgumentException ignore) |
| 263 | | | { |
| 264 | | | |
| 265 | 0 | | } |
| 266 | | | |
| 267 | | | |
| 268 | | | try |
| 269 | | | { |
| 270 | 100 | (6) | new BigDecimal(word); |
| 271 | 100 | | return new Token(TokenType.NUMERIC_LITERAL, word); |
| 272 | | | } |
| 273 | 100 | | catch (NumberFormatException ignore) |
| 274 | | | { |
| 275 | | | |
| 276 | | | } |
| 277 | | | |
| 278 | | | |
| 279 | 100 | | return new Token(TokenType.IDENTIFIER, word); |
| 280 | | | } |
| 281 | | | } |
| 282 | | | } |
| 283 | | | |
| 284 | | | private String eatBlockComment () |
| 285 | | | throws ParseException |
| 286 | | | { |
| 287 | | | |
| 288 | 0 | | final StringBuffer sb = new StringBuffer(); |
| 289 | 0 | | sb.append("/*"); |
| 290 | | | for (;;) |
| 291 | | | { |
| 292 | 0 | | mark(); |
| 293 | 0 | | final int d = read(); |
| 294 | 0 | | if (d == '*') |
| 295 | | | { |
| 296 | 0 | | mark(); |
| 297 | 0 | | if (read() != '/') |
| 298 | | | { |
| 299 | 0 | | reset(); |
| 300 | 0 | | sb.append((char) d); |
| 301 | 0 | | continue; |
| 302 | | | } |
| 303 | 0 | | sb.append("*/"); |
| 304 | 0 | | break; |
| 305 | | | } |
| 306 | 0 | | else if (isNewlineChar((char) d)) |
| 307 | | | { |
| 308 | 0 | | ++mLine; mColumn = 0; |
| 309 | | | } |
| 310 | 0 | | sb.append((char) d); |
| 311 | 0 | | } |
| 312 | 0 | | return sb.toString(); |
| 313 | | | } |
| 314 | | | |
| 315 | | | private String readWord (int c) |
| 316 | | | throws ParseException |
| 317 | | | { |
| 318 | 100 | | final StringBuffer sb = new StringBuffer(); |
| 319 | 100 | | sb.append((char) c); |
| 320 | | | for (;;) |
| 321 | | | { |
| 322 | 100 | | mark(); |
| 323 | 100 | | final int d = read(); |
| 324 | | | |
| 325 | 100 | | if (isSpecialCharacter((char) d)) |
| 326 | | | { |
| 327 | 100 | | reset(); |
| 328 | 100 | | break; |
| 329 | | | } |
| 330 | 100 | | sb.append((char) d); |
| 331 | 100 | | } |
| 332 | 100 | | return sb.toString(); |
| 333 | | | } |
| 334 | | | |
| 335 | | | private String readStringLiteral (int c) |
| 336 | | | throws ParseException |
| 337 | | | { |
| 338 | 100 | | final StringBuffer sb = new StringBuffer(); |
| 339 | 100 | | sb.append((char) c); |
| 340 | | | for (;;) |
| 341 | | | { |
| 342 | 100 | | final int d = read(); |
| 343 | 100 | | sb.append((char) d); |
| 344 | | | |
| 345 | 100 | | if (d == '"' || d == '\'') |
| 346 | | | { |
| 347 | 100 | | break; |
| 348 | | | } |
| 349 | 100 | | } |
| 350 | 100 | | return sb.toString(); |
| 351 | | | } |
| 352 | | | |
| 353 | | | private static boolean isSpecialCharacter (char c) |
| 354 | | | { |
| 355 | 100 | | return (Character.isWhitespace(c) || c == '(' || c == ')' |
| 356 | | | || c == ';' || c == ',' || c == '-'); |
| 357 | | | } |
| 358 | | | |
| 359 | | | private Token eatNewline (int c) |
| 360 | | | throws ParseException |
| 361 | | | { |
| 362 | | | final Token t; |
| 363 | 100 | | if (c == Constants.LINE_FEED_CHAR) |
| 364 | | | { |
| 365 | 100 | | ++mLine; mColumn = 0; |
| 366 | 100 | | t = new Token(TokenType.NEWLINE, asString(Constants.LINE_FEED_CHAR)); |
| 367 | | | } |
| 368 | 0 | | else if (c == Constants.CARRIAGE_RETURN_CHAR) |
| 369 | | | { |
| 370 | 0 | | mark(); |
| 371 | 0 | | if (read() != Constants.LINE_FEED_CHAR) |
| 372 | | | { |
| 373 | 0 | | reset(); |
| 374 | | | } |
| 375 | 0 | | ++mLine; mColumn = 0; |
| 376 | 0 | | t = new Token(TokenType.NEWLINE, |
| 377 | | | asString(Constants.CARRIAGE_RETURN_CHAR) |
| 378 | | | + asString(Constants.LINE_FEED_CHAR)); |
| 379 | | | } |
| 380 | | | else |
| 381 | | | { |
| 382 | 0 | | throw new ParseException("Unexpected newline char '" |
| 383 | | | + (char) c + "'", mLine, mColumn); |
| 384 | | | } |
| 385 | 100 | | return t; |
| 386 | | | } |
| 387 | | | |
| 388 | | | private Token eatWhitespaces (int c) |
| 389 | | | throws ParseException |
| 390 | | | { |
| 391 | 100 | | final StringBuffer sb = new StringBuffer(); |
| 392 | 100 | (7) | sb.append((char) c); |
| 393 | | | for (;;) |
| 394 | | | { |
| 395 | 100 | | mark(); |
| 396 | 100 | | final int d = read(); |
| 397 | | | |
| 398 | 100 | | if (Character.isWhitespace((char) d) |
| 399 | | | && ! isNewlineChar((char) d)) |
| 400 | | | { |
| 401 | 100 | | sb.append((char) d); |
| 402 | | | } |
| 403 | | | else |
| 404 | | | { |
| 405 | 100 | | reset(); |
| 406 | 100 | | break; |
| 407 | | | } |
| 408 | 100 | | } |
| 409 | 100 | | return new Token(TokenType.WHITESPACE, sb.toString()); |
| 410 | | | } |
| 411 | | | |
| 412 | | | private void reset () |
| 413 | | | throws ParseException |
| 414 | | | { |
| 415 | | | try |
| 416 | | | { |
| 417 | 100 | | mInputStream.reset(); |
| 418 | 100 | | mColumn = mSaveColumn; |
| 419 | | | } |
| 420 | 0 | | catch (IOException e) |
| 421 | | | { |
| 422 | 0 | | final ParseException pe |
| 423 | | | = new ParseException(e, mLine, mColumn); |
| 424 | 0 | | pe.initCause(e); |
| 425 | 0 | | throw pe; |
| 426 | 100 | | } |
| 427 | 100 | | } |
| 428 | | | |
| 429 | | | private void mark () |
| 430 | | | { |
| 431 | 100 | | mSaveColumn = mColumn; |
| 432 | 100 | | mInputStream.mark(Integer.MAX_VALUE); |
| 433 | 100 | | } |
| 434 | | | |
| 435 | | | private static String asString (int c) |
| 436 | | | { |
| 437 | 100 | | return Character.toString((char) c); |
| 438 | | | } |
| 439 | | | |
| 440 | | | private int read () |
| 441 | | | throws ParseException |
| 442 | | | { |
| 443 | 100 | | int c = -1; |
| 444 | | | try |
| 445 | | | { |
| 446 | 100 | | ++mColumn; |
| 447 | 100 | | c = mInputStream.read(); |
| 448 | | | } |
| 449 | 0 | | catch (IOException e) |
| 450 | | | { |
| 451 | 0 | | throw new ParseException(e, mLine, mColumn); |
| 452 | 100 | | } |
| 453 | 100 | | return c; |
| 454 | | | } |
| 455 | | | |
| 456 | | | private static boolean isNewlineChar (char c) |
| 457 | | | { |
| 458 | 100 | | return (c == Constants.LINE_FEED_CHAR |
| 459 | | | || c == Constants.CARRIAGE_RETURN_CHAR); |
| 460 | | | } |
| 461 | | | |
| 462 | | | |
| 463 | | | |
| 464 | | | <code></code><code></code> |
| 465 | | | |
| 466 | | | @param |
| 467 | | | @throws |
| 468 | | | |
| 469 | | | public static void main (String[] args) |
| 470 | | (8) | throws Exception |
| 471 | | | { |
| 472 | 0 | | final SqlScanner scanner |
| 473 | | | = new SqlScanner(new FileInputStream(args[0])); |
| 474 | | | |
| 475 | 0 | | final List tokens = new ArrayList(); |
| 476 | | | |
| 477 | | | for (;;) |
| 478 | | | { |
| 479 | 0 | | final Token t = scanner.nextToken(); |
| 480 | 0 | | System.err.println(scanner.getLine() + ": " |
| 481 | | | + scanner.getColumn() + " = " + t); |
| 482 | 0 | | tokens.add(t); |
| 483 | 0 | | if (t.getType() == TokenType.EOF) |
| 484 | | | { |
| 485 | 0 | | break; |
| 486 | | | } |
| 487 | 0 | | } |
| 488 | | | |
| 489 | 0 | | for (final Iterator iterator = tokens.iterator(); iterator.hasNext();) |
| 490 | | | { |
| 491 | 0 | | final Token t = (Token) iterator.next(); |
| 492 | 0 | | System.out.print(t.getValue()); |
| 493 | 0 | | } |
| 494 | 0 | | System.out.flush(); |
| 495 | 0 | | } |
| 496 | | | } |