| 1 | /* |
|---|
| 2 | * $Id$ |
|---|
| 3 | * |
|---|
| 4 | * Copyright 2006, The jCoderZ.org Project. All rights reserved. |
|---|
| 5 | * |
|---|
| 6 | * Redistribution and use in source and binary forms, with or without |
|---|
| 7 | * modification, are permitted provided that the following conditions are |
|---|
| 8 | * met: |
|---|
| 9 | * |
|---|
| 10 | * * Redistributions of source code must retain the above copyright |
|---|
| 11 | * notice, this list of conditions and the following disclaimer. |
|---|
| 12 | * * Redistributions in binary form must reproduce the above |
|---|
| 13 | * copyright notice, this list of conditions and the following |
|---|
| 14 | * disclaimer in the documentation and/or other materials |
|---|
| 15 | * provided with the distribution. |
|---|
| 16 | * * Neither the name of the jCoderZ.org Project nor the names of |
|---|
| 17 | * its contributors may be used to endorse or promote products |
|---|
| 18 | * derived from this software without specific prior written |
|---|
| 19 | * permission. |
|---|
| 20 | * |
|---|
| 21 | * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS" AND |
|---|
| 22 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
|---|
| 23 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR |
|---|
| 24 | * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS AND CONTRIBUTORS |
|---|
| 25 | * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR |
|---|
| 26 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF |
|---|
| 27 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR |
|---|
| 28 | * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, |
|---|
| 29 | * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR |
|---|
| 30 | * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF |
|---|
| 31 | * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
|---|
| 32 | */ |
|---|
| 33 | package org.jcoderz.phoenix.sqlparser; |
|---|
| 34 | |
|---|
| 35 | import java.io.BufferedInputStream; |
|---|
| 36 | import java.io.FileInputStream; |
|---|
| 37 | import java.io.IOException; |
|---|
| 38 | import java.io.InputStream; |
|---|
| 39 | import java.math.BigDecimal; |
|---|
| 40 | import java.util.ArrayList; |
|---|
| 41 | import java.util.Iterator; |
|---|
| 42 | import java.util.List; |
|---|
| 43 | |
|---|
| 44 | import org.jcoderz.commons.util.Constants; |
|---|
| 45 | |
|---|
| 46 | /** |
|---|
| 47 | * Simple SQL Scanner. |
|---|
| 48 | * |
|---|
| 49 | * @author Michael Griffel |
|---|
| 50 | */ |
|---|
| 51 | public final class SqlScanner |
|---|
| 52 | implements ScannerInterface |
|---|
| 53 | { |
|---|
| 54 | private final BufferedInputStream mInputStream; |
|---|
| 55 | private int mColumn = 0; |
|---|
| 56 | private int mLine = 1; |
|---|
| 57 | private boolean mReportWhitespace = true; |
|---|
| 58 | private int mSaveColumn = 0; |
|---|
| 59 | |
|---|
| 60 | /** |
|---|
| 61 | * create a new SQL Scanner. |
|---|
| 62 | * @param input the input stream to read SQL data from |
|---|
| 63 | */ |
|---|
| 64 | public SqlScanner (InputStream input) |
|---|
| 65 | { |
|---|
| 66 | mInputStream = new BufferedInputStream(input); |
|---|
| 67 | } |
|---|
| 68 | |
|---|
| 69 | /** |
|---|
| 70 | * Returns the reportWhitespace. |
|---|
| 71 | * @return the reportWhitespace. |
|---|
| 72 | */ |
|---|
| 73 | public boolean isSetReportWhitespace () |
|---|
| 74 | { |
|---|
| 75 | return mReportWhitespace; |
|---|
| 76 | } |
|---|
| 77 | |
|---|
| 78 | /** |
|---|
| 79 | * Sets the reportWhitespace to given <code>reportWhitespace</code>. |
|---|
| 80 | * @param reportWhitespace The reportWhitespace to set. |
|---|
| 81 | */ |
|---|
| 82 | public void setReportWhitespace (boolean reportWhitespace) |
|---|
| 83 | { |
|---|
| 84 | mReportWhitespace = reportWhitespace; |
|---|
| 85 | } |
|---|
| 86 | |
|---|
| 87 | /** |
|---|
| 88 | * Returns the line. |
|---|
| 89 | * @return the line. |
|---|
| 90 | */ |
|---|
| 91 | public int getLine () |
|---|
| 92 | { |
|---|
| 93 | return mLine; |
|---|
| 94 | } |
|---|
| 95 | |
|---|
| 96 | /** |
|---|
| 97 | * Returns the offset. |
|---|
| 98 | * @return the offset. |
|---|
| 99 | */ |
|---|
| 100 | public int getColumn () |
|---|
| 101 | { |
|---|
| 102 | return mColumn; |
|---|
| 103 | } |
|---|
| 104 | |
|---|
| 105 | /** |
|---|
| 106 | * This is just a wrapper around the real nextToken() method for logging. |
|---|
| 107 | * @return the next token |
|---|
| 108 | * @throws ParseException if a syntax error is encountered |
|---|
| 109 | * @see org.jcoderz.phoenix.sqlparser.ScannerInterface#nextToken() |
|---|
| 110 | */ |
|---|
| 111 | public Token nextToken () |
|---|
| 112 | throws ParseException |
|---|
| 113 | { |
|---|
| 114 | return getNextToken(); |
|---|
| 115 | } |
|---|
| 116 | |
|---|
| 117 | /** {@inheritDoc} */ |
|---|
| 118 | private Token getNextToken () |
|---|
| 119 | throws ParseException |
|---|
| 120 | { |
|---|
| 121 | for (;;) |
|---|
| 122 | { |
|---|
| 123 | mark(); |
|---|
| 124 | final int c = read(); |
|---|
| 125 | |
|---|
| 126 | if (c == -1) // EOF |
|---|
| 127 | { |
|---|
| 128 | return new Token(TokenType.EOF); |
|---|
| 129 | } |
|---|
| 130 | else if (isNewlineChar((char) c)) |
|---|
| 131 | { |
|---|
| 132 | final Token t = eatNewline(c); |
|---|
| 133 | if (mReportWhitespace) |
|---|
| 134 | { |
|---|
| 135 | return t; |
|---|
| 136 | } |
|---|
| 137 | continue; |
|---|
| 138 | } |
|---|
| 139 | else if (Character.isWhitespace((char) c)) |
|---|
| 140 | { |
|---|
| 141 | final Token t = eatWhitespaces(c); |
|---|
| 142 | if (mReportWhitespace) |
|---|
| 143 | { |
|---|
| 144 | return t; |
|---|
| 145 | } |
|---|
| 146 | continue; |
|---|
| 147 | } |
|---|
| 148 | else if (c == '(') |
|---|
| 149 | { |
|---|
| 150 | return new Token(TokenType.OPEN_PAREN, asString(c)); |
|---|
| 151 | } |
|---|
| 152 | else if (c == ')') |
|---|
| 153 | { |
|---|
| 154 | return new Token(TokenType.CLOSE_PAREN, asString(c)); |
|---|
| 155 | } |
|---|
| 156 | else if (c == ';') |
|---|
| 157 | { |
|---|
| 158 | return new Token(TokenType.SEMICOLON, asString(c)); |
|---|
| 159 | } |
|---|
| 160 | else if (c == ',') |
|---|
| 161 | { |
|---|
| 162 | return new Token(TokenType.COMMA, asString(c)); |
|---|
| 163 | } |
|---|
| 164 | else if (c == '/') // maybe block comment or single slash |
|---|
| 165 | { |
|---|
| 166 | mark(); |
|---|
| 167 | if (read() == '*') // a block comment |
|---|
| 168 | { |
|---|
| 169 | final String comment = eatBlockComment(); |
|---|
| 170 | return new Token(TokenType.COMMENT, comment); |
|---|
| 171 | } |
|---|
| 172 | reset(); |
|---|
| 173 | return new Token(TokenType.SLASH, asString(c)); |
|---|
| 174 | } |
|---|
| 175 | else if (c == '-') // comment or numeric |
|---|
| 176 | { |
|---|
| 177 | mark(); |
|---|
| 178 | final int d = read(); |
|---|
| 179 | |
|---|
| 180 | final Token t; |
|---|
| 181 | if (d == '-') // -> comment |
|---|
| 182 | { |
|---|
| 183 | final StringBuffer sb = new StringBuffer(); |
|---|
| 184 | sb.append("--"); |
|---|
| 185 | for (;;) |
|---|
| 186 | { |
|---|
| 187 | mark(); |
|---|
| 188 | final int e = read(); |
|---|
| 189 | if (e == '\n' || e == -1) // end of line or eof |
|---|
| 190 | { |
|---|
| 191 | reset(); |
|---|
| 192 | break; |
|---|
| 193 | } |
|---|
| 194 | sb.append((char) e); |
|---|
| 195 | } |
|---|
| 196 | t = new Token(TokenType.COMMENT, sb.toString()); |
|---|
| 197 | } |
|---|
| 198 | else if (Character.isDigit((char) d))// (negative) nummeric |
|---|
| 199 | { |
|---|
| 200 | final StringBuffer sb = new StringBuffer(); |
|---|
| 201 | sb.append('-'); |
|---|
| 202 | sb.append((char) d); |
|---|
| 203 | for (;;) |
|---|
| 204 | { |
|---|
| 205 | mark(); |
|---|
| 206 | final int e = read(); |
|---|
| 207 | if (! Character.isDigit((char) e)) |
|---|
| 208 | { |
|---|
| 209 | reset(); |
|---|
| 210 | break; |
|---|
| 211 | } |
|---|
| 212 | sb.append((char) e); |
|---|
| 213 | } |
|---|
| 214 | |
|---|
| 215 | final String negativeNumeric = sb.toString(); |
|---|
| 216 | try |
|---|
| 217 | { |
|---|
| 218 | Integer.parseInt(negativeNumeric); |
|---|
| 219 | t = new Token(TokenType.NUMERIC_LITERAL, negativeNumeric); |
|---|
| 220 | } |
|---|
| 221 | catch (NumberFormatException shouldNotOccur) |
|---|
| 222 | { |
|---|
| 223 | throw new ParseException("Cannot parse negative numberic '" |
|---|
| 224 | + negativeNumeric |
|---|
| 225 | + "'", shouldNotOccur, mLine, mColumn); |
|---|
| 226 | } |
|---|
| 227 | } |
|---|
| 228 | // operator '- ', '-(' or '-function' |
|---|
| 229 | else if (d == '(' || Character.isLetter((char) d) |
|---|
| 230 | || Character.isWhitespace((char) d)) |
|---|
| 231 | { |
|---|
| 232 | reset(); |
|---|
| 233 | return new Token(TokenType.OPERATOR, asString(c)); |
|---|
| 234 | } |
|---|
| 235 | else |
|---|
| 236 | { |
|---|
| 237 | throw new ParseException("Unexpected char '" + (char) d |
|---|
| 238 | + "', expected '-' or digit.", mLine, mColumn); |
|---|
| 239 | } |
|---|
| 240 | return t; |
|---|
| 241 | } |
|---|
| 242 | else if (c == '"' || c == '\'') // literal |
|---|
| 243 | { |
|---|
| 244 | final String literal = readStringLiteral(c); |
|---|
| 245 | return new Token(TokenType.STRING_LITERAL, literal); |
|---|
| 246 | } |
|---|
| 247 | else // keywords, identifier |
|---|
| 248 | { |
|---|
| 249 | final String word = readWord(c); |
|---|
| 250 | |
|---|
| 251 | try |
|---|
| 252 | { |
|---|
| 253 | // FIXME: prefix keyword? otherwise 'comma' will be a keyword |
|---|
| 254 | if (!TokenType.OPERATOR.toString().equalsIgnoreCase(word)) |
|---|
| 255 | { |
|---|
| 256 | final TokenType tokenType |
|---|
| 257 | = TokenType.fromString( |
|---|
| 258 | word.toLowerCase(Constants.SYSTEM_LOCALE)); |
|---|
| 259 | return new Token(tokenType, word); |
|---|
| 260 | } |
|---|
| 261 | } |
|---|
| 262 | catch (IllegalArgumentException ignore) |
|---|
| 263 | { |
|---|
| 264 | // not a known keyword |
|---|
| 265 | } |
|---|
| 266 | |
|---|
| 267 | // numeric literal? |
|---|
| 268 | try |
|---|
| 269 | { |
|---|
| 270 | new BigDecimal(word); // well-formed? |
|---|
| 271 | return new Token(TokenType.NUMERIC_LITERAL, word); |
|---|
| 272 | } |
|---|
| 273 | catch (NumberFormatException ignore) |
|---|
| 274 | { |
|---|
| 275 | // not a numeric |
|---|
| 276 | } |
|---|
| 277 | |
|---|
| 278 | // otherwise it must be a identifier (hopefully) |
|---|
| 279 | return new Token(TokenType.IDENTIFIER, word); |
|---|
| 280 | } |
|---|
| 281 | } |
|---|
| 282 | } |
|---|
| 283 | |
|---|
| 284 | private String eatBlockComment () |
|---|
| 285 | throws ParseException |
|---|
| 286 | { |
|---|
| 287 | // read block comment |
|---|
| 288 | final StringBuffer sb = new StringBuffer(); |
|---|
| 289 | sb.append("/*"); |
|---|
| 290 | for (;;) |
|---|
| 291 | { |
|---|
| 292 | mark(); |
|---|
| 293 | final int d = read(); |
|---|
| 294 | if (d == '*') // maybe end of block comment |
|---|
| 295 | { |
|---|
| 296 | mark(); |
|---|
| 297 | if (read() != '/') // not end of block comment |
|---|
| 298 | { |
|---|
| 299 | reset(); |
|---|
| 300 | sb.append((char) d); |
|---|
| 301 | continue; |
|---|
| 302 | } |
|---|
| 303 | sb.append("*/"); |
|---|
| 304 | break; |
|---|
| 305 | } |
|---|
| 306 | else if (isNewlineChar((char) d)) |
|---|
| 307 | { |
|---|
| 308 | ++mLine; mColumn = 0; |
|---|
| 309 | } |
|---|
| 310 | sb.append((char) d); |
|---|
| 311 | } |
|---|
| 312 | return sb.toString(); |
|---|
| 313 | } |
|---|
| 314 | |
|---|
| 315 | private String readWord (int c) |
|---|
| 316 | throws ParseException |
|---|
| 317 | { |
|---|
| 318 | final StringBuffer sb = new StringBuffer(); |
|---|
| 319 | sb.append((char) c); |
|---|
| 320 | for (;;) |
|---|
| 321 | { |
|---|
| 322 | mark(); |
|---|
| 323 | final int d = read(); |
|---|
| 324 | |
|---|
| 325 | if (isSpecialCharacter((char) d)) |
|---|
| 326 | { |
|---|
| 327 | reset(); |
|---|
| 328 | break; |
|---|
| 329 | } |
|---|
| 330 | sb.append((char) d); |
|---|
| 331 | } |
|---|
| 332 | return sb.toString(); |
|---|
| 333 | } |
|---|
| 334 | |
|---|
| 335 | private String readStringLiteral (int c) |
|---|
| 336 | throws ParseException |
|---|
| 337 | { |
|---|
| 338 | final StringBuffer sb = new StringBuffer(); |
|---|
| 339 | sb.append((char) c); |
|---|
| 340 | for (;;) |
|---|
| 341 | { |
|---|
| 342 | final int d = read(); |
|---|
| 343 | sb.append((char) d); |
|---|
| 344 | |
|---|
| 345 | if (d == '"' || d == '\'') |
|---|
| 346 | { |
|---|
| 347 | break; |
|---|
| 348 | } |
|---|
| 349 | } |
|---|
| 350 | return sb.toString(); |
|---|
| 351 | } |
|---|
| 352 | |
|---|
| 353 | private static boolean isSpecialCharacter (char c) |
|---|
| 354 | { |
|---|
| 355 | return (Character.isWhitespace(c) || c == '(' || c == ')' |
|---|
| 356 | || c == ';' || c == ',' || c == '-'); |
|---|
| 357 | } |
|---|
| 358 | |
|---|
| 359 | private Token eatNewline (int c) |
|---|
| 360 | throws ParseException |
|---|
| 361 | { |
|---|
| 362 | final Token t; |
|---|
| 363 | if (c == Constants.LINE_FEED_CHAR) // UNIX newline? |
|---|
| 364 | { |
|---|
| 365 | ++mLine; mColumn = 0; |
|---|
| 366 | t = new Token(TokenType.NEWLINE, asString(Constants.LINE_FEED_CHAR)); |
|---|
| 367 | } |
|---|
| 368 | else if (c == Constants.CARRIAGE_RETURN_CHAR) // WINDOWS newline? |
|---|
| 369 | { |
|---|
| 370 | mark(); |
|---|
| 371 | if (read() != Constants.LINE_FEED_CHAR) // eat LF |
|---|
| 372 | { |
|---|
| 373 | reset(); |
|---|
| 374 | } |
|---|
| 375 | ++mLine; mColumn = 0; |
|---|
| 376 | t = new Token(TokenType.NEWLINE, |
|---|
| 377 | asString(Constants.CARRIAGE_RETURN_CHAR) |
|---|
| 378 | + asString(Constants.LINE_FEED_CHAR)); |
|---|
| 379 | } |
|---|
| 380 | else |
|---|
| 381 | { |
|---|
| 382 | throw new ParseException("Unexpected newline char '" |
|---|
| 383 | + (char) c + "'", mLine, mColumn); |
|---|
| 384 | } |
|---|
| 385 | return t; |
|---|
| 386 | } |
|---|
| 387 | |
|---|
| 388 | private Token eatWhitespaces (int c) |
|---|
| 389 | throws ParseException |
|---|
| 390 | { |
|---|
| 391 | final StringBuffer sb = new StringBuffer(); |
|---|
| 392 | sb.append((char) c); // TODO: assertTrue(isWhitespace(c)); |
|---|
| 393 | for (;;) |
|---|
| 394 | { |
|---|
| 395 | mark(); |
|---|
| 396 | final int d = read(); |
|---|
| 397 | |
|---|
| 398 | if (Character.isWhitespace((char) d) |
|---|
| 399 | && ! isNewlineChar((char) d)) |
|---|
| 400 | { |
|---|
| 401 | sb.append((char) d); |
|---|
| 402 | } |
|---|
| 403 | else // not a whitespace, or is newline |
|---|
| 404 | { // which must be reported separately |
|---|
| 405 | reset(); |
|---|
| 406 | break; |
|---|
| 407 | } |
|---|
| 408 | } |
|---|
| 409 | return new Token(TokenType.WHITESPACE, sb.toString()); |
|---|
| 410 | } |
|---|
| 411 | |
|---|
| 412 | private void reset () |
|---|
| 413 | throws ParseException |
|---|
| 414 | { |
|---|
| 415 | try |
|---|
| 416 | { |
|---|
| 417 | mInputStream.reset(); |
|---|
| 418 | mColumn = mSaveColumn; |
|---|
| 419 | } |
|---|
| 420 | catch (IOException e) |
|---|
| 421 | { |
|---|
| 422 | final ParseException pe |
|---|
| 423 | = new ParseException(e, mLine, mColumn); |
|---|
| 424 | pe.initCause(e); |
|---|
| 425 | throw pe; |
|---|
| 426 | } |
|---|
| 427 | } |
|---|
| 428 | |
|---|
| 429 | private void mark () |
|---|
| 430 | { |
|---|
| 431 | mSaveColumn = mColumn; |
|---|
| 432 | mInputStream.mark(Integer.MAX_VALUE); |
|---|
| 433 | } |
|---|
| 434 | |
|---|
| 435 | private static String asString (int c) |
|---|
| 436 | { |
|---|
| 437 | return Character.toString((char) c); |
|---|
| 438 | } |
|---|
| 439 | |
|---|
| 440 | private int read () |
|---|
| 441 | throws ParseException |
|---|
| 442 | { |
|---|
| 443 | int c = -1; |
|---|
| 444 | try |
|---|
| 445 | { |
|---|
| 446 | ++mColumn; |
|---|
| 447 | c = mInputStream.read(); |
|---|
| 448 | } |
|---|
| 449 | catch (IOException e) |
|---|
| 450 | { |
|---|
| 451 | throw new ParseException(e, mLine, mColumn); |
|---|
| 452 | } |
|---|
| 453 | return c; |
|---|
| 454 | } |
|---|
| 455 | |
|---|
| 456 | private static boolean isNewlineChar (char c) |
|---|
| 457 | { |
|---|
| 458 | return (c == Constants.LINE_FEED_CHAR |
|---|
| 459 | || c == Constants.CARRIAGE_RETURN_CHAR); |
|---|
| 460 | } |
|---|
| 461 | |
|---|
| 462 | /** |
|---|
| 463 | * Simple SQL Scanner that reads the file given at argument 1 and dumps |
|---|
| 464 | * the tokens to <code>stderr</code> and the content on <code>stdout</code>. |
|---|
| 465 | * |
|---|
| 466 | * @param args command line arguments |
|---|
| 467 | * @throws Exception An error occurred |
|---|
| 468 | */ |
|---|
| 469 | public static void main (String[] args) |
|---|
| 470 | throws Exception |
|---|
| 471 | { |
|---|
| 472 | final SqlScanner scanner |
|---|
| 473 | = new SqlScanner(new FileInputStream(args[0])); |
|---|
| 474 | |
|---|
| 475 | final List tokens = new ArrayList(); |
|---|
| 476 | |
|---|
| 477 | for (;;) |
|---|
| 478 | { |
|---|
| 479 | final Token t = scanner.nextToken(); |
|---|
| 480 | System.err.println(scanner.getLine() + ": " |
|---|
| 481 | + scanner.getColumn() + " = " + t); |
|---|
| 482 | tokens.add(t); |
|---|
| 483 | if (t.getType() == TokenType.EOF) |
|---|
| 484 | { |
|---|
| 485 | break; |
|---|
| 486 | } |
|---|
| 487 | } |
|---|
| 488 | |
|---|
| 489 | for (final Iterator iterator = tokens.iterator(); iterator.hasNext();) |
|---|
| 490 | { |
|---|
| 491 | final Token t = (Token) iterator.next(); |
|---|
| 492 | System.out.print(t.getValue()); |
|---|
| 493 | } |
|---|
| 494 | System.out.flush(); |
|---|
| 495 | } |
|---|
| 496 | } |
|---|