| 1 | /* |
|---|
| 2 | * $Id$ |
|---|
| 3 | * |
|---|
| 4 | * Copyright 2006, The jCoderZ.org Project. All rights reserved. |
|---|
| 5 | * |
|---|
| 6 | * Redistribution and use in source and binary forms, with or without |
|---|
| 7 | * modification, are permitted provided that the following conditions are |
|---|
| 8 | * met: |
|---|
| 9 | * |
|---|
| 10 | * * Redistributions of source code must retain the above copyright |
|---|
| 11 | * notice, this list of conditions and the following disclaimer. |
|---|
| 12 | * * Redistributions in binary form must reproduce the above |
|---|
| 13 | * copyright notice, this list of conditions and the following |
|---|
| 14 | * disclaimer in the documentation and/or other materials |
|---|
| 15 | * provided with the distribution. |
|---|
| 16 | * * Neither the name of the jCoderZ.org Project nor the names of |
|---|
| 17 | * its contributors may be used to endorse or promote products |
|---|
| 18 | * derived from this software without specific prior written |
|---|
| 19 | * permission. |
|---|
| 20 | * |
|---|
| 21 | * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS" AND |
|---|
| 22 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
|---|
| 23 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR |
|---|
| 24 | * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS AND CONTRIBUTORS |
|---|
| 25 | * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR |
|---|
| 26 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF |
|---|
| 27 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR |
|---|
| 28 | * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, |
|---|
| 29 | * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR |
|---|
| 30 | * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF |
|---|
| 31 | * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
|---|
| 32 | */ |
|---|
| 33 | package org.jcoderz.commons.logging; |
|---|
| 34 | |
|---|
| 35 | import java.text.FieldPosition; |
|---|
| 36 | import java.text.Format; |
|---|
| 37 | import java.text.ParsePosition; |
|---|
| 38 | import java.util.Iterator; |
|---|
| 39 | import java.nio.CharBuffer; |
|---|
| 40 | import java.util.NoSuchElementException; |
|---|
| 41 | |
|---|
| 42 | |
|---|
| 43 | /** |
|---|
| 44 | * This Formatter formats the whitespace of a string, The space char |
|---|
| 45 | * <code>'\u0020'</code> is left untouched, all other whitespace chars are |
|---|
| 46 | * replaced and compressed by exactly one space char in a row. It extends the |
|---|
| 47 | * Format type in a unsymmetric way: A formatted String cannot be parsed in a |
|---|
| 48 | * way that the result is equal to the source string. |
|---|
| 49 | * If allocated with a sub format, first the sub format is used for formatting, |
|---|
| 50 | * the rewsult will be formatted by this. |
|---|
| 51 | * In addition to the Format implementation it offers static access methods |
|---|
| 52 | * for the format functionality. |
|---|
| 53 | * |
|---|
| 54 | */ |
|---|
| 55 | public final class WhitespaceFormat |
|---|
| 56 | extends Format |
|---|
| 57 | { |
|---|
| 58 | static final char PRESERVED_CHAR = '\u0020'; |
|---|
| 59 | |
|---|
| 60 | private final Format mSubFormat; |
|---|
| 61 | |
|---|
| 62 | private static final class WhitespaceIterator |
|---|
| 63 | implements Iterator |
|---|
| 64 | { |
|---|
| 65 | private final CharBuffer mBuffer; |
|---|
| 66 | |
|---|
| 67 | /** |
|---|
| 68 | * Constructs a white space iterator fo the supplied text. Skips over |
|---|
| 69 | * initial whitespace. |
|---|
| 70 | * |
|---|
| 71 | * @param text The text which should be read line by line. |
|---|
| 72 | */ |
|---|
| 73 | private WhitespaceIterator (final CharBuffer buffer) |
|---|
| 74 | { |
|---|
| 75 | mBuffer = buffer.duplicate(); |
|---|
| 76 | skip(); |
|---|
| 77 | } |
|---|
| 78 | |
|---|
| 79 | /** {@inheritDoc} */ |
|---|
| 80 | public void remove () |
|---|
| 81 | { |
|---|
| 82 | throw new UnsupportedOperationException(); |
|---|
| 83 | } |
|---|
| 84 | |
|---|
| 85 | /** {@inheritDoc} */ |
|---|
| 86 | public boolean hasNext () |
|---|
| 87 | { |
|---|
| 88 | return mBuffer.remaining() > 0; |
|---|
| 89 | } |
|---|
| 90 | |
|---|
| 91 | /** |
|---|
| 92 | * Gets the next character sequence up to the next whitespace char |
|---|
| 93 | * (excluding), which is not the space character <code>' '</code>. |
|---|
| 94 | * The character sequence being returned by this will never contain a |
|---|
| 95 | * whitespace char but the space char <code>'\u0020'</code>. |
|---|
| 96 | * |
|---|
| 97 | * @see java.util.Iterator#next() |
|---|
| 98 | */ |
|---|
| 99 | public Object next () |
|---|
| 100 | { |
|---|
| 101 | if (mBuffer.remaining() <= 0) |
|---|
| 102 | { |
|---|
| 103 | throw new NoSuchElementException(); |
|---|
| 104 | } |
|---|
| 105 | boolean wsFound = false; |
|---|
| 106 | boolean postWsFound = false; |
|---|
| 107 | |
|---|
| 108 | int startOfWs = 0; |
|---|
| 109 | int endOfWs = 0; |
|---|
| 110 | |
|---|
| 111 | // CHECKME: on some platforms slice does not behave as stated in SDK |
|---|
| 112 | // api, mBuffer is simply duplicated. |
|---|
| 113 | final CharBuffer rc = mBuffer.slice(); |
|---|
| 114 | for (int i = 0; i < mBuffer.remaining() && ! (wsFound && postWsFound); |
|---|
| 115 | ++i) |
|---|
| 116 | { |
|---|
| 117 | final char c = mBuffer.charAt(i); |
|---|
| 118 | |
|---|
| 119 | if (Character.isWhitespace(c) && ! (c == PRESERVED_CHAR)) |
|---|
| 120 | { |
|---|
| 121 | if (! wsFound) |
|---|
| 122 | { |
|---|
| 123 | startOfWs = i; |
|---|
| 124 | } |
|---|
| 125 | endOfWs = i; |
|---|
| 126 | wsFound = true; |
|---|
| 127 | postWsFound = false; |
|---|
| 128 | } |
|---|
| 129 | else |
|---|
| 130 | { |
|---|
| 131 | postWsFound = true; |
|---|
| 132 | } |
|---|
| 133 | } |
|---|
| 134 | setPositions(rc, wsFound, postWsFound, startOfWs, endOfWs); |
|---|
| 135 | return rc; |
|---|
| 136 | } |
|---|
| 137 | |
|---|
| 138 | /** |
|---|
| 139 | * Sets the positions and limits of supplied buffer and internal buffer |
|---|
| 140 | * for the result of a next call. |
|---|
| 141 | * |
|---|
| 142 | * @param rc This buffer is returned to the <code>next()</code> caller. |
|---|
| 143 | * @param wsFound Flag denoting whether whitespace to replace has been |
|---|
| 144 | * found. |
|---|
| 145 | * @param postWsFound Flag for denoting whether chars after whitespace |
|---|
| 146 | * have been found. |
|---|
| 147 | * @param startOfWs The start index of whitespace chars to replace. |
|---|
| 148 | * @param endOfWs THe end index of teh whitespace chars to replace. |
|---|
| 149 | */ |
|---|
| 150 | private void setPositions ( |
|---|
| 151 | final CharBuffer rc, |
|---|
| 152 | final boolean wsFound, |
|---|
| 153 | final boolean postWsFound, |
|---|
| 154 | final int startOfWs, |
|---|
| 155 | final int endOfWs) |
|---|
| 156 | { |
|---|
| 157 | if (wsFound) |
|---|
| 158 | { |
|---|
| 159 | // CHECKME: if slice does not work as said in the api, then position |
|---|
| 160 | // is > 0 and idx has to be added to the position to get the new |
|---|
| 161 | // limit |
|---|
| 162 | if (rc.position() > 0) |
|---|
| 163 | { |
|---|
| 164 | rc.limit(rc.position() + startOfWs); |
|---|
| 165 | } |
|---|
| 166 | else |
|---|
| 167 | { |
|---|
| 168 | rc.limit(startOfWs); |
|---|
| 169 | } |
|---|
| 170 | if (! postWsFound) |
|---|
| 171 | { |
|---|
| 172 | mBuffer.position(mBuffer.limit()); |
|---|
| 173 | } |
|---|
| 174 | else |
|---|
| 175 | { |
|---|
| 176 | mBuffer.position(mBuffer.position() + endOfWs + 1); |
|---|
| 177 | } |
|---|
| 178 | } |
|---|
| 179 | else |
|---|
| 180 | { |
|---|
| 181 | mBuffer.position(mBuffer.limit()); |
|---|
| 182 | } |
|---|
| 183 | } |
|---|
| 184 | |
|---|
| 185 | /** |
|---|
| 186 | * Skips over initial whitespace, which is not |
|---|
| 187 | * {@linkplain WhitespaceFormat#PRESERVED_CHAR} |
|---|
| 188 | */ |
|---|
| 189 | private void skip () |
|---|
| 190 | { |
|---|
| 191 | boolean wsFound = false; |
|---|
| 192 | boolean postWsFound = false; |
|---|
| 193 | boolean first = true; |
|---|
| 194 | |
|---|
| 195 | int endOfWs = 0; |
|---|
| 196 | |
|---|
| 197 | for (int i = 0; i < mBuffer.remaining() |
|---|
| 198 | && ((wsFound ^ postWsFound) || first); |
|---|
| 199 | ++i) |
|---|
| 200 | { |
|---|
| 201 | first = false; |
|---|
| 202 | final char c = mBuffer.charAt(i); |
|---|
| 203 | |
|---|
| 204 | if (Character.isWhitespace(c) && ! (c == PRESERVED_CHAR)) |
|---|
| 205 | { |
|---|
| 206 | wsFound = true; |
|---|
| 207 | endOfWs = i; |
|---|
| 208 | } |
|---|
| 209 | else |
|---|
| 210 | { |
|---|
| 211 | postWsFound = wsFound; |
|---|
| 212 | } |
|---|
| 213 | } |
|---|
| 214 | if (wsFound && ! postWsFound) |
|---|
| 215 | { |
|---|
| 216 | // only whitespace found |
|---|
| 217 | mBuffer.position(mBuffer.limit()); |
|---|
| 218 | } |
|---|
| 219 | else if (wsFound) |
|---|
| 220 | { |
|---|
| 221 | // found chars after whitespace, so the following is correct |
|---|
| 222 | mBuffer.position(mBuffer.position() + endOfWs); |
|---|
| 223 | } |
|---|
| 224 | } |
|---|
| 225 | } |
|---|
| 226 | |
|---|
| 227 | /** |
|---|
| 228 | * Creates a new instance of this with no sub format. |
|---|
| 229 | */ |
|---|
| 230 | public WhitespaceFormat () |
|---|
| 231 | { |
|---|
| 232 | this(null); |
|---|
| 233 | } |
|---|
| 234 | |
|---|
| 235 | /** |
|---|
| 236 | * Creates a new instance of this with the supplied sub format. |
|---|
| 237 | * |
|---|
| 238 | * @param subFormat The sub format to use for first step formatting of an |
|---|
| 239 | * object. This will be used for parsing an object as well. Might be null. |
|---|
| 240 | */ |
|---|
| 241 | public WhitespaceFormat (final Format subFormat) |
|---|
| 242 | { |
|---|
| 243 | mSubFormat = subFormat; |
|---|
| 244 | } |
|---|
| 245 | |
|---|
| 246 | /** |
|---|
| 247 | * Replaces and reduces whitespace in the supplied message. The resulting |
|---|
| 248 | * string will only have <code>'\u0020'</code> as white space. Any such |
|---|
| 249 | * character in the source string is left untouched, all other whitespace |
|---|
| 250 | * characters are replaced by <code>'\u0020'</code>, but with only one in a |
|---|
| 251 | * row, so, for example, a sequence of 2 line separators will be replaced |
|---|
| 252 | * by one <code>'\u0020'</code>. |
|---|
| 253 | * |
|---|
| 254 | * @param message The message in which to find and replace white space. |
|---|
| 255 | * |
|---|
| 256 | * @return String with replaced and reduced white space |
|---|
| 257 | */ |
|---|
| 258 | public static String format (final String message) |
|---|
| 259 | { |
|---|
| 260 | return format(CharBuffer.wrap(message)).toString(); |
|---|
| 261 | } |
|---|
| 262 | |
|---|
| 263 | /** |
|---|
| 264 | * Replaced and reduces whitespace in the supplied character buffer. |
|---|
| 265 | * |
|---|
| 266 | * @see #format(String) |
|---|
| 267 | |
|---|
| 268 | * @param message The message buffer in which to find and replace white |
|---|
| 269 | * space. |
|---|
| 270 | * |
|---|
| 271 | * @return CharBuffer with replaced and reduced white space. This might be |
|---|
| 272 | * <code>message</code> if it does not contain whitespace to replace. |
|---|
| 273 | */ |
|---|
| 274 | public static CharBuffer format (final CharBuffer message) |
|---|
| 275 | { |
|---|
| 276 | final WhitespaceIterator iter = new WhitespaceIterator(message); |
|---|
| 277 | |
|---|
| 278 | CharBuffer rc = null; |
|---|
| 279 | boolean isFirst = true; |
|---|
| 280 | boolean flip = false; |
|---|
| 281 | |
|---|
| 282 | while (iter.hasNext()) |
|---|
| 283 | { |
|---|
| 284 | final CharBuffer cb = (CharBuffer) iter.next(); |
|---|
| 285 | |
|---|
| 286 | if (! (isFirst || (rc == null))) |
|---|
| 287 | { |
|---|
| 288 | rc.put(PRESERVED_CHAR); |
|---|
| 289 | rc.put(cb); |
|---|
| 290 | } |
|---|
| 291 | else if (isFirst) |
|---|
| 292 | { |
|---|
| 293 | isFirst = false; |
|---|
| 294 | if ((cb.limit() == message.limit()) |
|---|
| 295 | && (cb.position() == message.position())) |
|---|
| 296 | { |
|---|
| 297 | rc = message.duplicate(); |
|---|
| 298 | } |
|---|
| 299 | else |
|---|
| 300 | { |
|---|
| 301 | rc = CharBuffer.allocate(message.limit()); |
|---|
| 302 | rc.put(cb); |
|---|
| 303 | flip = true; |
|---|
| 304 | } |
|---|
| 305 | } |
|---|
| 306 | else |
|---|
| 307 | { |
|---|
| 308 | // should never occur |
|---|
| 309 | throw new RuntimeException("More than one string parts and no " |
|---|
| 310 | + "target buffer is allocated"); |
|---|
| 311 | } |
|---|
| 312 | } |
|---|
| 313 | if (flip) |
|---|
| 314 | { |
|---|
| 315 | rc.flip(); |
|---|
| 316 | } |
|---|
| 317 | return rc; |
|---|
| 318 | } |
|---|
| 319 | |
|---|
| 320 | /** |
|---|
| 321 | * If a sub format is set, it delegates parsing to the sub format. If no |
|---|
| 322 | * subformat is set, it takes the source string until the first whitespace |
|---|
| 323 | * char is found, which is not {@link #PRESERVED_CHAR}. |
|---|
| 324 | * |
|---|
| 325 | * @see java.text.Format#parseObject(java.lang.String, java.text.ParsePosition) |
|---|
| 326 | */ |
|---|
| 327 | public Object parseObject (final String source, final ParsePosition pos) |
|---|
| 328 | { |
|---|
| 329 | Object rc; |
|---|
| 330 | if (mSubFormat == null) |
|---|
| 331 | { |
|---|
| 332 | int i = pos.getIndex(); |
|---|
| 333 | final int len = source.length(); |
|---|
| 334 | boolean endFound = false; |
|---|
| 335 | |
|---|
| 336 | while (i < len && ! endFound) |
|---|
| 337 | { |
|---|
| 338 | final char c = source.charAt(i); |
|---|
| 339 | if (Character.isWhitespace(c) && c != PRESERVED_CHAR) |
|---|
| 340 | { |
|---|
| 341 | endFound = true; |
|---|
| 342 | } |
|---|
| 343 | else |
|---|
| 344 | { |
|---|
| 345 | ++i; |
|---|
| 346 | } |
|---|
| 347 | } |
|---|
| 348 | if (endFound) |
|---|
| 349 | { |
|---|
| 350 | rc = source.substring(pos.getIndex(), i); |
|---|
| 351 | pos.setIndex(i); |
|---|
| 352 | } |
|---|
| 353 | else |
|---|
| 354 | { |
|---|
| 355 | rc = source.substring(pos.getIndex()); |
|---|
| 356 | pos.setIndex(len); |
|---|
| 357 | } |
|---|
| 358 | } |
|---|
| 359 | else |
|---|
| 360 | { |
|---|
| 361 | rc = mSubFormat.parseObject(source, pos); |
|---|
| 362 | } |
|---|
| 363 | return rc; |
|---|
| 364 | } |
|---|
| 365 | |
|---|
| 366 | /** |
|---|
| 367 | * If a sub format is set, it uses this to format the object and compresses |
|---|
| 368 | * the whitespace within the result. |
|---|
| 369 | * If no sub format is set, it expects a String object and compresses the |
|---|
| 370 | * whitespace on that. |
|---|
| 371 | * |
|---|
| 372 | * @param obj The object to format. |
|---|
| 373 | * @param toAppendTo The string buffer where to append to the formatted |
|---|
| 374 | * object. |
|---|
| 375 | * @param pos The field position for formatting. |
|---|
| 376 | * |
|---|
| 377 | * @return StringBuffer with formatted objects. |
|---|
| 378 | * |
|---|
| 379 | * @see java.text.Format#format(java.lang.Object, java.lang.StringBuffer, java.text.FieldPosition) |
|---|
| 380 | */ |
|---|
| 381 | public StringBuffer format ( |
|---|
| 382 | final Object obj, |
|---|
| 383 | final StringBuffer toAppendTo, |
|---|
| 384 | final FieldPosition pos) |
|---|
| 385 | { |
|---|
| 386 | if (mSubFormat == null) |
|---|
| 387 | { |
|---|
| 388 | if (! (obj instanceof String)) |
|---|
| 389 | { |
|---|
| 390 | throw new IllegalArgumentException("Supplied object to be formatted" |
|---|
| 391 | + " must be a String but is " |
|---|
| 392 | + obj.getClass().getName() + ": " + obj); |
|---|
| 393 | } |
|---|
| 394 | toAppendTo.append(format((String) obj)); |
|---|
| 395 | if (pos != null) |
|---|
| 396 | { |
|---|
| 397 | pos.setBeginIndex(0); |
|---|
| 398 | pos.setEndIndex(0); |
|---|
| 399 | } |
|---|
| 400 | } |
|---|
| 401 | else |
|---|
| 402 | { |
|---|
| 403 | StringBuffer sb = new StringBuffer(); |
|---|
| 404 | sb = mSubFormat.format(obj, sb, pos); |
|---|
| 405 | toAppendTo.append(WhitespaceFormat.format(sb.toString())); |
|---|
| 406 | } |
|---|
| 407 | return toAppendTo; |
|---|
| 408 | } |
|---|
| 409 | } |
|---|