| 1 | /* |
|---|
| 2 | * $Id$ |
|---|
| 3 | * |
|---|
| 4 | * Copyright 2006, The jCoderZ.org Project. All rights reserved. |
|---|
| 5 | * |
|---|
| 6 | * Redistribution and use in source and binary forms, with or without |
|---|
| 7 | * modification, are permitted provided that the following conditions are |
|---|
| 8 | * met: |
|---|
| 9 | * |
|---|
| 10 | * * Redistributions of source code must retain the above copyright |
|---|
| 11 | * notice, this list of conditions and the following disclaimer. |
|---|
| 12 | * * Redistributions in binary form must reproduce the above |
|---|
| 13 | * copyright notice, this list of conditions and the following |
|---|
| 14 | * disclaimer in the documentation and/or other materials |
|---|
| 15 | * provided with the distribution. |
|---|
| 16 | * * Neither the name of the jCoderZ.org Project nor the names of |
|---|
| 17 | * its contributors may be used to endorse or promote products |
|---|
| 18 | * derived from this software without specific prior written |
|---|
| 19 | * permission. |
|---|
| 20 | * |
|---|
| 21 | * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS" AND |
|---|
| 22 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
|---|
| 23 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR |
|---|
| 24 | * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS AND CONTRIBUTORS |
|---|
| 25 | * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR |
|---|
| 26 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF |
|---|
| 27 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR |
|---|
| 28 | * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, |
|---|
| 29 | * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR |
|---|
| 30 | * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF |
|---|
| 31 | * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
|---|
| 32 | */ |
|---|
| 33 | package org.jcoderz.commons.util; |
|---|
| 34 | |
|---|
| 35 | import java.util.Arrays; |
|---|
| 36 | |
|---|
| 37 | import org.jcoderz.commons.ArgumentMalformedException; |
|---|
| 38 | |
|---|
| 39 | |
|---|
| 40 | /** |
|---|
| 41 | * This class provides encode/decode for RFC 2045 Base64 as |
|---|
| 42 | * defined by RFC 2045, N. Freed and N. Borenstein. |
|---|
| 43 | * RFC 2045: Multipurpose Internet Mail Extensions (MIME) |
|---|
| 44 | * Part One: Format of Internet Message Bodies. Reference |
|---|
| 45 | * 1996 Available at: http://www.ietf.org/rfc/rfc2045.txt |
|---|
| 46 | * This class is used by XML Schema binary format validation |
|---|
| 47 | * |
|---|
| 48 | * This implementation does not encode/decode streaming |
|---|
| 49 | * data. You need the data that you will encode/decode |
|---|
| 50 | * already on a byte array. |
|---|
| 51 | * |
|---|
| 52 | * @author Michael Griffel |
|---|
| 53 | * |
|---|
| 54 | * TODO: remove deep copy of decoded Base64 data in case of padding chars. |
|---|
| 55 | */ |
|---|
| 56 | public final class Base64Util |
|---|
| 57 | { |
|---|
| 58 | private static final String ENCODED_PARAMETER = "encoded"; |
|---|
| 59 | private static final int LOWER_SIX_BITS = 0x3f; |
|---|
| 60 | private static final int BASELENGTH = 255; |
|---|
| 61 | private static final int BITS_PER_BASE64_CHAR = 6; |
|---|
| 62 | private static final int FOURBYTE = 4; |
|---|
| 63 | private static final int BYTES_PER_BASE64_CHUNK = 3; |
|---|
| 64 | private static final int TWENTYFOURBITGROUP = 3 * Constants.BITS_PER_BYTE; |
|---|
| 65 | private static final char PAD = '='; |
|---|
| 66 | private static final char[] LOOKUP_BASE64_ALPHABET |
|---|
| 67 | = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/" |
|---|
| 68 | .toCharArray(); |
|---|
| 69 | private static final byte[] BASE64_ALPHABET = new byte[BASELENGTH]; |
|---|
| 70 | |
|---|
| 71 | static |
|---|
| 72 | { |
|---|
| 73 | Arrays.fill(BASE64_ALPHABET, (byte) -1); |
|---|
| 74 | for (int i = 0; i < LOOKUP_BASE64_ALPHABET.length; i++) |
|---|
| 75 | { |
|---|
| 76 | BASE64_ALPHABET[LOOKUP_BASE64_ALPHABET[i]] = (byte) i; |
|---|
| 77 | } |
|---|
| 78 | } |
|---|
| 79 | |
|---|
| 80 | private Base64Util () |
|---|
| 81 | { |
|---|
| 82 | // no instances allowed - only static methods |
|---|
| 83 | } |
|---|
| 84 | |
|---|
| 85 | /** |
|---|
| 86 | * Encodes hex octets into Base64. |
|---|
| 87 | * |
|---|
| 88 | * @param binaryData Array containing binary data. |
|---|
| 89 | * @return Encoded Base64 array |
|---|
| 90 | */ |
|---|
| 91 | public static char[] encodeToChars (byte[] binaryData) |
|---|
| 92 | { |
|---|
| 93 | final char[] result; |
|---|
| 94 | if (binaryData == null) |
|---|
| 95 | { |
|---|
| 96 | result = null; |
|---|
| 97 | } |
|---|
| 98 | else if (binaryData.length == 0) |
|---|
| 99 | { |
|---|
| 100 | result = new char[0]; |
|---|
| 101 | } |
|---|
| 102 | else |
|---|
| 103 | { |
|---|
| 104 | final int dataBits = binaryData.length * Constants.BITS_PER_BYTE; |
|---|
| 105 | final int remainingBits = dataBits % TWENTYFOURBITGROUP; |
|---|
| 106 | final int numberTriplets = dataBits / TWENTYFOURBITGROUP; |
|---|
| 107 | final int numberQuartet = remainingBits != 0 ? numberTriplets + 1 |
|---|
| 108 | : numberTriplets; |
|---|
| 109 | |
|---|
| 110 | final char [] encodedData = new char[numberQuartet * FOURBYTE]; |
|---|
| 111 | int encodedIndex = 0; |
|---|
| 112 | int dataIndex = 0; |
|---|
| 113 | for (int i = 0; i < numberTriplets; i++) |
|---|
| 114 | { |
|---|
| 115 | // b1 b2 b3 |
|---|
| 116 | // +---------+---------+---------+ |
|---|
| 117 | // |765432 10|7654 3210|76 543210| = x |
|---|
| 118 | // +--------16---------8---------+ |
|---|
| 119 | // | | | | | |
|---|
| 120 | // ^^^^^^ ^^^^^^^ ^^^^^^^ ^^^^^^ |
|---|
| 121 | // d1 d2 d3 d4 |
|---|
| 122 | final int x |
|---|
| 123 | = (binaryData[dataIndex++] & Constants.BYTE_MASK) |
|---|
| 124 | << (2 * Constants.BITS_PER_BYTE) // b1 |
|---|
| 125 | | (binaryData[dataIndex++] & Constants.BYTE_MASK) |
|---|
| 126 | << Constants.BITS_PER_BYTE // b2 |
|---|
| 127 | | (binaryData[dataIndex++] & Constants.BYTE_MASK); // b3 |
|---|
| 128 | |
|---|
| 129 | encodedData[encodedIndex++] = LOOKUP_BASE64_ALPHABET // d1 |
|---|
| 130 | [(x >>> (3 * BITS_PER_BASE64_CHAR)) & LOWER_SIX_BITS]; |
|---|
| 131 | encodedData[encodedIndex++] = LOOKUP_BASE64_ALPHABET // d2 |
|---|
| 132 | [(x >>> (2 * BITS_PER_BASE64_CHAR)) & LOWER_SIX_BITS]; |
|---|
| 133 | encodedData[encodedIndex++] = LOOKUP_BASE64_ALPHABET // d3 |
|---|
| 134 | [(x >>> BITS_PER_BASE64_CHAR) & LOWER_SIX_BITS]; |
|---|
| 135 | encodedData[encodedIndex++] = LOOKUP_BASE64_ALPHABET // d4 |
|---|
| 136 | [x & LOWER_SIX_BITS]; |
|---|
| 137 | } |
|---|
| 138 | // two bytes left |
|---|
| 139 | if (remainingBits == 2 * Constants.BITS_PER_BYTE) |
|---|
| 140 | { |
|---|
| 141 | // b2 b3 |
|---|
| 142 | // +---------+---------+ |
|---|
| 143 | // |765432 10|7654 3210| = x |
|---|
| 144 | // +---------8---------+ |
|---|
| 145 | // | | | | pad | |
|---|
| 146 | // ^^^^^^ ^^^^^^^ ^^^^^^^ ^^^^^^ |
|---|
| 147 | // d1 d2 d3 d4 |
|---|
| 148 | final int x |
|---|
| 149 | = (binaryData[dataIndex++] & Constants.BYTE_MASK) |
|---|
| 150 | << Constants.BITS_PER_BYTE // b2 |
|---|
| 151 | | (binaryData[dataIndex++] & Constants.BYTE_MASK); // b3 |
|---|
| 152 | |
|---|
| 153 | encodedData[encodedIndex++] = LOOKUP_BASE64_ALPHABET // d1 |
|---|
| 154 | [x >>> 10 & LOWER_SIX_BITS]; |
|---|
| 155 | encodedData[encodedIndex++] = LOOKUP_BASE64_ALPHABET // d2 |
|---|
| 156 | [x >>> 4 & LOWER_SIX_BITS]; |
|---|
| 157 | encodedData[encodedIndex++] = LOOKUP_BASE64_ALPHABET // d3 |
|---|
| 158 | [x << 2 & LOWER_SIX_BITS]; |
|---|
| 159 | encodedData[encodedIndex++] = PAD; // d4 |
|---|
| 160 | } |
|---|
| 161 | // one byte left |
|---|
| 162 | else if (remainingBits == Constants.BITS_PER_BYTE) |
|---|
| 163 | { |
|---|
| 164 | // b3 |
|---|
| 165 | // +---------+ |
|---|
| 166 | // |765432 10| = x |
|---|
| 167 | // +---------+ |
|---|
| 168 | // | | | pad | pad | |
|---|
| 169 | // ^^^^^^ ^^^^^^ ^^^^^^ ^^^^^^ |
|---|
| 170 | // d1 d2 d3 d4 |
|---|
| 171 | final int x |
|---|
| 172 | = (binaryData[dataIndex++] & Constants.BYTE_MASK); // b3 |
|---|
| 173 | |
|---|
| 174 | encodedData[encodedIndex++] = LOOKUP_BASE64_ALPHABET // d1 |
|---|
| 175 | [(x >>> 2) & LOWER_SIX_BITS]; |
|---|
| 176 | encodedData[encodedIndex++] = LOOKUP_BASE64_ALPHABET // d2 |
|---|
| 177 | [(x << 4) & LOWER_SIX_BITS]; |
|---|
| 178 | encodedData[encodedIndex++] = PAD; // d3 |
|---|
| 179 | encodedData[encodedIndex++] = PAD; // d4 |
|---|
| 180 | } |
|---|
| 181 | result = encodedData; |
|---|
| 182 | } |
|---|
| 183 | return result; |
|---|
| 184 | } |
|---|
| 185 | |
|---|
| 186 | /** |
|---|
| 187 | * Encodes hex octets into Base64. |
|---|
| 188 | * |
|---|
| 189 | * @param binaryData Array containing binary data. |
|---|
| 190 | * @return Encoded Base64 string. |
|---|
| 191 | */ |
|---|
| 192 | public static String encode (byte[] binaryData) |
|---|
| 193 | { |
|---|
| 194 | return new String(encodeToChars(binaryData)); |
|---|
| 195 | } |
|---|
| 196 | |
|---|
| 197 | /** |
|---|
| 198 | * Encodes hex octets into Base64. |
|---|
| 199 | * The encoded characters are written to the given string |
|---|
| 200 | * buffer <tt>sb</tt>. |
|---|
| 201 | * |
|---|
| 202 | * @param sb the string buffer that is used to write the |
|---|
| 203 | * Base64 characters to. |
|---|
| 204 | * @param binaryData Array containing binary data. |
|---|
| 205 | */ |
|---|
| 206 | public static void appendEncoded (StringBuffer sb, byte[] binaryData) |
|---|
| 207 | { |
|---|
| 208 | sb.append(encodeToChars(binaryData)); |
|---|
| 209 | } |
|---|
| 210 | |
|---|
| 211 | /** |
|---|
| 212 | * Decodes Base64 data into octets. |
|---|
| 213 | * |
|---|
| 214 | * @param encoded Base64 encoded string. |
|---|
| 215 | * @return an array containing decoded data. |
|---|
| 216 | * @throws ArgumentMalformedException if the given string is not |
|---|
| 217 | * Base64 encoded. |
|---|
| 218 | */ |
|---|
| 219 | public static byte[] decode (String encoded) |
|---|
| 220 | throws ArgumentMalformedException |
|---|
| 221 | { |
|---|
| 222 | Assert.notNull(encoded, ENCODED_PARAMETER); |
|---|
| 223 | final byte[] result; |
|---|
| 224 | |
|---|
| 225 | if (encoded.length() % FOURBYTE != 0) |
|---|
| 226 | { |
|---|
| 227 | throw new ArgumentMalformedException(ENCODED_PARAMETER, encoded, |
|---|
| 228 | "Base64 length must be a multiple of " + FOURBYTE); |
|---|
| 229 | } |
|---|
| 230 | final char[] base64Data = encoded.toCharArray(); |
|---|
| 231 | final int numberQuadruple = base64Data.length / FOURBYTE; |
|---|
| 232 | |
|---|
| 233 | if (numberQuadruple == 0) |
|---|
| 234 | { |
|---|
| 235 | throw new ArgumentMalformedException(ENCODED_PARAMETER, encoded, |
|---|
| 236 | "Base64 length " + base64Data.length + " must be at least " |
|---|
| 237 | + FOURBYTE + " bytes"); |
|---|
| 238 | } |
|---|
| 239 | |
|---|
| 240 | byte b1 = 0, b2 = 0, b3 = 0, b4 = 0; |
|---|
| 241 | int encodedIndex = 0; |
|---|
| 242 | int dataIndex = 0; |
|---|
| 243 | final byte[] decodedData |
|---|
| 244 | = new byte[(numberQuadruple) * BYTES_PER_BASE64_CHUNK]; |
|---|
| 245 | final int pureBase64Chunks = numberQuadruple - 1; |
|---|
| 246 | |
|---|
| 247 | for (int i = 0; i < pureBase64Chunks; i++) |
|---|
| 248 | { |
|---|
| 249 | b1 = base64AlphabetLookup(base64Data[dataIndex++]); |
|---|
| 250 | b2 = base64AlphabetLookup(base64Data[dataIndex++]); |
|---|
| 251 | b3 = base64AlphabetLookup(base64Data[dataIndex++]); |
|---|
| 252 | b4 = base64AlphabetLookup(base64Data[dataIndex++]); |
|---|
| 253 | // b1 b2 b3 b4 |
|---|
| 254 | // +---------+---------+---------+--------+ |
|---|
| 255 | // |00 543210|0054 3210|005432 10|00543210| |
|---|
| 256 | // +---------+---------+---------+--------+ |
|---|
| 257 | // |^^^^^^ ^^|^^^^ ^^^^|^^ ^^^^^^| |
|---|
| 258 | // d1 d2 d3 |
|---|
| 259 | decodedData[encodedIndex++] = (byte) (b1 << 2 | b2 >> 4); // d1 |
|---|
| 260 | decodedData[encodedIndex++] = (byte) (b2 << 4 | b3 >> 2); // d2 |
|---|
| 261 | decodedData[encodedIndex++] = (byte) (b3 << 6 | b4); // d3 |
|---|
| 262 | } |
|---|
| 263 | |
|---|
| 264 | // read last chunk |
|---|
| 265 | b1 = base64AlphabetLookup(base64Data[dataIndex++]); |
|---|
| 266 | b2 = base64AlphabetLookup(base64Data[dataIndex++]); |
|---|
| 267 | final char beforeLastChar = base64Data[dataIndex++]; |
|---|
| 268 | final char lastChar = base64Data[dataIndex++]; |
|---|
| 269 | |
|---|
| 270 | if (isData((beforeLastChar)) && isData((lastChar))) //No PAD e.g 3cQl |
|---|
| 271 | { |
|---|
| 272 | // b1 b2 b3 b4 |
|---|
| 273 | // +---------+---------+---------+--------+ |
|---|
| 274 | // |00 543210|0054 3210|005432 10|00543210| |
|---|
| 275 | // +---------+---------+---------+--------+ |
|---|
| 276 | // |^^^^^^ ^^|^^^^ ^^^^|^^ ^^^^^^| |
|---|
| 277 | // d1 d2 d3 |
|---|
| 278 | b3 = BASE64_ALPHABET[beforeLastChar]; |
|---|
| 279 | b4 = BASE64_ALPHABET[lastChar]; |
|---|
| 280 | decodedData[encodedIndex++] = (byte) (b1 << 2 | b2 >> 4); // d1 |
|---|
| 281 | decodedData[encodedIndex++] = (byte) (b2 << 4 | b3 >> 2); // d2 |
|---|
| 282 | decodedData[encodedIndex++] = (byte) (b3 << 6 | b4); // d3 |
|---|
| 283 | result = decodedData; |
|---|
| 284 | } |
|---|
| 285 | else |
|---|
| 286 | { |
|---|
| 287 | final int decodedDataLength = encodedIndex; |
|---|
| 288 | // Check if they are PAD character(s) |
|---|
| 289 | if (isPad(beforeLastChar) && isPad(lastChar)) |
|---|
| 290 | { |
|---|
| 291 | // Two PAD e.g. 3c[Pad][Pad] |
|---|
| 292 | assertLastFourBitsZero(encoded, b2); |
|---|
| 293 | final byte[] tmp = new byte[decodedDataLength + 1]; |
|---|
| 294 | System.arraycopy(decodedData, 0, tmp, 0, decodedDataLength); |
|---|
| 295 | tmp[encodedIndex] = (byte) (b1 << 2 | b2 >> 4); |
|---|
| 296 | result = tmp; |
|---|
| 297 | } |
|---|
| 298 | else if (isData(beforeLastChar) && isPad(lastChar)) |
|---|
| 299 | { |
|---|
| 300 | // One PAD e.g. 3cQ[Pad] |
|---|
| 301 | b3 = BASE64_ALPHABET[beforeLastChar]; |
|---|
| 302 | assertLastTwoBitsZero(encoded, b3); |
|---|
| 303 | final byte[] tmp = new byte[decodedDataLength + 2]; |
|---|
| 304 | System.arraycopy(decodedData, 0, tmp, 0, decodedDataLength); |
|---|
| 305 | tmp[encodedIndex++] = (byte) (b1 << 2 | b2 >> 4); |
|---|
| 306 | tmp[encodedIndex] = (byte) (b2 << 4 | b3 >> 2); |
|---|
| 307 | result = tmp; |
|---|
| 308 | } |
|---|
| 309 | else |
|---|
| 310 | { |
|---|
| 311 | // an error like "3c[Pad]r", "3cdX", "3cXd", "3cXX" |
|---|
| 312 | // where X is non data |
|---|
| 313 | throw new ArgumentMalformedException(ENCODED_PARAMETER, encoded, |
|---|
| 314 | "At least one of the last 2 characters '" |
|---|
| 315 | + new StringBuffer().append(beforeLastChar).append(lastChar) |
|---|
| 316 | + "' are not a valid Base64 [padding] character"); |
|---|
| 317 | } |
|---|
| 318 | } |
|---|
| 319 | return result; |
|---|
| 320 | } |
|---|
| 321 | |
|---|
| 322 | |
|---|
| 323 | private static void assertLastFourBitsZero (String encoded, byte b) |
|---|
| 324 | { |
|---|
| 325 | if ((b & 0xf) != 0) // last 4 bits should be zero |
|---|
| 326 | { |
|---|
| 327 | throw new ArgumentMalformedException(ENCODED_PARAMETER, encoded, |
|---|
| 328 | "Last 4 bits should be zero of the last " |
|---|
| 329 | + "non-padding character '" |
|---|
| 330 | + Integer.toHexString(b) + "'"); |
|---|
| 331 | } |
|---|
| 332 | } |
|---|
| 333 | |
|---|
| 334 | private static void assertLastTwoBitsZero (String encoded, byte b) |
|---|
| 335 | { |
|---|
| 336 | if ((b & 0x3) != 0) // last 2 bits should be zero |
|---|
| 337 | { |
|---|
| 338 | throw new ArgumentMalformedException(ENCODED_PARAMETER, encoded, |
|---|
| 339 | "Last 2 bits should be zero of the last " |
|---|
| 340 | + "non-padding character '" |
|---|
| 341 | + Integer.toHexString(b) + "'"); |
|---|
| 342 | } |
|---|
| 343 | } |
|---|
| 344 | |
|---|
| 345 | private static byte base64AlphabetLookup (char octect) |
|---|
| 346 | { |
|---|
| 347 | if (!isData(octect)) |
|---|
| 348 | { |
|---|
| 349 | throw new ArgumentMalformedException("octect", |
|---|
| 350 | Character.toString(octect), |
|---|
| 351 | "Illegal Base64 character '" + octect + "'"); |
|---|
| 352 | } |
|---|
| 353 | return BASE64_ALPHABET[octect]; |
|---|
| 354 | } |
|---|
| 355 | |
|---|
| 356 | private static boolean isPad (char octect) |
|---|
| 357 | { |
|---|
| 358 | return (octect == PAD); |
|---|
| 359 | } |
|---|
| 360 | |
|---|
| 361 | private static boolean isData (char octect) |
|---|
| 362 | { |
|---|
| 363 | return (BASE64_ALPHABET[octect] != -1); |
|---|
| 364 | } |
|---|
| 365 | } |
|---|