001 /*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements. See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership. The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License. You may obtain a copy of the License at
009 *
010 * http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing,
013 * software distributed under the License is distributed on an
014 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
015 * KIND, either express or implied. See the License for the
016 * specific language governing permissions and limitations
017 * under the License.
018 *
019 */
020
021 package org.apache.directory.shared.ldap.util;
022
023
024 /**
025 * decoding of base64 characters to raw bytes.
026 *
027 * @author <a href="mailto:dev@directory.apache.org">Apache Directory Project</a>
028 * @version $Revision: 664290 $
029 */
030 public class Base64
031 {
032
033 /**
034 * passed data array.
035 *
036 * @param a_data
037 * the array of bytes to encode
038 * @return base64-coded character array.
039 */
040 public static char[] encode( byte[] a_data )
041 {
042 char[] l_out = new char[( ( a_data.length + 2 ) / 3 ) * 4];
043
044 //
045 // 3 bytes encode to 4 chars. Output is always an even
046 // multiple of 4 characters.
047 //
048 for ( int ii = 0, l_index = 0; ii < a_data.length; ii += 3, l_index += 4 )
049 {
050 boolean l_quad = false;
051 boolean l_trip = false;
052
053 int l_val = ( 0xFF & a_data[ii] );
054 l_val <<= 8;
055 if ( ( ii + 1 ) < a_data.length )
056 {
057 l_val |= ( 0xFF & a_data[ii + 1] );
058 l_trip = true;
059 }
060
061 l_val <<= 8;
062 if ( ( ii + 2 ) < a_data.length )
063 {
064 l_val |= ( 0xFF & a_data[ii + 2] );
065 l_quad = true;
066 }
067
068 l_out[l_index + 3] = s_alphabet[( l_quad ? ( l_val & 0x3F ) : 64 )];
069 l_val >>= 6;
070 l_out[l_index + 2] = s_alphabet[( l_trip ? ( l_val & 0x3F ) : 64 )];
071 l_val >>= 6;
072 l_out[l_index + 1] = s_alphabet[l_val & 0x3F];
073 l_val >>= 6;
074 l_out[l_index + 0] = s_alphabet[l_val & 0x3F];
075 }
076 return l_out;
077 }
078
079
080 /**
081 * Decodes a BASE-64 encoded stream to recover the original data. White
082 * space before and after will be trimmed away, but no other manipulation of
083 * the input will be performed. As of version 1.2 this method will properly
084 * handle input containing junk characters (newlines and the like) rather
085 * than throwing an error. It does this by pre-parsing the input and
086 * generating from that a count of VALID input characters.
087 *
088 * @param a_data
089 * data to decode.
090 * @return the decoded binary data.
091 */
092 public static byte[] decode( char[] data )
093 {
094 // as our input could contain non-BASE64 data (newlines,
095 // whitespace of any sort, whatever) we must first adjust
096 // our count of USABLE data so that...
097 // (a) we don't misallocate the output array, and
098 // (b) think that we miscalculated our data length
099 // just because of extraneous throw-away junk
100
101 int tempLen = data.length;
102
103 for ( char c:data)
104 {
105 if ( ( c > 255 ) || s_codes[c] < 0 )
106 {
107 --tempLen; // ignore non-valid chars and padding
108 }
109 }
110 // calculate required length:
111 // -- 3 bytes for every 4 valid base64 chars
112 // -- plus 2 bytes if there are 3 extra base64 chars,
113 // or plus 1 byte if there are 2 extra.
114
115 int l_len = ( tempLen / 4 ) * 3;
116
117 if ( ( tempLen % 4 ) == 3 )
118 {
119 l_len += 2;
120 }
121
122 if ( ( tempLen % 4 ) == 2 )
123 {
124 l_len += 1;
125 }
126
127 byte[] l_out = new byte[l_len];
128
129 int l_shift = 0; // # of excess bits stored in accum
130 int l_accum = 0; // excess bits
131 int l_index = 0;
132
133 // we now go through the entire array (NOT using the 'tempLen' value)
134 for ( char c:data )
135 {
136 int l_value = ( c > 255 ) ? -1 : s_codes[c];
137
138 if ( l_value >= 0 ) // skip over non-code
139 {
140 l_accum <<= 6; // bits shift up by 6 each time thru
141 l_shift += 6; // loop, with new bits being put in
142 l_accum |= l_value; // at the bottom. whenever there
143 if ( l_shift >= 8 ) // are 8 or more shifted in, write them
144 {
145 l_shift -= 8; // out (from the top, leaving any excess
146 l_out[l_index++] = // at the bottom for next iteration.
147 ( byte ) ( ( l_accum >> l_shift ) & 0xff );
148 }
149 }
150 // we will also have skipped processing a padding null byte ('=')
151 // here;
152 // these are used ONLY for padding to an even length and do not
153 // legally
154 // occur as encoded data. for this reason we can ignore the fact
155 // that
156 // no index++ operation occurs in that special case: the out[] array
157 // is
158 // initialized to all-zero bytes to start with and that works to our
159 // advantage in this combination.
160 }
161
162 // if there is STILL something wrong we just have to throw up now!
163 if ( l_index != l_out.length )
164 {
165 throw new Error( "Miscalculated data length (wrote " + l_index + " instead of " + l_out.length + ")" );
166 }
167
168 return l_out;
169 }
170
171 /** code characters for values 0..63 */
172 private static char[] s_alphabet = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/="
173 .toCharArray();
174
175 /** lookup table for converting base64 characters to value in range 0..63 */
176 private static byte[] s_codes = new byte[256];
177
178 static
179 {
180 for ( int ii = 0; ii < 256; ii++ )
181 {
182 s_codes[ii] = -1;
183 }
184
185 for ( int ii = 'A'; ii <= 'Z'; ii++ )
186 {
187 s_codes[ii] = ( byte ) ( ii - 'A' );
188 }
189
190 for ( int ii = 'a'; ii <= 'z'; ii++ )
191 {
192 s_codes[ii] = ( byte ) ( 26 + ii - 'a' );
193 }
194
195 for ( int ii = '0'; ii <= '9'; ii++ )
196 {
197 s_codes[ii] = ( byte ) ( 52 + ii - '0' );
198 }
199
200 s_codes['+'] = 62;
201 s_codes['/'] = 63;
202 }
203 }