001    /*
002     *  Licensed to the Apache Software Foundation (ASF) under one
003     *  or more contributor license agreements.  See the NOTICE file
004     *  distributed with this work for additional information
005     *  regarding copyright ownership.  The ASF licenses this file
006     *  to you under the Apache License, Version 2.0 (the
007     *  "License"); you may not use this file except in compliance
008     *  with the License.  You may obtain a copy of the License at
009     *  
010     *    http://www.apache.org/licenses/LICENSE-2.0
011     *  
012     *  Unless required by applicable law or agreed to in writing,
013     *  software distributed under the License is distributed on an
014     *  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
015     *  KIND, either express or implied.  See the License for the
016     *  specific language governing permissions and limitations
017     *  under the License. 
018     *  
019     */
020    
021    package org.apache.directory.shared.ldap.util;
022    
023    
024    /**
025     * decoding of base64 characters to raw bytes.
026     * 
027     * @author <a href="mailto:dev@directory.apache.org">Apache Directory Project</a>
028     * @version $Revision: 664290 $
029     */
030    public class Base64
031    {
032    
033        /**
034         * passed data array.
035         * 
036         * @param a_data
037         *            the array of bytes to encode
038         * @return base64-coded character array.
039         */
040        public static char[] encode( byte[] a_data )
041        {
042            char[] l_out = new char[( ( a_data.length + 2 ) / 3 ) * 4];
043    
044            //
045            // 3 bytes encode to 4 chars. Output is always an even
046            // multiple of 4 characters.
047            //
048            for ( int ii = 0, l_index = 0; ii < a_data.length; ii += 3, l_index += 4 )
049            {
050                boolean l_quad = false;
051                boolean l_trip = false;
052    
053                int l_val = ( 0xFF & a_data[ii] );
054                l_val <<= 8;
055                if ( ( ii + 1 ) < a_data.length )
056                {
057                    l_val |= ( 0xFF & a_data[ii + 1] );
058                    l_trip = true;
059                }
060    
061                l_val <<= 8;
062                if ( ( ii + 2 ) < a_data.length )
063                {
064                    l_val |= ( 0xFF & a_data[ii + 2] );
065                    l_quad = true;
066                }
067    
068                l_out[l_index + 3] = s_alphabet[( l_quad ? ( l_val & 0x3F ) : 64 )];
069                l_val >>= 6;
070                l_out[l_index + 2] = s_alphabet[( l_trip ? ( l_val & 0x3F ) : 64 )];
071                l_val >>= 6;
072                l_out[l_index + 1] = s_alphabet[l_val & 0x3F];
073                l_val >>= 6;
074                l_out[l_index + 0] = s_alphabet[l_val & 0x3F];
075            }
076            return l_out;
077        }
078    
079    
080        /**
081         * Decodes a BASE-64 encoded stream to recover the original data. White
082         * space before and after will be trimmed away, but no other manipulation of
083         * the input will be performed. As of version 1.2 this method will properly
084         * handle input containing junk characters (newlines and the like) rather
085         * than throwing an error. It does this by pre-parsing the input and
086         * generating from that a count of VALID input characters.
087         * 
088         * @param a_data
089         *            data to decode.
090         * @return the decoded binary data.
091         */
092        public static byte[] decode( char[] data )
093        {
094            // as our input could contain non-BASE64 data (newlines,
095            // whitespace of any sort, whatever) we must first adjust
096            // our count of USABLE data so that...
097            // (a) we don't misallocate the output array, and
098            // (b) think that we miscalculated our data length
099            // just because of extraneous throw-away junk
100    
101            int tempLen = data.length;
102            
103            for ( char c:data)
104            {
105                if ( ( c > 255 ) || s_codes[c] < 0 )
106                {
107                    --tempLen; // ignore non-valid chars and padding
108                }
109            }
110            // calculate required length:
111            // -- 3 bytes for every 4 valid base64 chars
112            // -- plus 2 bytes if there are 3 extra base64 chars,
113            // or plus 1 byte if there are 2 extra.
114    
115            int l_len = ( tempLen / 4 ) * 3;
116    
117            if ( ( tempLen % 4 ) == 3 )
118            {
119                l_len += 2;
120            }
121    
122            if ( ( tempLen % 4 ) == 2 )
123            {
124                l_len += 1;
125            }
126    
127            byte[] l_out = new byte[l_len];
128    
129            int l_shift = 0; // # of excess bits stored in accum
130            int l_accum = 0; // excess bits
131            int l_index = 0;
132    
133            // we now go through the entire array (NOT using the 'tempLen' value)
134            for ( char c:data )
135            {
136                int l_value = ( c > 255 ) ? -1 : s_codes[c];
137    
138                if ( l_value >= 0 ) // skip over non-code
139                {
140                    l_accum <<= 6; // bits shift up by 6 each time thru
141                    l_shift += 6; // loop, with new bits being put in
142                    l_accum |= l_value; // at the bottom. whenever there
143                    if ( l_shift >= 8 ) // are 8 or more shifted in, write them
144                    {
145                        l_shift -= 8; // out (from the top, leaving any excess
146                        l_out[l_index++] = // at the bottom for next iteration.
147                        ( byte ) ( ( l_accum >> l_shift ) & 0xff );
148                    }
149                }
150                // we will also have skipped processing a padding null byte ('=')
151                // here;
152                // these are used ONLY for padding to an even length and do not
153                // legally
154                // occur as encoded data. for this reason we can ignore the fact
155                // that
156                // no index++ operation occurs in that special case: the out[] array
157                // is
158                // initialized to all-zero bytes to start with and that works to our
159                // advantage in this combination.
160            }
161    
162            // if there is STILL something wrong we just have to throw up now!
163            if ( l_index != l_out.length )
164            {
165                throw new Error( "Miscalculated data length (wrote " + l_index + " instead of " + l_out.length + ")" );
166            }
167    
168            return l_out;
169        }
170    
171        /** code characters for values 0..63 */
172        private static char[] s_alphabet = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/="
173            .toCharArray();
174    
175        /** lookup table for converting base64 characters to value in range 0..63 */
176        private static byte[] s_codes = new byte[256];
177    
178        static
179        {
180            for ( int ii = 0; ii < 256; ii++ )
181            {
182                s_codes[ii] = -1;
183            }
184    
185            for ( int ii = 'A'; ii <= 'Z'; ii++ )
186            {
187                s_codes[ii] = ( byte ) ( ii - 'A' );
188            }
189    
190            for ( int ii = 'a'; ii <= 'z'; ii++ )
191            {
192                s_codes[ii] = ( byte ) ( 26 + ii - 'a' );
193            }
194    
195            for ( int ii = '0'; ii <= '9'; ii++ )
196            {
197                s_codes[ii] = ( byte ) ( 52 + ii - '0' );
198            }
199    
200            s_codes['+'] = 62;
201            s_codes['/'] = 63;
202        }
203    }