001    /*
002     *  Licensed to the Apache Software Foundation (ASF) under one
003     *  or more contributor license agreements.  See the NOTICE file
004     *  distributed with this work for additional information
005     *  regarding copyright ownership.  The ASF licenses this file
006     *  to you under the Apache License, Version 2.0 (the
007     *  "License"); you may not use this file except in compliance
008     *  with the License.  You may obtain a copy of the License at
009     *  
010     *    http://www.apache.org/licenses/LICENSE-2.0
011     *  
012     *  Unless required by applicable law or agreed to in writing,
013     *  software distributed under the License is distributed on an
014     *  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
015     *  KIND, either express or implied.  See the License for the
016     *  specific language governing permissions and limitations
017     *  under the License. 
018     *  
019     */
020    package org.apache.directory.shared.ldap.ldif;
021    
022    import java.io.BufferedReader;
023    import java.io.StringReader;
024    import java.util.ArrayList;
025    
026    import javax.naming.NamingException;
027    import javax.naming.directory.Attribute;
028    import javax.naming.directory.Attributes;
029    import javax.naming.directory.BasicAttributes;
030    
031    import org.apache.directory.shared.ldap.util.StringTools;
032    import org.slf4j.Logger;
033    import org.slf4j.LoggerFactory;
034    
035    /**
036     * <pre>
037     *  &lt;ldif-file&gt; ::= &quot;version:&quot; &lt;fill&gt; &lt;number&gt; &lt;seps&gt; &lt;dn-spec&gt; &lt;sep&gt; 
038     *  &lt;ldif-content-change&gt;
039     *  
040     *  &lt;ldif-content-change&gt; ::= 
041     *    &lt;number&gt; &lt;oid&gt; &lt;options-e&gt; &lt;value-spec&gt; &lt;sep&gt; &lt;attrval-specs-e&gt; 
042     *    &lt;ldif-attrval-record-e&gt; | 
043     *    &lt;alpha&gt; &lt;chars-e&gt; &lt;options-e&gt; &lt;value-spec&gt; &lt;sep&gt; &lt;attrval-specs-e&gt; 
044     *    &lt;ldif-attrval-record-e&gt; | 
045     *    &quot;control:&quot; &lt;fill&gt; &lt;number&gt; &lt;oid&gt; &lt;spaces-e&gt; &lt;criticality&gt; 
046     *    &lt;value-spec-e&gt; &lt;sep&gt; &lt;controls-e&gt; 
047     *        &quot;changetype:&quot; &lt;fill&gt; &lt;changerecord-type&gt; &lt;ldif-change-record-e&gt; |
048     *    &quot;changetype:&quot; &lt;fill&gt; &lt;changerecord-type&gt; &lt;ldif-change-record-e&gt;
049     *                              
050     *  &lt;ldif-attrval-record-e&gt; ::= &lt;seps&gt; &lt;dn-spec&gt; &lt;sep&gt; &lt;attributeType&gt; 
051     *    &lt;options-e&gt; &lt;value-spec&gt; &lt;sep&gt; &lt;attrval-specs-e&gt; 
052     *    &lt;ldif-attrval-record-e&gt; | e
053     *                              
054     *  &lt;ldif-change-record-e&gt; ::= &lt;seps&gt; &lt;dn-spec&gt; &lt;sep&gt; &lt;controls-e&gt; 
055     *    &quot;changetype:&quot; &lt;fill&gt; &lt;changerecord-type&gt; &lt;ldif-change-record-e&gt; | e
056     *                              
057     *  &lt;dn-spec&gt; ::= &quot;dn:&quot; &lt;fill&gt; &lt;safe-string&gt; | &quot;dn::&quot; &lt;fill&gt; &lt;base64-string&gt;
058     *                              
059     *  &lt;controls-e&gt; ::= &quot;control:&quot; &lt;fill&gt; &lt;number&gt; &lt;oid&gt; &lt;spaces-e&gt; &lt;criticality&gt; 
060     *    &lt;value-spec-e&gt; &lt;sep&gt; &lt;controls-e&gt; | e
061     *                              
062     *  &lt;criticality&gt; ::= &quot;true&quot; | &quot;false&quot; | e
063     *                              
064     *  &lt;oid&gt; ::= '.' &lt;number&gt; &lt;oid&gt; | e
065     *                              
066     *  &lt;attrval-specs-e&gt; ::= &lt;number&gt; &lt;oid&gt; &lt;options-e&gt; &lt;value-spec&gt; &lt;sep&gt; 
067     *  &lt;attrval-specs-e&gt; | 
068     *    &lt;alpha&gt; &lt;chars-e&gt; &lt;options-e&gt; &lt;value-spec&gt; &lt;sep&gt; &lt;attrval-specs-e&gt; | e
069     *                              
070     *  &lt;value-spec-e&gt; ::= &lt;value-spec&gt; | e
071     *  
072     *  &lt;value-spec&gt; ::= ':' &lt;fill&gt; &lt;safe-string-e&gt; | 
073     *    &quot;::&quot; &lt;fill&gt; &lt;base64-chars&gt; | 
074     *    &quot;:&lt;&quot; &lt;fill&gt; &lt;url&gt;
075     *  
076     *  &lt;attributeType&gt; ::= &lt;number&gt; &lt;oid&gt; | &lt;alpha&gt; &lt;chars-e&gt;
077     *  
078     *  &lt;options-e&gt; ::= ';' &lt;char&gt; &lt;chars-e&gt; &lt;options-e&gt; |e
079     *                              
080     *  &lt;chars-e&gt; ::= &lt;char&gt; &lt;chars-e&gt; |  e
081     *  
082     *  &lt;changerecord-type&gt; ::= &quot;add&quot; &lt;sep&gt; &lt;attributeType&gt; &lt;options-e&gt; &lt;value-spec&gt; 
083     *  &lt;sep&gt; &lt;attrval-specs-e&gt; | 
084     *    &quot;delete&quot; &lt;sep&gt; | 
085     *    &quot;modify&quot; &lt;sep&gt; &lt;mod-type&gt; &lt;fill&gt; &lt;attributeType&gt; &lt;options-e&gt; &lt;sep&gt; 
086     *    &lt;attrval-specs-e&gt; &lt;sep&gt; '-' &lt;sep&gt; &lt;mod-specs-e&gt; | 
087     *    &quot;moddn&quot; &lt;sep&gt; &lt;newrdn&gt; &lt;sep&gt; &quot;deleteoldrdn:&quot; &lt;fill&gt; &lt;0-1&gt; &lt;sep&gt; 
088     *    &lt;newsuperior-e&gt; &lt;sep&gt; |
089     *    &quot;modrdn&quot; &lt;sep&gt; &lt;newrdn&gt; &lt;sep&gt; &quot;deleteoldrdn:&quot; &lt;fill&gt; &lt;0-1&gt; &lt;sep&gt; 
090     *    &lt;newsuperior-e&gt; &lt;sep&gt;
091     *  
092     *  &lt;newrdn&gt; ::= ':' &lt;fill&gt; &lt;safe-string&gt; | &quot;::&quot; &lt;fill&gt; &lt;base64-chars&gt;
093     *  
094     *  &lt;newsuperior-e&gt; ::= &quot;newsuperior&quot; &lt;newrdn&gt; | e
095     *  
096     *  &lt;mod-specs-e&gt; ::= &lt;mod-type&gt; &lt;fill&gt; &lt;attributeType&gt; &lt;options-e&gt; 
097     *    &lt;sep&gt; &lt;attrval-specs-e&gt; &lt;sep&gt; '-' &lt;sep&gt; &lt;mod-specs-e&gt; | e
098     *  
099     *  &lt;mod-type&gt; ::= &quot;add:&quot; | &quot;delete:&quot; | &quot;replace:&quot;
100     *  
101     *  &lt;url&gt; ::= &lt;a Uniform Resource Locator, as defined in [6]&gt;
102     *  
103     *  
104     *  
105     *  LEXICAL
106     *  -------
107     *  
108     *  &lt;fill&gt;           ::= ' ' &lt;fill&gt; | e
109     *  &lt;char&gt;           ::= &lt;alpha&gt; | &lt;digit&gt; | '-'
110     *  &lt;number&gt;         ::= &lt;digit&gt; &lt;digits&gt;
111     *  &lt;0-1&gt;            ::= '0' | '1'
112     *  &lt;digits&gt;         ::= &lt;digit&gt; &lt;digits&gt; | e
113     *  &lt;digit&gt;          ::= '0' | '1' | '2' | '3' | '4' | '5' | '6' | '7' | '8' | '9'
114     *  &lt;seps&gt;           ::= &lt;sep&gt; &lt;seps-e&gt; 
115     *  &lt;seps-e&gt;         ::= &lt;sep&gt; &lt;seps-e&gt; | e
116     *  &lt;sep&gt;            ::= 0x0D 0x0A | 0x0A
117     *  &lt;spaces&gt;         ::= ' ' &lt;spaces-e&gt;
118     *  &lt;spaces-e&gt;       ::= ' ' &lt;spaces-e&gt; | e
119     *  &lt;safe-string-e&gt;  ::= &lt;safe-string&gt; | e
120     *  &lt;safe-string&gt;    ::= &lt;safe-init-char&gt; &lt;safe-chars&gt;
121     *  &lt;safe-init-char&gt; ::= [0x01-0x09] | 0x0B | 0x0C | [0x0E-0x1F] | [0x21-0x39] | 0x3B | [0x3D-0x7F]
122     *  &lt;safe-chars&gt;     ::= &lt;safe-char&gt; &lt;safe-chars&gt; | e
123     *  &lt;safe-char&gt;      ::= [0x01-0x09] | 0x0B | 0x0C | [0x0E-0x7F]
124     *  &lt;base64-string&gt;  ::= &lt;base64-char&gt; &lt;base64-chars&gt;
125     *  &lt;base64-chars&gt;   ::= &lt;base64-char&gt; &lt;base64-chars&gt; | e
126     *  &lt;base64-char&gt;    ::= 0x2B | 0x2F | [0x30-0x39] | 0x3D | [0x41-9x5A] | [0x61-0x7A]
127     *  &lt;alpha&gt;          ::= [0x41-0x5A] | [0x61-0x7A]
128     *  
129     *  COMMENTS
130     *  --------
131     *  - The ldap-oid VN is not correct in the RFC-2849. It has been changed from 1*DIGIT 0*1(&quot;.&quot; 1*DIGIT) to
132     *  DIGIT+ (&quot;.&quot; DIGIT+)*
133     *  - The mod-spec lacks a sep between *attrval-spec and &quot;-&quot;.
134     *  - The BASE64-UTF8-STRING should be BASE64-CHAR BASE64-STRING
135     *  - The ValueSpec rule must accept multilines values. In this case, we have a LF followed by a 
136     *  single space before the continued value.
137     * </pre>
138     * 
139     * @author <a href="mailto:dev@directory.apache.org">Apache Directory Project</a>
140     * @version $Rev$, $Date$
141     */
142    public class LdifAttributesReader extends LdifReader
143    {
144        /** A logger */
145        private static final Logger LOG = LoggerFactory.getLogger( LdifAttributesReader.class );
146    
147        /**
148         * Constructors
149         */
150        public LdifAttributesReader()
151        {
152            lines = new ArrayList<String>();
153            position = new Position();
154            version = DEFAULT_VERSION;
155        }
156    
157    
158        /**
159         * Parse an AttributeType/AttributeValue
160         * 
161         * @param attributes The entry where to store the value
162         * @param line The line to parse
163         * @param lowerLine The same line, lowercased
164         * @throws NamingException If anything goes wrong
165         */
166        private void parseAttribute( Attributes attributes, String line, String lowerLine ) throws NamingException
167        {
168            int colonIndex = line.indexOf( ':' );
169    
170            String attributeType = lowerLine.substring( 0, colonIndex );
171    
172            // We should *not* have a DN twice
173            if ( attributeType.equals( "dn" ) )
174            {
175                LOG.error( "An entry must not have two DNs" );
176                throw new NamingException( "A ldif entry should not have two DNs" );
177            }
178    
179            Object attributeValue = parseValue( line, colonIndex );
180    
181            // Update the entry
182            Attribute attribute = attributes.get( attributeType );
183            
184            if ( attribute == null )
185            {
186                attributes.put( attributeType, attributeValue );
187            }
188            else
189            {
190                attribute.add( attributeValue );
191            }
192        }
193    
194        /**
195         * Parse a ldif file. The following rules are processed :
196         * 
197         * &lt;ldif-file&gt; ::= &lt;ldif-attrval-record&gt; &lt;ldif-attrval-records&gt; |
198         * &lt;ldif-change-record&gt; &lt;ldif-change-records&gt; &lt;ldif-attrval-record&gt; ::=
199         * &lt;dn-spec&gt; &lt;sep&gt; &lt;attrval-spec&gt; &lt;attrval-specs&gt; &lt;ldif-change-record&gt; ::=
200         * &lt;dn-spec&gt; &lt;sep&gt; &lt;controls-e&gt; &lt;changerecord&gt; &lt;dn-spec&gt; ::= "dn:" &lt;fill&gt;
201         * &lt;distinguishedName&gt; | "dn::" &lt;fill&gt; &lt;base64-distinguishedName&gt;
202         * &lt;changerecord&gt; ::= "changetype:" &lt;fill&gt; &lt;change-op&gt;
203         * 
204         * @return The read entry
205         * @throws NamingException If the entry can't be read or is invalid
206         */
207        private Attributes parseAttributes() throws NamingException
208        {
209            if ( ( lines == null ) || ( lines.size() == 0 ) )
210            {
211                LOG.debug( "The entry is empty : end of ldif file" );
212                return null;
213            }
214    
215            Attributes attributes = new BasicAttributes( true );
216    
217            // Now, let's iterate through the other lines
218            for ( String line:lines )
219            {
220                // Each line could start either with an OID, an attribute type, with
221                // "control:" or with "changetype:"
222                String lowerLine = line.toLowerCase();
223    
224                // We have three cases :
225                // 1) The first line after the DN is a "control:" -> this is an error
226                // 2) The first line after the DN is a "changeType:" -> this is an error
227                // 3) The first line after the DN is anything else
228                if ( lowerLine.startsWith( "control:" ) )
229                {
230                    LOG.error( "We cannot have changes when reading a file which already contains entries" );
231                    throw new NamingException( "No changes withing entries" );
232                }
233                else if ( lowerLine.startsWith( "changetype:" ) )
234                {
235                    LOG.error( "We cannot have changes when reading a file which already contains entries" );
236                    throw new NamingException( "No changes withing entries" );
237                }
238                else if ( line.indexOf( ':' ) > 0 )
239                {
240                    parseAttribute( attributes, line, lowerLine );
241                }
242                else
243                {
244                    // Invalid attribute Value
245                    LOG.error( "Expecting an attribute type" );
246                    throw new NamingException( "Bad attribute" );
247                }
248            }
249    
250            LOG.debug( "Read an attributes : {}", attributes );
251            
252            return attributes;
253        }
254    
255    
256        /**
257         * A method which parses a ldif string and returns a list of entries.
258         * 
259         * @param ldif The ldif string
260         * @return A list of entries, or an empty List
261         * @throws NamingException
262         *             If something went wrong
263         */
264        public Attributes parseAttributes( String ldif ) throws NamingException
265        {
266            lines = new ArrayList<String>();
267            position = new Position();
268    
269            LOG.debug( "Starts parsing ldif buffer" );
270    
271            if ( StringTools.isEmpty( ldif ) )
272            {
273                return new BasicAttributes( true );
274            }
275    
276            StringReader strIn = new StringReader( ldif );
277            in = new BufferedReader( strIn );
278    
279            try
280            {
281                readLines();
282                
283                Attributes attributes = parseAttributes();
284    
285                if ( LOG.isDebugEnabled() )
286                {
287                    LOG.debug( "Parsed {} entries.", ( attributes == null ? 0 : 1 ) );
288                }
289    
290                return attributes;
291            }
292            catch (NamingException ne)
293            {
294                LOG.error( "Cannot parse the ldif buffer : {}", ne.getMessage() );
295                throw new NamingException( "Error while parsing the ldif buffer" );
296            }
297        }
298    }