001    /*
002     *  Licensed to the Apache Software Foundation (ASF) under one
003     *  or more contributor license agreements.  See the NOTICE file
004     *  distributed with this work for additional information
005     *  regarding copyright ownership.  The ASF licenses this file
006     *  to you under the Apache License, Version 2.0 (the
007     *  "License"); you may not use this file except in compliance
008     *  with the License.  You may obtain a copy of the License at
009     *  
010     *    http://www.apache.org/licenses/LICENSE-2.0
011     *  
012     *  Unless required by applicable law or agreed to in writing,
013     *  software distributed under the License is distributed on an
014     *  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
015     *  KIND, either express or implied.  See the License for the
016     *  specific language governing permissions and limitations
017     *  under the License. 
018     *  
019     */
020    package org.apache.directory.shared.ldap.filter;
021    
022    
023    import java.text.ParseException;
024    
025    import org.apache.directory.shared.ldap.entry.Value;
026    import org.apache.directory.shared.ldap.entry.client.ClientBinaryValue;
027    import org.apache.directory.shared.ldap.util.AttributeUtils;
028    import org.apache.directory.shared.ldap.util.Position;
029    import org.apache.directory.shared.ldap.util.StringTools;
030    
031    
032    /**
033     * This class parse a Ldap filter. The grammar is given in RFC 4515
034     *
035     * @author <a href="mailto:dev@directory.apache.org">Apache Directory Project</a>
036     * @version $Rev$, $Date$
037     */
038    public class FilterParser
039    {
040        /**
041         * Creates a filter parser implementation.
042         */
043        public FilterParser()
044        {
045        }
046    
047    
048        /**
049         * Parse an extensible
050         * 
051         * extensible     = ( attr [":dn"] [':' oid] ":=" assertionvalue )
052         *                  / ( [":dn"] ':' oid ":=" assertionvalue )
053         * matchingrule   = ":" oid
054         */
055        private static ExprNode parseExtensible( String attr, String filter, Position pos ) throws ParseException
056        {
057            ExtensibleNode node = new ExtensibleNode( attr );
058    
059            if ( attr != null )
060            {
061                // First check if we have a ":dn"
062                if ( StringTools.areEquals( filter, pos.start, "dn" ) )
063                {
064                    // Set the dnAttributes flag and move forward in the string
065                    node.setDnAttributes( true );
066                    pos.start += 2;
067                }
068                else
069                {
070                    // Push back the ':' 
071                    pos.start--;
072                }
073    
074                // Do we have a MatchingRule ?
075                if ( StringTools.charAt( filter, pos.start ) == ':' )
076                {
077                    pos.start++;
078                    int start = pos.start;
079    
080                    if ( StringTools.charAt( filter, pos.start ) == '=' )
081                    {
082                        pos.start++;
083    
084                        // Get the assertionValue
085                        node.setValue( parseAssertionValue( filter, pos, true ) );
086    
087                        return node;
088                    }
089                    else
090                    {
091                        AttributeUtils.parseAttribute( filter, pos, false );
092    
093                        node.setMatchingRuleId( filter.substring( start, pos.start ) );
094    
095                        if ( StringTools.areEquals( filter, pos.start, ":=" ) )
096                        {
097                            pos.start += 2;
098    
099                            // Get the assertionValue
100                            node.setValue( parseAssertionValue( filter, pos, true ) );
101    
102                            return node;
103                        }
104                        else
105                        {
106                            throw new ParseException( "AssertionValue expected", pos.start );
107                        }
108                    }
109                }
110                else
111                {
112                    throw new ParseException( "Expected MatchingRule or assertionValue", pos.start );
113                }
114            }
115            else
116            {
117                boolean oidRequested = false;
118    
119                // First check if we have a ":dn"
120                if ( StringTools.areEquals( filter, pos.start, ":dn" ) )
121                {
122                    // Set the dnAttributes flag and move forward in the string
123                    node.setDnAttributes( true );
124                    pos.start += 3;
125                }
126                else
127                {
128                    oidRequested = true;
129                }
130    
131                // Do we have a MatchingRule ?
132                if ( StringTools.charAt( filter, pos.start ) == ':' )
133                {
134                    pos.start++;
135                    int start = pos.start;
136    
137                    if ( StringTools.charAt( filter, pos.start ) == '=' )
138                    {
139                        if ( oidRequested )
140                        {
141                            throw new ParseException( "MatchingRule expected", pos.start );
142                        }
143    
144                        pos.start++;
145    
146                        // Get the assertionValue
147                        node.setValue( parseAssertionValue( filter, pos, true ) );
148    
149                        return node;
150                    }
151                    else
152                    {
153                        AttributeUtils.parseAttribute( filter, pos, false );
154    
155                        node.setMatchingRuleId( filter.substring( start, pos.start ) );
156    
157                        if ( StringTools.areEquals( filter, pos.start, ":=" ) )
158                        {
159                            pos.start += 2;
160    
161                            // Get the assertionValue
162                            node.setValue( parseAssertionValue( filter, pos, true ) );
163    
164                            return node;
165                        }
166                        else
167                        {
168                            throw new ParseException( "AssertionValue expected", pos.start );
169                        }
170                    }
171                }
172                else
173                {
174                    throw new ParseException( "Expected MatchingRule or assertionValue", pos.start );
175                }
176            }
177        }
178    
179    
180        /**
181         * An assertion value : 
182         * assertionvalue = valueencoding
183         * valueencoding  = 0*(normal / escaped)
184         * normal         = UTF1SUBSET / UTFMB
185         * escaped        = '\' HEX HEX
186         * HEX            = '0'-'9' / 'A'-'F' / 'a'-'f'
187         * UTF1SUBSET     = %x01-27 / %x2B-5B / %x5D-7F (Everything but '\0', '*', '(', ')' and '\')
188         * UTFMB          = UTF2 / UTF3 / UTF4
189         * UTF0           = %x80-BF
190         * UTF2           = %xC2-DF UTF0
191         * UTF3           = %xE0 %xA0-BF UTF0 / %xE1-EC UTF0 UTF0 / %xED %x80-9F UTF0 / %xEE-EF UTF0 UTF0
192         * UTF4           = %xF0 %x90-BF UTF0 UTF0 / %xF1-F3 UTF0 UTF0 UTF0 / %xF4 %x80-8F UTF0 UTF0
193         * 
194         * With the specific constraints (RFC 4515):
195         *    "The <valueencoding> rule ensures that the entire filter string is a"
196         *    "valid UTF-8 string and provides that the octets that represent the"
197         *    "ASCII characters "*" (ASCII 0x2a), "(" (ASCII 0x28), ")" (ASCII"
198         *    "0x29), "\" (ASCII 0x5c), and NUL (ASCII 0x00) are represented as a"
199         *    "backslash "\" (ASCII 0x5c) followed by the two hexadecimal digits"
200         *    "representing the value of the encoded octet."
201    
202         * 
203         * The incomming String is already transformed from UTF-8 to unicode, so we must assume that the 
204         * grammar we have to check is the following :
205         * 
206         * assertionvalue = valueencoding
207         * valueencoding  = 0*(normal / escaped)
208         * normal         = unicodeSubset
209         * escaped        = '\' HEX HEX
210         * HEX            = '0'-'9' / 'A'-'F' / 'a'-'f'
211         * unicodeSubset     = %x01-27 / %x2B-5B / %x5D-FFFF
212         */
213        private static Value<?> parseAssertionValue( String filter, Position pos, boolean preserveEscapedChars ) throws ParseException
214        {
215            int start = pos.start;
216            char c = StringTools.charAt( filter, pos.start );
217            
218            // Create a buffer big enough to contain the value once converted
219            byte[] value = new byte[ filter.length() - pos.start];
220            int current = 0;
221    
222            do
223            {
224                if ( StringTools.isUnicodeSubset( c ) )
225                {
226                    value[current++] = (byte)c;
227                    pos.start++;
228                }
229                else if ( StringTools.isCharASCII( filter, pos.start, '\\' ) )
230                {
231                    // Maybe an escaped 
232                    pos.start++;
233    
234                    // First hex
235                    if ( StringTools.isHex( filter, pos.start ) )
236                    {
237                        pos.start++;
238                    }
239                    else
240                    {
241                        throw new ParseException( "Not a valid escaped value", pos.start );
242                    }
243    
244                    // second hex
245                    if ( StringTools.isHex( filter, pos.start ) )
246                    {
247                        value[current++] = StringTools.getHexValue( filter.charAt( pos.start - 1 ), filter.charAt( pos.start ) );
248                        pos.start++;
249                    }
250                    else
251                    {
252                        throw new ParseException( "Not a valid escaped value", pos.start );
253                    }
254                }
255                else
256                {
257                    // not a valid char, so let's get out
258                    break;
259                }
260            }
261            while ( ( c = StringTools.charAt( filter, pos.start ) ) != '\0' );
262    
263            if ( current != 0 )
264            {
265                byte[] result = new byte[ current ];
266                System.arraycopy( value, 0, result, 0, current );
267                
268                return new ClientBinaryValue( result );
269            }
270            else
271            {
272                return new ClientBinaryValue();
273            }
274        }
275    
276    
277        private static Value<?> parseAssertionValue( String filter, Position pos ) throws ParseException
278        {
279            return parseAssertionValue( filter, pos, false );
280        }
281    
282    
283        /**
284         * Parse a substring
285         */
286        private static ExprNode parseSubstring( String attr, Value<?> initial, String filter, Position pos )
287            throws ParseException
288        {
289            if ( StringTools.isCharASCII( filter, pos.start, '*' ) )
290            {
291                // We have found a '*' : this is a substring
292                SubstringNode node = new SubstringNode( attr );
293    
294                if ( initial != null && !initial.isNull() )
295                {
296                    // We have a substring starting with a value : val*...
297                    // Set the initial value. It must be a String
298                    String initialStr = initial.getString();
299                    node.setInitial( initialStr );
300                }
301    
302                pos.start++;
303    
304                // 
305                while ( true )
306                {
307                    Value<?> assertionValue = parseAssertionValue( filter, pos );
308    
309                    // Is there anything else but a ')' after the value ?
310                    if ( StringTools.isCharASCII( filter, pos.start, ')' ) )
311                    {
312                        // Nope : as we have had [initial] '*' (any '*' ) *,
313                        // this is the final
314                        if ( !assertionValue.isNull() )
315                        {
316                            String finalStr = assertionValue.getString();
317                            node.setFinal( finalStr );
318                        }
319    
320                        return node;
321                    }
322                    else if ( StringTools.isCharASCII( filter, pos.start, '*' ) )
323                    {
324                        // We have a '*' : it's an any
325                        // If the value is empty, that means we have more than 
326                        // one consecutive '*' : do nothing in this case.
327                        if ( !assertionValue.isNull() )
328                        {
329                            String anyStr = assertionValue.getString();
330                            node.addAny( anyStr );
331                        }
332    
333                        pos.start++;
334                    }
335                    else
336                    {
337                        // This is an error
338                        throw new ParseException( "Bad substring", pos.start );
339                    }
340                }
341            }
342            else
343            {
344                // This is an error
345                throw new ParseException( "Bad substring", pos.start );
346            }
347        }
348    
349    
350        /**
351         * Here is the grammar to parse :
352         * 
353         * simple    ::= '=' assertionValue
354         * present   ::= '=' '*'
355         * substring ::= '=' [initial] any [final]
356         * initial   ::= assertionValue
357         * any       ::= '*' ( assertionValue '*')*
358         * 
359         * As we can see, there is an ambiguity in the grammar : attr=* can be
360         * seen as a present or as a substring. As stated in the RFC :
361         * 
362         * "Note that although both the <substring> and <present> productions in"
363         * "the grammar above can produce the "attr=*" construct, this construct"
364         * "is used only to denote a presence filter." (RFC 4515, 3)
365         * 
366         * We have also to consider the difference between a substring and the
367         * equality node : this last node does not contain a '*'
368         *
369         * @param attr
370         * @param filter
371         * @param pos
372         * @return
373         */
374        private static ExprNode parsePresenceEqOrSubstring( String attr, String filter, Position pos )
375            throws ParseException
376        {
377            if ( StringTools.isCharASCII( filter, pos.start, '*' ) )
378            {
379                // To be a present node, the next char should be a ')'
380                pos.start++;
381    
382                if ( StringTools.isCharASCII( filter, pos.start, ')' ) )
383                {
384                    // This is a present node
385                    return new PresenceNode( attr );
386                }
387                else
388                {
389                    // Definitively a substring with no initial or an error
390                    // Push back the '*' on the string
391                    pos.start--;
392                    return parseSubstring( attr, null, filter, pos );
393                }
394            }
395            else if ( StringTools.isCharASCII( filter, pos.start, ')' ) )
396            {
397                // An empty equality Node
398                return new EqualityNode( attr, new ClientBinaryValue() );
399            }
400            else
401            {
402                // A substring or an equality node
403                Value<?> value = parseAssertionValue( filter, pos );
404    
405                // Is there anything else but a ')' after the value ?
406                if ( StringTools.isCharASCII( filter, pos.start, ')' ) )
407                {
408                    // This is an equality node
409                    return new EqualityNode( attr, value );
410                }
411    
412                return parseSubstring( attr, value, filter, pos );
413            }
414        }
415    
416    
417        /**
418         * Parse the following grammar :
419         * item           = simple / present / substring / extensible
420         * simple         = attr filtertype assertionvalue
421         * filtertype     = '=' / '~=' / '>=' / '<='
422         * present        = attr '=' '*'
423         * substring      = attr '=' [initial] any [final]
424         * extensible     = ( attr [":dn"] [':' oid] ":=" assertionvalue )
425         *                  / ( [":dn"] ':' oid ":=" assertionvalue )
426         * matchingrule   = ":" oid
427         *                  
428         * An item starts with an attribute or a colon.
429         */
430        private static ExprNode parseItem( String filter, Position pos, char c ) throws ParseException
431        {
432            LeafNode node = null;
433            String attr = null;
434    
435            if ( c == '\0' )
436            {
437                throw new ParseException( "Bad char", pos.start );
438            }
439    
440            if ( c == ':' )
441            {
442                // If we have a colon, then the item is an extensible one
443                return parseExtensible( null, filter, pos );
444            }
445            else
446            {
447                // We must have an attribute
448                attr = AttributeUtils.parseAttribute( filter, pos, true );
449    
450                // Now, we may have a present, substring, simple or an extensible
451                c = StringTools.charAt( filter, pos.start );
452    
453                switch ( c )
454                {
455                    case '=':
456                        // It can be a presence, an equal or a substring
457                        pos.start++;
458                        return parsePresenceEqOrSubstring( attr, filter, pos );
459    
460                    case '~':
461                        // Approximate node
462                        pos.start++;
463    
464                        // Check that we have a '='
465                        if ( !StringTools.isCharASCII( filter, pos.start, '=' ) )
466                        {
467                            throw new ParseException( "Expecting a '=' ", pos.start );
468                        }
469    
470                        pos.start++;
471    
472                        // Parse the value and create the node
473                        node = new ApproximateNode( attr, parseAssertionValue( filter, pos ) );
474                        return node;
475    
476                    case '>':
477                        // Greater or equal node
478                        pos.start++;
479    
480                        // Check that we have a '='
481                        if ( !StringTools.isCharASCII( filter, pos.start, '=' ) )
482                        {
483                            throw new ParseException( "Expecting a '=' ", pos.start );
484                        }
485    
486                        pos.start++;
487    
488                        // Parse the value and create the node
489                        node = new GreaterEqNode( attr, parseAssertionValue( filter, pos ) );
490                        return node;
491    
492                    case '<':
493                        // Less or equal node
494                        pos.start++;
495    
496                        // Check that we have a '='
497                        if ( !StringTools.isCharASCII( filter, pos.start, '=' ) )
498                        {
499                            throw new ParseException( "Expecting a '=' ", pos.start );
500                        }
501    
502                        pos.start++;
503    
504                        // Parse the value and create the node
505                        node = new LessEqNode( attr, parseAssertionValue( filter, pos ) );
506                        return node;
507    
508                    case ':':
509                        // An extensible node
510                        pos.start++;
511                        return parseExtensible( attr, filter, pos );
512    
513                    default:
514                        // This is an error
515                        throw new ParseException( "An item is expected", pos.start );
516                }
517            }
518        }
519    
520    
521        /**
522         * Parse AND, OR and NOT nodes :
523         * 
524         * and            = '&' filterlist
525         * or             = '|' filterlist
526         * not            = '!' filter
527         * filterlist     = 1*filter
528         * 
529         * @return
530         */
531        private static ExprNode parseBranchNode( ExprNode node, String filter, Position pos ) throws ParseException
532        {
533            BranchNode bNode = ( BranchNode ) node;
534    
535            // We must have at least one filter
536            ExprNode child = parseFilterInternal( filter, pos );
537    
538            // Add the child to the node children
539            bNode.addNode( child );
540    
541            // Now, iterate recusively though all the remaining filters, if any
542            while ( ( child = parseFilterInternal( filter, pos ) ) != null )
543            {
544                // Add the child to the node children
545                bNode.addNode( child );
546            }
547    
548            return node;
549        }
550    
551    
552        /**
553         * filtercomp     = and / or / not / item
554         * and            = '&' filterlist
555         * or             = '|' filterlist
556         * not            = '!' filter
557         * item           = simple / present / substring / extensible
558         * simple         = attr filtertype assertionvalue
559         * present        = attr EQUALS ASTERISK
560         * substring      = attr EQUALS [initial] any [final]
561         * extensible     = ( attr [dnattrs]
562         *                    [matchingrule] COLON EQUALS assertionvalue )
563         *                    / ( [dnattrs]
564         *                         matchingrule COLON EQUALS assertionvalue )
565         */
566        private static ExprNode parseFilterComp( String filter, Position pos ) throws ParseException
567        {
568            ExprNode node = null;
569    
570            if ( pos.start == pos.length )
571            {
572                throw new ParseException( "Empty filterComp", pos.start );
573            }
574    
575            char c = StringTools.charAt( filter, pos.start );
576    
577            switch ( c )
578            {
579                case '&':
580                    // This is a AND node
581                    pos.start++;
582                    node = new AndNode();
583                    parseBranchNode( node, filter, pos );
584                    break;
585    
586                case '|':
587                    // This is an OR node
588                    pos.start++;
589                    node = new OrNode();
590                    parseBranchNode( node, filter, pos );
591                    break;
592    
593                case '!':
594                    // This is a NOT node
595                    pos.start++;
596                    node = new NotNode();
597                    parseBranchNode( node, filter, pos );
598                    break;
599    
600                default:
601                    // This is an item
602                    node = parseItem( filter, pos, c );
603                    break;
604    
605            }
606    
607            return node;
608        }
609    
610    
611        /**
612         * Pasre the grammar rule :
613         * filter ::= '(' filterComp ')'
614         */
615        private static ExprNode parseFilterInternal( String filter, Position pos ) throws ParseException
616        {
617            // Check for the left '('
618            if ( !StringTools.isCharASCII( filter, pos.start, '(' ) )
619            {
620                // No more node, get out
621                if ( ( pos.start == 0 ) && ( pos.length != 0 ) )
622                {
623                    throw new ParseException( "No '(' at the begining of the filter", 0 );
624                }
625                else
626                {
627                    return null;
628                }
629            }
630    
631            pos.start++;
632    
633            // parse the filter component
634            ExprNode node = parseFilterComp( filter, pos );
635    
636            if ( node == null )
637            {
638                throw new ParseException( "Bad filter", pos.start );
639            }
640    
641            // Check that we have a right ')'
642            if ( !StringTools.isCharASCII( filter, pos.start, ')' ) )
643            {
644                throw new ParseException( "The filter has no right parenthese", pos.start );
645            }
646    
647            pos.start++;
648    
649            return node;
650        }
651    
652    
653        /**
654         * @see FilterParser#parse(String)
655         */
656        public static ExprNode parse( String filter ) throws ParseException
657        {
658            // The filter must not be null. This is a defensive test
659            if ( StringTools.isEmpty( filter ) )
660            {
661                throw new ParseException( "Empty filter", 0 );
662            }
663    
664            Position pos = new Position();
665            pos.start = 0;
666            pos.end = 0;
667            pos.length = filter.length();
668    
669            return parseFilterInternal( filter, pos );
670        }
671    
672    
673        public void setFilterParserMonitor( FilterParserMonitor monitor )
674        {
675        }
676    }