001 /*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements. See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership. The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License. You may obtain a copy of the License at
009 *
010 * http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing,
013 * software distributed under the License is distributed on an
014 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
015 * KIND, either express or implied. See the License for the
016 * specific language governing permissions and limitations
017 * under the License.
018 *
019 */
020 package org.apache.directory.shared.ldap.filter;
021
022
023 import java.text.ParseException;
024
025 import org.apache.directory.shared.ldap.entry.Value;
026 import org.apache.directory.shared.ldap.entry.client.ClientBinaryValue;
027 import org.apache.directory.shared.ldap.util.AttributeUtils;
028 import org.apache.directory.shared.ldap.util.Position;
029 import org.apache.directory.shared.ldap.util.StringTools;
030
031
032 /**
033 * This class parse a Ldap filter. The grammar is given in RFC 4515
034 *
035 * @author <a href="mailto:dev@directory.apache.org">Apache Directory Project</a>
036 * @version $Rev$, $Date$
037 */
038 public class FilterParser
039 {
040 /**
041 * Creates a filter parser implementation.
042 */
043 public FilterParser()
044 {
045 }
046
047
048 /**
049 * Parse an extensible
050 *
051 * extensible = ( attr [":dn"] [':' oid] ":=" assertionvalue )
052 * / ( [":dn"] ':' oid ":=" assertionvalue )
053 * matchingrule = ":" oid
054 */
055 private static ExprNode parseExtensible( String attr, String filter, Position pos ) throws ParseException
056 {
057 ExtensibleNode node = new ExtensibleNode( attr );
058
059 if ( attr != null )
060 {
061 // First check if we have a ":dn"
062 if ( StringTools.areEquals( filter, pos.start, "dn" ) )
063 {
064 // Set the dnAttributes flag and move forward in the string
065 node.setDnAttributes( true );
066 pos.start += 2;
067 }
068 else
069 {
070 // Push back the ':'
071 pos.start--;
072 }
073
074 // Do we have a MatchingRule ?
075 if ( StringTools.charAt( filter, pos.start ) == ':' )
076 {
077 pos.start++;
078 int start = pos.start;
079
080 if ( StringTools.charAt( filter, pos.start ) == '=' )
081 {
082 pos.start++;
083
084 // Get the assertionValue
085 node.setValue( parseAssertionValue( filter, pos, true ) );
086
087 return node;
088 }
089 else
090 {
091 AttributeUtils.parseAttribute( filter, pos, false );
092
093 node.setMatchingRuleId( filter.substring( start, pos.start ) );
094
095 if ( StringTools.areEquals( filter, pos.start, ":=" ) )
096 {
097 pos.start += 2;
098
099 // Get the assertionValue
100 node.setValue( parseAssertionValue( filter, pos, true ) );
101
102 return node;
103 }
104 else
105 {
106 throw new ParseException( "AssertionValue expected", pos.start );
107 }
108 }
109 }
110 else
111 {
112 throw new ParseException( "Expected MatchingRule or assertionValue", pos.start );
113 }
114 }
115 else
116 {
117 boolean oidRequested = false;
118
119 // First check if we have a ":dn"
120 if ( StringTools.areEquals( filter, pos.start, ":dn" ) )
121 {
122 // Set the dnAttributes flag and move forward in the string
123 node.setDnAttributes( true );
124 pos.start += 3;
125 }
126 else
127 {
128 oidRequested = true;
129 }
130
131 // Do we have a MatchingRule ?
132 if ( StringTools.charAt( filter, pos.start ) == ':' )
133 {
134 pos.start++;
135 int start = pos.start;
136
137 if ( StringTools.charAt( filter, pos.start ) == '=' )
138 {
139 if ( oidRequested )
140 {
141 throw new ParseException( "MatchingRule expected", pos.start );
142 }
143
144 pos.start++;
145
146 // Get the assertionValue
147 node.setValue( parseAssertionValue( filter, pos, true ) );
148
149 return node;
150 }
151 else
152 {
153 AttributeUtils.parseAttribute( filter, pos, false );
154
155 node.setMatchingRuleId( filter.substring( start, pos.start ) );
156
157 if ( StringTools.areEquals( filter, pos.start, ":=" ) )
158 {
159 pos.start += 2;
160
161 // Get the assertionValue
162 node.setValue( parseAssertionValue( filter, pos, true ) );
163
164 return node;
165 }
166 else
167 {
168 throw new ParseException( "AssertionValue expected", pos.start );
169 }
170 }
171 }
172 else
173 {
174 throw new ParseException( "Expected MatchingRule or assertionValue", pos.start );
175 }
176 }
177 }
178
179
180 /**
181 * An assertion value :
182 * assertionvalue = valueencoding
183 * valueencoding = 0*(normal / escaped)
184 * normal = UTF1SUBSET / UTFMB
185 * escaped = '\' HEX HEX
186 * HEX = '0'-'9' / 'A'-'F' / 'a'-'f'
187 * UTF1SUBSET = %x01-27 / %x2B-5B / %x5D-7F (Everything but '\0', '*', '(', ')' and '\')
188 * UTFMB = UTF2 / UTF3 / UTF4
189 * UTF0 = %x80-BF
190 * UTF2 = %xC2-DF UTF0
191 * UTF3 = %xE0 %xA0-BF UTF0 / %xE1-EC UTF0 UTF0 / %xED %x80-9F UTF0 / %xEE-EF UTF0 UTF0
192 * UTF4 = %xF0 %x90-BF UTF0 UTF0 / %xF1-F3 UTF0 UTF0 UTF0 / %xF4 %x80-8F UTF0 UTF0
193 *
194 * With the specific constraints (RFC 4515):
195 * "The <valueencoding> rule ensures that the entire filter string is a"
196 * "valid UTF-8 string and provides that the octets that represent the"
197 * "ASCII characters "*" (ASCII 0x2a), "(" (ASCII 0x28), ")" (ASCII"
198 * "0x29), "\" (ASCII 0x5c), and NUL (ASCII 0x00) are represented as a"
199 * "backslash "\" (ASCII 0x5c) followed by the two hexadecimal digits"
200 * "representing the value of the encoded octet."
201
202 *
203 * The incomming String is already transformed from UTF-8 to unicode, so we must assume that the
204 * grammar we have to check is the following :
205 *
206 * assertionvalue = valueencoding
207 * valueencoding = 0*(normal / escaped)
208 * normal = unicodeSubset
209 * escaped = '\' HEX HEX
210 * HEX = '0'-'9' / 'A'-'F' / 'a'-'f'
211 * unicodeSubset = %x01-27 / %x2B-5B / %x5D-FFFF
212 */
213 private static Value<?> parseAssertionValue( String filter, Position pos, boolean preserveEscapedChars ) throws ParseException
214 {
215 int start = pos.start;
216 char c = StringTools.charAt( filter, pos.start );
217
218 // Create a buffer big enough to contain the value once converted
219 byte[] value = new byte[ filter.length() - pos.start];
220 int current = 0;
221
222 do
223 {
224 if ( StringTools.isUnicodeSubset( c ) )
225 {
226 value[current++] = (byte)c;
227 pos.start++;
228 }
229 else if ( StringTools.isCharASCII( filter, pos.start, '\\' ) )
230 {
231 // Maybe an escaped
232 pos.start++;
233
234 // First hex
235 if ( StringTools.isHex( filter, pos.start ) )
236 {
237 pos.start++;
238 }
239 else
240 {
241 throw new ParseException( "Not a valid escaped value", pos.start );
242 }
243
244 // second hex
245 if ( StringTools.isHex( filter, pos.start ) )
246 {
247 value[current++] = StringTools.getHexValue( filter.charAt( pos.start - 1 ), filter.charAt( pos.start ) );
248 pos.start++;
249 }
250 else
251 {
252 throw new ParseException( "Not a valid escaped value", pos.start );
253 }
254 }
255 else
256 {
257 // not a valid char, so let's get out
258 break;
259 }
260 }
261 while ( ( c = StringTools.charAt( filter, pos.start ) ) != '\0' );
262
263 if ( current != 0 )
264 {
265 byte[] result = new byte[ current ];
266 System.arraycopy( value, 0, result, 0, current );
267
268 return new ClientBinaryValue( result );
269 }
270 else
271 {
272 return new ClientBinaryValue();
273 }
274 }
275
276
277 private static Value<?> parseAssertionValue( String filter, Position pos ) throws ParseException
278 {
279 return parseAssertionValue( filter, pos, false );
280 }
281
282
283 /**
284 * Parse a substring
285 */
286 private static ExprNode parseSubstring( String attr, Value<?> initial, String filter, Position pos )
287 throws ParseException
288 {
289 if ( StringTools.isCharASCII( filter, pos.start, '*' ) )
290 {
291 // We have found a '*' : this is a substring
292 SubstringNode node = new SubstringNode( attr );
293
294 if ( initial != null && !initial.isNull() )
295 {
296 // We have a substring starting with a value : val*...
297 // Set the initial value. It must be a String
298 String initialStr = initial.getString();
299 node.setInitial( initialStr );
300 }
301
302 pos.start++;
303
304 //
305 while ( true )
306 {
307 Value<?> assertionValue = parseAssertionValue( filter, pos );
308
309 // Is there anything else but a ')' after the value ?
310 if ( StringTools.isCharASCII( filter, pos.start, ')' ) )
311 {
312 // Nope : as we have had [initial] '*' (any '*' ) *,
313 // this is the final
314 if ( !assertionValue.isNull() )
315 {
316 String finalStr = assertionValue.getString();
317 node.setFinal( finalStr );
318 }
319
320 return node;
321 }
322 else if ( StringTools.isCharASCII( filter, pos.start, '*' ) )
323 {
324 // We have a '*' : it's an any
325 // If the value is empty, that means we have more than
326 // one consecutive '*' : do nothing in this case.
327 if ( !assertionValue.isNull() )
328 {
329 String anyStr = assertionValue.getString();
330 node.addAny( anyStr );
331 }
332
333 pos.start++;
334 }
335 else
336 {
337 // This is an error
338 throw new ParseException( "Bad substring", pos.start );
339 }
340 }
341 }
342 else
343 {
344 // This is an error
345 throw new ParseException( "Bad substring", pos.start );
346 }
347 }
348
349
350 /**
351 * Here is the grammar to parse :
352 *
353 * simple ::= '=' assertionValue
354 * present ::= '=' '*'
355 * substring ::= '=' [initial] any [final]
356 * initial ::= assertionValue
357 * any ::= '*' ( assertionValue '*')*
358 *
359 * As we can see, there is an ambiguity in the grammar : attr=* can be
360 * seen as a present or as a substring. As stated in the RFC :
361 *
362 * "Note that although both the <substring> and <present> productions in"
363 * "the grammar above can produce the "attr=*" construct, this construct"
364 * "is used only to denote a presence filter." (RFC 4515, 3)
365 *
366 * We have also to consider the difference between a substring and the
367 * equality node : this last node does not contain a '*'
368 *
369 * @param attr
370 * @param filter
371 * @param pos
372 * @return
373 */
374 private static ExprNode parsePresenceEqOrSubstring( String attr, String filter, Position pos )
375 throws ParseException
376 {
377 if ( StringTools.isCharASCII( filter, pos.start, '*' ) )
378 {
379 // To be a present node, the next char should be a ')'
380 pos.start++;
381
382 if ( StringTools.isCharASCII( filter, pos.start, ')' ) )
383 {
384 // This is a present node
385 return new PresenceNode( attr );
386 }
387 else
388 {
389 // Definitively a substring with no initial or an error
390 // Push back the '*' on the string
391 pos.start--;
392 return parseSubstring( attr, null, filter, pos );
393 }
394 }
395 else if ( StringTools.isCharASCII( filter, pos.start, ')' ) )
396 {
397 // An empty equality Node
398 return new EqualityNode( attr, new ClientBinaryValue() );
399 }
400 else
401 {
402 // A substring or an equality node
403 Value<?> value = parseAssertionValue( filter, pos );
404
405 // Is there anything else but a ')' after the value ?
406 if ( StringTools.isCharASCII( filter, pos.start, ')' ) )
407 {
408 // This is an equality node
409 return new EqualityNode( attr, value );
410 }
411
412 return parseSubstring( attr, value, filter, pos );
413 }
414 }
415
416
417 /**
418 * Parse the following grammar :
419 * item = simple / present / substring / extensible
420 * simple = attr filtertype assertionvalue
421 * filtertype = '=' / '~=' / '>=' / '<='
422 * present = attr '=' '*'
423 * substring = attr '=' [initial] any [final]
424 * extensible = ( attr [":dn"] [':' oid] ":=" assertionvalue )
425 * / ( [":dn"] ':' oid ":=" assertionvalue )
426 * matchingrule = ":" oid
427 *
428 * An item starts with an attribute or a colon.
429 */
430 private static ExprNode parseItem( String filter, Position pos, char c ) throws ParseException
431 {
432 LeafNode node = null;
433 String attr = null;
434
435 if ( c == '\0' )
436 {
437 throw new ParseException( "Bad char", pos.start );
438 }
439
440 if ( c == ':' )
441 {
442 // If we have a colon, then the item is an extensible one
443 return parseExtensible( null, filter, pos );
444 }
445 else
446 {
447 // We must have an attribute
448 attr = AttributeUtils.parseAttribute( filter, pos, true );
449
450 // Now, we may have a present, substring, simple or an extensible
451 c = StringTools.charAt( filter, pos.start );
452
453 switch ( c )
454 {
455 case '=':
456 // It can be a presence, an equal or a substring
457 pos.start++;
458 return parsePresenceEqOrSubstring( attr, filter, pos );
459
460 case '~':
461 // Approximate node
462 pos.start++;
463
464 // Check that we have a '='
465 if ( !StringTools.isCharASCII( filter, pos.start, '=' ) )
466 {
467 throw new ParseException( "Expecting a '=' ", pos.start );
468 }
469
470 pos.start++;
471
472 // Parse the value and create the node
473 node = new ApproximateNode( attr, parseAssertionValue( filter, pos ) );
474 return node;
475
476 case '>':
477 // Greater or equal node
478 pos.start++;
479
480 // Check that we have a '='
481 if ( !StringTools.isCharASCII( filter, pos.start, '=' ) )
482 {
483 throw new ParseException( "Expecting a '=' ", pos.start );
484 }
485
486 pos.start++;
487
488 // Parse the value and create the node
489 node = new GreaterEqNode( attr, parseAssertionValue( filter, pos ) );
490 return node;
491
492 case '<':
493 // Less or equal node
494 pos.start++;
495
496 // Check that we have a '='
497 if ( !StringTools.isCharASCII( filter, pos.start, '=' ) )
498 {
499 throw new ParseException( "Expecting a '=' ", pos.start );
500 }
501
502 pos.start++;
503
504 // Parse the value and create the node
505 node = new LessEqNode( attr, parseAssertionValue( filter, pos ) );
506 return node;
507
508 case ':':
509 // An extensible node
510 pos.start++;
511 return parseExtensible( attr, filter, pos );
512
513 default:
514 // This is an error
515 throw new ParseException( "An item is expected", pos.start );
516 }
517 }
518 }
519
520
521 /**
522 * Parse AND, OR and NOT nodes :
523 *
524 * and = '&' filterlist
525 * or = '|' filterlist
526 * not = '!' filter
527 * filterlist = 1*filter
528 *
529 * @return
530 */
531 private static ExprNode parseBranchNode( ExprNode node, String filter, Position pos ) throws ParseException
532 {
533 BranchNode bNode = ( BranchNode ) node;
534
535 // We must have at least one filter
536 ExprNode child = parseFilterInternal( filter, pos );
537
538 // Add the child to the node children
539 bNode.addNode( child );
540
541 // Now, iterate recusively though all the remaining filters, if any
542 while ( ( child = parseFilterInternal( filter, pos ) ) != null )
543 {
544 // Add the child to the node children
545 bNode.addNode( child );
546 }
547
548 return node;
549 }
550
551
552 /**
553 * filtercomp = and / or / not / item
554 * and = '&' filterlist
555 * or = '|' filterlist
556 * not = '!' filter
557 * item = simple / present / substring / extensible
558 * simple = attr filtertype assertionvalue
559 * present = attr EQUALS ASTERISK
560 * substring = attr EQUALS [initial] any [final]
561 * extensible = ( attr [dnattrs]
562 * [matchingrule] COLON EQUALS assertionvalue )
563 * / ( [dnattrs]
564 * matchingrule COLON EQUALS assertionvalue )
565 */
566 private static ExprNode parseFilterComp( String filter, Position pos ) throws ParseException
567 {
568 ExprNode node = null;
569
570 if ( pos.start == pos.length )
571 {
572 throw new ParseException( "Empty filterComp", pos.start );
573 }
574
575 char c = StringTools.charAt( filter, pos.start );
576
577 switch ( c )
578 {
579 case '&':
580 // This is a AND node
581 pos.start++;
582 node = new AndNode();
583 parseBranchNode( node, filter, pos );
584 break;
585
586 case '|':
587 // This is an OR node
588 pos.start++;
589 node = new OrNode();
590 parseBranchNode( node, filter, pos );
591 break;
592
593 case '!':
594 // This is a NOT node
595 pos.start++;
596 node = new NotNode();
597 parseBranchNode( node, filter, pos );
598 break;
599
600 default:
601 // This is an item
602 node = parseItem( filter, pos, c );
603 break;
604
605 }
606
607 return node;
608 }
609
610
611 /**
612 * Pasre the grammar rule :
613 * filter ::= '(' filterComp ')'
614 */
615 private static ExprNode parseFilterInternal( String filter, Position pos ) throws ParseException
616 {
617 // Check for the left '('
618 if ( !StringTools.isCharASCII( filter, pos.start, '(' ) )
619 {
620 // No more node, get out
621 if ( ( pos.start == 0 ) && ( pos.length != 0 ) )
622 {
623 throw new ParseException( "No '(' at the begining of the filter", 0 );
624 }
625 else
626 {
627 return null;
628 }
629 }
630
631 pos.start++;
632
633 // parse the filter component
634 ExprNode node = parseFilterComp( filter, pos );
635
636 if ( node == null )
637 {
638 throw new ParseException( "Bad filter", pos.start );
639 }
640
641 // Check that we have a right ')'
642 if ( !StringTools.isCharASCII( filter, pos.start, ')' ) )
643 {
644 throw new ParseException( "The filter has no right parenthese", pos.start );
645 }
646
647 pos.start++;
648
649 return node;
650 }
651
652
653 /**
654 * @see FilterParser#parse(String)
655 */
656 public static ExprNode parse( String filter ) throws ParseException
657 {
658 // The filter must not be null. This is a defensive test
659 if ( StringTools.isEmpty( filter ) )
660 {
661 throw new ParseException( "Empty filter", 0 );
662 }
663
664 Position pos = new Position();
665 pos.start = 0;
666 pos.end = 0;
667 pos.length = filter.length();
668
669 return parseFilterInternal( filter, pos );
670 }
671
672
673 public void setFilterParserMonitor( FilterParserMonitor monitor )
674 {
675 }
676 }