001 /*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements. See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership. The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License. You may obtain a copy of the License at
009 *
010 * http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing,
013 * software distributed under the License is distributed on an
014 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
015 * KIND, either express or implied. See the License for the
016 * specific language governing permissions and limitations
017 * under the License.
018 *
019 */
020 package org.apache.directory.shared.ldap.util;
021
022
023 import java.io.ByteArrayOutputStream;
024 import java.io.File;
025 import java.io.OutputStreamWriter;
026 import java.io.UnsupportedEncodingException;
027 import java.util.Iterator;
028 import java.util.List;
029 import java.util.Map;
030 import java.io.FileFilter;
031 import java.lang.reflect.Method;
032 import java.nio.charset.Charset;
033 import java.util.ArrayList;
034 import java.util.regex.Pattern;
035 import java.util.regex.PatternSyntaxException;
036
037 import javax.naming.InvalidNameException;
038
039 import org.apache.directory.shared.ldap.entry.client.ClientBinaryValue;
040 import org.apache.directory.shared.ldap.entry.client.ClientStringValue;
041
042
043 /**
044 * Various string manipulation methods that are more efficient then chaining
045 * string operations: all is done in the same buffer without creating a bunch of
046 * string objects.
047 *
048 * @author <a href="mailto:dev@directory.apache.org">Apache Directory Project</a>
049 * @version $Rev: 798875 $
050 */
051 public class StringTools
052 {
053 /** The default charset, because it's not provided by JDK 1.5 */
054 static String defaultCharset = null;
055
056
057
058 // ~ Static fields/initializers
059 // -----------------------------------------------------------------
060
061 /** Hex chars */
062 private static final byte[] HEX_CHAR = new byte[]
063 { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F' };
064
065 private static final int UTF8_MULTI_BYTES_MASK = 0x0080;
066
067 private static final int UTF8_TWO_BYTES_MASK = 0x00E0;
068
069 private static final int UTF8_TWO_BYTES = 0x00C0;
070
071 private static final int UTF8_THREE_BYTES_MASK = 0x00F0;
072
073 private static final int UTF8_THREE_BYTES = 0x00E0;
074
075 private static final int UTF8_FOUR_BYTES_MASK = 0x00F8;
076
077 private static final int UTF8_FOUR_BYTES = 0x00F0;
078
079 private static final int UTF8_FIVE_BYTES_MASK = 0x00FC;
080
081 private static final int UTF8_FIVE_BYTES = 0x00F8;
082
083 private static final int UTF8_SIX_BYTES_MASK = 0x00FE;
084
085 private static final int UTF8_SIX_BYTES = 0x00FC;
086
087 /** <alpha> ::= [0x41-0x5A] | [0x61-0x7A] */
088 private static final boolean[] ALPHA =
089 {
090 false, false, false, false, false, false, false, false,
091 false, false, false, false, false, false, false, false,
092 false, false, false, false, false, false, false, false,
093 false, false, false, false, false, false, false, false,
094 false, false, false, false, false, false, false, false,
095 false, false, false, false, false, false, false, false,
096 false, false, false, false, false, false, false, false,
097 false, false, false, false, false, false, false, false,
098 false, true, true, true, true, true, true, true,
099 true, true, true, true, true, true, true, true,
100 true, true, true, true, true, true, true, true,
101 true, true, true, false, false, false, false, false,
102 false, true, true, true, true, true, true, true,
103 true, true, true, true, true, true, true, true,
104 true, true, true, true, true, true, true, true,
105 true, true, true, false, false, false, false, false
106 };
107
108 /** <alpha-lower-case> ::= [0x61-0x7A] */
109 private static final boolean[] ALPHA_LOWER_CASE =
110 {
111 false, false, false, false, false, false, false, false,
112 false, false, false, false, false, false, false, false,
113 false, false, false, false, false, false, false, false,
114 false, false, false, false, false, false, false, false,
115 false, false, false, false, false, false, false, false,
116 false, false, false, false, false, false, false, false,
117 false, false, false, false, false, false, false, false,
118 false, false, false, false, false, false, false, false,
119 false, false, false, false, false, false, false, false,
120 false, false, false, false, false, false, false, false,
121 false, false, false, false, false, false, false, false,
122 false, false, false, false, false, false, false, false,
123 false, true, true, true, true, true, true, true,
124 true, true, true, true, true, true, true, true,
125 true, true, true, true, true, true, true, true,
126 true, true, true, false, false, false, false, false
127 };
128
129 /** <alpha-upper-case> ::= [0x41-0x5A] */
130 private static final boolean[] ALPHA_UPPER_CASE =
131 {
132 false, false, false, false, false, false, false, false,
133 false, false, false, false, false, false, false, false,
134 false, false, false, false, false, false, false, false,
135 false, false, false, false, false, false, false, false,
136 false, false, false, false, false, false, false, false,
137 false, false, false, false, false, false, false, false,
138 false, false, false, false, false, false, false, false,
139 false, false, false, false, false, false, false, false,
140 false, true, true, true, true, true, true, true,
141 true, true, true, true, true, true, true, true,
142 true, true, true, true, true, true, true, true,
143 true, true, true, false, false, false, false, false,
144 false, false, false, false, false, false, false, false,
145 false, false, false, false, false, false, false, false,
146 false, false, false, false, false, false, false, false,
147 false, false, false, false, false, false, false, false,
148 };
149
150 /** <alpha-digit> | <digit> */
151 private static final boolean[] ALPHA_DIGIT =
152 {
153 false, false, false, false, false, false, false, false,
154 false, false, false, false, false, false, false, false,
155 false, false, false, false, false, false, false, false,
156 false, false, false, false, false, false, false, false,
157 false, false, false, false, false, false, false, false,
158 false, false, false, false, false, false, false, false,
159 true, true, true, true, true, true, true, true,
160 true, true, false, false, false, false, false, false,
161 false, true, true, true, true, true, true, true,
162 true, true, true, true, true, true, true, true,
163 true, true, true, true, true, true, true, true,
164 true, true, true, false, false, false, false, false,
165 false, true, true, true, true, true, true, true,
166 true, true, true, true, true, true, true, true,
167 true, true, true, true, true, true, true, true,
168 true, true, true, false, false, false, false, false
169 };
170
171 /** <alpha> | <digit> | '-' */
172 private static final boolean[] CHAR =
173 {
174 false, false, false, false, false, false, false, false,
175 false, false, false, false, false, false, false, false,
176 false, false, false, false, false, false, false, false,
177 false, false, false, false, false, false, false, false,
178 false, false, false, false, false, false, false, false,
179 false, false, false, false, false, true, false, false,
180 true, true, true, true, true, true, true, true,
181 true, true, false, false, false, false, false, false,
182 false, true, true, true, true, true, true, true,
183 true, true, true, true, true, true, true, true,
184 true, true, true, true, true, true, true, true,
185 true, true, true, false, false, false, false, false,
186 false, true, true, true, true, true, true, true,
187 true, true, true, true, true, true, true, true,
188 true, true, true, true, true, true, true, true,
189 true, true, true, false, false, false, false, false
190 };
191
192 /** %01-%27 %2B-%5B %5D-%7F */
193 private static final boolean[] UNICODE_SUBSET =
194 {
195 false, true, true, true, true, true, true, true, // '\0'
196 true, true, true, true, true, true, true, true,
197 true, true, true, true, true, true, true, true,
198 true, true, true, true, true, true, true, true,
199 true, true, true, true, true, true, true, true,
200 false, false, false, true, true, true, true, true, // '(', ')', '*'
201 true, true, true, true, true, true, true, true,
202 true, true, true, true, true, true, true, true,
203 true, true, true, true, true, true, true, true,
204 true, true, true, true, true, true, true, true,
205 true, true, true, true, true, true, true, true,
206 true, true, true, true, false, true, true, true, // '\'
207 true, true, true, true, true, true, true, true,
208 true, true, true, true, true, true, true, true,
209 true, true, true, true, true, true, true, true,
210 true, true, true, true, true, true, true, true,
211 };
212
213 /** '0' | '1' | '2' | '3' | '4' | '5' | '6' | '7' | '8' | '9' */
214 private static final boolean[] DIGIT =
215 {
216 false, false, false, false, false, false, false, false,
217 false, false, false, false, false, false, false, false,
218 false, false, false, false, false, false, false, false,
219 false, false, false, false, false, false, false, false,
220 false, false, false, false, false, false, false, false,
221 false, false, false, false, false, false, false, false,
222 true, true, true, true, true, true, true, true,
223 true, true, false, false, false, false, false, false,
224 false, false, false, false, false, false, false, false,
225 false, false, false, false, false, false, false, false,
226 false, false, false, false, false, false, false, false,
227 false, false, false, false, false, false, false, false,
228 false, false, false, false, false, false, false, false,
229 false, false, false, false, false, false, false, false,
230 false, false, false, false, false, false, false, false,
231 false, false, false, false, false, false, false, false
232 };
233
234 /** <hex> ::= [0x30-0x39] | [0x41-0x46] | [0x61-0x66] */
235 private static final boolean[] HEX =
236 {
237 false, false, false, false, false, false, false, false,
238 false, false, false, false, false, false, false, false,
239 false, false, false, false, false, false, false, false,
240 false, false, false, false, false, false, false, false,
241 false, false, false, false, false, false, false, false,
242 false, false, false, false, false, false, false, false,
243 true, true, true, true, true, true, true, true,
244 true, true, false, false, false, false, false, false,
245 false, true, true, true, true, true, true, false,
246 false, false, false, false, false, false, false, false,
247 false, false, false, false, false, false, false, false,
248 false, false, false, false, false, false, false, false,
249 false, true, true, true, true, true, true, false,
250 false, false, false, false, false, false, false, false,
251 false, false, false, false, false, false, false, false,
252 false, false, false, false, false, false, false, false };
253
254 /** A table containing booleans when the corresponding char is printable */
255 private static final boolean[] IS_PRINTABLE_CHAR =
256 {
257 false, false, false, false, false, false, false, false, // ---, ---, ---, ---, ---, ---, ---, ---
258 false, false, false, false, false, false, false, false, // ---, ---, ---, ---, ---, ---, ---, ---
259 false, false, false, false, false, false, false, false, // ---, ---, ---, ---, ---, ---, ---, ---
260 false, false, false, false, false, false, false, false, // ---, ---, ---, ---, ---, ---, ---, ---
261 true, false, false, false, false, false, false, true, // ' ', ---, ---, ---, ---, ---, ---, "'"
262 true, true, false, true, true, true, true, true, // '(', ')', ---, '+', ',', '-', '.', '/'
263 true, true, true, true, true, true, true, true, // '0', '1', '2', '3', '4', '5', '6', '7',
264 true, true, true, false, false, true, false, true, // '8', '9', ':', ---, ---, '=', ---, '?'
265 false, true, true, true, true, true, true, true, // ---, 'A', 'B', 'C', 'D', 'E', 'F', 'G',
266 true, true, true, true, true, true, true, true, // 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O'
267 true, true, true, true, true, true, true, true, // 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W'
268 true, true, true, false, false, false, false, false, // 'X', 'Y', 'Z', ---, ---, ---, ---, ---
269 false, true, true, true, true, true, true, true, // ---, 'a', 'b', 'c', 'd', 'e', 'f', 'g'
270 true, true, true, true, true, true, true, true, // 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o'
271 true, true, true, true, true, true, true, true, // 'p', 'q', 'r', 's', 't', 'u', 'v', 'w'
272 true, true, true, false, false, false, false, false // 'x', 'y', 'z', ---, ---, ---, ---, ---
273 };
274
275
276 /** <hex> ::= [0x30-0x39] | [0x41-0x46] | [0x61-0x66] */
277 private static final byte[] HEX_VALUE =
278 {
279 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 00 -> 0F
280 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 10 -> 1F
281 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 20 -> 2F
282 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, -1, -1, -1, -1, -1, // 30 -> 3F ( 0, 1,2, 3, 4,5, 6, 7, 8, 9 )
283 -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 40 -> 4F ( A, B, C, D, E, F )
284 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 50 -> 5F
285 -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1 // 60 -> 6F ( a, b, c, d, e, f )
286 };
287
288 /** lowerCase = 'a' .. 'z', '0'..'9', '-' */
289 private static final char[] LOWER_CASE =
290 {
291 0, 0, 0, 0, 0, 0, 0, 0,
292 0, 0, 0, 0, 0, 0, 0, 0,
293 0, 0, 0, 0, 0, 0, 0, 0,
294 0, 0, 0, 0, 0, 0, 0, 0,
295 0, 0, 0, 0, 0, 0, 0, 0,
296 0, 0, 0, 0, 0, '-', 0, 0,
297 '0', '1', '2', '3', '4', '5', '6', '7',
298 '8', '9', 0, 0, 0, 0, 0, 0,
299 0, 'a', 'b', 'c', 'd', 'e', 'f', 'g',
300 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',
301 'p', 'q', 'r', 's', 't', 'u', 'v', 'w',
302 'x', 'y', 'z', 0, 0, 0, 0, 0,
303 0, 'a', 'b', 'c', 'd', 'e', 'f', 'g',
304 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',
305 'p', 'q', 'r', 's', 't', 'u', 'v', 'w',
306 'x', 'y', 'z', 0, 0, 0, 0, 0,
307 0, 0, 0, 0, 0, 0, 0, 0,
308 0, 0, 0, 0, 0, 0, 0, 0,
309 0, 0, 0, 0, 0, 0, 0, 0,
310 0, 0, 0, 0, 0, 0, 0, 0,
311 0, 0, 0, 0, 0, 0, 0, 0,
312 0, 0, 0, 0, 0, 0, 0, 0,
313 0, 0, 0, 0, 0, 0, 0, 0,
314 0, 0, 0, 0, 0, 0, 0, 0
315 };
316
317 private static final char[] TO_LOWER_CASE =
318 {
319 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
320 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F,
321 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
322 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F,
323 ' ', 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, '\'',
324 '(', ')', 0x2A, '+', ',', '-', '.', '/',
325 '0', '1', '2', '3', '4', '5', '6', '7',
326 '8', '9', ':', 0x3B, 0x3C, '=', 0x3E, '?',
327 0x40, 'a', 'b', 'c', 'd', 'e', 'f', 'g',
328 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',
329 'p', 'q', 'r', 's', 't', 'u', 'v', 'w',
330 'x', 'y', 'z', 0x5B, 0x5C, 0x5D, 0x5E, 0x5F,
331 0x60, 'a', 'b', 'c', 'd', 'e', 'f', 'g',
332 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',
333 'p', 'q', 'r', 's', 't', 'u', 'v', 'w',
334 'x', 'y', 'z', 0x7B, 0x7C, 0x7D, 0x7E, 0x7F,
335 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
336 0x88, 0x89, 0x8A, 0x8B, 0x8C, 0x8D, 0x8E, 0x8F,
337 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97,
338 0x98, 0x99, 0x9A, 0x9B, 0x9C, 0x9D, 0x9E, 0x9F,
339 0xA0, 0xA1, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7,
340 0xA8, 0xA9, 0xAA, 0xAB, 0xAC, 0xAD, 0xAE, 0xAF,
341 0xB0, 0xB1, 0xB2, 0xB3, 0xB4, 0xB5, 0xB6, 0xB7,
342 0xB8, 0xB9, 0xBA, 0xBB, 0xBC, 0xBD, 0xBE, 0xBF,
343 0xC0, 0xC1, 0xC2, 0xC3, 0xC4, 0xC5, 0xC6, 0xC7,
344 0xC8, 0xC9, 0xCA, 0xCB, 0xCC, 0xCD, 0xCE, 0xCF,
345 0xD0, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6, 0xD7,
346 0xD8, 0xD9, 0xDA, 0xDB, 0xDC, 0xDD, 0xDE, 0xDF,
347 0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7,
348 0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF,
349 0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7,
350 0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0xFF,
351 };
352
353
354 /** upperCase = 'A' .. 'Z', '0'..'9', '-' */
355 private static final char[] UPPER_CASE =
356 {
357 0, 0, 0, 0, 0, 0, 0, 0,
358 0, 0, 0, 0, 0, 0, 0, 0,
359 0, 0, 0, 0, 0, 0, 0, 0,
360 0, 0, 0, 0, 0, 0, 0, 0,
361 0, 0, 0, 0, 0, 0, 0, 0,
362 0, 0, 0, 0, 0, '-', 0, 0,
363 '0', '1', '2', '3', '4', '5', '6', '7',
364 '8', '9', 0, 0, 0, 0, 0, 0,
365 0, 'A', 'B', 'C', 'D', 'E', 'F', 'G',
366 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O',
367 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W',
368 'X', 'Y', 'Z', 0, 0, 0, 0, 0,
369 0, 'A', 'B', 'C', 'D', 'E', 'F', 'G',
370 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O',
371 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W',
372 'X', 'Y', 'Z', 0, 0, 0, 0, 0,
373 0, 0, 0, 0, 0, 0, 0, 0,
374 0, 0, 0, 0, 0, 0, 0, 0,
375 0, 0, 0, 0, 0, 0, 0, 0,
376 0, 0, 0, 0, 0, 0, 0, 0,
377 0, 0, 0, 0, 0, 0, 0, 0,
378 0, 0, 0, 0, 0, 0, 0, 0,
379 0, 0, 0, 0, 0, 0, 0, 0,
380 0, 0, 0, 0, 0, 0, 0, 0
381 };
382
383 private static final int CHAR_ONE_BYTE_MASK = 0xFFFFFF80;
384
385 private static final int CHAR_TWO_BYTES_MASK = 0xFFFFF800;
386
387 private static final int CHAR_THREE_BYTES_MASK = 0xFFFF0000;
388
389 private static final int CHAR_FOUR_BYTES_MASK = 0xFFE00000;
390
391 private static final int CHAR_FIVE_BYTES_MASK = 0xFC000000;
392
393 private static final int CHAR_SIX_BYTES_MASK = 0x80000000;
394
395 public static final int NOT_EQUAL = -1;
396
397 // The following methods are taken from org.apache.commons.lang.StringUtils
398
399 /**
400 * The empty String <code>""</code>.
401 *
402 * @since 2.0
403 */
404 public static final String EMPTY = "";
405
406 /**
407 * The empty byte[]
408 */
409 public static final byte[] EMPTY_BYTES = new byte[]
410 {};
411
412 /**
413 * Trims several consecutive characters into one.
414 *
415 * @param str
416 * the string to trim consecutive characters of
417 * @param ch
418 * the character to trim down
419 * @return the newly trimmed down string
420 */
421 public static final String trimConsecutiveToOne( String str, char ch )
422 {
423 if ( ( null == str ) || ( str.length() == 0 ) )
424 {
425 return "";
426 }
427
428 char[] buffer = str.toCharArray();
429 char[] newbuf = new char[buffer.length];
430 int pos = 0;
431 boolean same = false;
432
433 for ( int i = 0; i < buffer.length; i++ )
434 {
435 char car = buffer[i];
436
437 if ( car == ch )
438 {
439 if ( same )
440 {
441 continue;
442 }
443 else
444 {
445 same = true;
446 newbuf[pos++] = car;
447 }
448 }
449 else
450 {
451 same = false;
452 newbuf[pos++] = car;
453 }
454 }
455
456 return new String( newbuf, 0, pos );
457 }
458
459
460 /**
461 * A deep trim of a string remove whitespace from the ends as well as
462 * excessive whitespace within the inside of the string between
463 * non-whitespace characters. A deep trim reduces internal whitespace down
464 * to a single space to perserve the whitespace separated tokenization order
465 * of the String.
466 *
467 * @param string the string to deep trim.
468 * @return the trimmed string.
469 */
470 public static final String deepTrim( String string )
471 {
472 return deepTrim( string, false );
473 }
474
475
476 /**
477 * This does the same thing as a trim but we also lowercase the string while
478 * performing the deep trim within the same buffer. This saves us from
479 * having to create multiple String and StringBuffer objects and is much
480 * more efficient.
481 *
482 * @see StringTools#deepTrim( String )
483 */
484 public static final String deepTrimToLower( String string )
485 {
486 return deepTrim( string, true );
487 }
488
489
490 /**
491 * Put common code to deepTrim(String) and deepTrimToLower here.
492 *
493 * @param str the string to deep trim
494 * @param toLowerCase how to normalize for case: upper or lower
495 * @return the deep trimmed string
496 * @see StringTools#deepTrim( String )
497 *
498 * TODO Replace the toCharArray() by substring manipulations
499 */
500 public static final String deepTrim( String str, boolean toLowerCase )
501 {
502 if ( ( null == str ) || ( str.length() == 0 ) )
503 {
504 return "";
505 }
506
507 char ch;
508 char[] buf = str.toCharArray();
509 char[] newbuf = new char[buf.length];
510 boolean wsSeen = false;
511 boolean isStart = true;
512 int pos = 0;
513
514 for ( int i = 0; i < str.length(); i++ )
515 {
516 ch = buf[i];
517
518 // filter out all uppercase characters
519 if ( toLowerCase )
520 {
521 if ( Character.isUpperCase( ch ) )
522 {
523 ch = Character.toLowerCase( ch );
524 }
525 }
526
527 // Check to see if we should add space
528 if ( Character.isWhitespace( ch ) )
529 {
530 // If the buffer has had characters added already check last
531 // added character. Only append a spc if last character was
532 // not whitespace.
533 if ( wsSeen )
534 {
535 continue;
536 }
537 else
538 {
539 wsSeen = true;
540
541 if ( isStart )
542 {
543 isStart = false;
544 }
545 else
546 {
547 newbuf[pos++] = ch;
548 }
549 }
550 }
551 else
552 {
553 // Add all non-whitespace
554 wsSeen = false;
555 isStart = false;
556 newbuf[pos++] = ch;
557 }
558 }
559
560 return ( pos == 0 ? "" : new String( newbuf, 0, ( wsSeen ? pos - 1 : pos ) ) );
561 }
562
563 /**
564 * Truncates large Strings showing a portion of the String's head and tail
565 * with the center cut out and replaced with '...'. Also displays the total
566 * length of the truncated string so size of '...' can be interpreted.
567 * Useful for large strings in UIs or hex dumps to log files.
568 *
569 * @param str the string to truncate
570 * @param head the amount of the head to display
571 * @param tail the amount of the tail to display
572 * @return the center truncated string
573 */
574 public static final String centerTrunc( String str, int head, int tail )
575 {
576 StringBuffer buf = null;
577
578 // Return as-is if String is smaller than or equal to the head plus the
579 // tail plus the number of characters added to the trunc representation
580 // plus the number of digits in the string length.
581 if ( str.length() <= ( head + tail + 7 + str.length() / 10 ) )
582 {
583 return str;
584 }
585
586 buf = new StringBuffer();
587 buf.append( '[' ).append( str.length() ).append( "][" );
588 buf.append( str.substring( 0, head ) ).append( "..." );
589 buf.append( str.substring( str.length() - tail ) );
590 buf.append( ']' );
591 return buf.toString();
592 }
593
594
595 /**
596 * Gets a hex string from byte array.
597 *
598 * @param res
599 * the byte array
600 * @return the hex string representing the binary values in the array
601 */
602 public static final String toHexString( byte[] res )
603 {
604 StringBuffer buf = new StringBuffer( res.length << 1 );
605
606 for ( int ii = 0; ii < res.length; ii++ )
607 {
608 String digit = Integer.toHexString( 0xFF & res[ii] );
609
610 if ( digit.length() == 1 )
611 {
612 digit = '0' + digit;
613 }
614
615 buf.append( digit );
616 }
617 return buf.toString().toUpperCase();
618 }
619
620 /**
621 * Rewrote the toLowercase method to improve performances.
622 * In Ldap, attributesType are supposed to use ASCII chars :
623 * 'a'-'z', 'A'-'Z', '0'-'9', '.' and '-' only.
624 *
625 * @param value The String to lowercase
626 * @return The lowercase string
627 */
628 public static final String toLowerCase( String value )
629 {
630 if ( ( null == value ) || ( value.length() == 0 ) )
631 {
632 return "";
633 }
634
635 char[] chars = value.toCharArray();
636
637 for ( int i = 0; i < chars.length; i++ )
638 {
639 chars[i] = LOWER_CASE[ chars[i] ];
640 }
641
642 return new String( chars );
643 }
644
645 /**
646 * Rewrote the toLowercase method to improve performances.
647 * In Ldap, attributesType are supposed to use ASCII chars :
648 * 'a'-'z', 'A'-'Z', '0'-'9', '.' and '-' only.
649 *
650 * @param value The String to uppercase
651 * @return The uppercase string
652 */
653 public static final String toUpperCase( String value )
654 {
655 if ( ( null == value ) || ( value.length() == 0 ) )
656 {
657 return "";
658 }
659
660 char[] chars = value.toCharArray();
661
662 for ( int i = 0; i < chars.length; i++ )
663 {
664 chars[i] = UPPER_CASE[ chars[i] ];
665 }
666
667 return new String( chars );
668 }
669
670 /**
671 * Get byte array from hex string
672 *
673 * @param hexString
674 * the hex string to convert to a byte array
675 * @return the byte form of the hex string.
676 */
677 public static final byte[] toByteArray( String hexString )
678 {
679 int arrLength = hexString.length() >> 1;
680 byte buf[] = new byte[arrLength];
681
682 for ( int ii = 0; ii < arrLength; ii++ )
683 {
684 int index = ii << 1;
685
686 String l_digit = hexString.substring( index, index + 2 );
687 buf[ii] = ( byte ) Integer.parseInt( l_digit, 16 );
688 }
689
690 return buf;
691 }
692
693
694 /**
695 * This method is used to insert HTML block dynamically
696 *
697 * @param source the HTML code to be processes
698 * @param replaceNl if true '\n' will be replaced by <br>
699 * @param replaceTag if true '<' will be replaced by < and '>' will be replaced
700 * by >
701 * @param replaceQuote if true '\"' will be replaced by "
702 * @return the formated html block
703 */
704 public static final String formatHtml( String source, boolean replaceNl, boolean replaceTag,
705 boolean replaceQuote )
706 {
707 StringBuffer buf = new StringBuffer();
708 int len = source.length();
709
710 for ( int ii = 0; ii < len; ii++ )
711 {
712 char ch = source.charAt( ii );
713
714 switch ( ch )
715 {
716 case '\"':
717 if ( replaceQuote )
718 {
719 buf.append( """ );
720 }
721 else
722 {
723 buf.append( ch );
724 }
725 break;
726
727 case '<':
728 if ( replaceTag )
729 {
730 buf.append( "<" );
731 }
732 else
733 {
734 buf.append( ch );
735 }
736 break;
737
738 case '>':
739 if ( replaceTag )
740 {
741 buf.append( ">" );
742 }
743 else
744 {
745 buf.append( ch );
746 }
747 break;
748
749 case '\n':
750 if ( replaceNl )
751 {
752 if ( replaceTag )
753 {
754 buf.append( "<br>" );
755 }
756 else
757 {
758 buf.append( "<br>" );
759 }
760 }
761 else
762 {
763 buf.append( ch );
764 }
765 break;
766
767 case '\r':
768 break;
769
770 case '&':
771 buf.append( "&" );
772 break;
773
774 default:
775 buf.append( ch );
776 break;
777 }
778 }
779
780 return buf.toString();
781 }
782
783
784 /**
785 * Creates a regular expression from an LDAP substring assertion filter
786 * specification.
787 *
788 * @param initialPattern
789 * the initial fragment before wildcards
790 * @param anyPattern
791 * fragments surrounded by wildcards if any
792 * @param finalPattern
793 * the final fragment after last wildcard if any
794 * @return the regular expression for the substring match filter
795 * @throws RESyntaxException
796 * if a syntactically correct regular expression cannot be
797 * compiled
798 */
799 public static final Pattern getRegex( String initialPattern, String[] anyPattern, String finalPattern )
800 throws PatternSyntaxException
801 {
802 StringBuffer buf = new StringBuffer();
803
804 if ( initialPattern != null )
805 {
806 buf.append( '^' ).append( Pattern.quote( initialPattern ) );
807 }
808
809 if ( anyPattern != null )
810 {
811 for ( int i = 0; i < anyPattern.length; i++ )
812 {
813 buf.append( ".*" ).append( Pattern.quote( anyPattern[i] ) );
814 }
815 }
816
817 if ( finalPattern != null )
818 {
819 buf.append( ".*" ).append( Pattern.quote( finalPattern ) );
820 }
821 else
822 {
823 buf.append( ".*" );
824 }
825
826 return Pattern.compile( buf.toString() );
827 }
828
829
830 /**
831 * Generates a regular expression from an LDAP substring match expression by
832 * parsing out the supplied string argument.
833 *
834 * @param ldapRegex
835 * the substring match expression
836 * @return the regular expression for the substring match filter
837 * @throws RESyntaxException
838 * if a syntactically correct regular expression cannot be
839 * compiled
840 */
841 public static final Pattern getRegex( String ldapRegex ) throws PatternSyntaxException
842 {
843 if ( ldapRegex == null )
844 {
845 throw new PatternSyntaxException( "Regex was null", "null", -1 );
846 }
847
848 List<String> any = new ArrayList<String>();
849 String remaining = ldapRegex;
850 int index = remaining.indexOf( '*' );
851
852 if ( index == -1 )
853 {
854 throw new PatternSyntaxException( "Ldap regex must have wild cards!", remaining, -1 );
855 }
856
857 String initialPattern = null;
858
859 if ( remaining.charAt( 0 ) != '*' )
860 {
861 initialPattern = remaining.substring( 0, index );
862 }
863
864 remaining = remaining.substring( index + 1, remaining.length() );
865
866 while ( ( index = remaining.indexOf( '*' ) ) != -1 )
867 {
868 any.add( remaining.substring( 0, index ) );
869 remaining = remaining.substring( index + 1, remaining.length() );
870 }
871
872 String finalPattern = null;
873 if ( !remaining.endsWith( "*" ) && remaining.length() > 0 )
874 {
875 finalPattern = remaining;
876 }
877
878 if ( any.size() > 0 )
879 {
880 String[] anyStrs = new String[any.size()];
881
882 for ( int i = 0; i < anyStrs.length; i++ )
883 {
884 anyStrs[i] = any.get( i );
885 }
886
887 return getRegex( initialPattern, anyStrs, finalPattern );
888 }
889
890 return getRegex( initialPattern, null, finalPattern );
891 }
892
893
894 /**
895 * Splits apart a OS separator delimited set of paths in a string into
896 * multiple Strings. File component path strings are returned within a List
897 * in the order they are found in the composite path string. Optionally, a
898 * file filter can be used to filter out path strings to control the
899 * components returned. If the filter is null all path components are
900 * returned.
901 *
902 * @param paths
903 * a set of paths delimited using the OS path separator
904 * @param filter
905 * a FileFilter used to filter the return set
906 * @return the filter accepted path component Strings in the order
907 * encountered
908 */
909 public static final List<String> getPaths( String paths, FileFilter filter )
910 {
911 int start = 0;
912 int stop = -1;
913 String path = null;
914 List<String> list = new ArrayList<String>();
915
916 // Abandon with no values if paths string is null
917 if ( paths == null || paths.trim().equals( "" ) )
918 {
919 return list;
920 }
921
922 final int max = paths.length() - 1;
923
924 // Loop spliting string using OS path separator: terminate
925 // when the start index is at the end of the paths string.
926 while ( start < max )
927 {
928 stop = paths.indexOf( File.pathSeparatorChar, start );
929
930 // The is no file sep between the start and the end of the string
931 if ( stop == -1 )
932 {
933 // If we have a trailing path remaining without ending separator
934 if ( start < max )
935 {
936 // Last path is everything from start to the string's end
937 path = paths.substring( start );
938
939 // Protect against consecutive separators side by side
940 if ( !path.trim().equals( "" ) )
941 {
942 // If filter is null add path, if it is not null add the
943 // path only if the filter accepts the path component.
944 if ( filter == null || filter.accept( new File( path ) ) )
945 {
946 list.add( path );
947 }
948 }
949 }
950
951 break; // Exit loop no more path components left!
952 }
953
954 // There is a separator between start and the end if we got here!
955 // start index is now at 0 or the index of last separator + 1
956 // stop index is now at next separator in front of start index
957 path = paths.substring( start, stop );
958
959 // Protect against consecutive separators side by side
960 if ( !path.trim().equals( "" ) )
961 {
962 // If filter is null add path, if it is not null add the path
963 // only if the filter accepts the path component.
964 if ( filter == null || filter.accept( new File( path ) ) )
965 {
966 list.add( path );
967 }
968 }
969
970 // Advance start index past separator to start of next path comp
971 start = stop + 1;
972 }
973
974 return list;
975 }
976
977
978 // ~ Methods
979 // ------------------------------------------------------------------------------------
980
981 /**
982 * Helper function that dump a byte in hex form
983 *
984 * @param octet The byte to dump
985 * @return A string representation of the byte
986 */
987 public static final String dumpByte( byte octet )
988 {
989 return new String( new byte[]
990 { '0', 'x', HEX_CHAR[( octet & 0x00F0 ) >> 4], HEX_CHAR[octet & 0x000F] } );
991 }
992
993
994 /**
995 * Helper function that returns a char from an hex
996 *
997 * @param hex The hex to dump
998 * @return A char representation of the hex
999 */
1000 public static final char dumpHex( byte hex )
1001 {
1002 return ( char ) HEX_CHAR[hex & 0x000F];
1003 }
1004
1005
1006 /**
1007 * Helper function that dump an array of bytes in hex form
1008 *
1009 * @param buffer The bytes array to dump
1010 * @return A string representation of the array of bytes
1011 */
1012 public static final String dumpBytes( byte[] buffer )
1013 {
1014 if ( buffer == null )
1015 {
1016 return "";
1017 }
1018
1019 StringBuffer sb = new StringBuffer();
1020
1021 for ( int i = 0; i < buffer.length; i++ )
1022 {
1023 sb.append( "0x" ).append( ( char ) ( HEX_CHAR[( buffer[i] & 0x00F0 ) >> 4] ) ).append(
1024 ( char ) ( HEX_CHAR[buffer[i] & 0x000F] ) ).append( " " );
1025 }
1026
1027 return sb.toString();
1028 }
1029
1030 /**
1031 *
1032 * Helper method to render an object which can be a String or a byte[]
1033 *
1034 * @return A string representing the object
1035 */
1036 public static String dumpObject( Object object )
1037 {
1038 if ( object != null )
1039 {
1040 if ( object instanceof String )
1041 {
1042 return (String) object;
1043 }
1044 else if ( object instanceof byte[] )
1045 {
1046 return dumpBytes( ( byte[] ) object );
1047 }
1048 else if ( object instanceof ClientStringValue )
1049 {
1050 return ( ( ClientStringValue ) object ).get();
1051 }
1052 else if ( object instanceof ClientBinaryValue )
1053 {
1054 return dumpBytes( ( ( ClientBinaryValue ) object ).get() );
1055 }
1056 else
1057 {
1058 return "<unknown type>";
1059 }
1060 }
1061 else
1062 {
1063 return "";
1064 }
1065 }
1066
1067 /**
1068 * Helper function that dump an array of bytes in hex pair form,
1069 * without '0x' and space chars
1070 *
1071 * @param buffer The bytes array to dump
1072 * @return A string representation of the array of bytes
1073 */
1074 public static final String dumpHexPairs( byte[] buffer )
1075 {
1076 if ( buffer == null )
1077 {
1078 return "";
1079 }
1080
1081 char[] str = new char[buffer.length << 1];
1082
1083 for ( int i = 0, pos = 0; i < buffer.length; i++ )
1084 {
1085 str[pos++] = ( char ) ( HEX_CHAR[( buffer[i] & 0x00F0 ) >> 4] );
1086 str[pos++] = ( char ) ( HEX_CHAR[buffer[i] & 0x000F] );
1087 }
1088
1089 return new String( str );
1090 }
1091
1092 /**
1093 * Return the Unicode char which is coded in the bytes at position 0.
1094 *
1095 * @param bytes The byte[] represntation of an Unicode string.
1096 * @return The first char found.
1097 */
1098 public static final char bytesToChar( byte[] bytes )
1099 {
1100 return bytesToChar( bytes, 0 );
1101 }
1102
1103
1104 /**
1105 * Count the number of bytes needed to return an Unicode char. This can be
1106 * from 1 to 6.
1107 *
1108 * @param bytes The bytes to read
1109 * @param pos Position to start counting. It must be a valid start of a
1110 * encoded char !
1111 * @return The number of bytes to create a char, or -1 if the encoding is
1112 * wrong. TODO : Should stop after the third byte, as a char is only
1113 * 2 bytes long.
1114 */
1115 public static final int countBytesPerChar( byte[] bytes, int pos )
1116 {
1117 if ( bytes == null )
1118 {
1119 return -1;
1120 }
1121
1122 if ( ( bytes[pos] & UTF8_MULTI_BYTES_MASK ) == 0 )
1123 {
1124 return 1;
1125 }
1126 else if ( ( bytes[pos] & UTF8_TWO_BYTES_MASK ) == UTF8_TWO_BYTES )
1127 {
1128 return 2;
1129 }
1130 else if ( ( bytes[pos] & UTF8_THREE_BYTES_MASK ) == UTF8_THREE_BYTES )
1131 {
1132 return 3;
1133 }
1134 else if ( ( bytes[pos] & UTF8_FOUR_BYTES_MASK ) == UTF8_FOUR_BYTES )
1135 {
1136 return 4;
1137 }
1138 else if ( ( bytes[pos] & UTF8_FIVE_BYTES_MASK ) == UTF8_FIVE_BYTES )
1139 {
1140 return 5;
1141 }
1142 else if ( ( bytes[pos] & UTF8_SIX_BYTES_MASK ) == UTF8_SIX_BYTES )
1143 {
1144 return 6;
1145 }
1146 else
1147 {
1148 return -1;
1149 }
1150 }
1151
1152
1153 /**
1154 * Return the number of bytes that hold an Unicode char.
1155 *
1156 * @param car The character to be decoded
1157 * @return The number of bytes to hold the char. TODO : Should stop after
1158 * the third byte, as a char is only 2 bytes long.
1159 */
1160 public static final int countNbBytesPerChar( char car )
1161 {
1162 if ( ( car & CHAR_ONE_BYTE_MASK ) == 0 )
1163 {
1164 return 1;
1165 }
1166 else if ( ( car & CHAR_TWO_BYTES_MASK ) == 0 )
1167 {
1168 return 2;
1169 }
1170 else if ( ( car & CHAR_THREE_BYTES_MASK ) == 0 )
1171 {
1172 return 3;
1173 }
1174 else if ( ( car & CHAR_FOUR_BYTES_MASK ) == 0 )
1175 {
1176 return 4;
1177 }
1178 else if ( ( car & CHAR_FIVE_BYTES_MASK ) == 0 )
1179 {
1180 return 5;
1181 }
1182 else if ( ( car & CHAR_SIX_BYTES_MASK ) == 0 )
1183 {
1184 return 6;
1185 }
1186 else
1187 {
1188 return -1;
1189 }
1190 }
1191
1192
1193 /**
1194 * Count the number of bytes included in the given char[].
1195 *
1196 * @param chars The char array to decode
1197 * @return The number of bytes in the char array
1198 */
1199 public static final int countBytes( char[] chars )
1200 {
1201 if ( chars == null )
1202 {
1203 return 0;
1204 }
1205
1206 int nbBytes = 0;
1207 int currentPos = 0;
1208
1209 while ( currentPos < chars.length )
1210 {
1211 int nbb = countNbBytesPerChar( chars[currentPos] );
1212
1213 // If the number of bytes necessary to encode a character is
1214 // above 3, we will need two UTF-16 chars
1215 currentPos += ( nbb < 4 ? 1 : 2 );
1216 nbBytes += nbb;
1217 }
1218
1219 return nbBytes;
1220 }
1221
1222
1223 /**
1224 * Return the Unicode char which is coded in the bytes at the given
1225 * position.
1226 *
1227 * @param bytes The byte[] represntation of an Unicode string.
1228 * @param pos The current position to start decoding the char
1229 * @return The decoded char, or -1 if no char can be decoded TODO : Should
1230 * stop after the third byte, as a char is only 2 bytes long.
1231 */
1232 public static final char bytesToChar( byte[] bytes, int pos )
1233 {
1234 if ( bytes == null )
1235 {
1236 return ( char ) -1;
1237 }
1238
1239 if ( ( bytes[pos] & UTF8_MULTI_BYTES_MASK ) == 0 )
1240 {
1241 return ( char ) bytes[pos];
1242 }
1243 else
1244 {
1245 if ( ( bytes[pos] & UTF8_TWO_BYTES_MASK ) == UTF8_TWO_BYTES )
1246 {
1247 // Two bytes char
1248 return ( char ) ( ( ( bytes[pos] & 0x1C ) << 6 ) + // 110x-xxyy
1249 // 10zz-zzzz
1250 // ->
1251 // 0000-0xxx
1252 // 0000-0000
1253 ( ( bytes[pos] & 0x03 ) << 6 ) + // 110x-xxyy 10zz-zzzz
1254 // -> 0000-0000
1255 // yy00-0000
1256 ( bytes[pos + 1] & 0x3F ) // 110x-xxyy 10zz-zzzz -> 0000-0000
1257 // 00zz-zzzz
1258 ); // -> 0000-0xxx yyzz-zzzz (07FF)
1259 }
1260 else if ( ( bytes[pos] & UTF8_THREE_BYTES_MASK ) == UTF8_THREE_BYTES )
1261 {
1262 // Three bytes char
1263 return ( char ) (
1264 // 1110-tttt 10xx-xxyy 10zz-zzzz -> tttt-0000-0000-0000
1265 ( ( bytes[pos] & 0x0F ) << 12 ) +
1266 // 1110-tttt 10xx-xxyy 10zz-zzzz -> 0000-xxxx-0000-0000
1267 ( ( bytes[pos + 1] & 0x3C ) << 6 ) +
1268 // 1110-tttt 10xx-xxyy 10zz-zzzz -> 0000-0000-yy00-0000
1269 ( ( bytes[pos + 1] & 0x03 ) << 6 ) +
1270 // 1110-tttt 10xx-xxyy 10zz-zzzz -> 0000-0000-00zz-zzzz
1271 ( bytes[pos + 2] & 0x3F )
1272 // -> tttt-xxxx yyzz-zzzz (FF FF)
1273 );
1274 }
1275 else if ( ( bytes[pos] & UTF8_FOUR_BYTES_MASK ) == UTF8_FOUR_BYTES )
1276 {
1277 // Four bytes char
1278 return ( char ) (
1279 // 1111-0ttt 10uu-vvvv 10xx-xxyy 10zz-zzzz -> 000t-tt00
1280 // 0000-0000 0000-0000
1281 ( ( bytes[pos] & 0x07 ) << 18 ) +
1282 // 1111-0ttt 10uu-vvvv 10xx-xxyy 10zz-zzzz -> 0000-00uu
1283 // 0000-0000 0000-0000
1284 ( ( bytes[pos + 1] & 0x30 ) << 16 ) +
1285 // 1111-0ttt 10uu-vvvv 10xx-xxyy 10zz-zzzz -> 0000-0000
1286 // vvvv-0000 0000-0000
1287 ( ( bytes[pos + 1] & 0x0F ) << 12 ) +
1288 // 1111-0ttt 10uu-vvvv 10xx-xxyy 10zz-zzzz -> 0000-0000
1289 // 0000-xxxx 0000-0000
1290 ( ( bytes[pos + 2] & 0x3C ) << 6 ) +
1291 // 1111-0ttt 10uu-vvvv 10xx-xxyy 10zz-zzzz -> 0000-0000
1292 // 0000-0000 yy00-0000
1293 ( ( bytes[pos + 2] & 0x03 ) << 6 ) +
1294 // 1111-0ttt 10uu-vvvv 10xx-xxyy 10zz-zzzz -> 0000-0000
1295 // 0000-0000 00zz-zzzz
1296 ( bytes[pos + 3] & 0x3F )
1297 // -> 000t-ttuu vvvv-xxxx yyzz-zzzz (1FFFFF)
1298 );
1299 }
1300 else if ( ( bytes[pos] & UTF8_FIVE_BYTES_MASK ) == UTF8_FIVE_BYTES )
1301 {
1302 // Five bytes char
1303 return ( char ) (
1304 // 1111-10tt 10uu-uuuu 10vv-wwww 10xx-xxyy 10zz-zzzz ->
1305 // 0000-00tt 0000-0000 0000-0000 0000-0000
1306 ( ( bytes[pos] & 0x03 ) << 24 ) +
1307 // 1111-10tt 10uu-uuuu 10vv-wwww 10xx-xxyy 10zz-zzzz ->
1308 // 0000-0000 uuuu-uu00 0000-0000 0000-0000
1309 ( ( bytes[pos + 1] & 0x3F ) << 18 ) +
1310 // 1111-10tt 10uu-uuuu 10vv-wwww 10xx-xxyy 10zz-zzzz ->
1311 // 0000-0000 0000-00vv 0000-0000 0000-0000
1312 ( ( bytes[pos + 2] & 0x30 ) << 12 ) +
1313 // 1111-10tt 10uu-uuuu 10vv-wwww 10xx-xxyy 10zz-zzzz ->
1314 // 0000-0000 0000-0000 wwww-0000 0000-0000
1315 ( ( bytes[pos + 2] & 0x0F ) << 12 ) +
1316 // 1111-10tt 10uu-uuuu 10vv-wwww 10xx-xxyy 10zz-zzzz ->
1317 // 0000-0000 0000-0000 0000-xxxx 0000-0000
1318 ( ( bytes[pos + 3] & 0x3C ) << 6 ) +
1319 // 1111-10tt 10uu-uuuu 10vv-wwww 10xx-xxyy 10zz-zzzz ->
1320 // 0000-0000 0000-0000 0000-0000 yy00-0000
1321 ( ( bytes[pos + 3] & 0x03 ) << 6 ) +
1322 // 1111-10tt 10uu-uuuu 10vv-wwww 10xx-xxyy 10zz-zzzz ->
1323 // 0000-0000 0000-0000 0000-0000 00zz-zzzz
1324 ( bytes[pos + 4] & 0x3F )
1325 // -> 0000-00tt uuuu-uuvv wwww-xxxx yyzz-zzzz (03 FF FF FF)
1326 );
1327 }
1328 else if ( ( bytes[pos] & UTF8_FIVE_BYTES_MASK ) == UTF8_FIVE_BYTES )
1329 {
1330 // Six bytes char
1331 return ( char ) (
1332 // 1111-110s 10tt-tttt 10uu-uuuu 10vv-wwww 10xx-xxyy 10zz-zzzz
1333 // ->
1334 // 0s00-0000 0000-0000 0000-0000 0000-0000
1335 ( ( bytes[pos] & 0x01 ) << 30 ) +
1336 // 1111-110s 10tt-tttt 10uu-uuuu 10vv-wwww 10xx-xxyy 10zz-zzzz
1337 // ->
1338 // 00tt-tttt 0000-0000 0000-0000 0000-0000
1339 ( ( bytes[pos + 1] & 0x3F ) << 24 ) +
1340 // 1111-110s 10tt-tttt 10uu-uuuu 10vv-wwww 10xx-xxyy
1341 // 10zz-zzzz ->
1342 // 0000-0000 uuuu-uu00 0000-0000 0000-0000
1343 ( ( bytes[pos + 2] & 0x3F ) << 18 ) +
1344 // 1111-110s 10tt-tttt 10uu-uuuu 10vv-wwww 10xx-xxyy
1345 // 10zz-zzzz ->
1346 // 0000-0000 0000-00vv 0000-0000 0000-0000
1347 ( ( bytes[pos + 3] & 0x30 ) << 12 ) +
1348 // 1111-110s 10tt-tttt 10uu-uuuu 10vv-wwww 10xx-xxyy
1349 // 10zz-zzzz ->
1350 // 0000-0000 0000-0000 wwww-0000 0000-0000
1351 ( ( bytes[pos + 3] & 0x0F ) << 12 ) +
1352 // 1111-110s 10tt-tttt 10uu-uuuu 10vv-wwww 10xx-xxyy
1353 // 10zz-zzzz ->
1354 // 0000-0000 0000-0000 0000-xxxx 0000-0000
1355 ( ( bytes[pos + 4] & 0x3C ) << 6 ) +
1356 // 1111-110s 10tt-tttt 10uu-uuuu 10vv-wwww 10xx-xxyy
1357 // 10zz-zzzz ->
1358 // 0000-0000 0000-0000 0000-0000 yy00-0000
1359 ( ( bytes[pos + 4] & 0x03 ) << 6 ) +
1360 // 1111-110s 10tt-tttt 10uu-uuuu 10vv-wwww 10xx-xxyy 10zz-zzzz
1361 // ->
1362 // 0000-0000 0000-0000 0000-0000 00zz-zzzz
1363 ( bytes[pos + 5] & 0x3F )
1364 // -> 0stt-tttt uuuu-uuvv wwww-xxxx yyzz-zzzz (7F FF FF FF)
1365 );
1366 }
1367 else
1368 {
1369 return ( char ) -1;
1370 }
1371 }
1372 }
1373
1374
1375 /**
1376 * Return the Unicode char which is coded in the bytes at the given
1377 * position.
1378 *
1379 * @param car The character to be transformed to an array of bytes
1380 *
1381 * @return The byte array representing the char
1382 *
1383 * TODO : Should stop after the third byte, as a char is only 2 bytes long.
1384 */
1385 public static final byte[] charToBytes( char car )
1386 {
1387 byte[] bytes = new byte[countNbBytesPerChar( car )];
1388
1389 if ( car <= 0x7F )
1390 {
1391 // Single byte char
1392 bytes[0] = ( byte ) car;
1393 return bytes;
1394 }
1395 else if ( car <= 0x7FF )
1396 {
1397 // two bytes char
1398 bytes[0] = ( byte ) ( 0x00C0 + ( ( car & 0x07C0 ) >> 6 ) );
1399 bytes[1] = ( byte ) ( 0x0080 + ( car & 0x3F ) );
1400 }
1401 else
1402 {
1403 // Three bytes char
1404 bytes[0] = ( byte ) ( 0x00E0 + ( ( car & 0xF000 ) >> 12 ) );
1405 bytes[1] = ( byte ) ( 0x0080 + ( ( car & 0x0FC0 ) >> 6 ) );
1406 bytes[2] = ( byte ) ( 0x0080 + ( car & 0x3F ) );
1407 }
1408
1409 return bytes;
1410 }
1411
1412
1413 /**
1414 * Count the number of chars included in the given byte[].
1415 *
1416 * @param bytes The byte array to decode
1417 * @return The number of char in the byte array
1418 */
1419 public static final int countChars( byte[] bytes )
1420 {
1421 if ( bytes == null )
1422 {
1423 return 0;
1424 }
1425
1426 int nbChars = 0;
1427 int currentPos = 0;
1428
1429 while ( currentPos < bytes.length )
1430 {
1431 currentPos += countBytesPerChar( bytes, currentPos );
1432 nbChars++;
1433 }
1434
1435 return nbChars;
1436 }
1437
1438
1439 /**
1440 * Check if a text is present at the current position in a buffer.
1441 *
1442 * @param bytes The buffer which contains the data
1443 * @param index Current position in the buffer
1444 * @param text The text we want to check
1445 * @return <code>true</code> if the buffer contains the text.
1446 */
1447 public static final int areEquals( byte[] bytes, int index, String text )
1448 {
1449 if ( ( bytes == null ) || ( bytes.length == 0 ) || ( bytes.length <= index ) || ( index < 0 )
1450 || ( text == null ) )
1451 {
1452 return NOT_EQUAL;
1453 }
1454 else
1455 {
1456 try
1457 {
1458 byte[] data = text.getBytes( "UTF-8" );
1459
1460 return areEquals( bytes, index, data );
1461 }
1462 catch ( UnsupportedEncodingException uee )
1463 {
1464 return NOT_EQUAL;
1465 }
1466 }
1467 }
1468
1469
1470 /**
1471 * Check if a text is present at the current position in a buffer.
1472 *
1473 * @param chars The buffer which contains the data
1474 * @param index Current position in the buffer
1475 * @param text The text we want to check
1476 * @return <code>true</code> if the buffer contains the text.
1477 */
1478 public static final int areEquals( char[] chars, int index, String text )
1479 {
1480 if ( ( chars == null ) || ( chars.length == 0 ) || ( chars.length <= index ) || ( index < 0 )
1481 || ( text == null ) )
1482 {
1483 return NOT_EQUAL;
1484 }
1485 else
1486 {
1487 char[] data = text.toCharArray();
1488
1489 return areEquals( chars, index, data );
1490 }
1491 }
1492
1493
1494 /**
1495 * Check if a text is present at the current position in a buffer.
1496 *
1497 * @param chars The buffer which contains the data
1498 * @param index Current position in the buffer
1499 * @param chars2 The text we want to check
1500 * @return <code>true</code> if the buffer contains the text.
1501 */
1502 public static final int areEquals( char[] chars, int index, char[] chars2 )
1503 {
1504 if ( ( chars == null ) || ( chars.length == 0 ) || ( chars.length <= index ) || ( index < 0 )
1505 || ( chars2 == null ) || ( chars2.length == 0 )
1506 || ( chars2.length > ( chars.length + index ) ) )
1507 {
1508 return NOT_EQUAL;
1509 }
1510 else
1511 {
1512 for ( int i = 0; i < chars2.length; i++ )
1513 {
1514 if ( chars[index++] != chars2[i] )
1515 {
1516 return NOT_EQUAL;
1517 }
1518 }
1519
1520 return index;
1521 }
1522 }
1523
1524 /**
1525 * Check if a text is present at the current position in another string.
1526 *
1527 * @param string The string which contains the data
1528 * @param index Current position in the string
1529 * @param text The text we want to check
1530 * @return <code>true</code> if the string contains the text.
1531 */
1532 public static final boolean areEquals( String string, int index, String text )
1533 {
1534 if ( ( string == null ) || ( text == null ) )
1535 {
1536 return false;
1537 }
1538
1539 int length1 = string.length();
1540 int length2 = text.length();
1541
1542 if ( ( length1 == 0 ) || ( length1 <= index ) || ( index < 0 )
1543 || ( length2 == 0 ) || ( length2 > ( length1 + index ) ) )
1544 {
1545 return false;
1546 }
1547 else
1548 {
1549 return string.substring( index ).startsWith( text );
1550 }
1551 }
1552
1553
1554 /**
1555 * Check if a text is present at the current position in a buffer.
1556 *
1557 * @param bytes The buffer which contains the data
1558 * @param index Current position in the buffer
1559 * @param bytes2 The text we want to check
1560 * @return <code>true</code> if the buffer contains the text.
1561 */
1562 public static final int areEquals( byte[] bytes, int index, byte[] bytes2 )
1563 {
1564
1565 if ( ( bytes == null ) || ( bytes.length == 0 ) || ( bytes.length <= index ) || ( index < 0 )
1566 || ( bytes2 == null ) || ( bytes2.length == 0 )
1567 || ( bytes2.length > ( bytes.length + index ) ) )
1568 {
1569 return NOT_EQUAL;
1570 }
1571 else
1572 {
1573 for ( int i = 0; i < bytes2.length; i++ )
1574 {
1575 if ( bytes[index++] != bytes2[i] )
1576 {
1577 return NOT_EQUAL;
1578 }
1579 }
1580
1581 return index;
1582 }
1583 }
1584
1585
1586 /**
1587 * Test if the current character is equal to a specific character. This
1588 * function works only for character between 0 and 127, as it does compare a
1589 * byte and a char (which is 16 bits wide)
1590 *
1591 * @param byteArray
1592 * The buffer which contains the data
1593 * @param index
1594 * Current position in the buffer
1595 * @param car
1596 * The character we want to compare with the current buffer
1597 * position
1598 * @return <code>true</code> if the current character equals the given
1599 * character.
1600 */
1601 public static final boolean isCharASCII( byte[] byteArray, int index, char car )
1602 {
1603 if ( ( byteArray == null ) || ( byteArray.length == 0 ) || ( index < 0 ) || ( index >= byteArray.length ) )
1604 {
1605 return false;
1606 }
1607 else
1608 {
1609 return ( ( byteArray[index] == car ) ? true : false );
1610 }
1611 }
1612
1613
1614 /**
1615 * Test if the current character is equal to a specific character.
1616 *
1617 * @param chars
1618 * The buffer which contains the data
1619 * @param index
1620 * Current position in the buffer
1621 * @param car
1622 * The character we want to compare with the current buffer
1623 * position
1624 * @return <code>true</code> if the current character equals the given
1625 * character.
1626 */
1627 public static final boolean isCharASCII( char[] chars, int index, char car )
1628 {
1629 if ( ( chars == null ) || ( chars.length == 0 ) || ( index < 0 ) || ( index >= chars.length ) )
1630 {
1631 return false;
1632 }
1633 else
1634 {
1635 return ( ( chars[index] == car ) ? true : false );
1636 }
1637 }
1638
1639 /**
1640 * Test if the current character is equal to a specific character.
1641 *
1642 * @param string The String which contains the data
1643 * @param index Current position in the string
1644 * @param car The character we want to compare with the current string
1645 * position
1646 * @return <code>true</code> if the current character equals the given
1647 * character.
1648 */
1649 public static final boolean isCharASCII( String string, int index, char car )
1650 {
1651 if ( string == null )
1652 {
1653 return false;
1654 }
1655
1656 int length = string.length();
1657
1658 if ( ( length == 0 ) || ( index < 0 ) || ( index >= length ) )
1659 {
1660 return false;
1661 }
1662 else
1663 {
1664 return string.charAt( index ) == car;
1665 }
1666 }
1667
1668
1669 /**
1670 * Test if the current character is equal to a specific character.
1671 *
1672 * @param string The String which contains the data
1673 * @param index Current position in the string
1674 * @param car The character we want to compare with the current string
1675 * position
1676 * @return <code>true</code> if the current character equals the given
1677 * character.
1678 */
1679 public static final boolean isICharASCII( String string, int index, char car )
1680 {
1681 if ( string == null )
1682 {
1683 return false;
1684 }
1685
1686 int length = string.length();
1687
1688 if ( ( length == 0 ) || ( index < 0 ) || ( index >= length ) )
1689 {
1690 return false;
1691 }
1692 else
1693 {
1694 return ( ( string.charAt( index ) | 0x20 ) & car ) == car;
1695 }
1696 }
1697
1698
1699 /**
1700 * Test if the current character is equal to a specific character.
1701 *
1702 * @param string The String which contains the data
1703 * @param index Current position in the string
1704 * @param car The character we want to compare with the current string
1705 * position
1706 * @return <code>true</code> if the current character equals the given
1707 * character.
1708 */
1709 public static final boolean isICharASCII( byte[] bytes, int index, char car )
1710 {
1711 if ( bytes == null )
1712 {
1713 return false;
1714 }
1715
1716 int length = bytes.length;
1717
1718 if ( ( length == 0 ) || ( index < 0 ) || ( index >= length ) )
1719 {
1720 return false;
1721 }
1722 else
1723 {
1724 return ( ( bytes[ index ] | 0x20 ) & car ) == car;
1725 }
1726 }
1727
1728
1729 /**
1730 * Test if the current character is a bit, ie 0 or 1.
1731 *
1732 * @param string
1733 * The String which contains the data
1734 * @param index
1735 * Current position in the string
1736 * @return <code>true</code> if the current character is a bit (0 or 1)
1737 */
1738 public static final boolean isBit( String string, int index )
1739 {
1740 if ( string == null )
1741 {
1742 return false;
1743 }
1744
1745 int length = string.length();
1746
1747 if ( ( length == 0 ) || ( index < 0 ) || ( index >= length ) )
1748 {
1749 return false;
1750 }
1751 else
1752 {
1753 char c = string.charAt( index );
1754 return ( ( c == '0' ) || ( c == '1' ) );
1755 }
1756 }
1757
1758
1759 /**
1760 * Get the character at a given position in a string, checking fo limits
1761 *
1762 * @param string The string which contains the data
1763 * @param index Current position in the string
1764 * @return The character ar the given position, or '\0' if something went wrong
1765 */
1766 public static final char charAt( String string, int index )
1767 {
1768 if ( string == null )
1769 {
1770 return '\0';
1771 }
1772
1773 int length = string.length();
1774
1775 if ( ( length == 0 ) || ( index < 0 ) || ( index >= length ) )
1776 {
1777 return '\0';
1778 }
1779 else
1780 {
1781 return string.charAt( index ) ;
1782 }
1783 }
1784
1785
1786 /**
1787 * Translate two chars to an hex value. The chars must be
1788 * in [a-fA-F0-9]
1789 *
1790 * @param high The high value
1791 * @param low The low value
1792 * @return A byte representation of the two chars
1793 */
1794 public static byte getHexValue( char high, char low )
1795 {
1796 if ( ( high > 127 ) || ( low > 127 ) || ( high < 0 ) | ( low < 0 ) )
1797 {
1798 return -1;
1799 }
1800
1801 return (byte)( ( HEX_VALUE[high] << 4 ) | HEX_VALUE[low] );
1802 }
1803
1804
1805 /**
1806 * Translate two bytes to an hex value. The bytes must be
1807 * in [0-9a-fA-F]
1808 *
1809 * @param high The high value
1810 * @param low The low value
1811 * @return A byte representation of the two bytes
1812 */
1813 public static byte getHexValue( byte high, byte low )
1814 {
1815 if ( ( high > 127 ) || ( low > 127 ) || ( high < 0 ) | ( low < 0 ) )
1816 {
1817 return -1;
1818 }
1819
1820 return (byte)( ( HEX_VALUE[high] << 4 ) | HEX_VALUE[low] );
1821 }
1822
1823
1824 /**
1825 * Return an hex value from a sinle char
1826 * The char must be in [0-9a-fA-F]
1827 *
1828 * @param c The char we want to convert
1829 * @return A byte between 0 and 15
1830 */
1831 public static byte getHexValue( char c )
1832 {
1833 if ( ( c > 127 ) || ( c < 0 ) )
1834 {
1835 return -1;
1836 }
1837
1838 return HEX_VALUE[c];
1839 }
1840
1841 /**
1842 * Check if the current character is an Hex Char <hex> ::= [0x30-0x39] |
1843 * [0x41-0x46] | [0x61-0x66]
1844 *
1845 * @param bytes The buffer which contains the data
1846 * @param index Current position in the buffer
1847 * @return <code>true</code> if the current character is a Hex Char
1848 */
1849 public static final boolean isHex( byte[] bytes, int index )
1850 {
1851 if ( ( bytes == null ) || ( bytes.length == 0 ) || ( index < 0 ) || ( index >= bytes.length ) )
1852 {
1853 return false;
1854 }
1855 else
1856 {
1857 byte c = bytes[index];
1858
1859 if ( ( ( c | 0x7F ) != 0x7F ) || ( HEX[c] == false ) )
1860 {
1861 return false;
1862 }
1863 else
1864 {
1865 return true;
1866 }
1867 }
1868 }
1869
1870
1871 /**
1872 * Check if the current character is an Hex Char <hex> ::= [0x30-0x39] |
1873 * [0x41-0x46] | [0x61-0x66]
1874 *
1875 * @param chars The buffer which contains the data
1876 * @param index Current position in the buffer
1877 * @return <code>true</code> if the current character is a Hex Char
1878 */
1879 public static final boolean isHex( char[] chars, int index )
1880 {
1881 if ( ( chars == null ) || ( chars.length == 0 ) || ( index < 0 ) || ( index >= chars.length ) )
1882 {
1883 return false;
1884 }
1885 else
1886 {
1887 char c = chars[index];
1888
1889 if ( ( c > 127 ) || ( HEX[c] == false ) )
1890 {
1891 return false;
1892 }
1893 else
1894 {
1895 return true;
1896 }
1897 }
1898 }
1899
1900 /**
1901 * Check if the current character is an Hex Char <hex> ::= [0x30-0x39] |
1902 * [0x41-0x46] | [0x61-0x66]
1903 *
1904 * @param string The string which contains the data
1905 * @param index Current position in the string
1906 * @return <code>true</code> if the current character is a Hex Char
1907 */
1908 public static final boolean isHex( String string, int index )
1909 {
1910 if ( string == null )
1911 {
1912 return false;
1913 }
1914
1915 int length = string.length();
1916
1917 if ( ( length == 0 ) || ( index < 0 ) || ( index >= length ) )
1918 {
1919 return false;
1920 }
1921 else
1922 {
1923 char c = string.charAt( index );
1924
1925 if ( ( c > 127 ) || ( HEX[c] == false ) )
1926 {
1927 return false;
1928 }
1929 else
1930 {
1931 return true;
1932 }
1933 }
1934 }
1935
1936
1937 /**
1938 * Test if the current character is a digit <digit> ::= '0' | '1' | '2' |
1939 * '3' | '4' | '5' | '6' | '7' | '8' | '9'
1940 *
1941 * @param bytes The buffer which contains the data
1942 * @return <code>true</code> if the current character is a Digit
1943 */
1944 public static final boolean isDigit( byte[] bytes )
1945 {
1946 if ( ( bytes == null ) || ( bytes.length == 0 ) )
1947 {
1948 return false;
1949 }
1950 else
1951 {
1952 return ( ( ( ( bytes[0] | 0x7F ) != 0x7F ) || !DIGIT[bytes[0]] ) ? false : true );
1953 }
1954 }
1955
1956
1957 /**
1958 * Test if the current character is a digit <digit> ::= '0' | '1' | '2' |
1959 * '3' | '4' | '5' | '6' | '7' | '8' | '9'
1960 *
1961 * @param car the character to test
1962 *
1963 * @return <code>true</code> if the character is a Digit
1964 */
1965 public static final boolean isDigit( char car )
1966 {
1967 return ( car >= '0' ) && ( car <= '9' );
1968 }
1969
1970
1971 /**
1972 * Test if the current byte is an Alpha character :
1973 * <alpha> ::= [0x41-0x5A] | [0x61-0x7A]
1974 *
1975 * @param c The byte to test
1976 *
1977 * @return <code>true</code> if the byte is an Alpha
1978 * character
1979 */
1980 public static final boolean isAlpha( byte c )
1981 {
1982 return ( ( c > 0 ) && ( c <= 127 ) && ALPHA[c] );
1983 }
1984
1985
1986 /**
1987 * Test if the current character is an Alpha character :
1988 * <alpha> ::= [0x41-0x5A] | [0x61-0x7A]
1989 *
1990 * @param c The char to test
1991 *
1992 * @return <code>true</code> if the character is an Alpha
1993 * character
1994 */
1995 public static final boolean isAlpha( char c )
1996 {
1997 return ( ( c > 0 ) && ( c <= 127 ) && ALPHA[c] );
1998 }
1999
2000
2001 /**
2002 * Test if the current character is an Alpha character : <alpha> ::=
2003 * [0x41-0x5A] | [0x61-0x7A]
2004 *
2005 * @param bytes The buffer which contains the data
2006 * @param index Current position in the buffer
2007 * @return <code>true</code> if the current character is an Alpha
2008 * character
2009 */
2010 public static final boolean isAlphaASCII( byte[] bytes, int index )
2011 {
2012 if ( ( bytes == null ) || ( bytes.length == 0 ) || ( index < 0 ) || ( index >= bytes.length ) )
2013 {
2014 return false;
2015 }
2016 else
2017 {
2018 byte c = bytes[index];
2019
2020 if ( ( ( c | 0x7F ) != 0x7F ) || ( ALPHA[c] == false ) )
2021 {
2022 return false;
2023 }
2024 else
2025 {
2026 return true;
2027 }
2028 }
2029 }
2030
2031
2032 /**
2033 * Test if the current character is an Alpha character : <alpha> ::=
2034 * [0x41-0x5A] | [0x61-0x7A]
2035 *
2036 * @param chars The buffer which contains the data
2037 * @param index Current position in the buffer
2038 * @return <code>true</code> if the current character is an Alpha
2039 * character
2040 */
2041 public static final boolean isAlphaASCII( char[] chars, int index )
2042 {
2043 if ( ( chars == null ) || ( chars.length == 0 ) || ( index < 0 ) || ( index >= chars.length ) )
2044 {
2045 return false;
2046 }
2047 else
2048 {
2049 char c = chars[index];
2050
2051 if ( ( c > 127 ) || ( ALPHA[c] == false ) )
2052 {
2053 return false;
2054 }
2055 else
2056 {
2057 return true;
2058 }
2059 }
2060 }
2061
2062
2063 /**
2064 * Test if the current character is an Alpha character : <alpha> ::=
2065 * [0x41-0x5A] | [0x61-0x7A]
2066 *
2067 * @param string The string which contains the data
2068 * @param index Current position in the string
2069 * @return <code>true</code> if the current character is an Alpha
2070 * character
2071 */
2072 public static final boolean isAlphaASCII( String string, int index )
2073 {
2074 if ( string == null )
2075 {
2076 return false;
2077 }
2078
2079 int length = string.length();
2080
2081 if ( ( length == 0 ) || ( index < 0 ) || ( index >= length ) )
2082 {
2083 return false;
2084 }
2085 else
2086 {
2087 char c = string.charAt( index );
2088
2089 if ( ( c > 127 ) || ( ALPHA[c] == false ) )
2090 {
2091 return false;
2092 }
2093 else
2094 {
2095 return true;
2096 }
2097 }
2098 }
2099
2100
2101 /**
2102 * Test if the current character is a lowercased Alpha character : <br/>
2103 * <alpha> ::= [0x61-0x7A]
2104 *
2105 * @param string The string which contains the data
2106 * @param index Current position in the string
2107 * @return <code>true</code> if the current character is a lower Alpha
2108 * character
2109 */
2110 public static final boolean isAlphaLowercaseASCII( String string, int index )
2111 {
2112 if ( string == null )
2113 {
2114 return false;
2115 }
2116
2117 int length = string.length();
2118
2119 if ( ( length == 0 ) || ( index < 0 ) || ( index >= length ) )
2120 {
2121 return false;
2122 }
2123 else
2124 {
2125 char c = string.charAt( index );
2126
2127 if ( ( c > 127 ) || ( ALPHA_LOWER_CASE[c] == false ) )
2128 {
2129 return false;
2130 }
2131 else
2132 {
2133 return true;
2134 }
2135 }
2136 }
2137
2138
2139 /**
2140 * Test if the current character is a uppercased Alpha character : <br/>
2141 * <alpha> ::= [0x61-0x7A]
2142 *
2143 * @param string The string which contains the data
2144 * @param index Current position in the string
2145 * @return <code>true</code> if the current character is a lower Alpha
2146 * character
2147 */
2148 public static final boolean isAlphaUppercaseASCII( String string, int index )
2149 {
2150 if ( string == null )
2151 {
2152 return false;
2153 }
2154
2155 int length = string.length();
2156
2157 if ( ( length == 0 ) || ( index < 0 ) || ( index >= length ) )
2158 {
2159 return false;
2160 }
2161 else
2162 {
2163 char c = string.charAt( index );
2164
2165 if ( ( c > 127 ) || ( ALPHA_UPPER_CASE[c] == false ) )
2166 {
2167 return false;
2168 }
2169 else
2170 {
2171 return true;
2172 }
2173 }
2174 }
2175
2176
2177 /**
2178 * Test if the current character is a digit <digit> ::= '0' | '1' | '2' |
2179 * '3' | '4' | '5' | '6' | '7' | '8' | '9'
2180 *
2181 * @param bytes The buffer which contains the data
2182 * @param index Current position in the buffer
2183 * @return <code>true</code> if the current character is a Digit
2184 */
2185 public static final boolean isDigit( byte[] bytes, int index )
2186 {
2187 if ( ( bytes == null ) || ( bytes.length == 0 ) || ( index < 0 ) || ( index >= bytes.length ) )
2188 {
2189 return false;
2190 }
2191 else
2192 {
2193 return ( ( ( ( bytes[index] | 0x7F ) != 0x7F ) || !DIGIT[bytes[index]] ) ? false : true );
2194 }
2195 }
2196
2197
2198 /**
2199 * Test if the current character is a digit <digit> ::= '0' | '1' | '2' |
2200 * '3' | '4' | '5' | '6' | '7' | '8' | '9'
2201 *
2202 * @param chars The buffer which contains the data
2203 * @param index Current position in the buffer
2204 * @return <code>true</code> if the current character is a Digit
2205 */
2206 public static final boolean isDigit( char[] chars, int index )
2207 {
2208 if ( ( chars == null ) || ( chars.length == 0 ) || ( index < 0 ) || ( index >= chars.length ) )
2209 {
2210 return false;
2211 }
2212 else
2213 {
2214 return ( ( ( chars[index] > 127 ) || !DIGIT[chars[index]] ) ? false : true );
2215 }
2216 }
2217
2218
2219 /**
2220 * Test if the current character is a digit <digit> ::= '0' | '1' | '2' |
2221 * '3' | '4' | '5' | '6' | '7' | '8' | '9'
2222 *
2223 * @param string The string which contains the data
2224 * @param index Current position in the string
2225 * @return <code>true</code> if the current character is a Digit
2226 */
2227 public static final boolean isDigit( String string, int index )
2228 {
2229 if ( string == null )
2230 {
2231 return false;
2232 }
2233
2234 int length = string.length();
2235
2236 if ( ( length == 0 ) || ( index < 0 ) || ( index >= length ) )
2237 {
2238 return false;
2239 }
2240 else
2241 {
2242 char c = string.charAt( index );
2243 return ( ( ( c > 127 ) || !DIGIT[c] ) ? false : true );
2244 }
2245 }
2246
2247
2248 /**
2249 * Test if the current character is a digit <digit> ::= '0' | '1' | '2' |
2250 * '3' | '4' | '5' | '6' | '7' | '8' | '9'
2251 *
2252 * @param chars The buffer which contains the data
2253 * @return <code>true</code> if the current character is a Digit
2254 */
2255 public static final boolean isDigit( char[] chars )
2256 {
2257 if ( ( chars == null ) || ( chars.length == 0 ) )
2258 {
2259 return false;
2260 }
2261 else
2262 {
2263 return ( ( ( chars[0] > 127 ) || !DIGIT[chars[0]] ) ? false : true );
2264 }
2265 }
2266
2267
2268 /**
2269 * Check if the current character is an 7 bits ASCII CHAR (between 0 and
2270 * 127).
2271 * <char> ::= <alpha> | <digit>
2272 *
2273 * @param string The string which contains the data
2274 * @param index Current position in the string
2275 * @return The position of the next character, if the current one is a CHAR.
2276 */
2277 public static final boolean isAlphaDigit( String string, int index )
2278 {
2279 if ( string == null )
2280 {
2281 return false;
2282 }
2283
2284 int length = string.length();
2285
2286 if ( ( length == 0 ) || ( index < 0 ) || ( index >= length ) )
2287 {
2288 return false;
2289 }
2290 else
2291 {
2292 char c = string.charAt( index );
2293
2294 if ( ( c > 127 ) || ( ALPHA_DIGIT[c] == false ) )
2295 {
2296 return false;
2297 }
2298 else
2299 {
2300 return true;
2301 }
2302 }
2303 }
2304
2305
2306 /**
2307 * Check if the current character is an 7 bits ASCII CHAR (between 0 and
2308 * 127). <char> ::= <alpha> | <digit> | '-'
2309 *
2310 * @param bytes The buffer which contains the data
2311 * @param index Current position in the buffer
2312 * @return The position of the next character, if the current one is a CHAR.
2313 */
2314 public static final boolean isAlphaDigitMinus( byte[] bytes, int index )
2315 {
2316 if ( ( bytes == null ) || ( bytes.length == 0 ) || ( index < 0 ) || ( index >= bytes.length ) )
2317 {
2318 return false;
2319 }
2320 else
2321 {
2322 byte c = bytes[index];
2323
2324 if ( ( ( c | 0x7F ) != 0x7F ) || ( CHAR[c] == false ) )
2325 {
2326 return false;
2327 }
2328 else
2329 {
2330 return true;
2331 }
2332 }
2333 }
2334
2335
2336 /**
2337 * Check if the current character is an 7 bits ASCII CHAR (between 0 and
2338 * 127). <char> ::= <alpha> | <digit> | '-'
2339 *
2340 * @param chars The buffer which contains the data
2341 * @param index Current position in the buffer
2342 * @return The position of the next character, if the current one is a CHAR.
2343 */
2344 public static final boolean isAlphaDigitMinus( char[] chars, int index )
2345 {
2346 if ( ( chars == null ) || ( chars.length == 0 ) || ( index < 0 ) || ( index >= chars.length ) )
2347 {
2348 return false;
2349 }
2350 else
2351 {
2352 char c = chars[index];
2353
2354 if ( ( c > 127 ) || ( CHAR[c] == false ) )
2355 {
2356 return false;
2357 }
2358 else
2359 {
2360 return true;
2361 }
2362 }
2363 }
2364
2365
2366 /**
2367 * Check if the current character is an 7 bits ASCII CHAR (between 0 and
2368 * 127). <char> ::= <alpha> | <digit> | '-'
2369 *
2370 * @param string The string which contains the data
2371 * @param index Current position in the string
2372 * @return The position of the next character, if the current one is a CHAR.
2373 */
2374 public static final boolean isAlphaDigitMinus( String string, int index )
2375 {
2376 if ( string == null )
2377 {
2378 return false;
2379 }
2380
2381 int length = string.length();
2382
2383 if ( ( length == 0 ) || ( index < 0 ) || ( index >= length ) )
2384 {
2385 return false;
2386 }
2387 else
2388 {
2389 char c = string.charAt( index );
2390
2391 if ( ( c > 127 ) || ( CHAR[c] == false ) )
2392 {
2393 return false;
2394 }
2395 else
2396 {
2397 return true;
2398 }
2399 }
2400 }
2401
2402
2403 // Empty checks
2404 // -----------------------------------------------------------------------
2405 /**
2406 * <p>
2407 * Checks if a String is empty ("") or null.
2408 * </p>
2409 *
2410 * <pre>
2411 * StringUtils.isEmpty(null) = true
2412 * StringUtils.isEmpty("") = true
2413 * StringUtils.isEmpty(" ") = false
2414 * StringUtils.isEmpty("bob") = false
2415 * StringUtils.isEmpty(" bob ") = false
2416 * </pre>
2417 *
2418 * <p>
2419 * NOTE: This method changed in Lang version 2.0. It no longer trims the
2420 * String. That functionality is available in isBlank().
2421 * </p>
2422 *
2423 * @param str the String to check, may be null
2424 * @return <code>true</code> if the String is empty or null
2425 */
2426 public static final boolean isEmpty( String str )
2427 {
2428 return str == null || str.length() == 0;
2429 }
2430
2431
2432 /**
2433 * Checks if a bytes array is empty or null.
2434 *
2435 * @param bytes The bytes array to check, may be null
2436 * @return <code>true</code> if the bytes array is empty or null
2437 */
2438 public static final boolean isEmpty( byte[] bytes )
2439 {
2440 return bytes == null || bytes.length == 0;
2441 }
2442
2443
2444 /**
2445 * <p>
2446 * Checks if a String is not empty ("") and not null.
2447 * </p>
2448 *
2449 * <pre>
2450 * StringUtils.isNotEmpty(null) = false
2451 * StringUtils.isNotEmpty("") = false
2452 * StringUtils.isNotEmpty(" ") = true
2453 * StringUtils.isNotEmpty("bob") = true
2454 * StringUtils.isNotEmpty(" bob ") = true
2455 * </pre>
2456 *
2457 * @param str the String to check, may be null
2458 * @return <code>true</code> if the String is not empty and not null
2459 */
2460 public static final boolean isNotEmpty( String str )
2461 {
2462 return str != null && str.length() > 0;
2463 }
2464
2465
2466 /**
2467 * <p>
2468 * Removes spaces (char <= 32) from both start and ends of this String,
2469 * handling <code>null</code> by returning <code>null</code>.
2470 * </p>
2471 * Trim removes start and end characters <= 32.
2472 *
2473 * <pre>
2474 * StringUtils.trim(null) = null
2475 * StringUtils.trim("") = ""
2476 * StringUtils.trim(" ") = ""
2477 * StringUtils.trim("abc") = "abc"
2478 * StringUtils.trim(" abc ") = "abc"
2479 * </pre>
2480 *
2481 * @param str the String to be trimmed, may be null
2482 * @return the trimmed string, <code>null</code> if null String input
2483 */
2484 public static final String trim( String str )
2485 {
2486 return ( isEmpty( str ) ? "" : str.trim() );
2487 }
2488
2489
2490 /**
2491 * <p>
2492 * Removes spaces (char <= 32) from both start and ends of this bytes
2493 * array, handling <code>null</code> by returning <code>null</code>.
2494 * </p>
2495 * Trim removes start and end characters <= 32.
2496 *
2497 * <pre>
2498 * StringUtils.trim(null) = null
2499 * StringUtils.trim("") = ""
2500 * StringUtils.trim(" ") = ""
2501 * StringUtils.trim("abc") = "abc"
2502 * StringUtils.trim(" abc ") = "abc"
2503 * </pre>
2504 *
2505 * @param bytes the byte array to be trimmed, may be null
2506 *
2507 * @return the trimmed byte array
2508 */
2509 public static final byte[] trim( byte[] bytes )
2510 {
2511 if ( isEmpty( bytes ) )
2512 {
2513 return EMPTY_BYTES;
2514 }
2515
2516 int start = trimLeft( bytes, 0 );
2517 int end = trimRight( bytes, bytes.length - 1 );
2518
2519 int length = end - start + 1;
2520
2521 if ( length != 0 )
2522 {
2523 byte[] newBytes = new byte[end - start + 1];
2524
2525 System.arraycopy( bytes, start, newBytes, 0, length );
2526
2527 return newBytes;
2528 }
2529 else
2530 {
2531 return EMPTY_BYTES;
2532 }
2533 }
2534
2535
2536 /**
2537 * <p>
2538 * Removes spaces (char <= 32) from start of this String, handling
2539 * <code>null</code> by returning <code>null</code>.
2540 * </p>
2541 * Trim removes start characters <= 32.
2542 *
2543 * <pre>
2544 * StringUtils.trimLeft(null) = null
2545 * StringUtils.trimLeft("") = ""
2546 * StringUtils.trimLeft(" ") = ""
2547 * StringUtils.trimLeft("abc") = "abc"
2548 * StringUtils.trimLeft(" abc ") = "abc "
2549 * </pre>
2550 *
2551 * @param str the String to be trimmed, may be null
2552 * @return the trimmed string, <code>null</code> if null String input
2553 */
2554 public static final String trimLeft( String str )
2555 {
2556 if ( isEmpty( str ) )
2557 {
2558 return "";
2559 }
2560
2561 int start = 0;
2562 int end = str.length();
2563
2564 while ( ( start < end ) && ( str.charAt( start ) == ' ' ) )
2565 {
2566 start++;
2567 }
2568
2569 return ( start == 0 ? str : str.substring( start ) );
2570 }
2571
2572
2573 /**
2574 * <p>
2575 * Removes spaces (char <= 32) from start of this array, handling
2576 * <code>null</code> by returning <code>null</code>.
2577 * </p>
2578 * Trim removes start characters <= 32.
2579 *
2580 * <pre>
2581 * StringUtils.trimLeft(null) = null
2582 * StringUtils.trimLeft("") = ""
2583 * StringUtils.trimLeft(" ") = ""
2584 * StringUtils.trimLeft("abc") = "abc"
2585 * StringUtils.trimLeft(" abc ") = "abc "
2586 * </pre>
2587 *
2588 * @param chars the chars array to be trimmed, may be null
2589 * @return the position of the first char which is not a space, or the last
2590 * position of the array.
2591 */
2592 public static final int trimLeft( char[] chars, int pos )
2593 {
2594 if ( chars == null )
2595 {
2596 return pos;
2597 }
2598
2599 while ( ( pos < chars.length ) && ( chars[pos] == ' ' ) )
2600 {
2601 pos++;
2602 }
2603
2604 return pos;
2605 }
2606
2607
2608 /**
2609 * <p>
2610 * Removes spaces (char <= 32) from a position in this array, handling
2611 * <code>null</code> by returning <code>null</code>.
2612 * </p>
2613 * Trim removes start characters <= 32.
2614 *
2615 * <pre>
2616 * StringUtils.trimLeft(null) = null
2617 * StringUtils.trimLeft("",...) = ""
2618 * StringUtils.trimLeft(" ",...) = ""
2619 * StringUtils.trimLeft("abc",...) = "abc"
2620 * StringUtils.trimLeft(" abc ",...) = "abc "
2621 * </pre>
2622 *
2623 * @param string the string to be trimmed, may be null
2624 * @param pos The starting position
2625 */
2626 public static final void trimLeft( String string, Position pos )
2627 {
2628 if ( string == null )
2629 {
2630 return;
2631 }
2632
2633 int length = string.length();
2634
2635 while ( ( pos.start < length ) && ( string.charAt( pos.start ) == ' ' ) )
2636 {
2637 pos.start++;
2638 }
2639
2640 pos.end = pos.start;
2641
2642 return;
2643 }
2644
2645
2646 /**
2647 * <p>
2648 * Removes spaces (char <= 32) from a position in this array, handling
2649 * <code>null</code> by returning <code>null</code>.
2650 * </p>
2651 * Trim removes start characters <= 32.
2652 *
2653 * <pre>
2654 * StringUtils.trimLeft(null) = null
2655 * StringUtils.trimLeft("",...) = ""
2656 * StringUtils.trimLeft(" ",...) = ""
2657 * StringUtils.trimLeft("abc",...) = "abc"
2658 * StringUtils.trimLeft(" abc ",...) = "abc "
2659 * </pre>
2660 *
2661 * @param bytes the byte array to be trimmed, may be null
2662 * @param pos The starting position
2663 */
2664 public static final void trimLeft( byte[] bytes, Position pos )
2665 {
2666 if ( bytes == null )
2667 {
2668 return;
2669 }
2670
2671 int length = bytes.length;
2672
2673 while ( ( pos.start < length ) && ( bytes[ pos.start ] == ' ' ) )
2674 {
2675 pos.start++;
2676 }
2677
2678 pos.end = pos.start;
2679
2680 return;
2681 }
2682
2683
2684 /**
2685 * <p>
2686 * Removes spaces (char <= 32) from start of this array, handling
2687 * <code>null</code> by returning <code>null</code>.
2688 * </p>
2689 * Trim removes start characters <= 32.
2690 *
2691 * <pre>
2692 * StringUtils.trimLeft(null) = null
2693 * StringUtils.trimLeft("") = ""
2694 * StringUtils.trimLeft(" ") = ""
2695 * StringUtils.trimLeft("abc") = "abc"
2696 * StringUtils.trimLeft(" abc ") = "abc "
2697 * </pre>
2698 *
2699 * @param bytes the byte array to be trimmed, may be null
2700 * @return the position of the first byte which is not a space, or the last
2701 * position of the array.
2702 */
2703 public static final int trimLeft( byte[] bytes, int pos )
2704 {
2705 if ( bytes == null )
2706 {
2707 return pos;
2708 }
2709
2710 while ( ( pos < bytes.length ) && ( bytes[pos] == ' ' ) )
2711 {
2712 pos++;
2713 }
2714
2715 return pos;
2716 }
2717
2718
2719 /**
2720 * <p>
2721 * Removes spaces (char <= 32) from end of this String, handling
2722 * <code>null</code> by returning <code>null</code>.
2723 * </p>
2724 * Trim removes start characters <= 32.
2725 *
2726 * <pre>
2727 * StringUtils.trimRight(null) = null
2728 * StringUtils.trimRight("") = ""
2729 * StringUtils.trimRight(" ") = ""
2730 * StringUtils.trimRight("abc") = "abc"
2731 * StringUtils.trimRight(" abc ") = " abc"
2732 * </pre>
2733 *
2734 * @param str the String to be trimmed, may be null
2735 * @return the trimmed string, <code>null</code> if null String input
2736 */
2737 public static final String trimRight( String str )
2738 {
2739 if ( isEmpty( str ) )
2740 {
2741 return "";
2742 }
2743
2744 int length = str.length();
2745 int end = length;
2746
2747 while ( ( end > 0 ) && ( str.charAt( end - 1 ) == ' ' ) )
2748 {
2749 if ( ( end > 1 ) && ( str.charAt( end - 2 ) == '\\' ) )
2750 {
2751 break;
2752 }
2753
2754 end--;
2755 }
2756
2757 return ( end == length ? str : str.substring( 0, end ) );
2758 }
2759
2760 /**
2761 * <p>
2762 * Removes spaces (char <= 32) from end of this String, handling
2763 * <code>null</code> by returning <code>null</code>.
2764 * </p>
2765 * Trim removes start characters <= 32.
2766 *
2767 * <pre>
2768 * StringUtils.trimRight(null) = null
2769 * StringUtils.trimRight("") = ""
2770 * StringUtils.trimRight(" ") = ""
2771 * StringUtils.trimRight("abc") = "abc"
2772 * StringUtils.trimRight(" abc ") = " abc"
2773 * </pre>
2774 *
2775 * @param str the String to be trimmed, may be null
2776 * @param escapedSpace The last escaped space, if any
2777 * @return the trimmed string, <code>null</code> if null String input
2778 */
2779 public static final String trimRight( String str, int escapedSpace )
2780 {
2781 if ( isEmpty( str ) )
2782 {
2783 return "";
2784 }
2785
2786 int length = str.length();
2787 int end = length;
2788
2789 while ( ( end > 0 ) && ( str.charAt( end - 1 ) == ' ' ) && ( end > escapedSpace ) )
2790 {
2791 if ( ( end > 1 ) && ( str.charAt( end - 2 ) == '\\' ) )
2792 {
2793 break;
2794 }
2795
2796 end--;
2797 }
2798
2799 return ( end == length ? str : str.substring( 0, end ) );
2800 }
2801
2802
2803 /**
2804 * <p>
2805 * Removes spaces (char <= 32) from end of this array, handling
2806 * <code>null</code> by returning <code>null</code>.
2807 * </p>
2808 * Trim removes start characters <= 32.
2809 *
2810 * <pre>
2811 * StringUtils.trimRight(null) = null
2812 * StringUtils.trimRight("") = ""
2813 * StringUtils.trimRight(" ") = ""
2814 * StringUtils.trimRight("abc") = "abc"
2815 * StringUtils.trimRight(" abc ") = " abc"
2816 * </pre>
2817 *
2818 * @param chars the chars array to be trimmed, may be null
2819 * @return the position of the first char which is not a space, or the last
2820 * position of the array.
2821 */
2822 public static final int trimRight( char[] chars, int pos )
2823 {
2824 if ( chars == null )
2825 {
2826 return pos;
2827 }
2828
2829 while ( ( pos >= 0 ) && ( chars[pos - 1] == ' ' ) )
2830 {
2831 pos--;
2832 }
2833
2834 return pos;
2835 }
2836
2837
2838 /**
2839 * <p>
2840 * Removes spaces (char <= 32) from end of this string, handling
2841 * <code>null</code> by returning <code>null</code>.
2842 * </p>
2843 * Trim removes start characters <= 32.
2844 *
2845 * <pre>
2846 * StringUtils.trimRight(null) = null
2847 * StringUtils.trimRight("") = ""
2848 * StringUtils.trimRight(" ") = ""
2849 * StringUtils.trimRight("abc") = "abc"
2850 * StringUtils.trimRight(" abc ") = " abc"
2851 * </pre>
2852 *
2853 * @param string the string to be trimmed, may be null
2854 * @return the position of the first char which is not a space, or the last
2855 * position of the string.
2856 */
2857 public static final String trimRight( String string, Position pos )
2858 {
2859 if ( string == null )
2860 {
2861 return "";
2862 }
2863
2864 while ( ( pos.end >= 0 ) && ( string.charAt( pos.end - 1 ) == ' ' ) )
2865 {
2866 if ( ( pos.end > 1 ) && ( string.charAt( pos.end - 2 ) == '\\' ) )
2867 {
2868 break;
2869 }
2870
2871 pos.end--;
2872 }
2873
2874 return ( pos.end == string.length() ? string : string.substring( 0, pos.end ) );
2875 }
2876
2877
2878 /**
2879 * <p>
2880 * Removes spaces (char <= 32) from end of this string, handling
2881 * <code>null</code> by returning <code>null</code>.
2882 * </p>
2883 * Trim removes start characters <= 32.
2884 *
2885 * <pre>
2886 * StringUtils.trimRight(null) = null
2887 * StringUtils.trimRight("") = ""
2888 * StringUtils.trimRight(" ") = ""
2889 * StringUtils.trimRight("abc") = "abc"
2890 * StringUtils.trimRight(" abc ") = " abc"
2891 * </pre>
2892 *
2893 * @param bytes the byte array to be trimmed, may be null
2894 * @return the position of the first char which is not a space, or the last
2895 * position of the byte array.
2896 */
2897 public static final String trimRight( byte[] bytes, Position pos )
2898 {
2899 if ( bytes == null )
2900 {
2901 return "";
2902 }
2903
2904 while ( ( pos.end >= 0 ) && ( bytes[pos.end - 1] == ' ' ) )
2905 {
2906 if ( ( pos.end > 1 ) && ( bytes[pos.end - 2] == '\\' ) )
2907 {
2908 break;
2909 }
2910
2911 pos.end--;
2912 }
2913
2914 if ( pos.end == bytes.length )
2915 {
2916 return StringTools.utf8ToString( bytes );
2917 }
2918 else
2919 {
2920 return StringTools.utf8ToString( bytes, pos.end );
2921 }
2922 }
2923
2924
2925 /**
2926 * <p>
2927 * Removes spaces (char <= 32) from end of this array, handling
2928 * <code>null</code> by returning <code>null</code>.
2929 * </p>
2930 * Trim removes start characters <= 32.
2931 *
2932 * <pre>
2933 * StringUtils.trimRight(null) = null
2934 * StringUtils.trimRight("") = ""
2935 * StringUtils.trimRight(" ") = ""
2936 * StringUtils.trimRight("abc") = "abc"
2937 * StringUtils.trimRight(" abc ") = " abc"
2938 * </pre>
2939 *
2940 * @param bytes the byte array to be trimmed, may be null
2941 * @return the position of the first char which is not a space, or the last
2942 * position of the array.
2943 */
2944 public static final int trimRight( byte[] bytes, int pos )
2945 {
2946 if ( bytes == null )
2947 {
2948 return pos;
2949 }
2950
2951 while ( ( pos >= 0 ) && ( bytes[pos] == ' ' ) )
2952 {
2953 pos--;
2954 }
2955
2956 return pos;
2957 }
2958
2959
2960 // Case conversion
2961 // -----------------------------------------------------------------------
2962 /**
2963 * <p>
2964 * Converts a String to upper case as per {@link String#toUpperCase()}.
2965 * </p>
2966 * <p>
2967 * A <code>null</code> input String returns <code>null</code>.
2968 * </p>
2969 *
2970 * <pre>
2971 * StringUtils.upperCase(null) = null
2972 * StringUtils.upperCase("") = ""
2973 * StringUtils.upperCase("aBc") = "ABC"
2974 * </pre>
2975 *
2976 * @param str the String to upper case, may be null
2977 * @return the upper cased String, <code>null</code> if null String input
2978 */
2979 public static final String upperCase( String str )
2980 {
2981 if ( str == null )
2982 {
2983 return null;
2984 }
2985
2986 return str.toUpperCase();
2987 }
2988
2989
2990 /**
2991 * <p>
2992 * Converts a String to lower case as per {@link String#toLowerCase()}.
2993 * </p>
2994 * <p>
2995 * A <code>null</code> input String returns <code>null</code>.
2996 * </p>
2997 *
2998 * <pre>
2999 * StringUtils.lowerCase(null) = null
3000 * StringUtils.lowerCase("") = ""
3001 * StringUtils.lowerCase("aBc") = "abc"
3002 * </pre>
3003 *
3004 * @param str the String to lower case, may be null
3005 * @return the lower cased String, <code>null</code> if null String input
3006 */
3007 public static final String lowerCase( String str )
3008 {
3009 if ( str == null )
3010 {
3011 return null;
3012 }
3013
3014 return str.toLowerCase();
3015 }
3016
3017
3018 /**
3019 * Rewrote the toLowercase method to improve performances.
3020 * In Ldap, attributesType are supposed to use ASCII chars :
3021 * 'a'-'z', 'A'-'Z', '0'-'9', '.' and '-' only. We will take
3022 * care of any other chars either.
3023 *
3024 * @param str The String to lowercase
3025 * @return The lowercase string
3026 */
3027 public static final String lowerCaseAscii( String str )
3028 {
3029 if ( str == null )
3030 {
3031 return null;
3032 }
3033
3034 char[] chars = str.toCharArray();
3035 int pos = 0;
3036
3037 for ( char c:chars )
3038 {
3039 chars[pos++] = TO_LOWER_CASE[c];
3040 }
3041
3042 return new String( chars );
3043 }
3044
3045
3046 // Equals
3047 // -----------------------------------------------------------------------
3048 /**
3049 * <p>
3050 * Compares two Strings, returning <code>true</code> if they are equal.
3051 * </p>
3052 * <p>
3053 * <code>null</code>s are handled without exceptions. Two
3054 * <code>null</code> references are considered to be equal. The comparison
3055 * is case sensitive.
3056 * </p>
3057 *
3058 * <pre>
3059 * StringUtils.equals(null, null) = true
3060 * StringUtils.equals(null, "abc") = false
3061 * StringUtils.equals("abc", null) = false
3062 * StringUtils.equals("abc", "abc") = true
3063 * StringUtils.equals("abc", "ABC") = false
3064 * </pre>
3065 *
3066 * @see java.lang.String#equals(Object)
3067 * @param str1 the first String, may be null
3068 * @param str2 the second String, may be null
3069 * @return <code>true</code> if the Strings are equal, case sensitive, or
3070 * both <code>null</code>
3071 */
3072 public static final boolean equals( String str1, String str2 )
3073 {
3074 return str1 == null ? str2 == null : str1.equals( str2 );
3075 }
3076
3077
3078 /**
3079 * Return an UTF-8 encoded String
3080 *
3081 * @param bytes The byte array to be transformed to a String
3082 * @return A String.
3083 */
3084 public static final String utf8ToString( byte[] bytes )
3085 {
3086 if ( bytes == null )
3087 {
3088 return "";
3089 }
3090
3091 try
3092 {
3093 return new String( bytes, "UTF-8" );
3094 }
3095 catch ( UnsupportedEncodingException uee )
3096 {
3097 return "";
3098 }
3099 }
3100
3101
3102 /**
3103 * Return an UTF-8 encoded String
3104 *
3105 * @param bytes The byte array to be transformed to a String
3106 * @param length The length of the byte array to be converted
3107 * @return A String.
3108 */
3109 public static final String utf8ToString( byte[] bytes, int length )
3110 {
3111 if ( bytes == null )
3112 {
3113 return "";
3114 }
3115
3116 try
3117 {
3118 return new String( bytes, 0, length, "UTF-8" );
3119 }
3120 catch ( UnsupportedEncodingException uee )
3121 {
3122 return "";
3123 }
3124 }
3125
3126
3127 /**
3128 * Return an UTF-8 encoded String
3129 *
3130 * @param bytes The byte array to be transformed to a String
3131 * @param start the starting position in the byte array
3132 * @param length The length of the byte array to be converted
3133 * @return A String.
3134 */
3135 public static final String utf8ToString( byte[] bytes, int start, int length )
3136 {
3137 if ( bytes == null )
3138 {
3139 return "";
3140 }
3141
3142 try
3143 {
3144 return new String( bytes, start, length, "UTF-8" );
3145 }
3146 catch ( UnsupportedEncodingException uee )
3147 {
3148 return "";
3149 }
3150 }
3151
3152
3153 /**
3154 * Return UTF-8 encoded byte[] representation of a String
3155 *
3156 * @param string The string to be transformed to a byte array
3157 * @return The transformed byte array
3158 */
3159 public static final byte[] getBytesUtf8( String string )
3160 {
3161 if ( string == null )
3162 {
3163 return new byte[0];
3164 }
3165
3166 try
3167 {
3168 return string.getBytes( "UTF-8" );
3169 }
3170 catch ( UnsupportedEncodingException uee )
3171 {
3172 return new byte[]
3173 {};
3174 }
3175 }
3176
3177
3178 /**
3179 * Utility method that return a String representation of a list
3180 *
3181 * @param list The list to transform to a string
3182 * @return A csv string
3183 */
3184 public static final String listToString( List<?> list )
3185 {
3186 if ( ( list == null ) || ( list.size() == 0 ) )
3187 {
3188 return "";
3189 }
3190
3191 StringBuilder sb = new StringBuilder();
3192 boolean isFirst = true;
3193
3194 Iterator<?> iter = list.iterator();
3195
3196 while ( iter.hasNext() )
3197 {
3198 if ( isFirst )
3199 {
3200 isFirst = false;
3201 }
3202 else
3203 {
3204 sb.append( ", " );
3205 }
3206
3207 sb.append( iter.next() );
3208 }
3209
3210 return sb.toString();
3211 }
3212
3213
3214 /**
3215 * Utility method that return a String representation of a list
3216 *
3217 * @param list The list to transform to a string
3218 * @param tabs The tabs to add in ffront of the elements
3219 * @return A csv string
3220 */
3221 public static final String listToString( List<?> list, String tabs )
3222 {
3223 if ( ( list == null ) || ( list.size() == 0 ) )
3224 {
3225 return "";
3226 }
3227
3228 StringBuffer sb = new StringBuffer();
3229
3230 Iterator<?> iter = list.iterator();
3231
3232 while ( iter.hasNext() )
3233 {
3234 sb.append( tabs );
3235 sb.append( iter.next() );
3236 sb.append( '\n' );
3237 }
3238
3239 return sb.toString();
3240 }
3241
3242
3243 /**
3244 * Utility method that return a String representation of a map. The elements
3245 * will be represented as "key = value"
3246 *
3247 * @param map The map to transform to a string
3248 * @return A csv string
3249 */
3250 public static final String mapToString( Map<?,?> map )
3251 {
3252 if ( ( map == null ) || ( map.size() == 0 ) )
3253 {
3254 return "";
3255 }
3256
3257 StringBuffer sb = new StringBuffer();
3258 boolean isFirst = true;
3259
3260 for ( Map.Entry<?, ?> entry:map.entrySet() )
3261 {
3262 if ( isFirst )
3263 {
3264 isFirst = false;
3265 }
3266 else
3267 {
3268 sb.append( ", " );
3269 }
3270
3271 sb.append( entry.getKey() );
3272 sb.append( " = '" ).append( entry.getValue() ).append( "'" );
3273 }
3274
3275 return sb.toString();
3276 }
3277
3278
3279 /**
3280 * Utility method that return a String representation of a map. The elements
3281 * will be represented as "key = value"
3282 *
3283 * @param map The map to transform to a string
3284 * @param tabs The tabs to add in ffront of the elements
3285 * @return A csv string
3286 */
3287 public static final String mapToString( Map<?,?> map, String tabs )
3288 {
3289 if ( ( map == null ) || ( map.size() == 0 ) )
3290 {
3291 return "";
3292 }
3293
3294 StringBuffer sb = new StringBuffer();
3295
3296 for ( Map.Entry<?, ?> entry:map.entrySet() )
3297 {
3298 sb.append( tabs );
3299 sb.append( entry.getKey() );
3300
3301 sb.append( " = '" ).append( entry.getValue().toString() ).append( "'\n" );
3302 }
3303
3304 return sb.toString();
3305 }
3306
3307
3308 /**
3309 * Get the default charset
3310 *
3311 * @return The default charset
3312 */
3313 public static final String getDefaultCharsetName()
3314 {
3315 if ( null == defaultCharset )
3316 {
3317 try
3318 {
3319 // Try with jdk 1.5 method, if we are using a 1.5 jdk :)
3320 Method method = Charset.class.getMethod( "defaultCharset", new Class[0] );
3321 defaultCharset = ((Charset) method.invoke( null, new Object[0]) ).name();
3322 }
3323 catch (Exception e)
3324 {
3325 // fall back to old method
3326 defaultCharset = new OutputStreamWriter( new ByteArrayOutputStream() ).getEncoding();
3327 }
3328 }
3329
3330 return defaultCharset;
3331 }
3332
3333
3334 /**
3335 * Decodes values of attributes in the DN encoded in hex into a UTF-8
3336 * String. RFC2253 allows a DN's attribute to be encoded in hex.
3337 * The encoded value starts with a # then is followed by an even
3338 * number of hex characters.
3339 *
3340 * @param str the string to decode
3341 * @return the decoded string
3342 */
3343 public static final String decodeHexString( String str ) throws InvalidNameException
3344 {
3345 if ( str == null || str.length() == 0 )
3346 {
3347 throw new InvalidNameException( "Expected string to start with a '#' character. " +
3348 "Invalid hex encoded string for empty or null string." );
3349 }
3350
3351 char[] chars = str.toCharArray();
3352
3353 if ( chars[0] != '#' )
3354 {
3355 throw new InvalidNameException( "Expected string to start with a '#' character. " +
3356 "Invalid hex encoded string: " + str );
3357 }
3358
3359 // the bytes representing the encoded string of hex
3360 // this should be ( length - 1 )/2 in size
3361 byte[] decoded = new byte[ ( chars.length - 1 ) >> 1 ];
3362
3363 for ( int ii = 1, jj = 0 ; ii < chars.length; ii+=2, jj++ )
3364 {
3365 int ch = ( StringTools.HEX_VALUE[chars[ii]] << 4 ) +
3366 StringTools.HEX_VALUE[chars[ii+1]];
3367 decoded[jj] = ( byte ) ch;
3368 }
3369
3370 return StringTools.utf8ToString( decoded );
3371 }
3372
3373
3374 /**
3375 * Decodes sequences of escaped hex within an attribute's value into
3376 * a UTF-8 String. The hex is decoded inline and the complete decoded
3377 * String is returned.
3378 *
3379 * @param str the string containing hex escapes
3380 * @return the decoded string
3381 */
3382 public static final String decodeEscapedHex( String str ) throws InvalidNameException
3383 {
3384 if ( str == null )
3385 {
3386 throw new InvalidNameException( "Expected string to be non-null " +
3387 "with valid index." );
3388 }
3389
3390 int length = str.length();
3391
3392 if ( length == 0 )
3393 {
3394 throw new InvalidNameException( "Expected string to be non-empty " +
3395 "with valid index." );
3396 }
3397
3398 // create buffer and add everything before start of scan
3399 StringBuffer buf = new StringBuffer();
3400 ByteBuffer bb = new ByteBuffer();
3401 boolean escaped = false;
3402
3403 // start scaning until we find an escaped series of bytes
3404 for ( int ii = 0; ii < length; ii++ )
3405 {
3406 char c = str.charAt( ii );
3407
3408 if ( !escaped && c == '\\' )
3409 {
3410 // we have the start of a hex escape sequence
3411 if ( isHex( str, ii+1 ) && isHex ( str, ii+2 ) )
3412 {
3413 bb.clear();
3414 int advancedBy = collectEscapedHexBytes( bb, str, ii );
3415 ii+=advancedBy-1;
3416 buf.append( StringTools.utf8ToString( bb.buffer(), bb.position() ) );
3417 escaped = false;
3418 continue;
3419 }
3420 else
3421 {
3422 // It may be an escaped char ( ' ', '"', '#', '+', ',', ';', '<', '=', '>', '\' )
3423 escaped = true;
3424 continue;
3425 }
3426 }
3427
3428 if ( escaped )
3429 {
3430 if ( DNUtils.isPairCharOnly( c ) )
3431 {
3432 // It is an escaped char ( ' ', '"', '#', '+', ',', ';', '<', '=', '>', '\' )
3433 // Stores it into the buffer without the '\'
3434 escaped = false;
3435 buf.append( c );
3436 continue;
3437 }
3438 else
3439 {
3440 throw new InvalidNameException( "The DN must contain valid escaped characters." );
3441 }
3442 }
3443 else
3444 {
3445 buf.append( str.charAt( ii ) );
3446 }
3447 }
3448
3449 if ( escaped )
3450 {
3451 // We should not have a '\' at the end of the string
3452 throw new InvalidNameException( "The DN must not ends with a '\\'." );
3453 }
3454
3455 return buf.toString();
3456 }
3457
3458
3459 /**
3460 * Convert an escaoed list of bytes to a byte[]
3461 *
3462 * @param str the string containing hex escapes
3463 * @return the converted byte[]
3464 */
3465 public static final byte[] convertEscapedHex( String str ) throws InvalidNameException
3466 {
3467 if ( str == null )
3468 {
3469 throw new InvalidNameException( "Expected string to be non-null " +
3470 "with valid index." );
3471 }
3472
3473 int length = str.length();
3474
3475 if ( length == 0 )
3476 {
3477 throw new InvalidNameException( "Expected string to be non-empty " +
3478 "with valid index." );
3479 }
3480
3481 // create buffer and add everything before start of scan
3482 byte[] buf = new byte[ str.length()/3];
3483 int pos = 0;
3484
3485 // start scaning until we find an escaped series of bytes
3486 for ( int i = 0; i < length; i++ )
3487 {
3488 char c = str.charAt( i );
3489
3490 if ( c == '\\' )
3491 {
3492 // we have the start of a hex escape sequence
3493 if ( isHex( str, i+1 ) && isHex ( str, i+2 ) )
3494 {
3495 byte value = ( byte ) ( (StringTools.HEX_VALUE[str.charAt( i+1 )] << 4 ) +
3496 StringTools.HEX_VALUE[str.charAt( i+2 )] );
3497
3498 i+=2;
3499 buf[pos++] = value;
3500 }
3501 }
3502 else
3503 {
3504 throw new InvalidNameException( "The DN must contain valid escaped characters." );
3505 }
3506 }
3507
3508 return buf;
3509 }
3510
3511
3512 /**
3513 * Collects an hex sequence from a string, and returns the value
3514 * as an integer, after having modified the initial value (the escaped
3515 * hex value is transsformed to the byte it represents).
3516 *
3517 * @param bb the buffer which will contain the unescaped byte
3518 * @param str the initial string with ecaped chars
3519 * @param index the position in the string of the escaped data
3520 * @return the byte as an integer
3521 */
3522 public static int collectEscapedHexBytes( ByteBuffer bb, String str, int index )
3523 {
3524 int advanceBy = 0;
3525
3526 for ( int ii = index; ii < str.length(); ii += 3, advanceBy += 3 )
3527 {
3528 // we have the start of a hex escape sequence
3529 if ( ( str.charAt( ii ) == '\\' ) && isHex( str, ii+1 ) && isHex ( str, ii+2 ) )
3530 {
3531 int bite = ( StringTools.HEX_VALUE[str.charAt( ii+1 )] << 4 ) +
3532 StringTools.HEX_VALUE[str.charAt( ii+2 )];
3533 bb.append( bite );
3534 }
3535 else
3536 {
3537 break;
3538 }
3539 }
3540
3541 return advanceBy;
3542 }
3543
3544
3545 /**
3546 * Thansform an array of ASCII bytes to a string. the byte array should contains
3547 * only values in [0, 127].
3548 *
3549 * @param bytes The byte array to transform
3550 * @return The resulting string
3551 */
3552 public static String asciiBytesToString( byte[] bytes )
3553 {
3554 if ( (bytes == null) || (bytes.length == 0 ) )
3555 {
3556 return "";
3557 }
3558
3559 char[] result = new char[bytes.length];
3560
3561 for ( int i = 0; i < bytes.length; i++ )
3562 {
3563 result[i] = (char)bytes[i];
3564 }
3565
3566 return new String( result );
3567 }
3568
3569
3570 /**
3571 * Build an AttributeType froma byte array. An AttributeType contains
3572 * only chars within [0-9][a-z][A-Z][-.].
3573 *
3574 * @param bytes The bytes containing the AttributeType
3575 * @return The AttributeType as a String
3576 */
3577 public static String getType( byte[] bytes)
3578 {
3579 if ( bytes == null )
3580 {
3581 return null;
3582 }
3583
3584 char[] chars = new char[bytes.length];
3585 int pos = 0;
3586
3587 for ( byte b:bytes )
3588 {
3589 chars[pos++] = (char)b;
3590 }
3591
3592 return new String( chars );
3593 }
3594
3595
3596 /**
3597 *
3598 * Check that a String is a valid IA5String. An IA5String contains only
3599 * char which values is between [0, 7F]
3600 *
3601 * @param str The String to check
3602 * @return <code>true</code> if the string is an IA5String or is empty,
3603 * <code>false</code> otherwise
3604 */
3605 public static boolean isIA5String( String str )
3606 {
3607 if ( ( str == null ) || ( str.length() == 0 ) )
3608 {
3609 return true;
3610 }
3611
3612 // All the chars must be in [0x00, 0x7F]
3613 for ( char c:str.toCharArray() )
3614 {
3615 if ( ( c < 0 ) || ( c > 0x7F ) )
3616 {
3617 return false;
3618 }
3619 }
3620
3621 return true;
3622 }
3623
3624
3625 /**
3626 *
3627 * Check that a String is a valid PrintableString. A PrintableString contains only
3628 * the following set of chars :
3629 * { ' ', ''', '(', ')', '+', '-', '.', '/', [0-9], ':', '=', '?', [A-Z], [a-z]}
3630 *
3631 * @param str The String to check
3632 * @return <code>true</code> if the string is a PrintableString or is empty,
3633 * <code>false</code> otherwise
3634 */
3635 public static boolean isPrintableString( String str )
3636 {
3637 if ( ( str == null ) || ( str.length() == 0 ) )
3638 {
3639 return true;
3640 }
3641
3642 for ( char c:str.toCharArray() )
3643 {
3644 if ( ( c > 127 ) || !IS_PRINTABLE_CHAR[ c ] )
3645 {
3646 return false;
3647 }
3648 }
3649
3650 return true;
3651 }
3652
3653
3654 /**
3655 * Check if the current char is in the unicodeSubset : all chars but
3656 * '\0', '(', ')', '*' and '\'
3657 *
3658 * @param str The string to check
3659 * @param pos Position of the current char
3660 * @return True if the current char is in the unicode subset
3661 */
3662 public static boolean isUnicodeSubset( String str, int pos )
3663 {
3664 if ( ( str == null ) || ( str.length() <= pos ) || ( pos < 0 ) )
3665 {
3666 return false;
3667 }
3668
3669 char c = str.charAt( pos );
3670
3671 return ( ( c > 127 ) || UNICODE_SUBSET[c] );
3672 }
3673
3674
3675 /**
3676 * Check if the current char is in the unicodeSubset : all chars but
3677 * '\0', '(', ')', '*' and '\'
3678 *
3679 * @param c The char to check
3680 * @return True if the current char is in the unicode subset
3681 */
3682 public static boolean isUnicodeSubset( char c )
3683 {
3684 return ( ( c > 127 ) || UNICODE_SUBSET[c] );
3685 }
3686 }