001/**
002 * Copyright 2005-2018 The Kuali Foundation
003 *
004 * Licensed under the Educational Community License, Version 2.0 (the "License");
005 * you may not use this file except in compliance with the License.
006 * You may obtain a copy of the License at
007 *
008 * http://www.opensource.org/licenses/ecl2.php
009 *
010 * Unless required by applicable law or agreed to in writing, software
011 * distributed under the License is distributed on an "AS IS" BASIS,
012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
013 * See the License for the specific language governing permissions and
014 * limitations under the License.
015 */
016/*
017 * $Id: XMLChar.java 1225426 2011-12-29 04:13:08Z mrglavas $
018 */
019
020package org.kuali.rice.kew.api.util;
021
022/**
023 * NOTE: Copied this code into Rice when removing Xalan jar
024 *
025 * This class defines the basic XML character properties. The data
026 * in this class can be used to verify that a character is a valid
027 * XML character or if the character is a space, name start, or name
028 * character.
029 * <p>
030 * A series of convenience methods are supplied to ease the burden
031 * of the developer. Because inlining the checks can improve per
032 * character performance, the tables of character properties are
033 * public. Using the character as an index into the <code>CHARS</code>
034 * array and applying the appropriate mask flag (e.g.
035 * <code>MASK_VALID</code>), yields the same results as calling the
036 * convenience methods. There is one exception: check the comments
037 * for the <code>isValid</code> method for details.
038 *
039 * @author Glenn Marcy, IBM
040 * @author Andy Clark, IBM
041 * @author Eric Ye, IBM
042 * @author Arnaud  Le Hors, IBM
043 * @author Rahul Srivastava, Sun Microsystems Inc.
044 *
045 * @version $Id: XMLChar.java 1225426 2011-12-29 04:13:08Z mrglavas $
046 */
047public class XMLChar {
048
049    //
050    // Constants
051    //
052
053    /** Character flags. */
054    private static final byte[] CHARS = new byte[1 << 16];
055
056    /** Valid character mask. */
057    public static final int MASK_VALID = 0x01;
058
059    /** Space character mask. */
060    public static final int MASK_SPACE = 0x02;
061
062    /** Name start character mask. */
063    public static final int MASK_NAME_START = 0x04;
064
065    /** Name character mask. */
066    public static final int MASK_NAME = 0x08;
067
068    /** Pubid character mask. */
069    public static final int MASK_PUBID = 0x10;
070
071    /**
072     * Content character mask. Special characters are those that can
073     * be considered the start of markup, such as '&lt;' and '&amp;'.
074     * The various newline characters are considered special as well.
075     * All other valid XML characters can be considered content.
076     * <p>
077     * This is an optimization for the inner loop of character scanning.
078     */
079    public static final int MASK_CONTENT = 0x20;
080
081    /** NCName start character mask. */
082    public static final int MASK_NCNAME_START = 0x40;
083
084    /** NCName character mask. */
085    public static final int MASK_NCNAME = 0x80;
086
087    //
088    // Static initialization
089    //
090
091    static {
092
093        //
094        // [2] Char ::= #x9 | #xA | #xD | [#x20-#xD7FF] |
095        //              [#xE000-#xFFFD] | [#x10000-#x10FFFF]
096        //
097
098        int charRange[] = {
099                0x0009, 0x000A, 0x000D, 0x000D, 0x0020, 0xD7FF, 0xE000, 0xFFFD,
100        };
101
102        //
103        // [3] S ::= (#x20 | #x9 | #xD | #xA)+
104        //
105
106        int spaceChar[] = {
107                0x0020, 0x0009, 0x000D, 0x000A,
108        };
109
110        //
111        // [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
112        //                  CombiningChar | Extender
113        //
114
115        int nameChar[] = {
116                0x002D, 0x002E, // '-' and '.'
117        };
118
119        //
120        // [5] Name ::= (Letter | '_' | ':') (NameChar)*
121        //
122
123        int nameStartChar[] = {
124                0x003A, 0x005F, // ':' and '_'
125        };
126
127        //
128        // [13] PubidChar ::= #x20 | 0xD | 0xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%]
129        //
130
131        int pubidChar[] = {
132                0x000A, 0x000D, 0x0020, 0x0021, 0x0023, 0x0024, 0x0025, 0x003D,
133                0x005F
134        };
135
136        int pubidRange[] = {
137                0x0027, 0x003B, 0x003F, 0x005A, 0x0061, 0x007A
138        };
139
140        //
141        // [84] Letter ::= BaseChar | Ideographic
142        //
143
144        int letterRange[] = {
145                // BaseChar
146                0x0041, 0x005A, 0x0061, 0x007A, 0x00C0, 0x00D6, 0x00D8, 0x00F6,
147                0x00F8, 0x0131, 0x0134, 0x013E, 0x0141, 0x0148, 0x014A, 0x017E,
148                0x0180, 0x01C3, 0x01CD, 0x01F0, 0x01F4, 0x01F5, 0x01FA, 0x0217,
149                0x0250, 0x02A8, 0x02BB, 0x02C1, 0x0388, 0x038A, 0x038E, 0x03A1,
150                0x03A3, 0x03CE, 0x03D0, 0x03D6, 0x03E2, 0x03F3, 0x0401, 0x040C,
151                0x040E, 0x044F, 0x0451, 0x045C, 0x045E, 0x0481, 0x0490, 0x04C4,
152                0x04C7, 0x04C8, 0x04CB, 0x04CC, 0x04D0, 0x04EB, 0x04EE, 0x04F5,
153                0x04F8, 0x04F9, 0x0531, 0x0556, 0x0561, 0x0586, 0x05D0, 0x05EA,
154                0x05F0, 0x05F2, 0x0621, 0x063A, 0x0641, 0x064A, 0x0671, 0x06B7,
155                0x06BA, 0x06BE, 0x06C0, 0x06CE, 0x06D0, 0x06D3, 0x06E5, 0x06E6,
156                0x0905, 0x0939, 0x0958, 0x0961, 0x0985, 0x098C, 0x098F, 0x0990,
157                0x0993, 0x09A8, 0x09AA, 0x09B0, 0x09B6, 0x09B9, 0x09DC, 0x09DD,
158                0x09DF, 0x09E1, 0x09F0, 0x09F1, 0x0A05, 0x0A0A, 0x0A0F, 0x0A10,
159                0x0A13, 0x0A28, 0x0A2A, 0x0A30, 0x0A32, 0x0A33, 0x0A35, 0x0A36,
160                0x0A38, 0x0A39, 0x0A59, 0x0A5C, 0x0A72, 0x0A74, 0x0A85, 0x0A8B,
161                0x0A8F, 0x0A91, 0x0A93, 0x0AA8, 0x0AAA, 0x0AB0, 0x0AB2, 0x0AB3,
162                0x0AB5, 0x0AB9, 0x0B05, 0x0B0C, 0x0B0F, 0x0B10, 0x0B13, 0x0B28,
163                0x0B2A, 0x0B30, 0x0B32, 0x0B33, 0x0B36, 0x0B39, 0x0B5C, 0x0B5D,
164                0x0B5F, 0x0B61, 0x0B85, 0x0B8A, 0x0B8E, 0x0B90, 0x0B92, 0x0B95,
165                0x0B99, 0x0B9A, 0x0B9E, 0x0B9F, 0x0BA3, 0x0BA4, 0x0BA8, 0x0BAA,
166                0x0BAE, 0x0BB5, 0x0BB7, 0x0BB9, 0x0C05, 0x0C0C, 0x0C0E, 0x0C10,
167                0x0C12, 0x0C28, 0x0C2A, 0x0C33, 0x0C35, 0x0C39, 0x0C60, 0x0C61,
168                0x0C85, 0x0C8C, 0x0C8E, 0x0C90, 0x0C92, 0x0CA8, 0x0CAA, 0x0CB3,
169                0x0CB5, 0x0CB9, 0x0CE0, 0x0CE1, 0x0D05, 0x0D0C, 0x0D0E, 0x0D10,
170                0x0D12, 0x0D28, 0x0D2A, 0x0D39, 0x0D60, 0x0D61, 0x0E01, 0x0E2E,
171                0x0E32, 0x0E33, 0x0E40, 0x0E45, 0x0E81, 0x0E82, 0x0E87, 0x0E88,
172                0x0E94, 0x0E97, 0x0E99, 0x0E9F, 0x0EA1, 0x0EA3, 0x0EAA, 0x0EAB,
173                0x0EAD, 0x0EAE, 0x0EB2, 0x0EB3, 0x0EC0, 0x0EC4, 0x0F40, 0x0F47,
174                0x0F49, 0x0F69, 0x10A0, 0x10C5, 0x10D0, 0x10F6, 0x1102, 0x1103,
175                0x1105, 0x1107, 0x110B, 0x110C, 0x110E, 0x1112, 0x1154, 0x1155,
176                0x115F, 0x1161, 0x116D, 0x116E, 0x1172, 0x1173, 0x11AE, 0x11AF,
177                0x11B7, 0x11B8, 0x11BC, 0x11C2, 0x1E00, 0x1E9B, 0x1EA0, 0x1EF9,
178                0x1F00, 0x1F15, 0x1F18, 0x1F1D, 0x1F20, 0x1F45, 0x1F48, 0x1F4D,
179                0x1F50, 0x1F57, 0x1F5F, 0x1F7D, 0x1F80, 0x1FB4, 0x1FB6, 0x1FBC,
180                0x1FC2, 0x1FC4, 0x1FC6, 0x1FCC, 0x1FD0, 0x1FD3, 0x1FD6, 0x1FDB,
181                0x1FE0, 0x1FEC, 0x1FF2, 0x1FF4, 0x1FF6, 0x1FFC, 0x212A, 0x212B,
182                0x2180, 0x2182, 0x3041, 0x3094, 0x30A1, 0x30FA, 0x3105, 0x312C,
183                0xAC00, 0xD7A3,
184                // Ideographic
185                0x3021, 0x3029, 0x4E00, 0x9FA5,
186        };
187        int letterChar[] = {
188                // BaseChar
189                0x0386, 0x038C, 0x03DA, 0x03DC, 0x03DE, 0x03E0, 0x0559, 0x06D5,
190                0x093D, 0x09B2, 0x0A5E, 0x0A8D, 0x0ABD, 0x0AE0, 0x0B3D, 0x0B9C,
191                0x0CDE, 0x0E30, 0x0E84, 0x0E8A, 0x0E8D, 0x0EA5, 0x0EA7, 0x0EB0,
192                0x0EBD, 0x1100, 0x1109, 0x113C, 0x113E, 0x1140, 0x114C, 0x114E,
193                0x1150, 0x1159, 0x1163, 0x1165, 0x1167, 0x1169, 0x1175, 0x119E,
194                0x11A8, 0x11AB, 0x11BA, 0x11EB, 0x11F0, 0x11F9, 0x1F59, 0x1F5B,
195                0x1F5D, 0x1FBE, 0x2126, 0x212E,
196                // Ideographic
197                0x3007,
198        };
199
200        //
201        // [87] CombiningChar ::= ...
202        //
203
204        int combiningCharRange[] = {
205                0x0300, 0x0345, 0x0360, 0x0361, 0x0483, 0x0486, 0x0591, 0x05A1,
206                0x05A3, 0x05B9, 0x05BB, 0x05BD, 0x05C1, 0x05C2, 0x064B, 0x0652,
207                0x06D6, 0x06DC, 0x06DD, 0x06DF, 0x06E0, 0x06E4, 0x06E7, 0x06E8,
208                0x06EA, 0x06ED, 0x0901, 0x0903, 0x093E, 0x094C, 0x0951, 0x0954,
209                0x0962, 0x0963, 0x0981, 0x0983, 0x09C0, 0x09C4, 0x09C7, 0x09C8,
210                0x09CB, 0x09CD, 0x09E2, 0x09E3, 0x0A40, 0x0A42, 0x0A47, 0x0A48,
211                0x0A4B, 0x0A4D, 0x0A70, 0x0A71, 0x0A81, 0x0A83, 0x0ABE, 0x0AC5,
212                0x0AC7, 0x0AC9, 0x0ACB, 0x0ACD, 0x0B01, 0x0B03, 0x0B3E, 0x0B43,
213                0x0B47, 0x0B48, 0x0B4B, 0x0B4D, 0x0B56, 0x0B57, 0x0B82, 0x0B83,
214                0x0BBE, 0x0BC2, 0x0BC6, 0x0BC8, 0x0BCA, 0x0BCD, 0x0C01, 0x0C03,
215                0x0C3E, 0x0C44, 0x0C46, 0x0C48, 0x0C4A, 0x0C4D, 0x0C55, 0x0C56,
216                0x0C82, 0x0C83, 0x0CBE, 0x0CC4, 0x0CC6, 0x0CC8, 0x0CCA, 0x0CCD,
217                0x0CD5, 0x0CD6, 0x0D02, 0x0D03, 0x0D3E, 0x0D43, 0x0D46, 0x0D48,
218                0x0D4A, 0x0D4D, 0x0E34, 0x0E3A, 0x0E47, 0x0E4E, 0x0EB4, 0x0EB9,
219                0x0EBB, 0x0EBC, 0x0EC8, 0x0ECD, 0x0F18, 0x0F19, 0x0F71, 0x0F84,
220                0x0F86, 0x0F8B, 0x0F90, 0x0F95, 0x0F99, 0x0FAD, 0x0FB1, 0x0FB7,
221                0x20D0, 0x20DC, 0x302A, 0x302F,
222        };
223
224        int combiningCharChar[] = {
225                0x05BF, 0x05C4, 0x0670, 0x093C, 0x094D, 0x09BC, 0x09BE, 0x09BF,
226                0x09D7, 0x0A02, 0x0A3C, 0x0A3E, 0x0A3F, 0x0ABC, 0x0B3C, 0x0BD7,
227                0x0D57, 0x0E31, 0x0EB1, 0x0F35, 0x0F37, 0x0F39, 0x0F3E, 0x0F3F,
228                0x0F97, 0x0FB9, 0x20E1, 0x3099, 0x309A,
229        };
230
231        //
232        // [88] Digit ::= ...
233        //
234
235        int digitRange[] = {
236                0x0030, 0x0039, 0x0660, 0x0669, 0x06F0, 0x06F9, 0x0966, 0x096F,
237                0x09E6, 0x09EF, 0x0A66, 0x0A6F, 0x0AE6, 0x0AEF, 0x0B66, 0x0B6F,
238                0x0BE7, 0x0BEF, 0x0C66, 0x0C6F, 0x0CE6, 0x0CEF, 0x0D66, 0x0D6F,
239                0x0E50, 0x0E59, 0x0ED0, 0x0ED9, 0x0F20, 0x0F29,
240        };
241
242        //
243        // [89] Extender ::= ...
244        //
245
246        int extenderRange[] = {
247                0x3031, 0x3035, 0x309D, 0x309E, 0x30FC, 0x30FE,
248        };
249
250        int extenderChar[] = {
251                0x00B7, 0x02D0, 0x02D1, 0x0387, 0x0640, 0x0E46, 0x0EC6, 0x3005,
252        };
253
254        //
255        // SpecialChar ::= '<', '&', '\n', '\r', ']'
256        //
257
258        int specialChar[] = {
259                '<', '&', '\n', '\r', ']',
260        };
261
262        //
263        // Initialize
264        //
265
266        // set valid characters
267        for (int i = 0; i < charRange.length; i += 2) {
268            for (int j = charRange[i]; j <= charRange[i + 1]; j++) {
269                CHARS[j] |= MASK_VALID | MASK_CONTENT;
270            }
271        }
272
273        // remove special characters
274        for (int i = 0; i < specialChar.length; i++) {
275            CHARS[specialChar[i]] = (byte)(CHARS[specialChar[i]] & ~MASK_CONTENT);
276        }
277
278        // set space characters
279        for (int i = 0; i < spaceChar.length; i++) {
280            CHARS[spaceChar[i]] |= MASK_SPACE;
281        }
282
283        // set name start characters
284        for (int i = 0; i < nameStartChar.length; i++) {
285            CHARS[nameStartChar[i]] |= MASK_NAME_START | MASK_NAME |
286                    MASK_NCNAME_START | MASK_NCNAME;
287        }
288        for (int i = 0; i < letterRange.length; i += 2) {
289            for (int j = letterRange[i]; j <= letterRange[i + 1]; j++) {
290                CHARS[j] |= MASK_NAME_START | MASK_NAME |
291                        MASK_NCNAME_START | MASK_NCNAME;
292            }
293        }
294        for (int i = 0; i < letterChar.length; i++) {
295            CHARS[letterChar[i]] |= MASK_NAME_START | MASK_NAME |
296                    MASK_NCNAME_START | MASK_NCNAME;
297        }
298
299        // set name characters
300        for (int i = 0; i < nameChar.length; i++) {
301            CHARS[nameChar[i]] |= MASK_NAME | MASK_NCNAME;
302        }
303        for (int i = 0; i < digitRange.length; i += 2) {
304            for (int j = digitRange[i]; j <= digitRange[i + 1]; j++) {
305                CHARS[j] |= MASK_NAME | MASK_NCNAME;
306            }
307        }
308        for (int i = 0; i < combiningCharRange.length; i += 2) {
309            for (int j = combiningCharRange[i]; j <= combiningCharRange[i + 1]; j++) {
310                CHARS[j] |= MASK_NAME | MASK_NCNAME;
311            }
312        }
313        for (int i = 0; i < combiningCharChar.length; i++) {
314            CHARS[combiningCharChar[i]] |= MASK_NAME | MASK_NCNAME;
315        }
316        for (int i = 0; i < extenderRange.length; i += 2) {
317            for (int j = extenderRange[i]; j <= extenderRange[i + 1]; j++) {
318                CHARS[j] |= MASK_NAME | MASK_NCNAME;
319            }
320        }
321        for (int i = 0; i < extenderChar.length; i++) {
322            CHARS[extenderChar[i]] |= MASK_NAME | MASK_NCNAME;
323        }
324
325        // remove ':' from allowable MASK_NCNAME_START and MASK_NCNAME chars
326        CHARS[':'] &= ~(MASK_NCNAME_START | MASK_NCNAME);
327
328        // set Pubid characters
329        for (int i = 0; i < pubidChar.length; i++) {
330            CHARS[pubidChar[i]] |= MASK_PUBID;
331        }
332        for (int i = 0; i < pubidRange.length; i += 2) {
333            for (int j = pubidRange[i]; j <= pubidRange[i + 1]; j++) {
334                CHARS[j] |= MASK_PUBID;
335            }
336        }
337
338    } // <clinit>()
339
340    //
341    // Public static methods
342    //
343
344    /**
345     * Returns true if the specified character is a supplemental character.
346     *
347     * @param c The character to check.
348     */
349    public static boolean isSupplemental(int c) {
350        return (c >= 0x10000 && c <= 0x10FFFF);
351    }
352
353    /**
354     * Returns true the supplemental character corresponding to the given
355     * surrogates.
356     *
357     * @param h The high surrogate.
358     * @param l The low surrogate.
359     */
360    public static int supplemental(char h, char l) {
361        return (h - 0xD800) * 0x400 + (l - 0xDC00) + 0x10000;
362    }
363
364    /**
365     * Returns the high surrogate of a supplemental character
366     *
367     * @param c The supplemental character to "split".
368     */
369    public static char highSurrogate(int c) {
370        return (char) (((c - 0x00010000) >> 10) + 0xD800);
371    }
372
373    /**
374     * Returns the low surrogate of a supplemental character
375     *
376     * @param c The supplemental character to "split".
377     */
378    public static char lowSurrogate(int c) {
379        return (char) (((c - 0x00010000) & 0x3FF) + 0xDC00);
380    }
381
382    /**
383     * Returns whether the given character is a high surrogate
384     *
385     * @param c The character to check.
386     */
387    public static boolean isHighSurrogate(int c) {
388        return (0xD800 <= c && c <= 0xDBFF);
389    }
390
391    /**
392     * Returns whether the given character is a low surrogate
393     *
394     * @param c The character to check.
395     */
396    public static boolean isLowSurrogate(int c) {
397        return (0xDC00 <= c && c <= 0xDFFF);
398    }
399
400
401    /**
402     * Returns true if the specified character is valid. This method
403     * also checks the surrogate character range from 0x10000 to 0x10FFFF.
404     * <p>
405     * If the program chooses to apply the mask directly to the
406     * <code>CHARS</code> array, then they are responsible for checking
407     * the surrogate character range.
408     *
409     * @param c The character to check.
410     */
411    public static boolean isValid(int c) {
412        return (c < 0x10000 && (CHARS[c] & MASK_VALID) != 0) ||
413                (0x10000 <= c && c <= 0x10FFFF);
414    } // isValid(int):boolean
415
416    /**
417     * Returns true if the specified character is invalid.
418     *
419     * @param c The character to check.
420     */
421    public static boolean isInvalid(int c) {
422        return !isValid(c);
423    } // isInvalid(int):boolean
424
425    /**
426     * Returns true if the specified character can be considered content.
427     *
428     * @param c The character to check.
429     */
430    public static boolean isContent(int c) {
431        return (c < 0x10000 && (CHARS[c] & MASK_CONTENT) != 0) ||
432                (0x10000 <= c && c <= 0x10FFFF);
433    } // isContent(int):boolean
434
435    /**
436     * Returns true if the specified character can be considered markup.
437     * Markup characters include '&lt;', '&amp;', and '%'.
438     *
439     * @param c The character to check.
440     */
441    public static boolean isMarkup(int c) {
442        return c == '<' || c == '&' || c == '%';
443    } // isMarkup(int):boolean
444
445    /**
446     * Returns true if the specified character is a space character
447     * as defined by production [3] in the XML 1.0 specification.
448     *
449     * @param c The character to check.
450     */
451    public static boolean isSpace(int c) {
452        return c < 0x10000 && (CHARS[c] & MASK_SPACE) != 0;
453    } // isSpace(int):boolean
454
455    /**
456     * Returns true if the specified character is a valid name start
457     * character as defined by production [5] in the XML 1.0
458     * specification.
459     *
460     * @param c The character to check.
461     */
462    public static boolean isNameStart(int c) {
463        return c < 0x10000 && (CHARS[c] & MASK_NAME_START) != 0;
464    } // isNameStart(int):boolean
465
466    /**
467     * Returns true if the specified character is a valid name
468     * character as defined by production [4] in the XML 1.0
469     * specification.
470     *
471     * @param c The character to check.
472     */
473    public static boolean isName(int c) {
474        return c < 0x10000 && (CHARS[c] & MASK_NAME) != 0;
475    } // isName(int):boolean
476
477    /**
478     * Returns true if the specified character is a valid NCName start
479     * character as defined by production [4] in Namespaces in XML
480     * recommendation.
481     *
482     * @param c The character to check.
483     */
484    public static boolean isNCNameStart(int c) {
485        return c < 0x10000 && (CHARS[c] & MASK_NCNAME_START) != 0;
486    } // isNCNameStart(int):boolean
487
488    /**
489     * Returns true if the specified character is a valid NCName
490     * character as defined by production [5] in Namespaces in XML
491     * recommendation.
492     *
493     * @param c The character to check.
494     */
495    public static boolean isNCName(int c) {
496        return c < 0x10000 && (CHARS[c] & MASK_NCNAME) != 0;
497    } // isNCName(int):boolean
498
499    /**
500     * Returns true if the specified character is a valid Pubid
501     * character as defined by production [13] in the XML 1.0
502     * specification.
503     *
504     * @param c The character to check.
505     */
506    public static boolean isPubid(int c) {
507        return c < 0x10000 && (CHARS[c] & MASK_PUBID) != 0;
508    } // isPubid(int):boolean
509
510    /*
511     * [5] Name ::= (Letter | '_' | ':') (NameChar)*
512     */
513    /**
514     * Check to see if a string is a valid Name according to [5]
515     * in the XML 1.0 Recommendation
516     *
517     * @param name string to check
518     * @return true if name is a valid Name
519     */
520    public static boolean isValidName(String name) {
521        if (name.length() == 0)
522            return false;
523        char ch = name.charAt(0);
524        if( isNameStart(ch) == false)
525            return false;
526        for (int i = 1; i < name.length(); i++ ) {
527            ch = name.charAt(i);
528            if( isName( ch ) == false ){
529                return false;
530            }
531        }
532        return true;
533    } // isValidName(String):boolean
534
535
536    /*
537     * from the namespace rec
538     * [4] NCName ::= (Letter | '_') (NCNameChar)*
539     */
540    /**
541     * Check to see if a string is a valid NCName according to [4]
542     * from the XML Namespaces 1.0 Recommendation
543     *
544     * @param ncName string to check
545     * @return true if name is a valid NCName
546     */
547    public static boolean isValidNCName(String ncName) {
548        if (ncName.length() == 0)
549            return false;
550        char ch = ncName.charAt(0);
551        if( isNCNameStart(ch) == false)
552            return false;
553        for (int i = 1; i < ncName.length(); i++ ) {
554            ch = ncName.charAt(i);
555            if( isNCName( ch ) == false ){
556                return false;
557            }
558        }
559        return true;
560    } // isValidNCName(String):boolean
561
562    /*
563     * [7] Nmtoken ::= (NameChar)+
564     */
565    /**
566     * Check to see if a string is a valid Nmtoken according to [7]
567     * in the XML 1.0 Recommendation
568     *
569     * @param nmtoken string to check
570     * @return true if nmtoken is a valid Nmtoken
571     */
572    public static boolean isValidNmtoken(String nmtoken) {
573        if (nmtoken.length() == 0)
574            return false;
575        for (int i = 0; i < nmtoken.length(); i++ ) {
576            char ch = nmtoken.charAt(i);
577            if(  ! isName( ch ) ){
578                return false;
579            }
580        }
581        return true;
582    } // isValidName(String):boolean
583
584
585
586
587
588    // encodings
589
590    /**
591     * Returns true if the encoding name is a valid IANA encoding.
592     * This method does not verify that there is a decoder available
593     * for this encoding, only that the characters are valid for an
594     * IANA encoding name.
595     *
596     * @param ianaEncoding The IANA encoding name.
597     */
598    public static boolean isValidIANAEncoding(String ianaEncoding) {
599        if (ianaEncoding != null) {
600            int length = ianaEncoding.length();
601            if (length > 0) {
602                char c = ianaEncoding.charAt(0);
603                if ((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z')) {
604                    for (int i = 1; i < length; i++) {
605                        c = ianaEncoding.charAt(i);
606                        if ((c < 'A' || c > 'Z') && (c < 'a' || c > 'z') &&
607                                (c < '0' || c > '9') && c != '.' && c != '_' &&
608                                c != '-') {
609                            return false;
610                        }
611                    }
612                    return true;
613                }
614            }
615        }
616        return false;
617    } // isValidIANAEncoding(String):boolean
618
619    /**
620     * Returns true if the encoding name is a valid Java encoding.
621     * This method does not verify that there is a decoder available
622     * for this encoding, only that the characters are valid for an
623     * Java encoding name.
624     *
625     * @param javaEncoding The Java encoding name.
626     */
627    public static boolean isValidJavaEncoding(String javaEncoding) {
628        if (javaEncoding != null) {
629            int length = javaEncoding.length();
630            if (length > 0) {
631                for (int i = 1; i < length; i++) {
632                    char c = javaEncoding.charAt(i);
633                    if ((c < 'A' || c > 'Z') && (c < 'a' || c > 'z') &&
634                            (c < '0' || c > '9') && c != '.' && c != '_' &&
635                            c != '-') {
636                        return false;
637                    }
638                }
639                return true;
640            }
641        }
642        return false;
643    } // isValidIANAEncoding(String):boolean
644
645    /**
646     * Simple check to determine if qname is legal. If it returns false
647     * then <param>str</param> is illegal; if it returns true then
648     * <param>str</param> is legal.
649     */
650    public static boolean isValidQName(String str) {
651
652        final int colon = str.indexOf(':');
653
654        if (colon == 0 || colon == str.length() - 1) {
655            return false;
656        }
657
658        if (colon > 0) {
659            final String prefix = str.substring(0,colon);
660            final String localPart = str.substring(colon+1);
661            return isValidNCName(prefix) && isValidNCName(localPart);
662        }
663        else {
664            return isValidNCName(str);
665        }
666    }
667
668} // class XMLChar