http://xml.apache.org/http://www.apache.org/http://www.w3.org/

Home

Readme
Charter
Release Info

Installation
Download
Build Instructions

FAQs
Samples
API Docs

DOM C++ Binding
Programming
Migration Guide

Feedback
Bug-Reporting
PDF Document

Source Repository
User Mail Archive
Devel Mail Archive

API Docs for SAX and DOM
 

XMLChar.hpp

Go to the documentation of this file.
00001 /*
00002  * Copyright 2002-2005 The Apache Software Foundation.
00003  * 
00004  * Licensed under the Apache License, Version 2.0 (the "License");
00005  * you may not use this file except in compliance with the License.
00006  * You may obtain a copy of the License at
00007  * 
00008  *      http://www.apache.org/licenses/LICENSE-2.0
00009  * 
00010  * Unless required by applicable law or agreed to in writing, software
00011  * distributed under the License is distributed on an "AS IS" BASIS,
00012  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
00013  * See the License for the specific language governing permissions and
00014  * limitations under the License.
00015  */
00016 
00017 /*
00018  * $Id: XMLChar.hpp 191054 2005-06-17 02:56:35Z jberry $
00019  */
00020 
00021 #if !defined(XMLCHAR_HPP)
00022 #define XMLCHAR_HPP
00023 
00024 #include <xercesc/util/XMLUniDefs.hpp>
00025 
00026 XERCES_CPP_NAMESPACE_BEGIN
00027 
00028 // ---------------------------------------------------------------------------
00029 //  This file defines Char and utility that conforms to XML 1.0 and XML 1.1
00030 // ---------------------------------------------------------------------------
00031 // Masks for the fgCharCharsTable1_0 array
00032 const XMLByte   gNCNameCharMask             = 0x1;
00033 const XMLByte   gFirstNameCharMask          = 0x2;
00034 const XMLByte   gNameCharMask               = 0x4;
00035 const XMLByte   gPlainContentCharMask       = 0x8;
00036 const XMLByte   gSpecialStartTagCharMask    = 0x10;
00037 const XMLByte   gControlCharMask            = 0x20;
00038 const XMLByte   gXMLCharMask                = 0x40;
00039 const XMLByte   gWhitespaceCharMask         = 0x80;
00040 
00041 // ---------------------------------------------------------------------------
00042 //  This class is for XML 1.0
00043 // ---------------------------------------------------------------------------
00044 class  XMLChar1_0
00045 {
00046 public:
00047     // -----------------------------------------------------------------------
00048     //  Public, static methods, check the string
00049     // -----------------------------------------------------------------------
00050     static bool isAllSpaces
00051     (
00052         const   XMLCh* const    toCheck
00053         , const unsigned int    count
00054     );
00055 
00056     static bool containsWhiteSpace
00057     (
00058         const   XMLCh* const    toCheck
00059         , const unsigned int    count
00060     );
00061 
00062     static bool isValidNmtoken
00063     (
00064         const   XMLCh*        const    toCheck
00065       , const   unsigned int           count
00066     );
00067 
00068     static bool isValidName
00069     (
00070         const   XMLCh* const    toCheck
00071         , const unsigned int    count
00072     );
00073 
00074     static bool isValidNCName
00075     (
00076         const   XMLCh* const    toCheck
00077         , const unsigned int    count
00078     );
00079 
00080     static bool isValidQName
00081     (
00082         const   XMLCh* const    toCheck
00083         , const unsigned int    count
00084     );
00085 
00086     // -----------------------------------------------------------------------
00087     //  Public, static methods, check the XMLCh
00088     //  surrogate pair is assumed if second parameter is not null
00089     // -----------------------------------------------------------------------
00090     static bool isXMLLetter(const XMLCh toCheck, const XMLCh toCheck2 = 0);
00091     static bool isFirstNameChar(const XMLCh toCheck, const XMLCh toCheck2 = 0);
00092     static bool isNameChar(const XMLCh toCheck, const XMLCh toCheck2 = 0);
00093     static bool isPlainContentChar(const XMLCh toCheck, const XMLCh toCheck2 = 0);
00094     static bool isSpecialStartTagChar(const XMLCh toCheck, const XMLCh toCheck2 = 0);
00095     static bool isXMLChar(const XMLCh toCheck, const XMLCh toCheck2 = 0);
00096     static bool isWhitespace(const XMLCh toCheck);
00097     static bool isWhitespace(const XMLCh toCheck, const XMLCh toCheck2);
00098     static bool isControlChar(const XMLCh toCheck, const XMLCh toCheck2 = 0);
00099 
00100     static bool isPublicIdChar(const XMLCh toCheck, const XMLCh toCheck2 = 0);
00101     static bool isFirstNCNameChar(const XMLCh toCheck, const XMLCh toCheck2 = 0);
00102     static bool isNCNameChar(const XMLCh toCheck, const XMLCh toCheck2 = 0);
00103 
00104     // -----------------------------------------------------------------------
00105     //  Special Non-conformant Public, static methods
00106     // -----------------------------------------------------------------------
00110     static bool isNELRecognized();
00111 
00115     static void enableNELWS();
00116 
00117 private:
00118     // -----------------------------------------------------------------------
00119     //  Unimplemented constructors and operators
00120     // -----------------------------------------------------------------------
00121     XMLChar1_0();
00122 
00123     // -----------------------------------------------------------------------
00124     //  Static data members
00125     //
00126     //  fgCharCharsTable1_0
00127     //      The character characteristics table. Bits in each byte, represent
00128     //      the characteristics of each character. It is generated via some
00129     //      code and then hard coded into the cpp file for speed.
00130     //
00131     //  fNEL
00132     //      Flag to respresents whether NEL and LSEP newline recognition is enabled
00133     //      or disabled
00134     // -----------------------------------------------------------------------
00135     static XMLByte  fgCharCharsTable1_0[0x10000];
00136     static bool     enableNEL;
00137 
00138     friend class XMLReader;
00139 };
00140 
00141 
00142 // ---------------------------------------------------------------------------
00143 //  XMLReader: Public, static methods
00144 // ---------------------------------------------------------------------------
00145 inline bool XMLChar1_0::isXMLLetter(const XMLCh toCheck, const XMLCh toCheck2)
00146 {
00147     // An XML letter is a FirstNameChar minus ':' and '_'.
00148     if (!toCheck2) {
00149         return (((fgCharCharsTable1_0[toCheck] & gFirstNameCharMask) != 0)
00150                 && (toCheck != chColon) && (toCheck != chUnderscore));
00151     }
00152     return false;
00153 }
00154 
00155 inline bool XMLChar1_0::isFirstNameChar(const XMLCh toCheck, const XMLCh toCheck2)
00156 {
00157     if (!toCheck2)
00158         return ((fgCharCharsTable1_0[toCheck] & gFirstNameCharMask) != 0);
00159     return false;
00160 }
00161 
00162 inline bool XMLChar1_0::isFirstNCNameChar(const XMLCh toCheck, const XMLCh toCheck2)
00163 {
00164     if (!toCheck2) {
00165         return (((fgCharCharsTable1_0[toCheck] & gFirstNameCharMask) != 0) && (toCheck != chColon));
00166     }
00167 
00168     return false;
00169 }
00170 
00171 inline bool XMLChar1_0::isNameChar(const XMLCh toCheck, const XMLCh toCheck2)
00172 {
00173     if (!toCheck2)
00174         return ((fgCharCharsTable1_0[toCheck] & gNameCharMask) != 0);
00175     return false;
00176 }
00177 
00178 inline bool XMLChar1_0::isNCNameChar(const XMLCh toCheck, const XMLCh toCheck2)
00179 {
00180     if (!toCheck2)
00181         return ((fgCharCharsTable1_0[toCheck] & gNCNameCharMask) != 0);
00182     return false;
00183 }
00184 
00185 inline bool XMLChar1_0::isPlainContentChar(const XMLCh toCheck, const XMLCh toCheck2)
00186 {
00187     if (!toCheck2)
00188         return ((fgCharCharsTable1_0[toCheck] & gPlainContentCharMask) != 0);
00189     else {
00190         if ((toCheck >= 0xD800) && (toCheck <= 0xDBFF))
00191            if ((toCheck2 >= 0xDC00) && (toCheck2 <= 0xDFFF))
00192                return true;
00193     }
00194     return false;
00195 }
00196 
00197 
00198 inline bool XMLChar1_0::isSpecialStartTagChar(const XMLCh toCheck, const XMLCh toCheck2)
00199 {
00200     if (!toCheck2)
00201         return ((fgCharCharsTable1_0[toCheck] & gSpecialStartTagCharMask) != 0);
00202     return false;
00203 }
00204 
00205 inline bool XMLChar1_0::isXMLChar(const XMLCh toCheck, const XMLCh toCheck2)
00206 {
00207     if (!toCheck2)
00208         return ((fgCharCharsTable1_0[toCheck] & gXMLCharMask) != 0);
00209     else {
00210         if ((toCheck >= 0xD800) && (toCheck <= 0xDBFF))
00211            if ((toCheck2 >= 0xDC00) && (toCheck2 <= 0xDFFF))
00212                return true;
00213     }
00214     return false;
00215 }
00216 
00217 inline bool XMLChar1_0::isWhitespace(const XMLCh toCheck)
00218 {
00219     return ((fgCharCharsTable1_0[toCheck] & gWhitespaceCharMask) != 0);
00220 }
00221 
00222 inline bool XMLChar1_0::isWhitespace(const XMLCh toCheck, const XMLCh toCheck2)
00223 {
00224     if (!toCheck2)
00225         return ((fgCharCharsTable1_0[toCheck] & gWhitespaceCharMask) != 0);
00226     return false;
00227 }
00228 
00229 inline bool XMLChar1_0::isControlChar(const XMLCh toCheck, const XMLCh toCheck2)
00230 {
00231     if (!toCheck2)
00232         return ((fgCharCharsTable1_0[toCheck] & gControlCharMask) != 0);
00233     return false;
00234 }
00235 
00236 inline bool XMLChar1_0::isNELRecognized() {
00237 
00238     return enableNEL;
00239 }
00240 
00241 
00242 // ---------------------------------------------------------------------------
00243 //  This class is for XML 1.1
00244 // ---------------------------------------------------------------------------
00245 class  XMLChar1_1
00246 {
00247 public:
00248     // -----------------------------------------------------------------------
00249     //  Public, static methods, check the string
00250     // -----------------------------------------------------------------------
00251     static bool isAllSpaces
00252     (
00253         const   XMLCh* const    toCheck
00254         , const unsigned int    count
00255     );
00256 
00257     static bool containsWhiteSpace
00258     (
00259         const   XMLCh* const    toCheck
00260         , const unsigned int    count
00261     );
00262 
00263     static bool isValidNmtoken
00264     (
00265         const   XMLCh*        const    toCheck
00266       , const   unsigned int           count
00267     );
00268 
00269     static bool isValidName
00270     (
00271         const   XMLCh* const    toCheck
00272         , const unsigned int    count
00273     );
00274 
00275     static bool isValidNCName
00276     (
00277         const   XMLCh* const    toCheck
00278         , const unsigned int    count
00279     );
00280 
00281     static bool isValidQName
00282     (
00283         const   XMLCh* const    toCheck
00284         , const unsigned int    count
00285     );
00286 
00287     // -----------------------------------------------------------------------
00288     //  Public, static methods, check the XMLCh
00289     // -----------------------------------------------------------------------
00290     static bool isXMLLetter(const XMLCh toCheck, const XMLCh toCheck2 = 0);
00291     static bool isFirstNameChar(const XMLCh toCheck, const XMLCh toCheck2 = 0);
00292     static bool isNameChar(const XMLCh toCheck, const XMLCh toCheck2 = 0);
00293     static bool isPlainContentChar(const XMLCh toCheck, const XMLCh toCheck2 = 0);
00294     static bool isSpecialStartTagChar(const XMLCh toCheck, const XMLCh toCheck2 = 0);
00295     static bool isXMLChar(const XMLCh toCheck, const XMLCh toCheck2 = 0);
00296     static bool isWhitespace(const XMLCh toCheck, const XMLCh toCheck2 = 0);
00297     static bool isControlChar(const XMLCh toCheck, const XMLCh toCheck2 = 0);
00298 
00299     static bool isPublicIdChar(const XMLCh toCheck, const XMLCh toCheck2 = 0);
00300     static bool isFirstNCNameChar(const XMLCh toCheck, const XMLCh toCheck2 = 0);
00301     static bool isNCNameChar(const XMLCh toCheck, const XMLCh toCheck2 = 0);
00302 
00303 private:
00304     // -----------------------------------------------------------------------
00305     //  Unimplemented constructors and operators
00306     // -----------------------------------------------------------------------
00307     XMLChar1_1();
00308 
00309     // -----------------------------------------------------------------------
00310     //  Static data members
00311     //
00312     //  fgCharCharsTable1_1
00313     //      The character characteristics table. Bits in each byte, represent
00314     //      the characteristics of each character. It is generated via some
00315     //      code and then hard coded into the cpp file for speed.
00316     //
00317     // -----------------------------------------------------------------------
00318     static XMLByte  fgCharCharsTable1_1[0x10000];
00319 
00320     friend class XMLReader;
00321 };
00322 
00323 
00324 // ---------------------------------------------------------------------------
00325 //  XMLReader: Public, static methods
00326 // ---------------------------------------------------------------------------
00327 inline bool XMLChar1_1::isXMLLetter(const XMLCh toCheck, const XMLCh toCheck2)
00328 {
00330     return XMLChar1_0::isXMLLetter(toCheck, toCheck2);
00331 }
00332 
00333 inline bool XMLChar1_1::isFirstNameChar(const XMLCh toCheck, const XMLCh toCheck2)
00334 {
00335     if (!toCheck2)
00336         return ((fgCharCharsTable1_1[toCheck] & gFirstNameCharMask) != 0);
00337     else {
00338         if ((toCheck >= 0xD800) && (toCheck <= 0xDB7F))
00339            if ((toCheck2 >= 0xDC00) && (toCheck2 <= 0xDFFF))
00340                return true;
00341     }
00342     return false;
00343 }
00344 
00345 inline bool XMLChar1_1::isFirstNCNameChar(const XMLCh toCheck, const XMLCh toCheck2)
00346 {
00347     if (!toCheck2) {
00348         return (((fgCharCharsTable1_1[toCheck] & gFirstNameCharMask) != 0) && (toCheck != chColon));
00349     }
00350     else {
00351         if ((toCheck >= 0xD800) && (toCheck <= 0xDB7F))
00352            if ((toCheck2 >= 0xDC00) && (toCheck2 <= 0xDFFF))
00353                return true;
00354     }
00355     return false;
00356 }
00357 
00358 inline bool XMLChar1_1::isNameChar(const XMLCh toCheck, const XMLCh toCheck2)
00359 {
00360     if (!toCheck2)
00361         return ((fgCharCharsTable1_1[toCheck] & gNameCharMask) != 0);
00362     else {
00363         if ((toCheck >= 0xD800) && (toCheck <= 0xDB7F))
00364            if ((toCheck2 >= 0xDC00) && (toCheck2 <= 0xDFFF))
00365                return true;
00366     }
00367     return false;
00368 }
00369 
00370 inline bool XMLChar1_1::isNCNameChar(const XMLCh toCheck, const XMLCh toCheck2)
00371 {
00372     if (!toCheck2)
00373         return ((fgCharCharsTable1_1[toCheck] & gNCNameCharMask) != 0);
00374     else {
00375         if ((toCheck >= 0xD800) && (toCheck <= 0xDB7F))
00376            if ((toCheck2 >= 0xDC00) && (toCheck2 <= 0xDFFF))
00377                return true;
00378     }
00379     return false;
00380 }
00381 
00382 inline bool XMLChar1_1::isPlainContentChar(const XMLCh toCheck, const XMLCh toCheck2)
00383 {
00384     if (!toCheck2)
00385         return ((fgCharCharsTable1_1[toCheck] & gPlainContentCharMask) != 0);
00386     else {
00387         if ((toCheck >= 0xD800) && (toCheck <= 0xDBFF))
00388            if ((toCheck2 >= 0xDC00) && (toCheck2 <= 0xDFFF))
00389                return true;
00390     }
00391     return false;
00392 }
00393 
00394 
00395 inline bool XMLChar1_1::isSpecialStartTagChar(const XMLCh toCheck, const XMLCh toCheck2)
00396 {
00397     if (!toCheck2)
00398         return ((fgCharCharsTable1_1[toCheck] & gSpecialStartTagCharMask) != 0);
00399     return false;
00400 }
00401 
00402 inline bool XMLChar1_1::isXMLChar(const XMLCh toCheck, const XMLCh toCheck2)
00403 {
00404     if (!toCheck2)
00405         return ((fgCharCharsTable1_1[toCheck] & gXMLCharMask) != 0);
00406     else {
00407         if ((toCheck >= 0xD800) && (toCheck <= 0xDBFF))
00408            if ((toCheck2 >= 0xDC00) && (toCheck2 <= 0xDFFF))
00409                return true;
00410     }
00411     return false;
00412 }
00413 
00414 inline bool XMLChar1_1::isWhitespace(const XMLCh toCheck, const XMLCh toCheck2)
00415 {
00416     if (!toCheck2)
00417         return ((fgCharCharsTable1_1[toCheck] & gWhitespaceCharMask) != 0);
00418     return false;
00419 }
00420 
00421 inline bool XMLChar1_1::isControlChar(const XMLCh toCheck, const XMLCh toCheck2)
00422 {
00423     if (!toCheck2)
00424         return ((fgCharCharsTable1_1[toCheck] & gControlCharMask) != 0);
00425     return false;
00426 }
00427 
00428 
00429 XERCES_CPP_NAMESPACE_END
00430 
00431 #endif


Copyright © 1994-2004 The Apache Software Foundation. All Rights Reserved.