• Main Page
  • Related Pages
  • Classes
  • Files
  • File List
  • File Members

public/ITextParsers.h

Go to the documentation of this file.
00001 /**
00002  * vim: set ts=4 :
00003  * =============================================================================
00004  * SourceMod
00005  * Copyright (C) 2004-2008 AlliedModders LLC.  All rights reserved.
00006  * =============================================================================
00007  *
00008  * This program is free software; you can redistribute it and/or modify it under
00009  * the terms of the GNU General Public License, version 3.0, as published by the
00010  * Free Software Foundation.
00011  * 
00012  * This program is distributed in the hope that it will be useful, but WITHOUT
00013  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
00014  * FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
00015  * details.
00016  *
00017  * You should have received a copy of the GNU General Public License along with
00018  * this program.  If not, see <http://www.gnu.org/licenses/>.
00019  *
00020  * As a special exception, AlliedModders LLC gives you permission to link the
00021  * code of this program (as well as its derivative works) to "Half-Life 2," the
00022  * "Source Engine," the "SourcePawn JIT," and any Game MODs that run on software
00023  * by the Valve Corporation.  You must obey the GNU General Public License in
00024  * all respects for all other code used.  Additionally, AlliedModders LLC grants
00025  * this exception to all derivative works.  AlliedModders LLC defines further
00026  * exceptions, found in LICENSE.txt (as of this writing, version JULY-31-2007),
00027  * or <http://www.sourcemod.net/license.php>.
00028  *
00029  * Version: $Id$
00030  */
00031 
00032 #ifndef _INCLUDE_SOURCEMOD_TEXTPARSERS_INTERFACE_H_
00033 #define _INCLUDE_SOURCEMOD_TEXTPARSERS_INTERFACE_H_
00034 
00035 /**
00036  * @file ITextParsers.h
00037  * @brief Defines various text/file parsing functions, as well as UTF-8 support code.
00038  */
00039 
00040 #include <IShareSys.h>
00041 
00042 namespace SourceMod
00043 {
00044 
00045           #define SMINTERFACE_TEXTPARSERS_NAME              "ITextParsers"
00046           #define SMINTERFACE_TEXTPARSERS_VERSION           4
00047 
00048           /**
00049            * The INI file format is defined as:
00050            * WHITESPACE: 0x20, \n, \t, \r
00051            * IDENTIFIER: A-Z a-z 0-9 _ - , + . $ ? / 
00052            * STRING: Any set of symbols
00053            * 
00054            * Basic syntax is comprised of SECTIONs.
00055            * A SECTION is defined as:
00056            * [SECTIONNAME]
00057            * OPTION
00058            * OPTION
00059            * OPTION...
00060            *
00061            * SECTIONNAME is an IDENTIFIER.
00062            * OPTION can be repeated any number of times, once per line.
00063            * OPTION is defined as one of:
00064            *  KEY = "VALUE"
00065            *  KEY = VALUE
00066            *  KEY
00067            * Where KEY is an IDENTIFIER and VALUE is a STRING.
00068            * 
00069            * WHITESPACE should always be omitted.
00070            * COMMENTS should be stripped, and are defined as text occurring in:
00071            * ;<TEXT>
00072            * 
00073            * Example file below.  Note that
00074            * The second line is technically invalid.  The event handler
00075            * must decide whether this should be allowed.
00076            * --FILE BELOW--
00077            * [gaben]
00078            * hi = clams
00079            * bye = "NO CLAMS"
00080            *
00081            * [valve]
00082            * cannot
00083            * maintain
00084            * products
00085            */
00086 
00087           /**
00088            * @brief Contains parse events for INI files.
00089            */
00090           class ITextListener_INI
00091           {
00092           public:
00093                     /** 
00094                      * @brief Returns version number.
00095                      */
00096                     virtual unsigned int GetTextParserVersion1()
00097                     {
00098                               return SMINTERFACE_TEXTPARSERS_VERSION;
00099                     }
00100           public:
00101                     /**
00102                      * @brief Called when a new section is encountered in an INI file.
00103                      * 
00104                      * @param section             Name of section in between the [ and ] characters.
00105                      * @param invalid_tokens True if invalid tokens were detected in the name.
00106                      * @param close_bracket       True if a closing bracket was detected, false otherwise.
00107                      * @param extra_tokens        True if extra tokens were detected on the line.
00108                      * @param curtok              Contains current token in the line where the section name starts.
00109                      *                                                          You can add to this offset when failing to point to a token.
00110                      * @return                                        True to keep parsing, false otherwise.
00111                      */
00112                     virtual bool ReadINI_NewSection(const char *section,
00113                                                                                                     bool invalid_tokens,
00114                                                                                                     bool close_bracket,
00115                                                                                                     bool extra_tokens,
00116                                                                                                     unsigned int *curtok)
00117                     {
00118                               return true;
00119                     }
00120 
00121                     /**
00122                      * @brief Called when encountering a key/value pair in an INI file.
00123                      * 
00124                      * @param key                           Name of key.
00125                      * @param value                         String containing value (with quotes stripped, if any).
00126                      * @param invalid_tokens Whether or not the key contained invalid tokens.
00127                      * @param equal_token         There was an '=' sign present (in case the value is missing).
00128                      * @param quotes              Whether value was enclosed in quotes.
00129                      * @param curtok              Contains the token index of the start of the value string.  
00130                      *                                                          This can be changed when returning false.
00131                      * @return                                        True to keep parsing, false otherwise.
00132                      */
00133                     virtual bool ReadINI_KeyValue(const char *key, 
00134                                                                                             const char *value, 
00135                                                                                             bool invalid_tokens,
00136                                                                                             bool equal_token,
00137                                                                                             bool quotes,
00138                                                                                             unsigned int *curtok)
00139                     {
00140                               return true;
00141                     }
00142 
00143                     /**
00144                      * @brief Called after a line has been preprocessed, if it has text.
00145                      *
00146                      * @param line                          Contents of line.
00147                      * @param curtok              Pointer to optionally store failed position in string.
00148                      * @return                                        True to keep parsing, false otherwise.
00149                      */
00150                     virtual bool ReadINI_RawLine(const char *line, unsigned int *curtok)
00151                     {
00152                               return true;
00153                     }
00154           };
00155 
00156           /**
00157            * :TODO: write this in CFG (context free grammar) format so it makes sense
00158            * 
00159            * The SMC file format is defined as:
00160            * WHITESPACE: 0x20, \n, \t, \r
00161            * IDENTIFIER: Any ASCII character EXCLUDING ", {, }, ;, //, / *, or WHITESPACE.
00162            * STRING: Any set of symbols enclosed in quotes.
00163            * Note: if a STRING does not have quotes, it is parsed as an IDENTIFIER.
00164            *
00165            * Basic syntax is comprised of SECTIONBLOCKs.
00166            * A SECTIONBLOCK defined as:
00167            *
00168            * SECTIONNAME
00169            * {
00170            *    OPTION
00171            * }
00172            * 
00173            * OPTION can be repeated any number of times inside a SECTIONBLOCK.
00174            * A new line will terminate an OPTION, but there can be more than one OPTION per line.
00175            * OPTION is defined any of:
00176            *          "KEY"  "VALUE"
00177            *    SECTIONBLOCK
00178            *
00179            * SECTIONNAME, KEY, VALUE, and SINGLEKEY are strings
00180            * SECTIONNAME cannot have trailing characters if quoted, but the quotes can be optionally removed.
00181            * If SECTIONNAME is not enclosed in quotes, the entire sectionname string is used (minus surrounding whitespace).
00182            * If KEY is not enclosed in quotes, the key is terminated at first whitespace.
00183            * If VALUE is not properly enclosed in quotes, the entire value string is used (minus surrounding whitespace).
00184            * The VALUE may have inner quotes, but the key string may not.
00185            *
00186            * For an example, see configs/permissions.cfg
00187            *
00188            * WHITESPACE should be ignored.
00189            * Comments are text occurring inside the following tokens, and should be stripped
00190            * unless they are inside literal strings:
00191            *  ;<TEXT>
00192            *  //<TEXT>
00193            *  / *<TEXT> */
00194 
00195           /**
00196            * @brief Lists actions to take when an SMC parse hook is done.
00197            */
00198           enum SMCResult
00199           {
00200                     SMCResult_Continue,           /**< Continue parsing */
00201                     SMCResult_Halt,                         /**< Stop parsing here */
00202                     SMCResult_HaltFail            /**< Stop parsing and return SMCError_Custom */
00203           };
00204 
00205           /**
00206            * @brief Lists error codes possible from parsing an SMC file.
00207            */
00208           enum SMCError
00209           {
00210                     SMCError_Okay = 0,                      /**< No error */
00211                     SMCError_StreamOpen,                    /**< Stream failed to open */
00212                     SMCError_StreamError,                   /**< The stream died... somehow */
00213                     SMCError_Custom,                        /**< A custom handler threw an error */
00214                     SMCError_InvalidSection1,     /**< A section was declared without quotes, and had extra tokens */
00215                     SMCError_InvalidSection2,     /**< A section was declared without any header */
00216                     SMCError_InvalidSection3,     /**< A section ending was declared with too many unknown tokens */
00217                     SMCError_InvalidSection4,     /**< A section ending has no matching beginning */
00218                     SMCError_InvalidSection5,     /**< A section beginning has no matching ending */
00219                     SMCError_InvalidTokens,                 /**< There were too many unidentifiable strings on one line */
00220                     SMCError_TokenOverflow,                 /**< The token buffer overflowed */
00221                     SMCError_InvalidProperty1,    /**< A property was declared outside of any section */
00222           };
00223 
00224           /**
00225            * @brief States for line/column
00226            */
00227           struct SMCStates
00228           {
00229                     unsigned int line;                      /**< Current line */
00230                     unsigned int col;                       /**< Current col */
00231           };
00232 
00233           /**
00234            * @brief Describes the events available for reading an SMC stream.
00235            */
00236           class ITextListener_SMC
00237           {
00238           public:
00239                     /** 
00240                      * @brief Returns version number.
00241                      */
00242                     virtual unsigned int GetTextParserVersion2()
00243                     {
00244                               return SMINTERFACE_TEXTPARSERS_VERSION;
00245                     }
00246           public:
00247                     /**
00248                      * @brief Called when starting parsing.
00249                      */
00250                     virtual void ReadSMC_ParseStart()
00251                     {
00252                     };
00253 
00254                     /**
00255                      * @brief Called when ending parsing.
00256                      *
00257                      * @param halted                        True if abnormally halted, false otherwise.
00258                      * @param failed                        True if parsing failed, false otherwise.
00259                      */
00260                     virtual void ReadSMC_ParseEnd(bool halted, bool failed)
00261                     {
00262                     }
00263 
00264                     /**
00265                      * @brief Called when entering a new section
00266                      *
00267                      * @param states              Parsing states.
00268                      * @param name                          Name of section, with the colon omitted.
00269                      * @return                                        SMCResult directive.
00270                      */
00271                     virtual SMCResult ReadSMC_NewSection(const SMCStates *states, const char *name)
00272                     {
00273                               return SMCResult_Continue;
00274                     }
00275 
00276                     /**
00277                      * @brief Called when encountering a key/value pair in a section.
00278                      * 
00279                      * @param states              Parsing states.
00280                      * @param key                           Key string.
00281                      * @param value                         Value string.  If no quotes were specified, this will be NULL, 
00282                      *                                                          and key will contain the entire string.
00283                      * @return                                        SMCResult directive.
00284                      */
00285                     virtual SMCResult ReadSMC_KeyValue(const SMCStates *states, const char *key, const char *value)
00286                     {
00287                               return SMCResult_Continue;
00288                     }
00289 
00290                     /**
00291                      * @brief Called when leaving the current section.
00292                      *
00293                      * @param states              Parsing states.
00294                      * @return                                        SMCResult directive.
00295                      */
00296                     virtual SMCResult ReadSMC_LeavingSection(const SMCStates *states)
00297                     {
00298                               return SMCResult_Continue;
00299                     }
00300 
00301                     /**
00302                      * @brief Called after an input line has been preprocessed.
00303                      *
00304                      * @param states              Parsing states.
00305                      * @param line                          Contents of the line, null terminated at the position 
00306                      *                                                          of the newline character (thus, no newline will exist).
00307                      * @return                                        SMCResult directive.
00308                      */
00309                     virtual SMCResult ReadSMC_RawLine(const SMCStates *states, const char *line)
00310                     {
00311                               return SMCResult_Continue;
00312                     }
00313           };        
00314 
00315           /**
00316            * @brief Contains various text stream parsing functions.
00317            */
00318           class ITextParsers : public SMInterface
00319           {
00320           public:
00321                     virtual const char *GetInterfaceName()
00322                     {
00323                               return SMINTERFACE_TEXTPARSERS_NAME;
00324                     }
00325                     virtual unsigned int GetInterfaceVersion()
00326                     {
00327                               return SMINTERFACE_TEXTPARSERS_VERSION;
00328                     }
00329                     virtual bool IsVersionCompatible(unsigned int version)
00330                     {
00331                               if (version < 2)
00332                               {
00333                                         return false;
00334                               }
00335                               return SMInterface::IsVersionCompatible(version);
00336                     }
00337           public:
00338                     /**
00339                      * @brief Parses an INI-format file.
00340                      *
00341                      * @param file                          Path to file.
00342                      * @param ini_listener        Event handler for reading file.
00343                      * @param line                          If non-NULL, will contain last line parsed (0 if file could not be opened).
00344                      * @param col                           If non-NULL, will contain last column parsed (undefined if file could not be opened).
00345                      * @return                                        True if parsing succeeded, false if file couldn't be opened or there was a syntax error.
00346                      */
00347                     virtual bool ParseFile_INI(const char *file, 
00348                                                                                           ITextListener_INI *ini_listener,
00349                                                                                           unsigned int *line,
00350                                                                                           unsigned int *col) =0;
00351 
00352                     /**
00353                      * @brief Parses an SMC-format text file.
00354                      * Note that the parser makes every effort to obey broken syntax.
00355                      * For example, if an open brace is missing, but the section name has a colon,
00356                      * it will let you know.  It is up to the event handlers to decide whether to be strict or not.
00357                      *
00358                      * @param file                          Path to file.
00359                      * @param smc_listener        Event handler for reading file.
00360                      * @param states              Optional pointer to store last known states.
00361                      * @return                                        An SMCError result code.
00362                      */
00363                     virtual SMCError ParseFile_SMC(const char *file, 
00364                                                                                           ITextListener_SMC *smc_listener, 
00365                                                                                           SMCStates *states) =0;
00366 
00367                     /**
00368                      * @brief Converts an SMCError to a string.
00369                      *
00370                      * @param err                           SMCError.
00371                      * @return                                        String error message, or NULL if none.
00372                      */
00373                     virtual const char *GetSMCErrorString(SMCError err) =0;
00374 
00375           public:
00376                     /**
00377                      * @brief Returns the number of bytes that a multi-byte character contains in a UTF-8 stream.
00378                      * If the current character is not multi-byte, the function returns 1.
00379                      *
00380                      * @param stream              Pointer to multi-byte ANSI character string.
00381                      * @return                                        Number of bytes in current character.
00382                      */
00383                     virtual unsigned int GetUTF8CharBytes(const char *stream) =0;
00384 
00385                     /**
00386                      * @brief Returns whether the first multi-byte character in the given stream
00387                      * is a whitespace character.
00388                      *
00389                      * @param stream              Pointer to multi-byte character string.
00390                      * @return                                        True if first character is whitespace, false otherwise.
00391                      */
00392                     virtual bool IsWhitespace(const char *stream) =0;
00393 
00394                     /**
00395                      * @brief Same as ParseFile_SMC, but with an extended error buffer.
00396                      *
00397                      * @param file                          Path to file.
00398                      * @param smc_listener        Event handler for reading file.
00399                      * @param states              Optional pointer to store last known states.
00400                      * @param buffer              Error message buffer.
00401                      * @param maxsize             Maximum size of the error buffer.
00402                      * @return                                        Error code.
00403                      */
00404                     virtual SMCError ParseSMCFile(const char *file,
00405                               ITextListener_SMC *smc_listener,
00406                               SMCStates *states,
00407                               char *buffer,
00408                               size_t maxsize) =0;
00409 
00410                     /**
00411                      * @brief Parses a raw UTF8 stream as an SMC file.
00412                      *
00413                      * @param stream              Memory containing data.
00414                      * @param length              Number of bytes in the stream.
00415                      * @param smc_listener        Event handler for reading file.
00416                      * @param states              Optional pointer to store last known states.
00417                      * @param buffer              Error message buffer.
00418                      * @param maxsize             Maximum size of the error buffer.
00419                      * @return                                        Error code.
00420                      */
00421                     virtual SMCError ParseSMCStream(const char *stream,
00422                               size_t length,
00423                               ITextListener_SMC *smc_listener,
00424                               SMCStates *states,
00425                               char *buffer,
00426                               size_t maxsize) =0;
00427           };
00428 
00429           inline unsigned int _GetUTF8CharBytes(const char *stream)
00430           {
00431                     unsigned char c = *(unsigned char *)stream;
00432                     if (c & (1<<7))
00433                     {
00434                               if (c & (1<<5))
00435                               {
00436                                         if (c & (1<<4))
00437                                         {
00438                                                   return 4;
00439                                         }
00440                                         return 3;
00441                               }
00442                               return 2;
00443                     }
00444                     return 1;
00445           }
00446 }
00447 
00448 extern SourceMod::ITextParsers *textparsers;
00449 
00450 #endif //_INCLUDE_SOURCEMOD_TEXTPARSERS_INTERFACE_H_
00451 

Generated on Wed Dec 7 2011 18:50:02 for SourceMod SDK by  doxygen 1.7.1