00001 #ifndef IONFLUX_TOOLS_TOKENIZER 00002 #define IONFLUX_TOOLS_TOKENIZER 00003 /* ========================================================================== 00004 * Ionflux Tools 00005 * Copyright (c) 2004 Joern P. Meier 00006 * mail@ionflux.org 00007 * -------------------------------------------------------------------------- 00008 * Tokenizer.hpp Generic byte string tokenizer. 00009 * ========================================================================== 00010 * 00011 * This file is part of Ionflux Tools. 00012 * 00013 * Ionflux Tools is free software; you can redistribute it and/or modify it 00014 * under the terms of the GNU General Public License as published by the Free 00015 * Software Foundation; either version 2 of the License, or (at your option) 00016 * any later version. 00017 * 00018 * Ionflux Tools is distributed in the hope that it will be useful, but 00019 * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 00020 * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 00021 * for more details. 00022 * 00023 * You should have received a copy of the GNU General Public License 00024 * along with Ionflux Tools; if not, write to the Free Software Foundation, 00025 * Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 00026 * 00027 * ========================================================================== */ 00028 00029 #include <string> 00030 #include <iostream> 00031 #include <vector> 00032 #include "ionflux/tools.hpp" 00033 #include "ionflux/TokenTypeMap.hpp" 00034 00035 namespace Ionflux 00036 { 00037 00038 namespace Tools 00039 { 00040 00048 00049 struct TokenType 00050 { 00052 int typeID; 00054 std::string validChars; 00060 bool invert; 00065 int maxChars; 00066 00073 static const int USERTYPE_ID; 00074 }; 00075 00077 struct Token 00078 { 00080 int typeID; 00082 std::string value; 00083 }; 00084 00085 class TokenTypeMap; 00086 00103 class Tokenizer 00104 { 00105 protected: 00107 std::string theInput; 00109 unsigned int currentPos; 00111 unsigned int currentTokenPos; 00113 Token currentToken; 00115 bool extractQuoted; 00117 char currentQuoteChar; 00119 bool extractEscaped; 00121 TokenTypeMap *typeMap; 00122 00123 public: 00125 TokenType TT_ANYTHING; 00127 static const TokenType TT_INVALID; 00129 static const TokenType TT_NONE; 00131 static const TokenType TT_QUOTED; 00133 static const TokenType TT_ESCAPED; 00135 static const TokenType TT_WHITESPACE; 00137 static const TokenType TT_LINETERM; 00139 static const TokenType TT_NUMBER; 00141 static const TokenType TT_ALPHA; 00143 static const TokenType TT_DEFAULT_SEP; 00145 static const TokenType TT_IDENTIFIER; 00147 static const Token TOK_INVALID; 00149 static const Token TOK_NONE; 00151 static const int TT_ANYTHING_TYPE_ID; 00153 static const std::string QUOTE_CHARS; 00155 static const char ESCAPE_CHAR; 00156 00161 Tokenizer(); 00162 00169 Tokenizer(const std::string& initInput); 00170 00177 Tokenizer(const std::vector<TokenType>& initTokenTypes); 00178 00186 Tokenizer(const std::vector<TokenType>& initTokenTypes, 00187 const std::string& initInput); 00188 00193 virtual ~Tokenizer(); 00194 00205 virtual void clearTokenTypes(); 00206 00212 virtual void useDefaultTokenTypes(); 00213 00223 virtual void setTokenTypes(const std::vector<TokenType>& newTokenTypes); 00224 00232 virtual void addTokenType(const TokenType& newTokenType); 00233 00241 virtual void addTokenTypes(const std::vector<TokenType>& newTokenTypes); 00242 00249 virtual void setInput(const std::string& newInput); 00250 00263 virtual Token nextToken(); 00264 00274 virtual Token getNextToken(const TokenTypeMap &otherMap); 00275 00282 virtual Token getNextToken(); 00283 00290 virtual Token getCurrentToken(); 00291 00298 virtual int getCurrentTokenType(); 00299 00305 virtual void reset(); 00306 00316 virtual void setTokenTypeAnything(); 00317 00330 virtual void setExtractQuoted(bool newExtractQuoted); 00331 00344 virtual void setExtractEscaped(bool newExtractEscaped); 00345 00353 virtual unsigned int getCurrentPos(); 00354 00362 virtual unsigned int getCurrentTokenPos(); 00363 00371 virtual char getQuoteChar(); 00372 00393 static bool isOneOf(char c, const std::string& testChars, bool invert); 00394 00402 static bool isValid(Token& token); 00403 }; 00404 00406 00407 } 00408 00409 } 00410 00414 #endif