Main Page   Namespace List   Class Hierarchy   Compound List   File List   Namespace Members   Compound Members  

parser.cc

00001 /* Parser.cc
00002  *
00003  * Copyright (C) 2002 Christian Neumann
00004  *
00005  * This file is part of Libxmlight
00006  * 
00007  * Libxmlight is free software; you can redistribute it and/or
00008  * modify it under the terms of the GNU General Public
00009  * License as published by the Free Software Foundation; either
00010  * version 2 of the License, or (at your option) any later version.
00011  *
00012  * Libxmlight is distributed in the hope that it will be useful,
00013  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00014  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00015  * General Public License for more details.
00016  *
00017  * You should have received a copy of the GNU General Public
00018  * License along with Libxmlight; if not, write to the Free
00019  * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
00020  */
00021 
00022 #include <Parser.h>
00023 #include <cctype>
00024 #include <Exception.h>
00025 
00026 using namespace xmlight;
00027 using namespace std;
00028 
00030 bool iselementchar(const char& c)
00031 {
00032         return isalpha(c) || isdigit(c) || c == ':' || c == '-' || c == '_' || c == '.';
00033 }
00034 
00036 bool notelementchar(const char& c)
00037 {
00038         return ! iselementchar(c);
00039 }
00040 
00041 inline void Parser::parse_tag() {
00042 
00043         string::const_iterator begin, mark, data_end;
00044         begin = data.begin();
00045         data_end = data.end();
00046         
00047         if(*begin == '/') {
00048                 // end element
00049                 mark = find_if(begin + 1, data_end, notelementchar);
00050                 if(mark != data_end) { 
00051                         _on_error(string("Invalid char ") + *mark + string("in end-element <") + data + string(">"));
00052                 }else {
00053                         string name(begin + 1, data_end);
00054                         on_end_element(name);
00055                         element_count--;
00056                 }
00057         }else if(*begin == '!') {
00058                 // External DTD Declaration
00059                 return;
00060         }else if(*begin == '?') {
00061                 // xml declaration / processor statement
00062                 if(string(begin, begin + 14) == "?xml version=\"") {
00063                         if(has_xmldecl) {
00064                                 _on_error("Document allready declared!");
00065                         }else {
00066                                 //cout << "XML Declaration found" << endl;
00067                                 has_xmldecl = true;
00068                         }
00069                 }else {
00070                         //cout << "Prozessor Anweisung" << endl;
00071                 }
00072         }else {
00073                 // element 
00074                 if(! has_root_element) has_root_element = true;
00075                 bool without_end_element = false;
00076                 if(data_end[-1] == '/') without_end_element = true;
00077                 mark = find_if(begin, data_end, notelementchar);
00078                 if(mark != data_end) {
00079                         // element with attributes
00080                         string name = string(begin, mark);
00081                         TAttributes attributes;
00082                         begin = mark;
00083                         
00084                         for(;;) {
00085                                 string attr_name, attr_value;
00086                                 mark = find_if(begin, data_end, iselementchar);
00087                                 if(mark == data_end) {
00088                                         break;
00089                                 }else if(*mark == '/' && (mark + 1) == data_end) {
00090                                         break;
00091                                 }else {
00092                                         begin = mark;
00093                                         
00094                                         mark = find_if(begin, data_end, notelementchar);
00095                                         if(mark == data_end) {
00096                                                 _on_error(string("Unexpected end in tag <") + data + string(">"));
00097                                                 return;
00098                                         }else if(*mark == '=' && (mark[1] == '"' || mark[1] == '\'')) {
00099                                                 ++mark;
00100                                                 attr_name = string(begin, mark - 1);
00101                                                 
00102                                                 // get value of attribute
00103                                                 
00104                                                 begin = mark + 1;       
00105 
00106                                                 mark = find(begin, data_end, *mark);
00107                                                 if(mark == data_end) {
00108                                                         _on_error(string("Unexpected end in tag <") + data + string(">"));
00109                                                         return;
00110                                                 }else {
00111                                                         attr_value = string(begin, mark);
00112                                                         begin = mark;
00113                                                 }
00114 
00115                                         }else {
00116                                                 _on_error(string("Invalid char ") + *mark + string(" in tag <") + data + string(">"));
00117                                                 return;
00118                                         }
00119                                                 
00120                                 }
00121                                 attributes.insert(make_pair(attr_name, attr_value));
00122                         }
00123                         
00124                         on_start_element(name, attributes);
00125                         if(without_end_element) {
00126                                 on_end_element(name);
00127                         }else {
00128                                 ++element_count;
00129                         }
00130                         
00131                 }else {
00132                         // element without attributes
00133                         on_start_element(data, TAttributes());
00134                         ++element_count;
00135                 }
00136         }
00137 }
00138 
00139 void Parser::parse_string(const string& xmldata) 
00140 {
00141         string::const_iterator position;
00142 
00143         for(position = xmldata.begin(); position != xmldata.end(); ++position) {
00144                 if(parsing_tag) {
00145                         if(*position == '>') {
00146                                 // tag ends
00147                                 parsing_tag = false;
00148                                 parse_tag();
00149                                 data.clear();
00150                         }else if(*position == '[' && data == "![CDATA") {
00151                                 //cout << "CDATA" << endl;
00152                                 // CDATA tag, ignore
00153                                 parsing_tag = false;
00154                                 parsing_cdata = true;
00155                         }else if(*position == '-' && data == "!-") {
00156                                 // comment
00157                                 parsing_tag = false;
00158                                 parsing_comment = true;
00159                                 data.clear();
00160                         }else if(*position == '[' && string(data.begin(), data.begin() + 9) == "!DOCTYPE ") {
00161                                 // DTD
00162                                 //cout << "DTD" << endl;
00163                                 parsing_tag = false;
00164                                 parsing_dtd = true;
00165                                 data.clear();
00166                         }else {
00167                                 data.push_back(*position);
00168                         }
00169                 }else if(parsing_comment) {
00170                         if(*position == '>' && data.size() >= 3 && string(data.end() - 2, data.end()) == "--") {
00171                                 // end of comment
00172                                 parsing_comment = false;
00173                                 string comment = string(data.begin(), data.end() - 2);
00174                                 on_comment(comment);
00175                                 data.clear();
00176                         }else{
00177                                 data.push_back(*position);
00178                         }
00179                 }else if(parsing_cdata) {
00180                         if(*position == '>' && data.size() >= 2 && string(data.end() - 2, data.end()) == "]]") {
00181                                 // end of cdata
00182                                 parsing_cdata = false;
00183                                 data.clear();
00184                         }else{
00185                                 data.push_back(*position);
00186                         }
00187                 }else if(parsing_dtd) {
00188                         if(*position == '<') {
00189                                 ++dtd_count; 
00190                         }else if(*position == '>') {
00191                                 if(dtd_count > 0) {
00192                                         dtd_count--;
00193                                 }else {
00194                                         parsing_dtd = false;
00195                                 }
00196                         }
00197                 }else if(*position == '<') {
00198                         // new tag begins
00199                         if(has_root_element) {
00200                                 on_data(data);
00201                                 data.clear();
00202                         }
00203                         parsing_tag = true;
00204                 }else if(has_root_element) {
00205                         // data
00206                         data.push_back(*position);
00207 
00208                 }
00209                         
00210         } // for()
00211 
00212 }

Generated on Mon Oct 13 02:35:23 2003 for MPCL by doxygen1.2.18