Dom.cxx Source File

00001 // $Header: /nfs/slac/g/glast/ground/cvs/xml/src/Dom.cxx,v 1.19 2001/11/01 20:01:35 jrb Exp $
00002 // Author:  J. Bogart
00003 //
00004 // Implementation of xml::Dom, a convenient place to put static
00005 // utilities which enhance DOM api.
00006 
00007 #include "xml/Dom.h"
00008 #include "dom/DOM_Element.hpp"
00009 #include "dom/DOM_NodeList.hpp"
00010 #include "dom/DOM_CharacterData.hpp"
00011 #include "dom/DOM_NamedNodeMap.hpp"
00012 #include "util/TransService.hpp"
00013 #include "util/PlatformUtils.hpp"
00014 #include <strstream>
00015 #include <string>
00016 #include <cstring>
00017 
00018 namespace xml {
00019   XMLLCPTranscoder*  Dom::transcoder = 0;
00020   char*              Dom::transBuf = 0;
00021   int                Dom::transBufSize = 1000;
00022   XMLCh*             Dom::xmlchBuf = 0;
00023   int                Dom::xmlchBufSize = 200;
00024 
00025   DOM_Element Dom::findFirstChildByName(const DOM_Element& parent, 
00026                                         const char* const name) {
00027     
00028     DOMString domstr = DOMString(name);
00029     DOM_NodeList list = parent.getElementsByTagName(domstr);
00030     int len = list.getLength();
00031     int iChild;
00032     
00033     for (iChild = 0; iChild < len; iChild++) {
00034       if (  ((domstr.equals("*"))   ||
00035              (domstr.equals((list.item(iChild)).getNodeName()) ) ) &&
00036             (list.item(iChild).getParentNode() == (DOM_Node) parent) ) {
00037         DOM_Node    child_tmp = list.item(iChild);
00038         DOM_Element& child = static_cast<DOM_Element&>(child_tmp);
00039         return DOM_Element(child);
00040       }
00041     }
00042     return DOM_Element();
00043   }
00044 
00045   DOM_Element Dom::findFirstChildByName(const DOM_Element& parent, 
00046                                         const std::string name) {
00047     return findFirstChildByName(parent, name.c_str());
00048   }
00049 
00050   DOM_Element Dom::getSiblingElement(const DOM_Node& child) {
00051     if (child == DOM_Node()) return DOM_Element();
00052 
00053     DOM_Node sib = child.getNextSibling();
00054 
00055     while (sib != DOM_Node()) {
00056       if (sib.getNodeType() == DOM_Node::ELEMENT_NODE) {
00057         return (DOM_Element(static_cast<DOM_Element &> (sib)) );
00058       }
00059       sib = sib.getNextSibling();
00060     }
00061     return DOM_Element();
00062   }
00063     
00064   DOM_Element  Dom::getFirstChildElement(const DOM_Node& parent) {
00065     DOM_Node childNode = parent.getFirstChild();
00066 
00067     if (childNode == DOM_Node()) return DOM_Element();
00068     else if (childNode.getNodeType() == DOM_Node::ELEMENT_NODE) {
00069       return (DOM_Element(static_cast<DOM_Element &>(childNode)) );
00070     }
00071     else return getSiblingElement(childNode);
00072   }
00073 
00074   std::string   Dom::getAttribute(const DOM_Element& elt, char* attName) {
00075     DOMString attValue = elt.getAttribute(DOMString(attName));
00076     if (attValue == DOMString()) return std::string("");
00077 
00078     std::string strValue = std::string(transToChar(attValue));
00079     return strValue;
00080 
00081   }
00082 
00083   std::string   Dom::getAttribute(const DOM_Element& elt, 
00084                                   std::string attName) {
00085     return getAttribute(elt, attName.c_str());
00086   }
00087   std::string  Dom::getAttribute(const DOM_Node& elt, 
00088                             char* attName) {
00089     if (elt.getNodeType() != DOM_Node::ELEMENT_NODE) {
00090       return std::string("");
00091     }
00092     return getAttribute(static_cast<const DOM_Element&>(elt), attName);
00093   }
00094 
00095   std::string  Dom::getAttribute(const DOM_Node& elt, 
00096                             std::string attName) {
00097     return getAttribute(elt, attName.c_str());
00098   }
00099 
00100   void  Dom::addAttribute(DOM_Element& elt, std::string name, 
00101                                  double value) {
00102     std::strstream s;
00103     s << value << '\0';
00104 
00105     elt.setAttribute(DOMString(name.c_str()), DOMString(s.str()));
00106   }
00107 
00108   void  Dom::addAttribute(DOM_Element& elt, const DOMString& name, 
00109                                  double value) {
00110     std::strstream s;
00111     s << value << '\0';
00112 
00113     elt.setAttribute(name, DOMString(s.str()));
00114   }
00115 
00116   void  Dom::addAttribute(DOM_Element& elt, std::string name, 
00117                                  int value) {
00118     std::strstream s;
00119     s << value << '\0';
00120 
00121     elt.setAttribute(DOMString(name.c_str()), DOMString(s.str()));
00122   }
00123 
00124   void  Dom::addAttribute(DOM_Element& elt, std::string name, 
00125                                  unsigned int value) {
00126     std::strstream s;
00127     s << value << '\0';
00128 
00129     elt.setAttribute(DOMString(name.c_str()), DOMString(s.str()));
00130   }
00131 
00132   void  Dom::addAttribute(DOM_Element& elt, std::string name, 
00133                                  const char* value) {
00134     elt.setAttribute(DOMString(name.c_str()), DOMString(value));
00135   }
00136 
00137   void  Dom::addAttribute(DOM_Element& elt, std::string name, 
00138                                  std::string value) {
00139     elt.setAttribute(DOMString(name.c_str()), DOMString(value.c_str()));
00140   }
00141 
00142   /* 
00143       Serialize a node (and any children) to the specified ostream.
00144       prefix is for now ignored, but see note following.
00145  
00146       The only node types which actually do get written are
00147            element (and its attributes and children)
00148            text
00149            comment
00150 
00151       NB:  For this first pass, printElement outputs all the
00152            supported node types just as they appeared in the
00153            serialization before parsing *if* the parse was
00154            non-validating.  If it *was* validating (or if
00155            the DOM representation was built programmatically
00156            rather than by parsing a file) then ignorable 
00157            white space will have been thrown away (in the
00158            validated case) or there won't have been any to
00159            begin with (programmatically-built case)
00160            so the printed version will be a horrific single
00161            line except that line breaks appearing within
00162            comments or text nodes will be preserved.
00163 
00164            Ideally would like to be able to query the DOM
00165            or have an argument passed in to tell us whether
00166            ignorable white space has been thrown away, in which
00167            case we should attempt to pretty print by putting
00168            newlines in in reasonable places and keeping track
00169            of a sensible indentation level.
00170 
00171            For now, make two different functions.  See 
00172            prettyPrintElement below.
00173   */
00174   void Dom::printElement(DOM_Node& node, std::ostream& out) {
00175 
00176     switch(node.getNodeType()) {
00177     case DOM_Node::ELEMENT_NODE:
00178       {
00179         // output start tag
00180         {
00181           DOMString tagName = node.getNodeName();
00182           char *pName = transToChar(tagName);
00183           out << '<' << pName;
00184         }
00185         // ...with attributes
00186         DOM_NamedNodeMap attMap = node.getAttributes();
00187         int   nAtt = attMap.getLength();
00188         int   iAtt;
00189         for (iAtt = 0; iAtt <nAtt; iAtt++) {
00190           DOM_Node att = attMap.item(iAtt);
00191           DOMString  attString = att.getNodeName();
00192           char *pAtt = transToChar(attString);
00193           out << ' ' << pAtt << '=';
00194           attString = att.getNodeValue();
00195           pAtt = transToChar(attString);
00196           out << '"' << pAtt << '"';
00197         }
00198       
00199         // iterate through children
00200         DOM_Node child = node.getFirstChild();
00201         if (child != 0) {  // there are children
00202           out << '>';
00203           while (child != 0) {
00204             Dom::printElement(child, out);
00205             child = child.getNextSibling();
00206           }
00207           // output end tag, long form
00208           {
00209             DOMString endName = node.getNodeName();
00210             char *    pEnd = transToChar(endName);
00211             out << "</" << pEnd << ">";
00212           }
00213         }
00214         else {  // no children; use short form for the empty tag
00215           out << " />";
00216         } 
00217       }
00218       break;
00219     case DOM_Node::TEXT_NODE:
00220       // just put it out as is
00221       {
00222         DOMString txtValue = node.getNodeValue();
00223         char *    pTxt = transToChar(txtValue);
00224         out << pTxt;
00225       }
00226       break;
00227 
00228     case DOM_Node::CDATA_SECTION_NODE:
00229       {
00230         DOMString txtValue = node.getNodeValue();
00231         char *    pTxt = transToChar(txtValue);
00232         // Probably need to put opening and closing sequences in by hand..
00233         out << "<![CDATA[" << pTxt << "]]>";
00234       }
00235       break;
00236       
00237 
00238     case DOM_Node::COMMENT_NODE :
00239       // glast.prs doesn't have any comments (but should!)
00240       {
00241         DOMString commentValue = node.getNodeValue();
00242         char *   pComment = transToChar(commentValue);
00243         out << "<!-- " << pComment << "-->";
00244       }
00245       break;
00246     default:
00247       // ignore anything else
00248       break;
00249     }
00250   }
00251 
00252   // Assume we need to do the indenting and line breaks
00253   void Dom::prettyPrintElement(DOM_Node& node, std::ostream& out,
00254                                std::string prefix) {
00255     
00256     out << prefix;
00257     switch(node.getNodeType()) {
00258 
00259     case DOM_Node::ELEMENT_NODE:
00260       {
00261         // output start tag
00262         DOMString tagName = node.getNodeName();
00263         char *pName   = transToChar(tagName);
00264         out << '<' << pName;
00265 
00266         // ...with attributes
00267         DOM_NamedNodeMap attMap = node.getAttributes();
00268         int   nAtt = attMap.getLength();
00269         int   iAtt;
00270         for (iAtt = 0; iAtt <nAtt; iAtt++) {
00271           DOM_Node att = attMap.item(iAtt);
00272           DOMString  attString = att.getNodeName();
00273           char      *pAtt = transToChar(attString);
00274           out << ' ' << pAtt << '=';
00275 
00276           attString = att.getNodeValue();
00277           pAtt = transToChar(attString);
00278           out << '"' << pAtt << '"';
00279         }
00280       
00281         // iterate through children
00282         DOM_Node child = node.getFirstChild();
00283         if (child != 0) {  // there are children
00284           out << '>' << std::endl;
00285           while (child != 0) {
00286             // new indent level
00287             Dom::prettyPrintElement(child, out, prefix + "  ");
00288             child = child.getNextSibling();
00289           }
00290           // output end tag, long form
00291           {
00292             DOMString endName = node.getNodeName();
00293             char *    pEnd = transToChar(endName);
00294             out << prefix << "</" << pEnd << ">" << std::endl;
00295           }
00296         }
00297         else {  // no children; use short form for the empty tag
00298           out << " />" << std::endl;
00299         } 
00300       }
00301       break;
00302     case DOM_Node::TEXT_NODE:
00303       // just put it out as is
00304       // Note this won't indent correctly if the text node
00305       // contains multiple lines.
00306       // Similarly, it's too much work to avoid sometimes putting out
00307       // an "extra" blank line in the vicinity of text.
00308       // Current code puts the extra <cr> before
00309       // the text node.
00310       {
00311         DOMString txtValue = node.getNodeValue();
00312         char *    pTxt = transToChar(txtValue);
00313         out << pTxt;
00314       }
00315 
00316       break;
00317 
00318     case DOM_Node::CDATA_SECTION_NODE:
00319       {
00320         DOMString txtValue = node.getNodeValue();
00321         char *    pTxt = transToChar(txtValue);
00322         // Probably need to put opening and closing sequences in by hand..
00323         out << "<![CDATA[" << pTxt << "]]>";
00324       }
00325       break;
00326       
00327     case DOM_Node::COMMENT_NODE :
00328       // glast.prs doesn't have any comments (but should!)
00329       // Note this won't indent correctly if the text node
00330       // contains multiple lines.  Could have utility routine
00331       // to do this, to be called for comments and text nodes
00332       {
00333         DOMString commentValue = node.getNodeValue();
00334         char *   pComment = transToChar(commentValue);
00335         out << "<!-- " << pComment << "-->" << std::endl;
00336       }
00337 
00338       break;
00339     default:
00340       // ignore anything else
00341       break;
00342     }
00343   }
00344 
00345   void Dom::prune(DOM_Element elt) {
00346     DOM_Element child = findFirstChildByName(elt, "*");
00347     while (child != DOM_Element()) {
00348       DOM_Element sib = getSiblingElement(child);
00349       prune(child);
00350       elt.removeChild(child);
00351       child = sib;
00352     }
00353   }
00354 
00355   char *Dom::transToChar(const XMLCh* const str, int len) {
00356     if (!transcoder) {
00357       int status = initTrans();
00358       if (!status) return 0;
00359     }
00360 
00361     // Find length of str to pass to transcode(..) rather than
00362     // just passing output buffer size.  This is important because 
00363     // (for Xerces 1.3 anyway) the transcode routine will try to read 
00364     // this many bytes from the input buffer, conceivably causing
00365     // an access violation if it's more than the actual input
00366     // buffer length
00367     
00368     if (len + 1 > transBufSize) { // return old buffer; allocate a bigger one
00369       char * tBuf = new char[len + 1];
00370       if (!tBuf) return 0;
00371       transBufSize = len + 1;
00372       delete [] transBuf;
00373       transBuf = tBuf;
00374     }
00375 
00376     bool ok;
00377     ok = Dom::transcoder->transcode(str, transBuf, 
00378                                len);
00379     return ( ok ? transBuf : 0);
00380   }
00381 
00382 
00383   char *Dom::transToChar(DOMString str) {
00384 
00385     return transToChar(str.rawBuffer(), str.length());
00386   }
00387 
00388   XMLCh* Dom::transToXMLCh(const char* const src) {
00389     if (!transcoder) {
00390       int status = initTrans();
00391       if (!status) return 0;
00392     }
00393     // as with transToChar above, find actual length of char*
00394     // and pass that to Xerces utility for 3rd (maxChars) argument.
00395     int len = strlen(src) + 1;
00396     if (len  > xmlchBufSize) {
00397       XMLCh * tBuf = new XMLCh[len];
00398       if (!tBuf) return 0;
00399       xmlchBufSize = len;
00400       delete [] xmlchBuf;
00401       xmlchBuf = tBuf;
00402     }
00403 
00404     bool ok;
00405 
00406     ok = transcoder->transcode(src, xmlchBuf, len);
00407     return (ok ? xmlchBuf : 0);
00408   }
00409 
00410   int Dom::initTrans() {
00411     transcoder = XMLPlatformUtils::fgTransService->makeNewLCPTranscoder();
00412     if (!transcoder) return 0;   // and complain?!? Shouldn't ever happen
00413     transBuf = new char[transBufSize];
00414     if (!transBuf) {
00415       delete transcoder;
00416       return 0;
00417     }
00418     xmlchBuf = new XMLCh[xmlchBufSize];
00419     if (!xmlchBuf) {
00420       delete [] transBuf;
00421       delete transcoder;
00422       return 0;
00423     }
00424     return 1;
00425   }
00426 
00427 }  // end namespace xml