1 #include "otsdaq/XmlUtilities/HttpXmlDocument.h"
2 #include "otsdaq/Macros/CoutMacros.h"
3 #include "otsdaq/Macros/StringMacros.h"
4 #include "otsdaq/MessageFacility/MessageFacility.h"
5 #include "otsdaq/XmlUtilities/ConvertFromXML.h"
6 #include "otsdaq/XmlUtilities/ConvertToXML.h"
10 #include <xercesc/dom/DOM.hpp>
11 #include <xercesc/dom/DOMDocument.hpp>
12 #include <xercesc/dom/DOMDocumentType.hpp>
13 #include <xercesc/dom/DOMElement.hpp>
14 #include <xercesc/dom/DOMImplementation.hpp>
15 #include <xercesc/dom/DOMImplementationLS.hpp>
16 #include <xercesc/dom/DOMImplementationRegistry.hpp>
19 #include <xercesc/dom/DOMNodeIterator.hpp>
20 #include <xercesc/dom/DOMNodeList.hpp>
21 #include <xercesc/dom/DOMText.hpp>
22 #include <xercesc/validators/common/Grammar.hpp>
24 #include <xercesc/parsers/XercesDOMParser.hpp>
25 #include <xercesc/util/XMLUni.hpp>
26 #include <xercesc/util/XercesDefs.hpp>
28 #include <xercesc/framework/LocalFileFormatTarget.hpp>
29 #include <xercesc/util/OutOfMemoryException.hpp>
37 #include <sys/types.h>
60 , headerTagName_(
"HEADER")
61 , dataTagName_(
"DATA")
62 , cookieCodeTagName_(
"CookieCode")
63 , displayNameTagName_(
"DisplayName")
67 if(cookieCode !=
"" || displayName !=
"")
69 headerElement_ = theDocument_->createElement(CONVERT_TO_XML(headerTagName_));
70 rootElement_->appendChild(headerElement_);
78 dataElement_ = theDocument_->createElement(CONVERT_TO_XML(dataTagName_));
79 rootElement_->appendChild(dataElement_);
88 , headerTagName_(doc.headerTagName_)
89 , dataTagName_(doc.dataTagName_)
90 , cookieCodeTagName_(doc.cookieCodeTagName_)
91 , displayNameTagName_(doc.displayNameTagName_)
102 recursiveElementCopy(doc.rootElement_, rootElement_);
104 if(doc.headerElement_ != 0)
105 headerElement_ = (xercesc::DOMElement*)rootElement_
106 ->getElementsByTagName(CONVERT_TO_XML(headerTagName_))
109 dataElement_ = (xercesc::DOMElement*)rootElement_
110 ->getElementsByTagName(CONVERT_TO_XML(dataTagName_))
117 HttpXmlDocument::~HttpXmlDocument(
void) {}
119 void HttpXmlDocument::setHeader(std::string cookieCode, std::string displayName)
123 std::stringstream ss;
124 ss << __COUT_HDR_FL__
125 <<
"Can NOT set header to doc with a header! Only allowed for docs without "
130 if(cookieCode !=
"" || displayName !=
"")
132 headerElement_ = theDocument_->createElement(CONVERT_TO_XML(headerTagName_));
133 rootElement_->appendChild(headerElement_);
136 if(displayName !=
"")
142 xercesc::DOMElement* HttpXmlDocument::addTextElementToData(
const std::string& childName,
143 const std::string& childValue)
150 xercesc::DOMElement* HttpXmlDocument::addBinaryStringToData(
const std::string& childName,
151 const std::string& binary)
153 std::string convertStr =
"";
155 for(
unsigned int i = 0; i < binary.length(); ++i)
158 sprintf(hexStr,
"%2.2X", ((
unsigned char)binary[i]));
160 convertStr += hexStr;
172 parent = dataElement_;
174 xercesc::DOMNodeList* nodeList =
175 parent->getChildNodes();
176 unsigned int count = 0;
178 for(
unsigned int i = 0; i < nodeList->getLength(); ++i)
180 if(nodeList->item(i)->getNodeType() !=
181 xercesc::DOMNode::TEXT_NODE)
194 xercesc::DOMNodeList* nodeList =
195 dataElement_->getChildNodes();
197 for(
unsigned int i = 0; i < nodeList->getLength(); ++i)
199 if(nodeList->item(i)->getNodeType() ==
200 xercesc::DOMNode::TEXT_NODE)
222 xercesc::DOMNodeList* nodeList =
223 document.dataElement_->getChildNodes();
224 for(
unsigned int i = 0; i < nodeList->getLength(); ++i)
226 if(nodeList->item(i)->getNodeType() ==
227 xercesc::DOMNode::TEXT_NODE)
230 recursiveAddElementToParent(
231 (xercesc::DOMElement*)(nodeList->item(i)), dataElement_,
true );
241 bool allowWhiteSpace ,
244 recursiveOutputXmlDocument(theDocument_->getDocumentElement(),
255 void HttpXmlDocument::recursiveOutputXmlDocument(xercesc::DOMElement* currEl,
256 std::ostringstream* out,
259 bool allowWhiteSpace,
263 auto start = std::chrono::high_resolution_clock::now();
268 std::cout << tabStr <<
"<" << XML_TO_CHAR(currEl->getNodeName());
270 *out << tabStr <<
"<" << XML_TO_CHAR(currEl->getNodeName());
273 if(currEl->getFirstChild() != NULL &&
274 currEl->getFirstChild()->getNodeType() ==
275 xercesc::DOMNode::TEXT_NODE)
279 std::cout <<
" value='"
281 XML_TO_CHAR(currEl->getFirstChild()->getNodeValue()),
287 XML_TO_CHAR(currEl->getFirstChild()->getNodeValue()),
291 if(printErrors && strcmp(XML_TO_CHAR(currEl->getNodeName()),
"Error") == 0)
292 __COUT_ERR__ <<
"xml field 'Error' encountered:\n"
293 << XML_TO_CHAR(currEl->getFirstChild()->getNodeValue()) << __E__;
298 auto end = std::chrono::high_resolution_clock::now();
300 std::chrono::duration_cast<std::chrono::milliseconds>(end - start).count();
301 __COUTT__ << tabStr << XML_TO_CHAR(currEl->getNodeName()) <<
" -- Time taken to call recurse xml out = " << duration <<
" milliseconds." << std::endl;
305 xercesc::DOMNodeList* nodeList = currEl->getChildNodes();
309 auto end = std::chrono::high_resolution_clock::now();
311 std::chrono::duration_cast<std::chrono::milliseconds>(end - start).count();
312 __COUTT__ << tabStr << XML_TO_CHAR(currEl->getNodeName()) <<
" -- Time taken to call recurse xml out = " << duration <<
" milliseconds." << std::endl;
318 std::cout << ((nodeList->getLength() == 0 ||
319 (nodeList->getLength() == 1 &&
320 currEl->getFirstChild()->getNodeType() ==
321 xercesc::DOMNode::TEXT_NODE))
325 <<
" len:" << nodeList->getLength() << std::endl;
330 !(std::string(XML_TO_CHAR(currEl->getNodeName())) ==
"ROOT" ||
331 std::string(XML_TO_CHAR(currEl->getNodeName())) ==
"HEADER" ||
332 std::string(XML_TO_CHAR(currEl->getNodeName())) ==
"DATA" ||
333 std::string(XML_TO_CHAR(currEl->getNodeName())) ==
"node" ||
334 std::string(XML_TO_CHAR(currEl->getNodeName())) ==
"nodes"))
338 XML_TO_CHAR(currEl->getFirstChild()->getNodeValue()),
340 <<
"</" << XML_TO_CHAR(currEl->getNodeName()) <<
">" << std::endl;
344 *out << ((nodeList->getLength() == 0 ||
345 (nodeList->getLength() == 1 &&
346 currEl->getFirstChild()->getNodeType() ==
347 xercesc::DOMNode::TEXT_NODE))
352 if(printErrors && strcmp(XML_TO_CHAR(currEl->getNodeName()),
"Error") == 0)
353 __COUT_ERR__ <<
"xml field 'Error' encountered:\n"
354 << XML_TO_CHAR(currEl->getFirstChild()->getNodeValue())
361 auto end = std::chrono::high_resolution_clock::now();
363 std::chrono::duration_cast<std::chrono::milliseconds>(end - start).count();
364 __COUTT__ << tabStr << XML_TO_CHAR(currEl->getNodeName()) <<
" " << nodeList->getLength() <<
365 " -- Time taken to call recurse xml out = " << duration <<
" milliseconds." << std::endl;
370 std::string newTabStr = tabStr +
"\t";
371 for(
unsigned int i = 0; i < nodeList->getLength(); ++i)
372 if(nodeList->item(i)->getNodeType() !=
373 xercesc::DOMNode::TEXT_NODE)
374 recursiveOutputXmlDocument(
375 (xercesc::DOMElement*)(nodeList->item(i)),
383 auto end = std::chrono::high_resolution_clock::now();
385 std::chrono::duration_cast<std::chrono::milliseconds>(end - start).count();
386 __COUTT__ << tabStr << XML_TO_CHAR(currEl->getNodeName()) <<
" -- Time taken to call recurse xml out = " << duration <<
" milliseconds." << std::endl;
390 if(currEl == dataElement_ &&
391 dataSs_.str().length())
394 std::cout << dataSs_.str() << std::endl;
396 *out << dataSs_.str() << std::endl;
400 if(nodeList->getLength() > 1 ||
401 (nodeList->getLength() == 1 &&
402 currEl->getFirstChild()->getNodeType() != xercesc::DOMNode::TEXT_NODE))
405 std::cout << tabStr <<
"</" << XML_TO_CHAR(currEl->getNodeName()) <<
">"
408 *out << tabStr <<
"</" << XML_TO_CHAR(currEl->getNodeName()) <<
">"
414 auto end = std::chrono::high_resolution_clock::now();
416 std::chrono::duration_cast<std::chrono::milliseconds>(end - start).count();
417 __COUTT__ << tabStr << XML_TO_CHAR(currEl->getNodeName()) <<
" -- DONE Time taken to call recurse xml out = " << duration <<
" milliseconds." << std::endl;
427 const unsigned int occurance)
429 unsigned int count = 0;
430 return recursiveFindElementValue(
431 theDocument_->getDocumentElement(), field, occurance, count);
437 std::string HttpXmlDocument::recursiveFindElementValue(xercesc::DOMElement* currEl,
438 const std::string& field,
439 const unsigned int occurance,
442 if(XML_TO_CHAR(currEl->getNodeName()) == field &&
443 occurance == count++)
445 if(currEl->getFirstChild() != NULL &&
446 currEl->getFirstChild()->getNodeType() ==
447 xercesc::DOMNode::TEXT_NODE)
450 XML_TO_CHAR(currEl->getFirstChild()->getNodeValue()));
457 xercesc::DOMNodeList* nodeList = currEl->getChildNodes();
458 for(
unsigned int i = 0; i < nodeList->getLength(); ++i)
459 if(nodeList->item(i)->getNodeType() !=
460 xercesc::DOMNode::TEXT_NODE)
462 retStr = recursiveFindElementValue(
463 (xercesc::DOMElement*)(nodeList->item(i)), field, occurance, count);
476 std::vector<std::string>& retVec)
478 recursiveFindAllElements(theDocument_->getDocumentElement(), field, &retVec);
484 void HttpXmlDocument::recursiveFindAllElements(xercesc::DOMElement* currEl,
485 const std::string& field,
486 std::vector<std::string>* retVec)
488 if(XML_TO_CHAR(currEl->getNodeName()) == field && currEl->getFirstChild() != NULL &&
489 currEl->getFirstChild()->getNodeType() ==
490 xercesc::DOMNode::TEXT_NODE)
492 retVec->push_back(XML_TO_CHAR(currEl->getFirstChild()->getNodeValue()));
495 xercesc::DOMNodeList* nodeList = currEl->getChildNodes();
496 for(
unsigned int i = 0; i < nodeList->getLength(); ++i)
497 if(nodeList->item(i)->getNodeType() !=
498 xercesc::DOMNode::TEXT_NODE)
499 recursiveFindAllElements(
500 (xercesc::DOMElement*)(nodeList->item(i)), field, retVec);
508 const unsigned int occurance)
511 theDocument_->getDocumentElement(), field, occurance);
520 xercesc::DOMElement* parentEl,
const std::string& field,
const unsigned int occurance)
522 unsigned int count = 0;
523 return recursiveFindElement(parentEl, field, occurance, count);
529 xercesc::DOMElement* HttpXmlDocument::recursiveFindElement(xercesc::DOMElement* currEl,
530 const std::string& field,
531 const unsigned int occurance,
534 if(XML_TO_CHAR(currEl->getNodeName()) == field &&
535 occurance == count++)
537 if(currEl->getFirstChild() != NULL &&
538 currEl->getFirstChild()->getNodeType() ==
539 xercesc::DOMNode::TEXT_NODE)
546 xercesc::DOMElement* retEl;
548 xercesc::DOMNodeList* nodeList = currEl->getChildNodes();
549 for(
unsigned int i = 0; i < nodeList->getLength(); ++i)
550 if(nodeList->item(i)->getNodeType() !=
551 xercesc::DOMNode::TEXT_NODE)
553 retEl = recursiveFindElement(
554 (xercesc::DOMElement*)(nodeList->item(i)), field, occurance, count);
566 void HttpXmlDocument::recursiveAddElementToParent(xercesc::DOMElement* child,
567 xercesc::DOMElement* parent,
570 std::string childText =
"";
572 std::string childName =
573 XML_TO_CHAR(child->getNodeName());
575 if(child->getFirstChild() != NULL &&
576 child->getFirstChild()->getNodeType() ==
580 childText = XML_TO_CHAR(child->getFirstChild()->getNodeValue());
584 __COUTS__(20) <<
"pre escape childText " << childText << std::endl;
586 __COUTS__(20) <<
"post escape childText " << childText << std::endl;
589 __COUTS__(20) <<
"childName " << childName <<
" childText " << childText << std::endl;
595 xercesc::DOMNodeList* nodeList = child->getChildNodes();
596 for(
unsigned int i = 0; i < nodeList->getLength(); ++i)
598 if(nodeList->item(i)->getNodeType() ==
599 xercesc::DOMNode::TEXT_NODE)
602 recursiveAddElementToParent(
603 (xercesc::DOMElement*)(nodeList->item(i)), newParent, html);
612 std::vector<xercesc::DOMElement*>& retVec)
614 recursiveFindAllElements(theDocument_->getDocumentElement(), field, &retVec);
620 void HttpXmlDocument::recursiveFindAllElements(xercesc::DOMElement* currEl,
621 const std::string& field,
622 std::vector<xercesc::DOMElement*>* retVec)
624 if(XML_TO_CHAR(currEl->getNodeName()) == field && currEl->getFirstChild() != NULL &&
625 currEl->getFirstChild()->getNodeType() ==
626 xercesc::DOMNode::TEXT_NODE)
628 retVec->push_back(currEl);
631 xercesc::DOMNodeList* nodeList = currEl->getChildNodes();
632 for(
unsigned int i = 0; i < nodeList->getLength(); ++i)
633 if(nodeList->item(i)->getNodeType() !=
634 xercesc::DOMNode::TEXT_NODE)
635 recursiveFindAllElements(
636 (xercesc::DOMElement*)(nodeList->item(i)), field, retVec);
647 struct stat fileStatus;
649 if(stat(filePath.c_str(), &fileStatus) != 0)
660 xercesc::XercesDOMParser* parser =
new xercesc::XercesDOMParser;
662 parser->setValidationScheme(xercesc::XercesDOMParser::Val_Auto);
663 parser->setDoNamespaces(
true);
664 parser->setDoSchema(
true);
665 parser->useCachedGrammarInParse(
false);
669 parser->parse(filePath.c_str());
672 theDocument_ = parser->adoptDocument();
677 rootElement_ = theDocument_->getDocumentElement();
680 __SS__ <<
"empty XML theDocument_: " << filePath << std::endl;
682 throw(std::runtime_error(
"empty XML theDocument_"));
685 recursiveFixTextFields(
688 xercesc::DOMNodeList* nodeList =
689 theDocument_->getElementsByTagName(CONVERT_TO_XML(headerTagName_));
690 if(nodeList->getLength())
692 (xercesc::DOMElement*)(theDocument_
693 ->getElementsByTagName(
694 CONVERT_TO_XML(headerTagName_))
699 dataElement_ = (xercesc::DOMElement*)(theDocument_
700 ->getElementsByTagName(
701 CONVERT_TO_XML(dataTagName_))
704 catch(xercesc::XMLException& e)
706 __SS__ <<
"Error parsing file: " << filePath << std::endl;
708 __COUT__ <<
"Error parsing file." << std::endl;
719 void HttpXmlDocument::recursiveFixTextFields(xercesc::DOMElement* currEl)
721 xercesc::DOMNodeList* nodeList = currEl->getChildNodes();
724 for(
unsigned int i = 0; i < nodeList->getLength(); ++i)
725 if(nodeList->item(i)->getNodeType() ==
726 xercesc::DOMNode::TEXT_NODE)
727 ((xercesc::DOMElement*)(nodeList->item(i)))
728 ->setTextContent(CONVERT_TO_XML(
730 ((xercesc::DOMElement*)(nodeList->item(i)))->getNodeValue()))));
732 recursiveFixTextFields((xercesc::DOMElement*)(nodeList->item(i)));
xercesc::DOMElement * getMatchingElement(const std::string &field, const unsigned int occurance=0)
void getAllMatchingValues(const std::string &field, std::vector< std::string > &retVec)
HttpXmlDocument(std::string cookieCode="", std::string displayName="")
xercesc::DOMElement * getMatchingElementInSubtree(xercesc::DOMElement *currEl, const std::string &field, const unsigned int occurance=0)
void copyDataChildren(HttpXmlDocument &document)
void removeDataElement(unsigned int dataChildIndex=0)
default to first child
bool loadXmlDocument(const std::string &filePath)
unsigned int getChildrenCount(xercesc::DOMElement *parent=0)
std::string getMatchingValue(const std::string &field, const unsigned int occurance=0)
void outputXmlDocument(std::ostringstream *out, bool dispStdOut=false, bool allowWhiteSpace=false, bool printErrors=false)
void getAllMatchingElements(const std::string &field, std::vector< xercesc::DOMElement * > &retVec)
Note that XmlDocument functionality is extended by HttpXmlDocument class.
xercesc::DOMElement * addTextElementToParent(const std::string &childName, const std::string &childText, xercesc::DOMElement *parent)
void recursiveRemoveChild(xercesc::DOMElement *childEl, xercesc::DOMElement *parentEl)
static std::string escapeString(std::string inString, bool allowWhiteSpace=false)