-
Notifications
You must be signed in to change notification settings - Fork 83
/
Copy pathxml2_xpath.cpp
95 lines (81 loc) · 2.64 KB
/
xml2_xpath.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
#include <Rcpp.h>
#include <libxml/xpath.h>
#include <libxml/xpathInternals.h>
#include <libxml/tree.h>
#include "xml2_types.h"
using namespace Rcpp;
class XmlSeeker {
xmlXPathContext* context_;
xmlXPathObject* result_;
std::string xpath_;
XPtrDoc doc_;
public:
XmlSeeker(XPtrDoc doc, xmlNode* node) : result_(NULL), doc_(doc) {
context_ = xmlXPathNewContext(doc.checked_get());
// Set context to current node
context_->node = node;
}
void registerNamespace(CharacterVector nsMap) {
if (nsMap.size() == 0)
return;
CharacterVector prefix = as<CharacterVector>(nsMap.attr("names"));
for (int i = 0; i < nsMap.size(); ++i) {
xmlChar* prefixI = (xmlChar*) CHAR(STRING_ELT(prefix, i));
xmlChar* urlI = (xmlChar*) CHAR(STRING_ELT(nsMap, i));
if (xmlXPathRegisterNs(context_, prefixI, urlI) != 0)
stop("Failed to register namespace (%s <-> %s)", prefixI, urlI);
}
}
RObject search(std::string xpath, int num_results) {
xpath_ = xpath;
result_ = xmlXPathEval((xmlChar*) xpath.c_str(), context_);
if (result_ == NULL) {
List ret = List();
ret.attr("class") = "xml_missing";
return ret;
}
switch (result_->type) {
case XPATH_NODESET:
{
xmlNodeSet* nodes = result_->nodesetval;
if (nodes == NULL || nodes->nodeNr == 0) {
List ret = List();
ret.attr("class") = "xml_missing";
return ret;
}
int n = std::min(result_->nodesetval->nodeNr, num_results);
List out(n);
for (int i = 0; i < n; i++) {
List ret;
ret["node"] = XPtrNode(nodes->nodeTab[i]);
ret["doc"] = doc_;
ret.attr("class") = "xml_node";
out[i] = ret;
}
return out;
}
case XPATH_NUMBER: { return NumericVector::create(result_->floatval); }
case XPATH_BOOLEAN: { return LogicalVector::create(result_->boolval); }
case XPATH_STRING: { return CharacterVector::create(Rf_mkCharCE((char *) result_->stringval, CE_UTF8)); }
default:
stop("XPath result type: %d not supported", result_->type);
}
return R_NilValue;
}
~XmlSeeker() {
try {
xmlXPathFreeContext(context_);
if (result_ != NULL)
xmlXPathFreeObject(result_);
} catch (...) {}
}
};
// [[Rcpp::export]]
RObject xpath_search(XPtrNode node, XPtrDoc doc, std::string xpath, CharacterVector nsMap, double num_results) {
if (num_results == R_PosInf) {
num_results = INT_MAX;
}
XmlSeeker seeker(doc, node.checked_get());
seeker.registerNamespace(nsMap);
return seeker.search(xpath, num_results);
}