Shaka Player Embedded
xml_document_parser.cc
Go to the documentation of this file.
1 // Copyright 2016 Google LLC
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // https://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
16 
17 #include <libxml/parser.h>
18 
19 #include <utility>
20 
21 #include "src/js/dom/comment.h"
22 #include "src/js/dom/document.h"
23 #include "src/js/dom/element.h"
24 #include "src/js/dom/text.h"
25 #include "src/js/js_error.h"
26 #include "src/util/utils.h"
27 
28 namespace shaka {
29 namespace js {
30 namespace dom {
31 
32 // Read the following article for how the libxml SAX interface works:
33 // http://www.jamesh.id.au/articles/libxml-sax/libxml-sax.html
34 
35 namespace {
36 
37 // xmlChar == unsigned char. We treat these as UTF-8 strings of chars.
38 static_assert(sizeof(xmlChar) == sizeof(char), "Must be raw characters");
39 
40 
41 XMLDocumentParser* GetParser(void* context) {
42  return reinterpret_cast<XMLDocumentParser*>(context);
43 }
44 
45 std::string ToString(const xmlChar* data) {
46  return reinterpret_cast<const char*>(data);
47 }
48 
49 std::string ToString(const xmlChar* data, int length) {
50  auto* text = reinterpret_cast<const char*>(data);
51  return std::string(text, text + length);
52 }
53 
54 
55 void SaxEndDocument(void* context) {
56  GetParser(context)->EndDocument();
57 }
58 
59 void SaxStartElementNS(void* context, const xmlChar* local_name,
60  const xmlChar* prefix, const xmlChar* namespace_uri,
61  int /* nb_namespaces */,
62  const xmlChar** /* namespaces */, int nb_attributes,
63  int /* nb_defaulted */, const xmlChar** attributes) {
64  GetParser(context)->StartElement(
65  ToString(local_name),
66  namespace_uri ? optional<std::string>(ToString(namespace_uri)) : nullopt,
67  prefix ? optional<std::string>(ToString(prefix)) : nullopt, nb_attributes,
68  reinterpret_cast<const char**>(attributes));
69 }
70 
71 void SaxEndElementNS(void* context, const xmlChar* /* localname */,
72  const xmlChar* /* prefix */, const xmlChar* /* URI */) {
73  GetParser(context)->EndElement();
74 }
75 
76 void SaxCharacters(void* context, const xmlChar* raw_data, int size) {
77  GetParser(context)->Text(ToString(raw_data, size));
78 }
79 
80 void SaxProcessingInstruction(void* context, const xmlChar* /* target */,
81  const xmlChar* /* data */) {
82  GetParser(context)->SetException(JsError::DOMException(NotSupportedError));
83 }
84 
85 void SaxComment(void* context, const xmlChar* raw_data) {
86  GetParser(context)->Comment(ToString(raw_data));
87 }
88 
89 PRINTF_FORMAT(2, 3)
90 void SaxWarning( // NOLINT(cert-dcl50-cpp)
91  void* /* context */, const char* format, ...) {
92  va_list args;
93  va_start(args, format);
94  std::string message = util::StringPrintfV(format, args);
95  LOG(WARNING) << "XML parsing warning: " << message;
96  va_end(args);
97 }
98 
99 PRINTF_FORMAT(2, 3)
100 void SaxError( // NOLINT(cert-dcl50-cpp)
101  void* context, const char* format, ...) {
102  va_list args;
103  va_start(args, format);
104  std::string message = util::StringPrintfV(format, args);
105  va_end(args);
106 
107  GetParser(context)->SetException(
108  JsError::DOMException(UnknownError, message));
109 }
110 
111 void SaxCdata(void* context, const xmlChar* value, int len) {
112  // We do not have a separate CDATA type, so treat as text.
113  GetParser(context)->Text(ToString(value, len));
114 }
115 
116 } // namespace
117 
119  : document_(document), current_node_(document) {}
120 
122 
124  const std::string& source) {
125  // TODO: libxml says we should call xmlInitParser in case of multithreaded
126  // programs; however it works without it. We may not want to change global
127  // state of libxml so embedders can use it without us changing it.
128 
129  xmlSAXHandler sax;
130  memset(&sax, 0, sizeof(sax));
131  sax.initialized = XML_SAX2_MAGIC;
132  sax.endDocument = &SaxEndDocument;
133  sax.startElementNs = &SaxStartElementNS;
134  sax.endElementNs = &SaxEndElementNS;
135  sax.characters = &SaxCharacters;
136  sax.processingInstruction = &SaxProcessingInstruction;
137  sax.comment = &SaxComment;
138  sax.warning = &SaxWarning;
139  sax.error = &SaxError;
140  sax.fatalError = &SaxError;
141  sax.cdataBlock = &SaxCdata;
142 
143  int code = xmlSAXUserParseMemory(&sax, this, source.c_str(), source.size());
144  if (code < 0) {
145  LOG(ERROR) << "Error parsing XML document, code=" << code;
146  return error_ ? std::move(*error_) : JsError::DOMException(UnknownError);
147  }
148  if (error_)
149  return std::move(*error_);
150 
151  return document_;
152 }
153 
155  FinishTextNode();
156 }
157 
158 void XMLDocumentParser::StartElement(const std::string& local_name,
159  optional<std::string> namespace_uri,
160  optional<std::string> namespace_prefix,
161  size_t attribute_count,
162  const char** attributes) {
163  FinishTextNode();
164 
165  RefPtr<Element> child =
166  new Element(document_, local_name, namespace_uri, namespace_prefix);
167  for (size_t i = 0; i < attribute_count; i++) {
168  // Each attribute has the following values in |attributes|.
169  const char* local_name = attributes[i * 5];
170  const char* namespace_prefix = attributes[i * 5 + 1];
171  const char* namespace_uri = attributes[i * 5 + 2];
172  const char* value_begin = attributes[i * 5 + 3];
173  const char* value_end = attributes[i * 5 + 4];
174 
175  if (namespace_uri) {
176  std::string qualified_name;
177  if (namespace_prefix)
178  qualified_name = std::string(namespace_prefix) + ":" + local_name;
179  else
180  qualified_name = local_name;
181 
182  child->SetAttributeNS(namespace_uri, qualified_name,
183  std::string(value_begin, value_end));
184  } else {
185  child->SetAttribute(local_name, std::string(value_begin, value_end));
186  }
187  }
188 
189  current_node_->AppendChild(child);
190  current_node_ = child;
191 }
192 
194  FinishTextNode();
195  current_node_ = current_node_->parent_node();
196  DCHECK(!current_node_.empty());
197 }
198 
199 void XMLDocumentParser::Text(const std::string& text) {
200  current_text_.append(text);
201 }
202 
203 void XMLDocumentParser::Comment(const std::string& text) {
204  FinishTextNode();
205  current_node_->AppendChild(document_->CreateComment(text));
206 }
207 
209  error_.reset(new JsError(std::move(error)));
210 }
211 
212 void XMLDocumentParser::FinishTextNode() {
213  if (!current_text_.empty()) {
214  current_node_->AppendChild(document_->CreateTextNode(current_text_));
215  current_text_.clear();
216  }
217 }
218 
219 } // namespace dom
220 } // namespace js
221 } // namespace shaka
XMLDocumentParser(RefPtr< Document > document)
const nullopt_t nullopt
Definition: optional.cc:22
const char * source
Definition: media_utils.cc:30
void Text(const std::string &text)
void StartElement(const std::string &local_name, optional< std::string > namespace_uri, optional< std::string > namespace_prefix, size_t attribute_count, const char **attributes)
std::string StringPrintfV(const char *format, va_list va)
Definition: utils.cc:58
ExceptionOr< RefPtr< Document > > Parse(const std::string &source)
static JsError DOMException(ExceptionCode code)
Definition: js_error.cc:115
const char * message
#define PRINTF_FORMAT(format, dots)
Definition: macros.h:39
void Comment(const std::string &text)