1 from __future__ import absolute_import, division, unicode_literals
3 from xml.dom.pulldom import START_ELEMENT, END_ELEMENT, \
4 COMMENT, IGNORABLE_WHITESPACE, CHARACTERS
8 from ..constants import voidElements
11 class TreeWalker(_base.TreeWalker):
15 for event in self.tree:
16 if previous is not None and \
17 (ignore_until is None or previous[1] is ignore_until):
18 if previous[1] is ignore_until:
20 for token in self.tokens(previous, event):
22 if token["type"] == "EmptyTag":
23 ignore_until = previous[1]
25 if ignore_until is None or previous[1] is ignore_until:
26 for token in self.tokens(previous, None):
28 elif ignore_until is not None:
29 raise ValueError("Illformed DOM event stream: void element without END_ELEMENT")
31 def tokens(self, event, next):
33 if type == START_ELEMENT:
35 namespace = node.namespaceURI
37 for attr in list(node.attributes.keys()):
38 attr = node.getAttributeNode(attr)
39 attrs[(attr.namespaceURI, attr.localName)] = attr.value
40 if name in voidElements:
41 for token in self.emptyTag(namespace,
44 not next or next[1] is not node):
47 yield self.startTag(namespace, name, attrs)
49 elif type == END_ELEMENT:
51 namespace = node.namespaceURI
52 if name not in voidElements:
53 yield self.endTag(namespace, name)
56 yield self.comment(node.nodeValue)
58 elif type in (IGNORABLE_WHITESPACE, CHARACTERS):
59 for token in self.text(node.nodeValue):
63 yield self.unknown(type)