1 from __future__ import absolute_import, division, unicode_literals
2 from pip._vendor.six import text_type
5 from ..treebuilders.etree import tag_regexp
7 from gettext import gettext
12 from .. import ihatexml
18 elif isinstance(s, text_type):
21 return s.decode("utf-8", "strict")
25 def __init__(self, et):
28 if et.docinfo.internalDTD:
29 self.children.append(Doctype(self,
30 ensure_str(et.docinfo.root_name),
31 ensure_str(et.docinfo.public_id),
32 ensure_str(et.docinfo.system_url)))
36 while node.getprevious() is not None:
37 node = node.getprevious()
38 while node is not None:
39 self.children.append(node)
45 def __getitem__(self, key):
46 return self.children[key]
55 class Doctype(object):
56 def __init__(self, root_node, name, public_id, system_id):
57 self.root_node = root_node
59 self.public_id = public_id
60 self.system_id = system_id
66 return self.root_node.children[1]
69 class FragmentRoot(Root):
70 def __init__(self, children):
71 self.children = [FragmentWrapper(self, child) for child in children]
72 self.text = self.tail = None
78 class FragmentWrapper(object):
79 def __init__(self, fragment_root, obj):
80 self.root_node = fragment_root
82 if hasattr(self.obj, 'text'):
83 self.text = ensure_str(self.obj.text)
86 if hasattr(self.obj, 'tail'):
87 self.tail = ensure_str(self.obj.tail)
90 self.isstring = isinstance(obj, str) or isinstance(obj, bytes)
91 # Support for bytes here is Py2
93 self.obj = ensure_str(self.obj)
95 def __getattr__(self, name):
96 return getattr(self.obj, name)
99 siblings = self.root_node.children
100 idx = siblings.index(self)
101 if idx < len(siblings) - 1:
102 return siblings[idx + 1]
106 def __getitem__(self, key):
110 return bool(self.obj)
118 def __unicode__(self):
125 class TreeWalker(_base.NonRecursiveTreeWalker):
126 def __init__(self, tree):
127 if hasattr(tree, "getroot"):
129 elif isinstance(tree, list):
130 tree = FragmentRoot(tree)
131 _base.NonRecursiveTreeWalker.__init__(self, tree)
132 self.filter = ihatexml.InfosetFilter()
134 def getNodeDetails(self, node):
135 if isinstance(node, tuple): # Text node
137 assert key in ("text", "tail"), _("Text nodes are text or tail, found %s") % key
138 return _base.TEXT, ensure_str(getattr(node, key))
140 elif isinstance(node, Root):
141 return (_base.DOCUMENT,)
143 elif isinstance(node, Doctype):
144 return _base.DOCTYPE, node.name, node.public_id, node.system_id
146 elif isinstance(node, FragmentWrapper) and node.isstring:
147 return _base.TEXT, node.obj
149 elif node.tag == etree.Comment:
150 return _base.COMMENT, ensure_str(node.text)
152 elif node.tag == etree.Entity:
153 return _base.ENTITY, ensure_str(node.text)[1:-1] # strip &;
156 # This is assumed to be an ordinary element
157 match = tag_regexp.match(ensure_str(node.tag))
159 namespace, tag = match.groups()
162 tag = ensure_str(node.tag)
164 for name, value in list(node.attrib.items()):
165 name = ensure_str(name)
166 value = ensure_str(value)
167 match = tag_regexp.match(name)
169 attrs[(match.group(1), match.group(2))] = value
171 attrs[(None, name)] = value
172 return (_base.ELEMENT, namespace, self.filter.fromXmlName(tag),
173 attrs, len(node) > 0 or node.text)
175 def getFirstChild(self, node):
176 assert not isinstance(node, tuple), _("Text nodes have no children")
178 assert len(node) or node.text, "Node has no children"
180 return (node, "text")
184 def getNextSibling(self, node):
185 if isinstance(node, tuple): # Text node
187 assert key in ("text", "tail"), _("Text nodes are text or tail, found %s") % key
189 # XXX: we cannot use a "bool(node) and node[0] or None" construct here
190 # because node[0] might evaluate to False if it has no child element
196 return node.getnext()
198 return (node, "tail") if node.tail else node.getnext()
200 def getParentNode(self, node):
201 if isinstance(node, tuple): # Text node
203 assert key in ("text", "tail"), _("Text nodes are text or tail, found %s") % key
206 # else: fallback to "normal" processing
208 return node.getparent()