4373383c574febca0bc17dc3ee43018190830359
[sdc/sdc-distribution-client.git] /
1 from __future__ import absolute_import, division, unicode_literals
2 from pip._vendor.six import text_type
3
4 from lxml import etree
5 from ..treebuilders.etree import tag_regexp
6
7 from gettext import gettext
8 _ = gettext
9
10 from . import _base
11
12 from .. import ihatexml
13
14
15 def ensure_str(s):
16     if s is None:
17         return None
18     elif isinstance(s, text_type):
19         return s
20     else:
21         return s.decode("utf-8", "strict")
22
23
24 class Root(object):
25     def __init__(self, et):
26         self.elementtree = et
27         self.children = []
28         if et.docinfo.internalDTD:
29             self.children.append(Doctype(self,
30                                          ensure_str(et.docinfo.root_name),
31                                          ensure_str(et.docinfo.public_id),
32                                          ensure_str(et.docinfo.system_url)))
33         root = et.getroot()
34         node = root
35
36         while node.getprevious() is not None:
37             node = node.getprevious()
38         while node is not None:
39             self.children.append(node)
40             node = node.getnext()
41
42         self.text = None
43         self.tail = None
44
45     def __getitem__(self, key):
46         return self.children[key]
47
48     def getnext(self):
49         return None
50
51     def __len__(self):
52         return 1
53
54
55 class Doctype(object):
56     def __init__(self, root_node, name, public_id, system_id):
57         self.root_node = root_node
58         self.name = name
59         self.public_id = public_id
60         self.system_id = system_id
61
62         self.text = None
63         self.tail = None
64
65     def getnext(self):
66         return self.root_node.children[1]
67
68
69 class FragmentRoot(Root):
70     def __init__(self, children):
71         self.children = [FragmentWrapper(self, child) for child in children]
72         self.text = self.tail = None
73
74     def getnext(self):
75         return None
76
77
78 class FragmentWrapper(object):
79     def __init__(self, fragment_root, obj):
80         self.root_node = fragment_root
81         self.obj = obj
82         if hasattr(self.obj, 'text'):
83             self.text = ensure_str(self.obj.text)
84         else:
85             self.text = None
86         if hasattr(self.obj, 'tail'):
87             self.tail = ensure_str(self.obj.tail)
88         else:
89             self.tail = None
90         self.isstring = isinstance(obj, str) or isinstance(obj, bytes)
91         # Support for bytes here is Py2
92         if self.isstring:
93             self.obj = ensure_str(self.obj)
94
95     def __getattr__(self, name):
96         return getattr(self.obj, name)
97
98     def getnext(self):
99         siblings = self.root_node.children
100         idx = siblings.index(self)
101         if idx < len(siblings) - 1:
102             return siblings[idx + 1]
103         else:
104             return None
105
106     def __getitem__(self, key):
107         return self.obj[key]
108
109     def __bool__(self):
110         return bool(self.obj)
111
112     def getparent(self):
113         return None
114
115     def __str__(self):
116         return str(self.obj)
117
118     def __unicode__(self):
119         return str(self.obj)
120
121     def __len__(self):
122         return len(self.obj)
123
124
125 class TreeWalker(_base.NonRecursiveTreeWalker):
126     def __init__(self, tree):
127         if hasattr(tree, "getroot"):
128             tree = Root(tree)
129         elif isinstance(tree, list):
130             tree = FragmentRoot(tree)
131         _base.NonRecursiveTreeWalker.__init__(self, tree)
132         self.filter = ihatexml.InfosetFilter()
133
134     def getNodeDetails(self, node):
135         if isinstance(node, tuple):  # Text node
136             node, key = node
137             assert key in ("text", "tail"), _("Text nodes are text or tail, found %s") % key
138             return _base.TEXT, ensure_str(getattr(node, key))
139
140         elif isinstance(node, Root):
141             return (_base.DOCUMENT,)
142
143         elif isinstance(node, Doctype):
144             return _base.DOCTYPE, node.name, node.public_id, node.system_id
145
146         elif isinstance(node, FragmentWrapper) and node.isstring:
147             return _base.TEXT, node.obj
148
149         elif node.tag == etree.Comment:
150             return _base.COMMENT, ensure_str(node.text)
151
152         elif node.tag == etree.Entity:
153             return _base.ENTITY, ensure_str(node.text)[1:-1]  # strip &;
154
155         else:
156             # This is assumed to be an ordinary element
157             match = tag_regexp.match(ensure_str(node.tag))
158             if match:
159                 namespace, tag = match.groups()
160             else:
161                 namespace = None
162                 tag = ensure_str(node.tag)
163             attrs = {}
164             for name, value in list(node.attrib.items()):
165                 name = ensure_str(name)
166                 value = ensure_str(value)
167                 match = tag_regexp.match(name)
168                 if match:
169                     attrs[(match.group(1), match.group(2))] = value
170                 else:
171                     attrs[(None, name)] = value
172             return (_base.ELEMENT, namespace, self.filter.fromXmlName(tag),
173                     attrs, len(node) > 0 or node.text)
174
175     def getFirstChild(self, node):
176         assert not isinstance(node, tuple), _("Text nodes have no children")
177
178         assert len(node) or node.text, "Node has no children"
179         if node.text:
180             return (node, "text")
181         else:
182             return node[0]
183
184     def getNextSibling(self, node):
185         if isinstance(node, tuple):  # Text node
186             node, key = node
187             assert key in ("text", "tail"), _("Text nodes are text or tail, found %s") % key
188             if key == "text":
189                 # XXX: we cannot use a "bool(node) and node[0] or None" construct here
190                 # because node[0] might evaluate to False if it has no child element
191                 if len(node):
192                     return node[0]
193                 else:
194                     return None
195             else:  # tail
196                 return node.getnext()
197
198         return (node, "tail") if node.tail else node.getnext()
199
200     def getParentNode(self, node):
201         if isinstance(node, tuple):  # Text node
202             node, key = node
203             assert key in ("text", "tail"), _("Text nodes are text or tail, found %s") % key
204             if key == "text":
205                 return node
206             # else: fallback to "normal" processing
207
208         return node.getparent()