1 """A collection of modules for iterating through different kinds of
2 tree, generating tokens identical to those produced by the tokenizer
5 To create a tree walker for a new type of tree, you need to do
6 implement a tree walker object (called TreeWalker by convention) that
7 implements a 'serialize' method taking a tree as sole argument and
8 returning an iterator generating tokens.
11 from __future__ import absolute_import, division, unicode_literals
15 from ..utils import default_etree
20 def getTreeWalker(treeType, implementation=None, **kwargs):
21 """Get a TreeWalker class for various types of tree with built-in support
23 treeType - the name of the tree type required (case-insensitive). Supported
26 "dom" - The xml.dom.minidom DOM implementation
27 "pulldom" - The xml.dom.pulldom event stream
28 "etree" - A generic walker for tree implementations exposing an
29 elementtree-like interface (known to work with
30 ElementTree, cElementTree and lxml.etree).
31 "lxml" - Optimized walker for lxml.etree
32 "genshi" - a Genshi stream
34 implementation - (Currently applies to the "etree" tree type only). A module
35 implementing the tree type e.g. xml.etree.ElementTree or
38 treeType = treeType.lower()
39 if treeType not in treeWalkerCache:
40 if treeType in ("dom", "pulldom"):
41 name = "%s.%s" % (__name__, treeType)
43 mod = sys.modules[name]
44 treeWalkerCache[treeType] = mod.TreeWalker
45 elif treeType == "genshi":
46 from . import genshistream
47 treeWalkerCache[treeType] = genshistream.TreeWalker
48 elif treeType == "lxml":
49 from . import lxmletree
50 treeWalkerCache[treeType] = lxmletree.TreeWalker
51 elif treeType == "etree":
53 if implementation is None:
54 implementation = default_etree
55 # XXX: NEVER cache here, caching is done in the etree submodule
56 return etree.getETreeModule(implementation, **kwargs).TreeWalker
57 return treeWalkerCache.get(treeType)