Advantages: Built into Python ... err ... that's it
Disadvantages: Too numerous to mention
Advantages: Built into Python, simple, fast
Disadvantages: Geared toward processing rather than construction of XML/HTML
Advantages: Beautiful code, insanely simple, fast
Disadvantages: Geared toward construction rather than processing of XML/HTML
By way of demonstration, let's create the following document manually in each:
<movie>
<title>Serenity</title>
<genre>science-fiction</genre>
<imdb href="http://www.imdb.com/title/tt0379786/" />
</movie>
import xml.dom
impl = xml.dom.getDOMImplementation()
doc = impl.createDocument('', 'movie', '')
title = doc.createElement('title')
title.appendChild(doc.createTextNode('Serenity'))
doc.documentElement.appendChild(title)
genre = doc.createElement('genre')
genre.appendChild(doc.createTextNode('science-fiction'))
doc.documentElement.appendChild(genre)
imdb = doc.createElement('imdb')
imdb.setAttribute('href', 'http://www.imdb.com/title/tt0379786/')
doc.documentElement.appendChild(imdb)
print doc.toxml('UTF-8')
import xml.etree.ElementTree as et
doc = et.Element('movie')
et.SubElement(doc, 'title').text = 'Serenity'
et.SubElement(doc, 'genre').text = 'science-fiction'
et.SubElement(doc, 'imdb').attrib['href'] = 'http://www.imdb.com/title/tt0379786/'
print et.tostring(doc)
import genshi.builder
tag = genshi.builder.ElementFactory()
print tag.movie(
tag.title('Serenity'),
tag.genre('science-fiction'),
tag.imdb(href='http://www.imdb.com/title/tt0379786/')
)
Question 1: What if we could construct an "ElementTreeFactory" combining the post-processing advantages of ElementTree with the simple interface of Genshi's ElementFactory (which is clearly the most elegant of the three examples we've seen)?
Question 2: How does Genshi know about my made-up <movie> element?
Genshi's ElementFactory overrides __getattr__ to generate Element instances. The code (simplified) looks like this:
class ElementFactory(object):
def __getattr__(self, name):
return Element(name)
# ...
Meanwhile, the Element class overrides __call__ to set attributes and append content based on its parameters. The code (again simplified) is as follows:
class Element(object):
def __call__(self, *args, **kwargs):
self.attrib |= kwargs_to_attrs(kwargs)
for arg in args:
self.append(arg)
def append(self, node):
# ...
First, a utility routine to append content to Elements. This turns out to be a little complex, due to ElementTree's separation of .text and .tail:
import xml.etree.ElementTree as et
class ElementTreeFactory(object):
def _append(self, node, contents):
# If the content is a string, decide which element's
# text or tail property it gets assigned to
if isinstance(contents, basestring) and contents:
if len(node) == 0:
if node.text is None:
node.text = contents
else:
node.text += contents
else:
last = node[-1]
if last.tail is None:
last.tail = contents
else:
last.tail += contents
# If the content is an element, simply append it to this
elif et.iselement(contents):
contents.tail = ''
node.append(contents)
# Otherwise, try treating it as an iterable
else:
for content in contents:
self._append(node, content)
Next, something to construct an Element with content from positional arguments and attributes from keyword arguments:
import xml.etree.ElementTree as et
class ElementTreeFactory(object):
def _append(self, node, contents):
# ...
def _element(self, _name, *args, **kwargs):
# Construct the Element with all attributes from kwargs
e = et.Element(_name, dict(
(key, value) for key, value in kwargs.iteritems()
if value is not None and value is not False
))
# Append the content from args
for args in args:
self._append(e, arg)
return e
Finally, the magic __getattr__ bit from Genshi, with some added caching for performance:
import xml.etree.ElementTree as et
class ElementTreeFactory(object):
def _append(self, node, contents):
# ...
def _element(self, _name, *args, **kwargs):
# ...
def __getattr__(self, name):
# Generate a method to deal with this tag
def create_elem(*args, **kwargs):
return self._element(elem_name, *args, **kwargs)
# Add the method to the factory
setattr(self, name, create_elem)
return generator
The resulting class allows us to construct documents like this:
tag = ElementTreeFactory()
print et.tostring(
tag.html(
tag.head(
tag.title('Boring Management Report')
),
tag.body(
tag.h1('Boring Management Report'),
tag.hr(),
tag.table(
tag.thead(
tag.tr(tag.th('Quarter'), tag.th('Sales'))
),
tag.tbody(
tag.tr(tag.td(row.quarter), tag.td(row.sales))
for row in dataset
)
)
)
)
)
Okay, that's all very nice, but how is it any better than Genshi's ElementFactory?
Remember that __getattr__ is only called if the attribute is not found on the class? We can supply our own versions of methods to customize the output (this is an actual example complete with genuine corporate stupidity)
class W3ElementTreeFactory(ElementTreeFactory):
def _add_class(self, elem, cls):
classes = set(elem.attrib.get('class', '').split())
classes.add(cls)
elem.attrib['class'] = ' '.join(classes)
def hr(self, *args, **kwargs):
# Output <div class="hrule-dots"> instead of <hr>
result = self._element('div', ' ', **kwargs)
self._add_class(result, 'hrule-dots')
return result
Here's a more complex example (again from the real world):
class W3ElementTreeFactory(ElementTreeFactory):
def _add_class(self, elem, cls):
# ...
def hr(self, *args, **kwargs):
# ...
def table(self, *args, **kwargs):
table = self._element('table', *args, **kwargs)
# If there is a <thead> element in content, apply the
# 'blue-dark' CSS class to all rows in it
thead = table.find('thead')
if thead:
for tr in thead.findall('tr'):
self._add_class(tr, 'blue-dark')
# Find the <tbody> element and apply even and odd classes
# to the rows within it
tbody = table.find('tbody')
if tbody:
for n, tr in enumerate(tbody.findall('tr')):
self._add_class(tr, ['odd', 'even'][n % 2])
return table
Some more examples of things I've done with the ElementTreeFactory:
In conclusion:
/
#