[shared] xml_parser: added parser XthmlToText that produces the pure textual version of a XHTML content.
This commit is contained in:
parent
abe56a5add
commit
ae69509354
|
@ -1177,4 +1177,28 @@ class XhtmlCleaner(XmlParser):
|
||||||
toAdd = content
|
toAdd = content
|
||||||
# Re-transform XML special chars to entities.
|
# Re-transform XML special chars to entities.
|
||||||
self.env.currentContent += cgi.escape(toAdd)
|
self.env.currentContent += cgi.escape(toAdd)
|
||||||
|
|
||||||
|
# ------------------------------------------------------------------------------
|
||||||
|
class XhtmlToText(XmlParser):
|
||||||
|
'''Produces a text version of XHTML content.'''
|
||||||
|
paraTags = ('p', 'li', 'center', 'div')
|
||||||
|
|
||||||
|
def startDocument(self):
|
||||||
|
XmlParser.startDocument(self)
|
||||||
|
self.res = []
|
||||||
|
|
||||||
|
def endDocument(self):
|
||||||
|
self.res = ''.join(self.res)
|
||||||
|
return XmlParser.endDocument(self)
|
||||||
|
|
||||||
|
def characters(self, content):
|
||||||
|
self.res.append(content.replace('\n', ''))
|
||||||
|
|
||||||
|
def startElement(self, elem, attrs):
|
||||||
|
'''Dumps a carriage return every time a "br" tag is encountered.'''
|
||||||
|
if elem == 'br': self.res.append('\n')
|
||||||
|
|
||||||
|
def endElement(self, elem):
|
||||||
|
'''Dumps a carriage return every time a paragraph is encountered.'''
|
||||||
|
if elem in self.paraTags: self.res.append('\n')
|
||||||
# ------------------------------------------------------------------------------
|
# ------------------------------------------------------------------------------
|
||||||
|
|
Loading…
Reference in a new issue