diff --git a/shared/xml_parser.py b/shared/xml_parser.py
index 7d4e73e..50d5f00 100644
--- a/shared/xml_parser.py
+++ b/shared/xml_parser.py
@@ -19,9 +19,11 @@
# ------------------------------------------------------------------------------
import xml.sax, difflib, types, cgi
+from xml.parsers.expat import XML_PARAM_ENTITY_PARSING_NEVER
from xml.sax.handler import ContentHandler, ErrorHandler, feature_external_ges
from xml.sax.xmlreader import InputSource
from xml.sax import SAXParseException
+
from appy.shared import UnicodeBuffer
from appy.shared.errors import AppyError
from appy.shared.utils import sequenceTypes
@@ -176,7 +178,9 @@ class XmlParser(ContentHandler, ErrorHandler):
# ContentHandler methods ---------------------------------------------------
def startDocument(self):
- self.parser._parser.UseForeignDTD(True)
+ parser = self.parser._parser
+ parser.UseForeignDTD(True)
+ parser.SetParamEntityParsing(XML_PARAM_ENTITY_PARSING_NEVER)
def setDocumentLocator(self, locator):
self.locator = locator
return self.env
@@ -311,6 +315,7 @@ class XmlUnmarshaller(XmlParser):
return res
def startDocument(self):
+ XmlParser.startDocument(self)
self.res = None # The resulting web of Python objects (Object instances)
self.env.containerStack = [] # The stack of current "containers" where
# to store the next parsed element. A container can be a list, a tuple,
@@ -978,17 +983,6 @@ class XhtmlCleaner(XmlParser):
# Tags that required a line break to be inserted after them.
lineBreakTags = ('p', 'li', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'td')
- # A pre-cleaning phase consists in performing some replacements before
- # running the XML SAX parsing. The dict below contains such repls.
- preCleanRepls = {' ': ' '}
-
- def preClean(self, s):
- '''Before true XHTML cleaning, this method performs pre-cleaning by
- performing, on p_s, replacements as defined in self.preCleanRepls.'''
- for item, repl in self.preCleanRepls.iteritems():
- if item in s:
- s = s.replace(item, repl)
- return s
def clean(self, s, keepStyles=True):
'''Cleaning XHTML code is done for 2 reasons:
@@ -1017,7 +1011,7 @@ class XhtmlCleaner(XmlParser):
# also ignore its content.
self.env.ignoreContent = False
try:
- res = self.parse('%s' % self.preClean(s)).encode('utf-8')
+ res = self.parse('%s' % s).encode('utf-8')
except SAXParseException, e:
raise self.Error(str(e))
return res
@@ -1033,6 +1027,7 @@ class XhtmlCleaner(XmlParser):
def startDocument(self):
# The result will be cleaned XHTML, joined from self.res.
+ XmlParser.startDocument(self)
self.res = []
def endDocument(self):