[bin] backup.py: remove Data.fs.old before packing the ZODB to avoid disk space problems; [checkldap] added a param to define the scope of the LDAP query (base, onelevel or subtree); [shared] xml_parser: convert nbsp entity to the equivalent utf-8 char.

2013-09-05 10:42:19 +02:00 · 2013-09-05 10:42:19 +02:00 · 1be7d9f0ab
commit 1be7d9f0ab
parent 528cca9aa0
8 changed files with 83 additions and 22 deletions
--- a/shared/xml_parser.py
+++ b/shared/xml_parser.py
@ -61,7 +61,7 @@ HTML_ENTITIES = {
        'ntilde':'ñ', 'ograve':'ò', 'oacute':'ó', 'ocirc':'ô', 'otilde':'õ',
        'ouml':'ö', 'divide':'÷', 'oslash':'ø', 'ugrave':'ù', 'uacute':'ú',
        'ucirc':'û', 'uuml':'ü', 'yacute':'ý', 'thorn':'þ', 'yuml':'ÿ',
-        'euro':'€', 'nbsp':' ', "rsquo":"'", "lsquo":"'", "ldquo":"'",
+        'euro':'€', 'nbsp':' ', "rsquo":"'", "lsquo":"'", "ldquo":"'",
        "rdquo":"'", 'ndash': '—', 'mdash': '—', 'oelig':'oe', 'quot': "'",
        'mu': 'µ'}
 import htmlentitydefs
@ -1135,7 +1135,10 @@ class XhtmlCleaner(XmlParser):
        # between tags.
        if not self.env.currentContent or \
           self.env.currentContent[-1] in ('\n', ' '):
-            toAdd = content.lstrip()
+            # I give here to lstrip an explicit list of what is to be considered
+            # as blank chars, because I do not want unicode NBSP chars to be in
+            # this list.
+            toAdd = content.lstrip(u' \n\r\t')
        else:
            toAdd = content
        # Re-transform XML special chars to entities.