[gen] XhtmlCleaner now only keeps useful CSS attributes within XHTML style attributes.

2012-05-24 12:54:40 +02:00 · 2012-05-24 12:54:40 +02:00 · d16b03e0d6
commit d16b03e0d6
parent 9477865f44
4 changed files with 42 additions and 8 deletions
--- a/pod/renderer.py
+++ b/pod/renderer.py
@ -217,7 +217,7 @@ class Renderer:
                        'text': pe.NS_TEXT}))
        self.stylesParser = self.createPodParser('styles.xml', context,
                                                 stylesInserts)
-        # Stores the styles mapping
+        # Store the styles mapping
        self.setStylesMapping(stylesMapping)

    def createPodParser(self, odtFile, context, inserts):
@ -398,6 +398,12 @@ class Renderer:
           and, on the other hand, ODT styles found into the template.'''
        try:
            stylesMapping = self.stylesManager.checkStylesMapping(stylesMapping)
+            # The predefined styles below are currently ignored, because the
+            # xhtml2odt parser does not take into account span tags.
+            if 'span[font-weight=bold]' not in stylesMapping:
+                stylesMapping['span[font-weight=bold]'] = 'podBold'
+            if 'span[font-style=italic]' not in stylesMapping:
+                stylesMapping['span[font-style=italic]'] = 'podItalic'
            self.stylesManager.stylesMapping = stylesMapping
        except PodError, po:
            self.contentParser.env.currentBuffer.content.close()
--- a/pod/styles_manager.py
+++ b/pod/styles_manager.py
@ -22,6 +22,7 @@ from UserDict import UserDict
 import appy.pod
 from appy.pod import *
 from appy.pod.odf_parser import OdfEnvironment, OdfParser
+from appy.shared.css import parseStyleAttribute

 # Possible states for the parser
 READING = 0 # Default state
@ -329,12 +330,7 @@ class StylesManager:
            else: return
        # If I am here, I have style info. Check if it corresponds to some style
        # in p_styles.
-        infos = attrs['style'].split(';')
-        styleInfo = {}
-        for info in infos:
-            if not info.strip(): continue
-            name, value = info.split(':')
-            styleInfo[name.strip()] = value.strip()
+        styleInfo = parseStyleAttribute(attrs['style'], asDict=True)
        for matchingAttrs, style in styles:
            if self.styleMatch(styleInfo, matchingAttrs):
                return style
--- a/shared/css.py
+++ b/shared/css.py
@ -0,0 +1,14 @@
+# ------------------------------------------------------------------------------
+def parseStyleAttribute(value, asDict=False):
+    '''Returns a list of CSS (name, value) pairs (or a dict if p_asDict is
+       True), parsed from p_value, which holds the content of a HTML "style"
+       tag.'''
+    if asDict: res = {}
+    else:      res = []
+    for attr in value.split(';'):
+        if not attr.strip(): continue
+        name, value = attr.split(':')
+        if asDict: res[name.strip()] = value.strip()
+        else:      res.append( (name.strip(), value.strip()) )
+    return res
+# ------------------------------------------------------------------------------
--- a/shared/xml_parser.py
+++ b/shared/xml_parser.py
@ -26,6 +26,7 @@ from xml.sax import SAXParseException
 from appy.shared import UnicodeBuffer, xmlPrologue
 from appy.shared.errors import AppyError
 from appy.shared.utils import sequenceTypes
+from appy.shared.css import parseStyleAttribute

 # Constants --------------------------------------------------------------------
 CONVERSION_ERROR = '"%s" value "%s" could not be converted by the XML ' \
@ -906,6 +907,10 @@ class XhtmlCleaner(XmlParser):
    # Attributes to ignore, if keepStyles if False.
    attrsToIgnore = ('align', 'valign', 'cellpadding', 'cellspacing', 'width',
                     'height', 'bgcolor', 'lang', 'border', 'class')
+    # CSS attributes to keep, if keepStyles if False. These attributes can be
+    # used by appy.pod (to align a paragraph, center/resize an image...).
+    cssAttrsToKeep = ('width', 'height', 'float', 'text-align',
+                      'font-style', 'font-weight')
    # Attrs to add, if not present, to ensure good formatting, be it at the web
    # or ODT levels.
    attrsToAdd = {'table': {'cellspacing':'0', 'cellpadding':'6', 'border':'1'},
@ -957,6 +962,15 @@ class XhtmlCleaner(XmlParser):
            raise self.Error(str(e))
        return res

+    def cleanStyleAttribute(self, value):
+        '''p_value contains some CSS attributes from a "style" attribute. We
+           keep those that pod can manage.'''
+        res = []
+        for name, v in parseStyleAttribute(value):
+            if name in self.cssAttrsToKeep:
+                res.append('%s: %s' % (name, v))
+        return '; '.join(res)
+
    def startDocument(self):
        # The result will be cleaned XHTML, joined from self.res.
        self.res = []
@ -989,7 +1003,11 @@ class XhtmlCleaner(XmlParser):
        res = '%s<%s' % (prefix, elem)
        # Include the found attributes, excepted those that must be ignored.
        for name, value in attrs.items():
-            if not e.keepStyles and (name in self.attrsToIgnore): continue
+            if not e.keepStyles:
+                if name in self.attrsToIgnore: continue
+                elif name == 'style':
+                    value = self.cleanStyleAttribute(value)
+                    if not value: continue
            res += ' %s="%s"' % (name, value)
        # Include additional attributes if required.
        if elem in self.attrsToAdd: