From 7d844070dfdbace2189aca27d0f8e0450d334774 Mon Sep 17 00:00:00 2001 From: Gaetan Delannay Date: Thu, 12 Jan 2012 21:49:23 +0100 Subject: [PATCH] appy.pod: improved styles mapping system: one may now express things like: 'p[text-align=center]':'StandardCentered' (ie, map an HTML tag having some CSS attributes to a specific ODT style). See verbose docstrings in styles_manager.py. --- gen/__init__.py | 5 +- gen/mixins/ToolMixin.py | 4 +- gen/mixins/__init__.py | 1 + gen/notifier.py | 4 +- gen/utils.py | 2 - gen/wrappers/__init__.py | 5 +- pod/renderer.py | 4 +- pod/styles_manager.py | 178 +++++++++++++++++++++++++++++++++++++-- pod/xhtml2odt.py | 59 +------------ shared/csv_parser.py | 2 +- shared/dav.py | 3 +- shared/sap.py | 2 +- shared/utils.py | 1 + shared/xml_parser.py | 14 +-- 14 files changed, 196 insertions(+), 88 deletions(-) diff --git a/gen/__init__.py b/gen/__init__.py index b5c649e..a431967 100644 --- a/gen/__init__.py +++ b/gen/__init__.py @@ -5,13 +5,12 @@ from appy import Object from appy.gen.layout import Table from appy.gen.layout import defaultFieldLayouts from appy.gen.po import PoMessage -from appy.gen.utils import sequenceTypes, GroupDescr, Keywords, getClassName, \ - SomeObjects +from appy.gen.utils import GroupDescr, Keywords, getClassName, SomeObjects import appy.pod from appy.pod.renderer import Renderer from appy.shared.data import countries from appy.shared.utils import Traceback, getOsTempFolder, formatNumber, \ - FileWrapper + FileWrapper, sequenceTypes # Default Appy permissions ----------------------------------------------------- r, w, d = ('read', 'write', 'delete') diff --git a/gen/mixins/ToolMixin.py b/gen/mixins/ToolMixin.py index ce3e7f4..fc3dc54 100644 --- a/gen/mixins/ToolMixin.py +++ b/gen/mixins/ToolMixin.py @@ -1,11 +1,11 @@ # ------------------------------------------------------------------------------ import os, os.path, sys, re, time, random, types, base64, urllib from appy.shared import mimeTypes -from appy.shared.utils import getOsTempFolder +from appy.shared.utils import getOsTempFolder, sequenceTypes from appy.shared.data import languages import appy.gen from appy.gen import Type, Search, Selection -from appy.gen.utils import SomeObjects, sequenceTypes, getClassName +from appy.gen.utils import SomeObjects, getClassName from appy.gen.mixins import BaseMixin from appy.gen.wrappers import AbstractWrapper from appy.gen.descriptors import ClassDescriptor diff --git a/gen/mixins/__init__.py b/gen/mixins/__init__.py index 3d1f9b1..0dfc5bd 100644 --- a/gen/mixins/__init__.py +++ b/gen/mixins/__init__.py @@ -9,6 +9,7 @@ import appy.gen as gen from appy.gen.utils import * from appy.gen.layout import Table, defaultPageLayouts from appy.gen.descriptors import WorkflowDescriptor, ClassDescriptor +from appy.shared.utils import sequenceTypes # ------------------------------------------------------------------------------ class BaseMixin: diff --git a/gen/notifier.py b/gen/notifier.py index cdc273f..749725b 100644 --- a/gen/notifier.py +++ b/gen/notifier.py @@ -33,9 +33,9 @@ def convertRolesToEmails(users, portal): SENDMAIL_ERROR = 'Error while sending mail: %s.' ENCODING_ERROR = 'Encoding error while sending mail: %s.' -from appy.gen.utils import sequenceTypes -from appy.gen.descriptors import WorkflowDescriptor import socket +from appy.shared.utils import sequenceTypes +from appy.gen.descriptors import WorkflowDescriptor def sendMail(obj, transition, transitionName, workflow): '''Sends mail about p_transition that has been triggered on p_obj that is diff --git a/gen/utils.py b/gen/utils.py index 783f1c4..b77f4cb 100644 --- a/gen/utils.py +++ b/gen/utils.py @@ -1,7 +1,5 @@ # ------------------------------------------------------------------------------ import re, os, os.path -import appy.pod -sequenceTypes = (list, tuple) # Function for creating a Zope object ------------------------------------------ def createObject(folder, id, className, appName, wf=True): diff --git a/gen/wrappers/__init__.py b/gen/wrappers/__init__.py index 8c604f9..f27adbc 100644 --- a/gen/wrappers/__init__.py +++ b/gen/wrappers/__init__.py @@ -5,8 +5,9 @@ import os, os.path, mimetypes import appy.pod from appy.gen import Type, Search, Ref, String -from appy.gen.utils import sequenceTypes, createObject -from appy.shared.utils import getOsTempFolder, executeCommand, normalizeString +from appy.gen.utils import createObject +from appy.shared.utils import getOsTempFolder, executeCommand, \ + normalizeString, sequenceTypes from appy.shared.xml_parser import XmlMarshaller from appy.shared.csv_parser import CsvMarshaller diff --git a/pod/renderer.py b/pod/renderer.py index 8261b17..807ed13 100644 --- a/pod/renderer.py +++ b/pod/renderer.py @@ -243,8 +243,8 @@ class Renderer: for converting a chunk of XHTML content (p_xhtmlString) into a chunk of ODT content.''' stylesMapping = self.stylesManager.checkStylesMapping(stylesMapping) - # xhtmlString can only be a chunk of XHTML. So we must surround it a - # tag in order to get a XML-compliant file (we need a root tag). + # xhtmlString can only be a chunk of XHTML. So we must surround it with + # a tag in order to get a XML-compliant file (we need a root tag). if xhtmlString == None: xhtmlString = '' xhtmlContent = '

%s

' % xhtmlString return Xhtml2OdtConverter(xhtmlContent, encoding, self.stylesManager, diff --git a/pod/styles_manager.py b/pod/styles_manager.py index f60fae6..2e63b1d 100644 --- a/pod/styles_manager.py +++ b/pod/styles_manager.py @@ -191,7 +191,7 @@ class StylesManager: self.paragraphStyles = self.styles.getStyles('paragraph') def checkStylesAdequation(self, htmlStyle, odtStyle): - '''Checks that p_odtStyle my be used for style p_htmlStyle.''' + '''Checks that p_odtStyle may be used for style p_htmlStyle.''' if (htmlStyle in XHTML_PARAGRAPH_TAGS_NO_LISTS) and \ (odtStyle in self.textStyles): raise PodError( @@ -202,9 +202,49 @@ class StylesManager: htmlStyle, odtStyle.displayName)) def checkStylesMapping(self, stylesMapping): - '''Checks that the given p_stylesMapping is correct. Returns the same - dict as p_stylesMapping, but with Style instances as values, instead - of strings (style's display names).''' + '''Checks that the given p_stylesMapping is correct, and returns the + internal representation of it. p_stylesMapping is a dict where: + * every key can be: + (1) the name of a XHTML 'paragraph-like' tag (p, h1, h2...) + (2) the name of a XHTML 'text-like' tag (span, b, i, em...) + (3) the name of a CSS class + (4) string 'h*' + * every value must be: + (a) if the key is (1), (2) or (3), value must be the display name + of an ODT style + (b) if the key is (4), value must be an integer indicating how to + map the outline level of outlined styles (ie, for mapping HTML + tag "h1" to the ODT style with outline-level=2, value must be + integer "1". In that case, h2 will be mapped to the ODT style + with outline-level=3, etc.). Note that this value can also be + negative. + * Some precision now about about keys. If key is (1) or (2), + parameters can be given between square brackets. Every such + parameter represents a CSS attribute and its value. For example, a + key can be: + p[text-align=center,color=blue] + + This feature allows to map XHTML tags having different CSS + attributes to different ODT styles. + + The method returns a dict which is the internal representation of + the styles mapping: + * every key can be: + (I) the name of a XHTML tag, corresponding to (1) or (2) whose + potential parameters have been removed; + (II) the name of a CSS class (=(3)) + (III) string 'h*' (=(4)) + * every value can be: + (i) a Styles instance that was found from the specified ODT style + display name in p_stylesMapping, if key is (I) and if only one, + non-parameterized XHTML tag was defined in p_stylesMapping; + (ii) a list of the form [ (params, Style), (params, Style),...] + if key is (I) and if one or more parameterized (or not) XHTML + tags representing the same tag were found in p_stylesMapping. + params, which can be None, is a dict whose pairs are of the + form (cssAttribute, cssValue). + (iii) an integer value (=(b)). + ''' res = {} if not isinstance(stylesMapping, dict) and \ not isinstance(stylesMapping, UserDict): @@ -221,6 +261,16 @@ class StylesManager: if (xhtmlStyleName != 'h*') and \ ((not xhtmlStyleName) or (not odtStyleName)): raise PodError(MAPPING_ELEM_EMPTY) + # Separate CSS attributes if any + cssAttrs = None + if '[' in xhtmlStyleName: + xhtmlStyleName, attrs = xhtmlStyleName.split('[') + xhtmlStyleName = xhtmlStyleName.strip() + attrs = attrs.strip()[:-1].split(',') + cssAttrs = {} + for attr in attrs: + name, value = attr.split('=') + cssAttrs[name.strip()] = value.strip() if xhtmlStyleName in XHTML_UNSTYLABLE_TAGS: raise PodError(UNSTYLABLE_TAG % (xhtmlStyleName, XHTML_UNSTYLABLE_TAGS)) @@ -232,9 +282,123 @@ class StylesManager: else: raise PodError(STYLE_NOT_FOUND % odtStyleName) self.checkStylesAdequation(xhtmlStyleName, odtStyle) - res[xhtmlStyleName] = odtStyle + # Store this style mapping in the result. + alreadyInRes = xhtmlStyleName in res + if cssAttrs or alreadyInRes: + # I must create a complex structure (ii) for this mapping. + if not alreadyInRes: + res[xhtmlStyleName] = [(cssAttrs, odtStyle)] + else: + value = res[xhtmlStyleName] + if not isinstance(value, list): + res[xhtmlStyleName] = [(cssAttrs, odtStyle), \ + (None, value)] + else: + res.insert(0, (cssAttrs, odtStyle)) + else: + # I must create a simple structure (i) for this mapping. + res[xhtmlStyleName] = odtStyle else: - res[xhtmlStyleName] = odtStyleName # In this case, it is the - # outline level, not an ODT style name + # In this case (iii), it is the outline level, not an ODT style + # name. + res[xhtmlStyleName] = odtStyleName + return res + + def styleMatch(self, attrs, matchingAttrs): + '''p_match is a dict of attributes found on some HTML element. + p_matchingAttrs is a dict of attributes corresponding to some style. + This method returns True if p_attrs contains the winning (name,value) + pairs that match those in p_matchingAttrs. Note that ALL attrs in + p_matchingAttrs must be present in p_attrs.''' + for name, value in matchingAttrs.iteritems(): + if name not in attrs: return + if value != attrs[name]: return + return True + + def getStyleFromMapping(self, elem, attrs, styles): + '''p_styles is a Style instance or a list of (cssParams, Style) tuples. + Depending on CSS attributes found in p_attrs, this method returns + the relevant Style instance.''' + if isinstance(styles, Style): return styles + hasStyleInfo = attrs and ('style' in attrs) + if not hasStyleInfo: + # If I have, at the last position in p_styles, the style related to + # no attribute at all, I return it. + lastAttrs, lastStyle = styles[-1] + if lastAttrs == None: return lastStyle + else: return + # If I am here, I have style info. Check if it corresponds to some style + # in p_styles. + infos = attrs['style'].split(';') + styleInfo = {} + for info in infos: + if not info.strip(): continue + name, value = info.split(':') + styleInfo[name.strip()] = value.strip() + for matchingAttrs, style in styles: + if self.styleMatch(styleInfo, matchingAttrs): + return style + + def findStyle(self, elem, attrs, classValue, localStylesMapping): + '''Finds the ODT style that must be applied to XHTML p_elem that has + attrs p_attrs. In some cases, p_attrs is None; the value of the + "class" attribute is given instead (in p_classValue). + + The global styles mapping is in self.stylesMapping; the local styles + mapping is in p_localStylesMapping. + + Here are the places where we will search, ordered by + priority (highest first): + (1) local styles mapping (CSS style in "class" attr) + (2) " (HTML elem) + (3) global styles mapping (CSS style in "class" attr) + (4) " (HTML elem) + (5) ODT style that has the same name as CSS style in "class" attr + (6) Predefined pod-specific ODT style that has the same name as + CSS style in "class" attr + (7) ODT style that has the same outline level as HTML elem. + ''' + res = None + cssStyleName = None + if attrs and attrs.has_key('class'): + cssStyleName = attrs['class'] + if classValue: + cssStyleName = classValue + # (1) + if localStylesMapping.has_key(cssStyleName): + res = localStylesMapping[cssStyleName] + # (2) + if (not res) and localStylesMapping.has_key(elem): + styles = localStylesMapping[elem] + res = self.getStyleFromMapping(elem, attrs, styles) + # (3) + if (not res) and self.stylesMapping.has_key(cssStyleName): + res = self.stylesMapping[cssStyleName] + # (4) + if (not res) and self.stylesMapping.has_key(elem): + styles = self.stylesMapping[elem] + res = self.getStyleFromMapping(elem, attrs, styles) + # (5) + if (not res) and self.styles.has_key(cssStyleName): + res = self.styles[cssStyleName] + # (6) + if (not res) and self.podSpecificStyles.has_key(cssStyleName): + res = self.podSpecificStyles[cssStyleName] + # (7) + if not res: + # Try to find a style with the correct outline level + if elem in XHTML_HEADINGS: + # Is there a delta that must be taken into account ? + outlineDelta = 0 + if localStylesMapping.has_key('h*'): + outlineDelta += localStylesMapping['h*'] + elif self.stylesMapping.has_key('h*'): + outlineDelta += self.stylesMapping['h*'] + outlineLevel = int(elem[1]) + outlineDelta + # Normalize the outline level + if outlineLevel < 1: outlineLevel = 1 + res = self.styles.getParagraphStyleAtLevel(outlineLevel) + if res: + self.checkStylesAdequation(elem, res) return res # ------------------------------------------------------------------------------ diff --git a/pod/xhtml2odt.py b/pod/xhtml2odt.py index 0e4950b..deb0805 100644 --- a/pod/xhtml2odt.py +++ b/pod/xhtml2odt.py @@ -496,8 +496,6 @@ class Xhtml2OdtConverter: self.xhtmlString = xhtmlString self.encoding = encoding # Todo: manage encoding that is not utf-8 self.stylesManager = stylesManager - self.odtStyles = stylesManager.styles - self.globalStylesMapping = stylesManager.stylesMapping self.localStylesMapping = localStylesMapping self.odtChunk = None self.xhtmlParser = XhtmlParser(XhtmlEnvironment(renderer), self) @@ -507,59 +505,6 @@ class Xhtml2OdtConverter: return self.xhtmlParser.env.res def findStyle(self, elem, attrs=None, classValue=None): - '''Finds the ODT style that must be applied to XHTML p_elem that has - attrs p_attrs. In some cases, p_attrs is not given; the value of the - "class" attribute is given instead (in p_classValue). - - Here are the places where we will search, ordered by - priority (highest first): - (1) local styles mapping (CSS style in "class" attr) - (2) " (HTML elem) - (3) global styles mapping (CSS style in "class" attr) - (4) " (HTML elem) - (5) ODT style that has the same name as CSS style in "class" attr - (6) Prefefined pod-specific ODT style that has the same name as - CSS style in "class" attr - (7) ODT style that has the same outline level as HTML elem.''' - res = None - cssStyleName = None - if attrs and attrs.has_key('class'): - cssStyleName = attrs['class'] - if classValue: - cssStyleName = classValue - # (1) - if self.localStylesMapping.has_key(cssStyleName): - res = self.localStylesMapping[cssStyleName] - # (2) - elif self.localStylesMapping.has_key(elem): - res = self.localStylesMapping[elem] - # (3) - elif self.globalStylesMapping.has_key(cssStyleName): - res = self.globalStylesMapping[cssStyleName] - # (4) - elif self.globalStylesMapping.has_key(elem): - res = self.globalStylesMapping[elem] - # (5) - elif self.odtStyles.has_key(cssStyleName): - res = self.odtStyles[cssStyleName] - # (6) - elif self.stylesManager.podSpecificStyles.has_key(cssStyleName): - res = self.stylesManager.podSpecificStyles[cssStyleName] - # (7) - else: - # Try to find a style with the correct outline level - if elem in XHTML_HEADINGS: - # Is there a delta that must be taken into account ? - outlineDelta = 0 - if self.localStylesMapping.has_key('h*'): - outlineDelta += self.localStylesMapping['h*'] - elif self.globalStylesMapping.has_key('h*'): - outlineDelta += self.globalStylesMapping['h*'] - outlineLevel = int(elem[1]) + outlineDelta - # Normalize the outline level - if outlineLevel < 1: outlineLevel = 1 - res = self.odtStyles.getParagraphStyleAtLevel(outlineLevel) - if res: - self.stylesManager.checkStylesAdequation(elem, res) - return res + return self.stylesManager.findStyle(elem, attrs, classValue, + self.localStylesMapping) # ------------------------------------------------------------------------------ diff --git a/shared/csv_parser.py b/shared/csv_parser.py index 1d96342..7726207 100644 --- a/shared/csv_parser.py +++ b/shared/csv_parser.py @@ -18,7 +18,7 @@ # ------------------------------------------------------------------------------ from appy import Object -from appy.gen.utils import sequenceTypes +from appy.shared.utils import sequenceTypes # ------------------------------------------------------------------------------ WRONG_LINE = 'Line number %d in file %s does not have the right number of ' \ diff --git a/shared/dav.py b/shared/dav.py index 61732f1..693c5ce 100644 --- a/shared/dav.py +++ b/shared/dav.py @@ -5,8 +5,7 @@ from StringIO import StringIO from mimetypes import guess_type from base64 import encodestring from appy import Object -from appy.shared.utils import copyData -from appy.gen.utils import sequenceTypes +from appy.shared.utils import copyData, sequenceTypes from appy.shared.xml_parser import XmlUnmarshaller, XmlMarshaller # ------------------------------------------------------------------------------ diff --git a/shared/sap.py b/shared/sap.py index 9ad928b..b8c747b 100644 --- a/shared/sap.py +++ b/shared/sap.py @@ -4,7 +4,7 @@ a website by SAP requiring a login/password.''' # ------------------------------------------------------------------------------ -from appy.gen.utils import sequenceTypes +from appy.shared.utils import sequenceTypes class SapError(Exception): pass SAP_MODULE_ERROR = 'Module pysap was not found (you can get it at ' \ diff --git a/shared/utils.py b/shared/utils.py index c347768..a8e8a06 100644 --- a/shared/utils.py +++ b/shared/utils.py @@ -19,6 +19,7 @@ # ------------------------------------------------------------------------------ import os, os.path, re, time, sys, traceback, unicodedata, shutil +sequenceTypes = (list, tuple) # ------------------------------------------------------------------------------ class FolderDeleter: diff --git a/shared/xml_parser.py b/shared/xml_parser.py index e58e430..5400522 100644 --- a/shared/xml_parser.py +++ b/shared/xml_parser.py @@ -24,6 +24,7 @@ from xml.sax.handler import ContentHandler, ErrorHandler, feature_external_ges,\ from xml.sax.xmlreader import InputSource from appy.shared import UnicodeBuffer, xmlPrologue from appy.shared.errors import AppyError +from appy.shared.utils import sequenceTypes # Constants -------------------------------------------------------------------- CONVERSION_ERROR = '"%s" value "%s" could not be converted by the XML ' \ @@ -420,7 +421,6 @@ class XmlMarshaller: xmlEntities = {'<': '<', '>': '>', '&': '&', '"': '"', "'": '''} trueFalse = {True: 'True', False: 'False'} - sequenceTypes = (tuple, list) fieldsToMarshall = 'all' fieldsToExclude = [] atFiles = ('image', 'file') # Types of archetypes fields that contain files. @@ -549,12 +549,12 @@ class XmlMarshaller: elif fieldType == 'dict': self.dumpDict(res, value) elif isRef: if value: - if type(value) in self.sequenceTypes: + if type(value) in sequenceTypes: for elem in value: self.dumpField(res, 'url', elem.absolute_url()) else: self.dumpField(res, 'url', value.absolute_url()) - elif type(value) in self.sequenceTypes: + elif type(value) in sequenceTypes: # The previous condition must be checked before this one because # referred objects may be stored in lists or tuples, too. for elem in value: self.dumpField(res, 'e', elem) @@ -603,7 +603,7 @@ class XmlMarshaller: if self.objectType != 'popo': if fType: res.write(' type="%s"' % fType) # Dump other attributes if needed - if type(fieldValue) in self.sequenceTypes: + if type(fieldValue) in sequenceTypes: res.write(' count="%d"' % len(fieldValue)) if fType == 'file': if hasattr(fieldValue, 'content_type'): @@ -662,7 +662,7 @@ class XmlMarshaller: elif self.fieldsToMarshall == 'all': mustDump = True else: - if (type(self.fieldsToMarshall) in self.sequenceTypes) \ + if (type(self.fieldsToMarshall) in sequenceTypes) \ and (fieldName in self.fieldsToMarshall): mustDump = True if mustDump: @@ -679,7 +679,7 @@ class XmlMarshaller: elif self.fieldsToMarshall == 'all_with_metadata': mustDump = True else: - if (type(self.fieldsToMarshall) in self.sequenceTypes) \ + if (type(self.fieldsToMarshall) in sequenceTypes) \ and (field.getName() in self.fieldsToMarshall): mustDump = True if mustDump: @@ -695,7 +695,7 @@ class XmlMarshaller: # Dump only needed fields if field.name in self.fieldsToExclude: continue if (field.type == 'Ref') and field.isBack: continue - if (type(self.fieldsToMarshall) in self.sequenceTypes) \ + if (type(self.fieldsToMarshall) in sequenceTypes) \ and (field.name not in self.fieldsToMarshall): continue # Determine field type fieldType = 'basic'