406 lines
19 KiB
Python
406 lines
19 KiB
Python
# ------------------------------------------------------------------------------
|
|
# Appy is a framework for building applications in the Python language.
|
|
# Copyright (C) 2007 Gaetan Delannay
|
|
|
|
# This program is free software; you can redistribute it and/or
|
|
# modify it under the terms of the GNU General Public License
|
|
# as published by the Free Software Foundation; either version 2
|
|
# of the License, or (at your option) any later version.
|
|
|
|
# This program is distributed in the hope that it will be useful,
|
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
# GNU General Public License for more details.
|
|
|
|
# You should have received a copy of the GNU General Public License
|
|
# along with this program; if not, write to the Free Software
|
|
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301,USA.
|
|
|
|
# ------------------------------------------------------------------------------
|
|
import re, os.path
|
|
#python3 compat
|
|
try:
|
|
from UserDict import UserDict
|
|
except ImportError:
|
|
from collections import UserDict
|
|
|
|
import appy.pod
|
|
from appy.pod import *
|
|
from appy.pod.odf_parser import OdfEnvironment, OdfParser
|
|
from appy.shared.css import parseStyleAttribute
|
|
|
|
# Possible states for the parser
|
|
READING = 0 # Default state
|
|
PARSING_STYLE = 1 # I am parsing styles definitions
|
|
|
|
# Error-related constants ------------------------------------------------------
|
|
MAPPING_NOT_DICT = 'The styles mapping must be a dictionary or a UserDict ' \
|
|
'instance.'
|
|
MAPPING_ELEM_NOT_STRING = "The styles mapping dictionary's keys and values " \
|
|
"must be strings."
|
|
MAPPING_OUTLINE_DELTA_NOT_INT = 'When specifying "h*" as key in the styles ' \
|
|
'mapping, you must specify an integer as ' \
|
|
'value. This integer, which may be positive ' \
|
|
'or negative, represents a delta that will ' \
|
|
'be added to the html heading\'s outline ' \
|
|
'level for finding an ODT style with the ' \
|
|
'same outline level.'
|
|
MAPPING_ELEM_EMPTY = 'In your styles mapping, you inserted an empty key ' \
|
|
'and/or value.'
|
|
UNSTYLABLE_TAG = 'You can\'t associate a style to element "%s". Unstylable ' \
|
|
'elements are: %s'
|
|
STYLE_NOT_FOUND = 'OpenDocument style "%s" was not found in your template. ' \
|
|
'Note that the styles names ("Heading 1", "Standard"...) ' \
|
|
'that appear when opening your template with OpenOffice, ' \
|
|
'for example, are a super-set of the styles that are really '\
|
|
'recorded into your document. Indeed, only styles that are ' \
|
|
'in use within your template are actually recorded into ' \
|
|
'the document. You may consult the list of available ' \
|
|
'styles programmatically by calling your pod renderer\'s ' \
|
|
'"getStyles" method.'
|
|
HTML_PARA_ODT_TEXT = 'For XHTML element "%s", you must associate a ' \
|
|
'paragraph-wide OpenDocument style. "%s" is a "text" ' \
|
|
'style (that applies to only a chunk of text within a ' \
|
|
'paragraph).'
|
|
HTML_TEXT_ODT_PARA = 'For XHTML element "%s", you must associate an ' \
|
|
'OpenDocument "text" style (that applies to only a chunk '\
|
|
'of text within a paragraph). "%s" is a paragraph-wide ' \
|
|
'style.'
|
|
# ------------------------------------------------------------------------------
|
|
class Style:
|
|
'''Represents a paragraph style as found in styles.xml in a ODT file'''
|
|
numberRex = re.compile('(\d+)(.*)')
|
|
def __init__(self, name, family):
|
|
self.name = name
|
|
self.family = family # May be 'paragraph', etc.
|
|
self.displayName = name
|
|
self.styleClass = None # May be 'text', 'list', etc.
|
|
self.fontSize = None
|
|
self.fontSizeUnit = None # May be pt, %, ...
|
|
self.outlineLevel = None # Were the styles lies within styles and
|
|
# substyles hierarchy
|
|
def setFontSize(self, fontSize):
|
|
rexRes = self.numberRex.search(fontSize)
|
|
self.fontSize = int(rexRes.group(1))
|
|
self.fontSizeUnit = rexRes.group(2)
|
|
def __repr__(self):
|
|
res = '<Style %s|family %s' % (self.name, self.family)
|
|
if self.displayName != None: res += '|displayName "%s"'%self.displayName
|
|
if self.styleClass != None: res += '|class %s' % self.styleClass
|
|
if self.fontSize != None:
|
|
res += '|fontSize %d%s' % (self.fontSize, self.fontSizeUnit)
|
|
if self.outlineLevel != None: res += '|level %s' % self.outlineLevel
|
|
return ('%s>' % res).encode('utf-8')
|
|
|
|
# ------------------------------------------------------------------------------
|
|
class Styles(UserDict):
|
|
def getParagraphStyleAtLevel(self, level):
|
|
'''Tries to find a style which has level p_level. Returns None if no
|
|
such style exists.'''
|
|
res = None
|
|
for style in self.values():
|
|
if (style.family == 'paragraph') and (style.outlineLevel == level):
|
|
res = style
|
|
break
|
|
return res
|
|
def getStyle(self, displayName):
|
|
'''Gets the style that has this p_displayName. Returns None if not
|
|
found.'''
|
|
res = None
|
|
for style in self.values():
|
|
if style.displayName == displayName:
|
|
res = style
|
|
break
|
|
return res
|
|
def getStyles(self, stylesType='all'):
|
|
'''Returns a list of all the styles of the given p_stylesType.'''
|
|
res = []
|
|
if stylesType == 'all':
|
|
res = list(self.values())
|
|
else:
|
|
for style in self.values():
|
|
if (style.family == stylesType) and style.displayName:
|
|
res.append(style)
|
|
return res
|
|
|
|
# ------------------------------------------------------------------------------
|
|
class StylesEnvironment(OdfEnvironment):
|
|
def __init__(self):
|
|
OdfEnvironment.__init__(self)
|
|
self.styles = Styles()
|
|
self.state = READING
|
|
self.currentStyle = None # The style definition currently parsed
|
|
|
|
# ------------------------------------------------------------------------------
|
|
class StylesParser(OdfParser):
|
|
def __init__(self, env, caller):
|
|
OdfParser.__init__(self, env, caller)
|
|
self.styleTag = None
|
|
def endDocument(self):
|
|
e = OdfParser.endDocument(self)
|
|
self.caller.styles = e.styles
|
|
def startElement(self, elem, attrs):
|
|
e = OdfParser.startElement(self, elem, attrs)
|
|
self.styleTag = '%s:style' % e.ns(e.NS_STYLE)
|
|
if elem == self.styleTag:
|
|
e.state = PARSING_STYLE
|
|
nameAttr = '%s:name' % e.ns(e.NS_STYLE)
|
|
familyAttr = '%s:family' % e.ns(e.NS_STYLE)
|
|
classAttr = '%s:class' % e.ns(e.NS_STYLE)
|
|
displayNameAttr = '%s:display-name' % e.ns(e.NS_STYLE)
|
|
# Create the style
|
|
style = Style(name=attrs[nameAttr], family=attrs[familyAttr])
|
|
if classAttr in attrs:
|
|
style.styleClass = attrs[classAttr]
|
|
if displayNameAttr in attrs:
|
|
style.displayName = attrs[displayNameAttr]
|
|
# Record this style in the environment
|
|
e.styles[style.name] = style
|
|
e.currentStyle = style
|
|
levelKey = '%s:default-outline-level' % e.ns(e.NS_STYLE)
|
|
if levelKey in attrs and attrs[levelKey].strip():
|
|
style.outlineLevel = int(attrs[levelKey])
|
|
else:
|
|
if e.state == PARSING_STYLE:
|
|
# I am parsing tags within the style.
|
|
if elem == ('%s:text-properties' % e.ns(e.NS_STYLE)):
|
|
fontSizeKey = '%s:font-size' % e.ns(e.NS_FO)
|
|
if fontSizeKey in attrs:
|
|
e.currentStyle.setFontSize(attrs[fontSizeKey])
|
|
def endElement(self, elem):
|
|
e = OdfParser.endElement(self, elem)
|
|
if elem == self.styleTag:
|
|
e.state = READING
|
|
e.currentStyle = None
|
|
|
|
# -------------------------------------------------------------------------------
|
|
class StylesManager:
|
|
'''Reads the paragraph styles from styles.xml within an ODT file, and
|
|
updates styles.xml with some predefined POD styles.'''
|
|
podSpecificStyles = {
|
|
'podItemKeepWithNext': Style('podItemKeepWithNext', 'paragraph'),
|
|
# This style is common to bullet and number items. Behing the scenes,
|
|
# there are 2 concrete ODT styles: podBulletItemKeepWithNext and
|
|
# podNumberItemKeepWithNext. pod chooses the right one.
|
|
}
|
|
def __init__(self, stylesString):
|
|
self.stylesString = stylesString
|
|
self.styles = None
|
|
# Global styles mapping
|
|
self.stylesMapping = None
|
|
self.stylesParser = StylesParser(StylesEnvironment(), self)
|
|
self.stylesParser.parse(self.stylesString)
|
|
# Now self.styles contains the styles.
|
|
# List of text styles derived from self.styles
|
|
self.textStyles = self.styles.getStyles('text')
|
|
# List of paragraph styles derived from self.styles
|
|
self.paragraphStyles = self.styles.getStyles('paragraph')
|
|
|
|
def checkStylesAdequation(self, htmlStyle, odtStyle):
|
|
'''Checks that p_odtStyle may be used for style p_htmlStyle.'''
|
|
if (htmlStyle in XHTML_PARAGRAPH_TAGS_NO_LISTS) and \
|
|
(odtStyle in self.textStyles):
|
|
raise PodError(
|
|
HTML_PARA_ODT_TEXT % (htmlStyle, odtStyle.displayName))
|
|
if (htmlStyle in XHTML_INNER_TAGS) and \
|
|
(odtStyle in self.paragraphStyles):
|
|
raise PodError(HTML_TEXT_ODT_PARA % (
|
|
htmlStyle, odtStyle.displayName))
|
|
|
|
def checkStylesMapping(self, stylesMapping):
|
|
'''Checks that the given p_stylesMapping is correct, and returns the
|
|
internal representation of it. p_stylesMapping is a dict where:
|
|
* every key can be:
|
|
(1) the name of a XHTML 'paragraph-like' tag (p, h1, h2...)
|
|
(2) the name of a XHTML 'text-like' tag (span, b, i, em...)
|
|
(3) the name of a CSS class
|
|
(4) string 'h*'
|
|
* every value must be:
|
|
(a) if the key is (1), (2) or (3), value must be the display name
|
|
of an ODT style
|
|
(b) if the key is (4), value must be an integer indicating how to
|
|
map the outline level of outlined styles (ie, for mapping XHTML
|
|
tag "h1" to the OD style with outline-level=2, value must be
|
|
integer "1". In that case, h2 will be mapped to the ODT style
|
|
with outline-level=3, etc.). Note that this value can also be
|
|
negative.
|
|
* Some precision now about about keys. If key is (1) or (2),
|
|
parameters can be given between square brackets. Every such
|
|
parameter represents a CSS attribute and its value. For example, a
|
|
key can be:
|
|
p[text-align=center,color=blue]
|
|
|
|
This feature allows to map XHTML tags having different CSS
|
|
attributes to different ODT styles.
|
|
|
|
The method returns a dict which is the internal representation of
|
|
the styles mapping:
|
|
* every key can be:
|
|
(I) the name of a XHTML tag, corresponding to (1) or (2) whose
|
|
potential parameters have been removed;
|
|
(II) the name of a CSS class (=(3))
|
|
(III) string 'h*' (=(4))
|
|
* every value can be:
|
|
(i) a Styles instance that was found from the specified ODT style
|
|
display name in p_stylesMapping, if key is (I) and if only one,
|
|
non-parameterized XHTML tag was defined in p_stylesMapping;
|
|
(ii) a list of the form [ (params, Style), (params, Style),...]
|
|
if key is (I) and if one or more parameterized (or not) XHTML
|
|
tags representing the same tag were found in p_stylesMapping.
|
|
params, which can be None, is a dict whose pairs are of the
|
|
form (cssAttribute, cssValue).
|
|
(iii) an integer value (=(b)).
|
|
'''
|
|
res = {}
|
|
if not isinstance(stylesMapping, dict) and \
|
|
not isinstance(stylesMapping, UserDict):
|
|
raise PodError(MAPPING_NOT_DICT)
|
|
for xhtmlStyleName, odtStyleName in stylesMapping.items():
|
|
if not isinstance(xhtmlStyleName, str):
|
|
raise PodError(MAPPING_ELEM_NOT_STRING)
|
|
if (xhtmlStyleName == 'h*') and \
|
|
not isinstance(odtStyleName, int):
|
|
raise PodError(MAPPING_OUTLINE_DELTA_NOT_INT)
|
|
if (xhtmlStyleName != 'h*') and \
|
|
not isinstance(odtStyleName, str):
|
|
raise PodError(MAPPING_ELEM_NOT_STRING)
|
|
if (xhtmlStyleName != 'h*') and \
|
|
((not xhtmlStyleName) or (not odtStyleName)):
|
|
raise PodError(MAPPING_ELEM_EMPTY)
|
|
# Separate CSS attributes if any
|
|
cssAttrs = None
|
|
if '[' in xhtmlStyleName:
|
|
xhtmlStyleName, attrs = xhtmlStyleName.split('[')
|
|
xhtmlStyleName = xhtmlStyleName.strip()
|
|
attrs = attrs.strip()[:-1].split(',')
|
|
cssAttrs = {}
|
|
for attr in attrs:
|
|
name, value = attr.split('=')
|
|
cssAttrs[name.strip()] = value.strip()
|
|
if xhtmlStyleName in XHTML_UNSTYLABLE_TAGS:
|
|
raise PodError(UNSTYLABLE_TAG % (xhtmlStyleName,
|
|
XHTML_UNSTYLABLE_TAGS))
|
|
if xhtmlStyleName != 'h*':
|
|
odtStyle = self.styles.getStyle(odtStyleName)
|
|
if not odtStyle:
|
|
if odtStyleName in self.podSpecificStyles:
|
|
odtStyle = self.podSpecificStyles[odtStyleName]
|
|
else:
|
|
raise PodError(STYLE_NOT_FOUND % odtStyleName)
|
|
self.checkStylesAdequation(xhtmlStyleName, odtStyle)
|
|
# Store this style mapping in the result.
|
|
alreadyInRes = xhtmlStyleName in res
|
|
if cssAttrs or alreadyInRes:
|
|
# I must create a complex structure (ii) for this mapping.
|
|
if not alreadyInRes:
|
|
res[xhtmlStyleName] = [(cssAttrs, odtStyle)]
|
|
else:
|
|
value = res[xhtmlStyleName]
|
|
if not isinstance(value, list):
|
|
res[xhtmlStyleName] = [(cssAttrs, odtStyle), \
|
|
(None, value)]
|
|
else:
|
|
res.insert(0, (cssAttrs, odtStyle))
|
|
else:
|
|
# I must create a simple structure (i) for this mapping.
|
|
res[xhtmlStyleName] = odtStyle
|
|
else:
|
|
# In this case (iii), it is the outline level, not an ODT style
|
|
# name.
|
|
res[xhtmlStyleName] = odtStyleName
|
|
return res
|
|
|
|
def styleMatch(self, attrs, matchingAttrs):
|
|
'''p_attrs is a dict of attributes found on some HTML element.
|
|
p_matchingAttrs is a dict of attributes corresponding to some style.
|
|
This method returns True if p_attrs contains the winning (name,value)
|
|
pairs that match those in p_matchingAttrs. Note that ALL attrs in
|
|
p_matchingAttrs must be present in p_attrs.'''
|
|
for name, value in matchingAttrs.items():
|
|
if name not in attrs: return
|
|
if value != attrs[name]: return
|
|
return True
|
|
|
|
def getStyleFromMapping(self, elem, attrs, styles):
|
|
'''p_styles is a Style instance or a list of (cssParams, Style) tuples.
|
|
Depending on CSS attributes found in p_attrs, this method returns
|
|
the relevant Style instance.'''
|
|
if isinstance(styles, Style): return styles
|
|
hasStyleInfo = attrs and ('style' in attrs)
|
|
if not hasStyleInfo:
|
|
# If I have, at the last position in p_styles, the style related to
|
|
# no attribute at all, I return it.
|
|
lastAttrs, lastStyle = styles[-1]
|
|
if lastAttrs == None: return lastStyle
|
|
else: return
|
|
# If I am here, I have style info. Check if it corresponds to some style
|
|
# in p_styles.
|
|
styleInfo = parseStyleAttribute(attrs['style'], asDict=True)
|
|
for matchingAttrs, style in styles:
|
|
if self.styleMatch(styleInfo, matchingAttrs):
|
|
return style
|
|
|
|
def findStyle(self, elem, attrs, classValue, localStylesMapping):
|
|
'''Finds the ODT style that must be applied to XHTML p_elem that has
|
|
attrs p_attrs. In some cases, p_attrs is None; the value of the
|
|
"class" attribute is given instead (in p_classValue).
|
|
|
|
The global styles mapping is in self.stylesMapping; the local styles
|
|
mapping is in p_localStylesMapping.
|
|
|
|
Here are the places where we will search, ordered by
|
|
priority (highest first):
|
|
(1) local styles mapping (CSS style in "class" attr)
|
|
(2) " (HTML elem)
|
|
(3) global styles mapping (CSS style in "class" attr)
|
|
(4) " (HTML elem)
|
|
(5) ODT style that has the same name as CSS style in "class" attr
|
|
(6) Predefined pod-specific ODT style that has the same name as
|
|
CSS style in "class" attr
|
|
(7) ODT style that has the same outline level as HTML elem.
|
|
'''
|
|
res = None
|
|
cssStyleName = None
|
|
if attrs and 'class' in attrs:
|
|
cssStyleName = attrs['class']
|
|
if classValue:
|
|
cssStyleName = classValue
|
|
# (1)
|
|
if cssStyleName in localStylesMapping:
|
|
res = localStylesMapping[cssStyleName]
|
|
# (2)
|
|
if (not res) and elem in localStylesMapping:
|
|
styles = localStylesMapping[elem]
|
|
res = self.getStyleFromMapping(elem, attrs, styles)
|
|
# (3)
|
|
if (not res) and cssStyleName in self.stylesMapping:
|
|
res = self.stylesMapping[cssStyleName]
|
|
# (4)
|
|
if (not res) and elem in self.stylesMapping:
|
|
styles = self.stylesMapping[elem]
|
|
res = self.getStyleFromMapping(elem, attrs, styles)
|
|
# (5)
|
|
if (not res) and cssStyleName in self.styles:
|
|
res = self.styles[cssStyleName]
|
|
# (6)
|
|
if (not res) and cssStyleName in self.podSpecificStyles:
|
|
res = self.podSpecificStyles[cssStyleName]
|
|
# (7)
|
|
if not res:
|
|
# Try to find a style with the correct outline level
|
|
if elem in XHTML_HEADINGS:
|
|
# Is there a delta that must be taken into account ?
|
|
outlineDelta = 0
|
|
if 'h*' in localStylesMapping:
|
|
outlineDelta += localStylesMapping['h*']
|
|
elif 'h*' in self.stylesMapping:
|
|
outlineDelta += self.stylesMapping['h*']
|
|
outlineLevel = int(elem[1]) + outlineDelta
|
|
# Normalize the outline level
|
|
if outlineLevel < 1: outlineLevel = 1
|
|
res = self.styles.getParagraphStyleAtLevel(outlineLevel)
|
|
if res:
|
|
self.checkStylesAdequation(elem, res)
|
|
return res
|
|
# ------------------------------------------------------------------------------
|