[pod] xhtml2odt: take into account widths of td and th tags, from 'width' attributes or from the corresponding CSS property within 'style' attributes.

This commit is contained in:
Gaetan Delannay 2015-03-26 10:53:04 +01:00
parent 44eca07b15
commit ab147ce99d
4 changed files with 1891 additions and 1845 deletions

View file

@ -63,7 +63,7 @@ HTML_TEXT_ODT_PARA = 'For XHTML element "%s", you must associate an ' \
'style.' 'style.'
# ------------------------------------------------------------------------------ # ------------------------------------------------------------------------------
class Style: class Style:
'''Represents a paragraph style as found in styles.xml in a ODT file.''' '''Represents a paragraph style as found in styles.xml in a ODT file'''
numberRex = re.compile('(\d+)(.*)') numberRex = re.compile('(\d+)(.*)')
def __init__(self, name, family): def __init__(self, name, family):
self.name = name self.name = name
@ -306,7 +306,7 @@ class StylesManager:
return res return res
def styleMatch(self, attrs, matchingAttrs): def styleMatch(self, attrs, matchingAttrs):
'''p_match is a dict of attributes found on some HTML element. '''p_attrs is a dict of attributes found on some HTML element.
p_matchingAttrs is a dict of attributes corresponding to some style. p_matchingAttrs is a dict of attributes corresponding to some style.
This method returns True if p_attrs contains the winning (name,value) This method returns True if p_attrs contains the winning (name,value)
pairs that match those in p_matchingAttrs. Note that ALL attrs in pairs that match those in p_matchingAttrs. Note that ALL attrs in

File diff suppressed because it is too large Load diff

View file

@ -12,6 +12,7 @@ from appy.pod.odf_parser import OdfEnvironment
from appy.pod.styles_manager import Style from appy.pod.styles_manager import Style
from appy.shared.xml_parser import XmlEnvironment, XmlParser, escapeXml from appy.shared.xml_parser import XmlEnvironment, XmlParser, escapeXml
from appy.shared.utils import WhitespaceCruncher from appy.shared.utils import WhitespaceCruncher
from appy.shared.css import CssStyles
# To which ODT tags do HTML tags correspond ? # To which ODT tags do HTML tags correspond ?
HTML_2_ODT = {'h1':'h', 'h2':'h', 'h3':'h', 'h4':'h', 'h5':'h', 'h6':'h', HTML_2_ODT = {'h1':'h', 'h2':'h', 'h3':'h', 'h4':'h', 'h5':'h', 'h6':'h',
@ -68,7 +69,7 @@ class HtmlElement:
return self return self
def getOdfTag(self, env): def getOdfTag(self, env):
'''Gets the raw ODF tag that corresponds to me.''' '''Gets the raw ODF tag that corresponds to me'''
res = '' res = ''
if HTML_2_ODT.has_key(self.elem): if HTML_2_ODT.has_key(self.elem):
res += '%s:%s' % (env.textNs, HTML_2_ODT[self.elem]) res += '%s:%s' % (env.textNs, HTML_2_ODT[self.elem])
@ -105,7 +106,7 @@ class HtmlElement:
# Check elements that can't be found within a paragraph # Check elements that can't be found within a paragraph
if (parentElem.elemType == 'para') and \ if (parentElem.elemType == 'para') and \
(self.elem in NOT_INSIDE_P_OR_P): (self.elem in NOT_INSIDE_P_OR_P):
# Oups, li->p wrongly considered as a conflict. # Oups, li->p wrongly considered as a conflict
if (parentElem.elem == 'li') and (self.elem in ('p', 'div')): if (parentElem.elem == 'li') and (self.elem in ('p', 'div')):
return () return ()
return (parentElem.setConflictual(),) return (parentElem.setConflictual(),)
@ -464,16 +465,12 @@ class XhtmlEnvironment(XmlEnvironment):
# If we are in the first row of a table, update columns count # If we are in the first row of a table, update columns count
if not table.firstRowParsed: if not table.firstRowParsed:
table.nbOfColumns += colspan table.nbOfColumns += colspan
if attrs.has_key('width') and (colspan == 1): styles = CssStyles(elem, attrs)
# Get the width, keep figures only. if hasattr(styles, 'width') and (colspan == 1):
width = ''
for c in attrs['width']:
if c.isdigit(): width += c
width = int(width)
# Ensure self.columnWidths is long enough # Ensure self.columnWidths is long enough
while (len(table.columnWidths)-1) < table.cellIndex: while (len(table.columnWidths)-1) < table.cellIndex:
table.columnWidths.append(None) table.columnWidths.append(None)
table.columnWidths[table.cellIndex] = width table.columnWidths[table.cellIndex] = styles.width.value
return currentElem return currentElem
def onElementEnd(self, elem): def onElementEnd(self, elem):

View file

@ -1,3 +1,6 @@
# ------------------------------------------------------------------------------
import re
# ------------------------------------------------------------------------------ # ------------------------------------------------------------------------------
def parseStyleAttribute(value, asDict=False): def parseStyleAttribute(value, asDict=False):
'''Returns a list of CSS (name, value) pairs (or a dict if p_asDict is '''Returns a list of CSS (name, value) pairs (or a dict if p_asDict is
@ -11,4 +14,50 @@ def parseStyleAttribute(value, asDict=False):
if asDict: res[name.strip()] = value.strip() if asDict: res[name.strip()] = value.strip()
else: res.append( (name.strip(), value.strip()) ) else: res.append( (name.strip(), value.strip()) )
return res return res
# ------------------------------------------------------------------------------
class CssValue:
'''Represents a CSS value having unit "px" or "%": value and unit are
extracted in attributes of the same name. If no unit is specified, "px"
is assumed.'''
valueRex = re.compile('(\d+)(%|px)?')
def __init__(self, value):
value, unit = CssValue.valueRex.match(value)
if not unit: unit = 'px'
self.value = int(value)
self.unit = unit
def __str__(self): return '%d%s' % (self.value, self.unit)
def __repr__(self): return self.__str__()
class CssStyles:
'''This class represents a set of styles collected from:
* an HTML "style" attribute;
* other attributes like "width".
'''
# The list of CSS properties having a unit (px or %)
withUnit = ('width', 'height')
def __init__(self, elem, attrs):
'''Analyses styles as found in p_attrs and sets, for every found style,
an attribute on self.'''
# First, parse the "style" attr if present
if attrs.has_key('style'):
styles = parseStyleAttribute(attrs['style'], asDict=True)
for name, value in styles.iteritems():
if name in CssStyles.withUnit:
value = CssValue(value)
setattr(self, name.replace('-', ''), value)
# Parse attributes "width" and "height" if present. But they will not
# override corresponding attributes from the "styles" attributes if
# found.
for name in ('width', 'height'):
if not hasattr(self, name) and attrs.has_key(name):
setattr(self, name, CssValue(attrs[name]))
def __repr__(self):
res = '<CSS'
for name, value in self.__dict__.iteritems():
res += ' %s:%s' % (name, value)
return res + '>'
# ------------------------------------------------------------------------------ # ------------------------------------------------------------------------------