[pod] xhtml2odt: take into account widths of td and th tags, from 'width' attributes or from the corresponding CSS property within 'style' attributes.

This commit is contained in:
Gaetan Delannay 2015-03-26 10:53:04 +01:00
parent 44eca07b15
commit ab147ce99d
4 changed files with 1891 additions and 1845 deletions

View file

@ -63,7 +63,7 @@ HTML_TEXT_ODT_PARA = 'For XHTML element "%s", you must associate an ' \
'style.'
# ------------------------------------------------------------------------------
class Style:
'''Represents a paragraph style as found in styles.xml in a ODT file.'''
'''Represents a paragraph style as found in styles.xml in a ODT file'''
numberRex = re.compile('(\d+)(.*)')
def __init__(self, name, family):
self.name = name
@ -306,7 +306,7 @@ class StylesManager:
return res
def styleMatch(self, attrs, matchingAttrs):
'''p_match is a dict of attributes found on some HTML element.
'''p_attrs is a dict of attributes found on some HTML element.
p_matchingAttrs is a dict of attributes corresponding to some style.
This method returns True if p_attrs contains the winning (name,value)
pairs that match those in p_matchingAttrs. Note that ALL attrs in

File diff suppressed because it is too large Load diff

View file

@ -12,6 +12,7 @@ from appy.pod.odf_parser import OdfEnvironment
from appy.pod.styles_manager import Style
from appy.shared.xml_parser import XmlEnvironment, XmlParser, escapeXml
from appy.shared.utils import WhitespaceCruncher
from appy.shared.css import CssStyles
# To which ODT tags do HTML tags correspond ?
HTML_2_ODT = {'h1':'h', 'h2':'h', 'h3':'h', 'h4':'h', 'h5':'h', 'h6':'h',
@ -68,7 +69,7 @@ class HtmlElement:
return self
def getOdfTag(self, env):
'''Gets the raw ODF tag that corresponds to me.'''
'''Gets the raw ODF tag that corresponds to me'''
res = ''
if HTML_2_ODT.has_key(self.elem):
res += '%s:%s' % (env.textNs, HTML_2_ODT[self.elem])
@ -105,7 +106,7 @@ class HtmlElement:
# Check elements that can't be found within a paragraph
if (parentElem.elemType == 'para') and \
(self.elem in NOT_INSIDE_P_OR_P):
# Oups, li->p wrongly considered as a conflict.
# Oups, li->p wrongly considered as a conflict
if (parentElem.elem == 'li') and (self.elem in ('p', 'div')):
return ()
return (parentElem.setConflictual(),)
@ -464,16 +465,12 @@ class XhtmlEnvironment(XmlEnvironment):
# If we are in the first row of a table, update columns count
if not table.firstRowParsed:
table.nbOfColumns += colspan
if attrs.has_key('width') and (colspan == 1):
# Get the width, keep figures only.
width = ''
for c in attrs['width']:
if c.isdigit(): width += c
width = int(width)
styles = CssStyles(elem, attrs)
if hasattr(styles, 'width') and (colspan == 1):
# Ensure self.columnWidths is long enough
while (len(table.columnWidths)-1) < table.cellIndex:
table.columnWidths.append(None)
table.columnWidths[table.cellIndex] = width
table.columnWidths[table.cellIndex] = styles.width.value
return currentElem
def onElementEnd(self, elem):

View file

@ -1,3 +1,6 @@
# ------------------------------------------------------------------------------
import re
# ------------------------------------------------------------------------------
def parseStyleAttribute(value, asDict=False):
'''Returns a list of CSS (name, value) pairs (or a dict if p_asDict is
@ -11,4 +14,50 @@ def parseStyleAttribute(value, asDict=False):
if asDict: res[name.strip()] = value.strip()
else: res.append( (name.strip(), value.strip()) )
return res
# ------------------------------------------------------------------------------
class CssValue:
'''Represents a CSS value having unit "px" or "%": value and unit are
extracted in attributes of the same name. If no unit is specified, "px"
is assumed.'''
valueRex = re.compile('(\d+)(%|px)?')
def __init__(self, value):
value, unit = CssValue.valueRex.match(value)
if not unit: unit = 'px'
self.value = int(value)
self.unit = unit
def __str__(self): return '%d%s' % (self.value, self.unit)
def __repr__(self): return self.__str__()
class CssStyles:
'''This class represents a set of styles collected from:
* an HTML "style" attribute;
* other attributes like "width".
'''
# The list of CSS properties having a unit (px or %)
withUnit = ('width', 'height')
def __init__(self, elem, attrs):
'''Analyses styles as found in p_attrs and sets, for every found style,
an attribute on self.'''
# First, parse the "style" attr if present
if attrs.has_key('style'):
styles = parseStyleAttribute(attrs['style'], asDict=True)
for name, value in styles.iteritems():
if name in CssStyles.withUnit:
value = CssValue(value)
setattr(self, name.replace('-', ''), value)
# Parse attributes "width" and "height" if present. But they will not
# override corresponding attributes from the "styles" attributes if
# found.
for name in ('width', 'height'):
if not hasattr(self, name) and attrs.has_key(name):
setattr(self, name, CssValue(attrs[name]))
def __repr__(self):
res = '<CSS'
for name, value in self.__dict__.iteritems():
res += ' %s:%s' % (name, value)
return res + '>'
# ------------------------------------------------------------------------------