[pod] xhtml2odt: generate ODT tables from XHTML tables, taking into account variable column sizes.

This commit is contained in:
Gaetan Delannay 2012-07-06 17:57:25 +02:00
parent bc505b335f
commit 8a5ca81746
3 changed files with 102 additions and 12 deletions

View file

@ -219,6 +219,10 @@ class Renderer:
stylesInserts) stylesInserts)
# Store the styles mapping # Store the styles mapping
self.setStylesMapping(stylesMapping) self.setStylesMapping(stylesMapping)
# While working, POD may identify "dynamic styles" to insert into
# the "automatic styles" section of content.xml, like the column styles
# of tables generated from XHTML tables via xhtml2odt.py.
self.dynamicStyles = []
def createPodParser(self, odtFile, context, inserts): def createPodParser(self, odtFile, context, inserts):
'''Creates the parser with its environment for parsing the given '''Creates the parser with its environment for parsing the given
@ -475,11 +479,22 @@ class Renderer:
for odtFile in ('content.xml', 'styles.xml'): for odtFile in ('content.xml', 'styles.xml'):
shutil.copy(os.path.join(self.tempFolder, odtFile), shutil.copy(os.path.join(self.tempFolder, odtFile),
os.path.join(self.unzipFolder, odtFile)) os.path.join(self.unzipFolder, odtFile))
# Insert dynamic styles
contentXml = os.path.join(self.unzipFolder, 'content.xml')
f = file(contentXml)
dynamicStyles = ''.join(self.dynamicStyles)
content = f.read().replace('<!DYNAMIC_STYLES!>', dynamicStyles)
f.close()
f = file(contentXml, 'w')
f.write(content)
f.close()
# Call the user-defined "finalize" function when present.
if self.finalizeFunction: if self.finalizeFunction:
try: try:
self.finalizeFunction(self.unzipFolder) self.finalizeFunction(self.unzipFolder)
except Exception, e: except Exception, e:
print WARNING_FINALIZE_ERROR % str(e) print WARNING_FINALIZE_ERROR % str(e)
# Re-zip the result.
resultOdtName = os.path.join(self.tempFolder, 'result.odt') resultOdtName = os.path.join(self.tempFolder, 'result.odt')
try: try:
resultOdt = zipfile.ZipFile(resultOdtName,'w', zipfile.ZIP_DEFLATED) resultOdt = zipfile.ZipFile(resultOdtName,'w', zipfile.ZIP_DEFLATED)

View file

@ -141,3 +141,4 @@
<@style@:background-image/> <@style@:background-image/>
</@style@:table-cell-properties> </@style@:table-cell-properties>
</@style@:style> </@style@:style>
<!DYNAMIC_STYLES!>

View file

@ -9,7 +9,7 @@
# underline. # underline.
# ------------------------------------------------------------------------------ # ------------------------------------------------------------------------------
import xml.sax import xml.sax, time, random
from appy.shared.xml_parser import XmlEnvironment, XmlParser from appy.shared.xml_parser import XmlEnvironment, XmlParser
from appy.pod.odf_parser import OdfEnvironment from appy.pod.odf_parser import OdfEnvironment
from appy.pod import * from appy.pod import *
@ -205,7 +205,10 @@ class HtmlTable:
of the HTML table, we will dump the result of this sub-buffer into of the HTML table, we will dump the result of this sub-buffer into
the parent buffer, which may be the global buffer or another table the parent buffer, which may be the global buffer or another table
buffer.''' buffer.'''
def __init__(self): def __init__(self, env):
elems = str(time.time()).split('.')
self.name= 'AppyTable%s%s%d' % (elems[0],elems[1],random.randint(1,100))
self.styleNs = env.ns[OdfEnvironment.NS_STYLE]
self.res = u'' # The sub-buffer. self.res = u'' # The sub-buffer.
self.tempRes = u'' # The temporary sub-buffer, into which we will self.tempRes = u'' # The temporary sub-buffer, into which we will
# dump all table sub-elements, until we encounter the end of the first # dump all table sub-elements, until we encounter the end of the first
@ -214,6 +217,42 @@ class HtmlTable:
# into self.res. # into self.res.
self.firstRowParsed = False # Was the first table row completely parsed? self.firstRowParsed = False # Was the first table row completely parsed?
self.nbOfColumns = 0 self.nbOfColumns = 0
# The following list stores, for every column, the size of the biggest
# content of all its cells.
self.columnContentSizes = []
def computeColumnStyles(self, renderer):
'''Once the table has been completely parsed, self.columnContentSizes
should be correctly filled. Based on this, we can deduce the width
of every column and create the corresponding style declarations, in
p_renderer.dynamicStyles.'''
total = 65000.0 # A number representing the total width of the table
# Ensure first that self.columnContentSizes is correct
if (len(self.columnContentSizes) != self.nbOfColumns) or \
(None in self.columnContentSizes):
print 'PROBLEM'
# There was a problem while parsing the table. Set every column
# with the same width.
widths = [int(total/self.nbOfColumns)] * self.nbOfColumns
else:
widths = []
# Compute the sum of all column content sizes
contentTotal = 0
for size in self.columnContentSizes: contentTotal += size
contentTotal = float(contentTotal)
for size in self.columnContentSizes:
width = int((size/contentTotal) * total)
widths.append(width)
# Compute style declatation corresponding to every column.
s = self.styleNs
i = 0
for width in widths:
i += 1
# Compute the width of this column, relative to "total".
decl = '<%s:style %s:name="%s.%d" %s:family="table-column">' \
'<%s:table-column-properties %s:rel-column-width="%d*"' \
'/></%s:style>' % (s, s, self.name, i, s, s, s, width, s)
renderer.dynamicStyles.append(decl.encode('utf-8'))
# ------------------------------------------------------------------------------ # ------------------------------------------------------------------------------
class XhtmlEnvironment(XmlEnvironment): class XhtmlEnvironment(XmlEnvironment):
@ -233,8 +272,17 @@ class XhtmlEnvironment(XmlEnvironment):
self.textNs = self.ns[OdfEnvironment.NS_TEXT] self.textNs = self.ns[OdfEnvironment.NS_TEXT]
self.linkNs = self.ns[OdfEnvironment.NS_XLINK] self.linkNs = self.ns[OdfEnvironment.NS_XLINK]
self.tableNs = self.ns[OdfEnvironment.NS_TABLE] self.tableNs = self.ns[OdfEnvironment.NS_TABLE]
self.ignore = False # Will be True when parsing parts of the XHTML that # The following attr will be True when parsing parts of the XHTML that
# must be ignored. # must be ignored.
self.ignore = False
# Are we currently within a table cell? Instead of a boolean, the field
# stores an integer. The integer is > 1 if the cell spans more than one
# column.
self.inCell = 0
# The index, within the current row, of the current cell
self.cellIndex = -1
# The size of the content of the currently parsed table cell
self.cellContentSize = 0
def getCurrentElement(self, isList=False): def getCurrentElement(self, isList=False):
'''Gets the element that is on the top of self.currentElements or '''Gets the element that is on the top of self.currentElements or
@ -259,13 +307,16 @@ class XhtmlEnvironment(XmlEnvironment):
def dumpCurrentContent(self): def dumpCurrentContent(self):
'''Dumps content that was temporarily stored in self.currentContent '''Dumps content that was temporarily stored in self.currentContent
into the result.''' into the result.'''
contentSize = 0
if self.currentContent.strip(): if self.currentContent.strip():
# Manage missing elements # Manage missing elements
currentElem = self.getCurrentElement() currentElem = self.getCurrentElement()
if self.anElementIsMissing(currentElem, None): if self.anElementIsMissing(currentElem, None):
currentElem.addInnerParagraph(self) currentElem.addInnerParagraph(self)
# Dump and reinitialize the current content # Dump and reinitialize the current content
for c in self.currentContent.strip('\n'): content = self.currentContent.strip('\n')
contentSize = len(content)
for c in content:
# We remove leading and trailing carriage returns, but not # We remove leading and trailing carriage returns, but not
# whitespace because whitespace may be part of the text to dump. # whitespace because whitespace may be part of the text to dump.
if XML_SPECIAL_CHARS.has_key(c): if XML_SPECIAL_CHARS.has_key(c):
@ -273,6 +324,8 @@ class XhtmlEnvironment(XmlEnvironment):
else: else:
self.dumpString(c) self.dumpString(c)
self.currentContent = u'' self.currentContent = u''
# If we are within a table cell, update the total size of cell content.
if self.inCell: self.cellContentSize += contentSize
def getOdtAttributes(self, htmlElem, htmlAttrs={}): def getOdtAttributes(self, htmlElem, htmlAttrs={}):
'''Gets the ODT attributes to dump for p_currentElem. p_htmlAttrs are '''Gets the ODT attributes to dump for p_currentElem. p_htmlAttrs are
@ -364,15 +417,17 @@ class XhtmlEnvironment(XmlEnvironment):
self.currentLists.append(currentElem) self.currentLists.append(currentElem)
elif elem == 'table': elif elem == 'table':
# Update stack of current tables # Update stack of current tables
self.currentTables.append(HtmlTable()) self.currentTables.append(HtmlTable(self))
elif elem in TABLE_CELL_TAGS: elif elem in TABLE_CELL_TAGS:
# Determine colspan
colspan = 1
if attrs.has_key('colspan'): colspan = int(attrs['colspan'])
self.inCell = colspan
self.cellIndex += colspan
# If we are in the first row of a table, update columns count # If we are in the first row of a table, update columns count
currentTable = self.currentTables[-1] currentTable = self.currentTables[-1]
if not currentTable.firstRowParsed: if not currentTable.firstRowParsed:
nbOfCols = 1 currentTable.nbOfColumns += colspan
if attrs.has_key('colspan'):
nbOfCols = int(attrs['colspan'])
currentTable.nbOfColumns += nbOfCols
return currentElem return currentElem
def onElementEnd(self, elem): def onElementEnd(self, elem):
@ -383,18 +438,35 @@ class XhtmlEnvironment(XmlEnvironment):
self.currentLists.pop() self.currentLists.pop()
elif elem == 'table': elif elem == 'table':
lastTable = self.currentTables.pop() lastTable = self.currentTables.pop()
# Computes the column styles required by the table
lastTable.computeColumnStyles(self.parser.caller.renderer)
# Dumps the content of the last parsed table into the parent buffer # Dumps the content of the last parsed table into the parent buffer
self.dumpString(lastTable.res) self.dumpString(lastTable.res)
elif elem == 'tr': elif elem == 'tr':
self.cellIndex = -1
lastTable = self.currentTables[-1] lastTable = self.currentTables[-1]
if not lastTable.firstRowParsed: if not lastTable.firstRowParsed:
lastTable.firstRowParsed = True lastTable.firstRowParsed = True
# First row is parsed. I know the number of columns in the # First row is parsed. I know the number of columns in the
# table: I can dump the columns declarations. # table: I can dump the columns declarations.
lastTable.res += ('<%s:table-column/>' % self.tableNs) * \ for i in range(1, lastTable.nbOfColumns + 1):
lastTable.nbOfColumns lastTable.res+= '<%s:table-column %s:style-name=' \
'"%s.%d"/>' % (self.tableNs, self.tableNs,
lastTable.name, i)
lastTable.res += lastTable.tempRes lastTable.res += lastTable.tempRes
lastTable.tempRes = u'' lastTable.tempRes = u''
elif elem in TABLE_CELL_TAGS:
# Update attr "columnContentSizes" of the currently parsed table,
# excepted if the cell spans several columns.
if self.inCell == 1:
lastTable = self.currentTables[-1]
sizes = lastTable.columnContentSizes
# Insert None values if the list is too small
while (len(sizes)-1) < self.cellIndex: sizes.append(None)
sizes[self.cellIndex] = max(sizes[self.cellIndex],
self.cellContentSize)
self.inCell = 0
self.cellContentSize = 0
if currentElem.tagsToClose: if currentElem.tagsToClose:
self.closeConflictualElements(currentElem.tagsToClose) self.closeConflictualElements(currentElem.tagsToClose)
if currentElem.tagsToReopen: if currentElem.tagsToReopen:
@ -446,7 +518,9 @@ class XhtmlParser(XmlParser):
e.dumpString('<%s>' % odfTag) e.dumpString('<%s>' % odfTag)
elif elem == 'table': elif elem == 'table':
# Here we must call "dumpString" only once # Here we must call "dumpString" only once
e.dumpString('<%s %s:style-name="podTable">' % (odfTag, e.tableNs)) table = e.currentTables[-1]
e.dumpString('<%s %s:name="%s" %s:style-name="podTable">' % \
(odfTag, e.tableNs, table.name, e.tableNs))
elif elem in TABLE_CELL_TAGS: elif elem in TABLE_CELL_TAGS:
e.dumpString('<%s %s:style-name="%s"' % \ e.dumpString('<%s %s:style-name="%s"' % \
(odfTag, e.tableNs, DEFAULT_ODT_STYLES[elem])) (odfTag, e.tableNs, DEFAULT_ODT_STYLES[elem]))