diff --git a/bin/odfwalk.py b/bin/odfwalk.py new file mode 100644 index 0000000..3bbc6c4 --- /dev/null +++ b/bin/odfwalk.py @@ -0,0 +1,75 @@ +'''This script allows to walk (and potentially patch) files (content.xml, + styles.xml...) contained within a given ODF file or within all ODF files + found in some folder.''' + +# ------------------------------------------------------------------------------ +import sys, os.path, time +from appy.shared.zip import unzip, zip +from appy.shared.utils import getOsTempFolder, FolderDeleter, executeCommand + +# ------------------------------------------------------------------------------ +usage = '''Usage: python odfWalk.py [file|folder] yourScript. + + If *file* is given, it is the path to an ODF file (odt or ods). This single + file will be walked. + If *folder* is given, we will walk all ODF files found in this folder and + sub-folders. + + *yourScript* is the path to a Python script that will be run on every walked + file. It will be called with a single arg containing the absolute path to the + folder containing the unzipped file content (content.xml, styles.xml...).''' + +# ------------------------------------------------------------------------------ +class OdfWalk: + toUnzip = ('.ods', '.odt') + def __init__(self, fileOrFolder, script): + self.fileOrFolder = fileOrFolder + self.script = script + self.tempFolder = getOsTempFolder() + + def walkFile(self, fileName): + '''Unzip p_fileName in a temp folder, call self.script, and then re-zip + the result.''' + print 'Walking %s...' % fileName + # Create a temp folder + name = 'f%f' % time.time() + tempFolder = os.path.join(self.tempFolder, name) + os.mkdir(tempFolder) + # Unzip the file in it + unzip(fileName, tempFolder) + # Call self.script + py = sys.executable or 'python' + cmd = '%s %s %s' % (py, self.script, tempFolder) + print ' Running %s...' % cmd, + os.system(cmd) + # Re-zip the result + zip(fileName, tempFolder, odf=True) + FolderDeleter.delete(tempFolder) + print 'done.' + + def run(self): + if os.path.isfile(self.fileOrFolder): + self.walkFile(self.fileOrFolder) + elif os.path.isdir(self.fileOrFolder): + # Walk all files found in this folder + for dir, dirnames, filenames in os.walk(self.fileOrFolder): + for name in filenames: + if os.path.splitext(name)[1] in self.toUnzip: + self.walkFile(os.path.join(dir, name)) + else: + print('%s does not exist.' % self.fileOrFolder) + +# ------------------------------------------------------------------------------ +if __name__ == '__main__': + if len(sys.argv) != 3: + print(usage) + sys.exit() + # Warn the user. + print 'All the files in %s will be modified. ' \ + 'Are you sure? [y/N] ' % sys.argv[1], + response = sys.stdin.readline().strip().lower() + if response == 'y': + OdfWalk(sys.argv[1], sys.argv[2]).run() + else: + print 'Canceled.' +# ------------------------------------------------------------------------------ diff --git a/pod/converter.py b/pod/converter.py index ee65625..c00592e 100644 --- a/pod/converter.py +++ b/pod/converter.py @@ -53,7 +53,7 @@ FILE_TYPES = {'odt': 'writer8', class ConverterError(Exception): pass # ConverterError-related messages ---------------------------------------------- -DOC_NOT_FOUND = 'Document "%s" was not found.' +DOC_NOT_FOUND = '"%s" not found.' URL_NOT_FOUND = 'Doc URL "%s" is wrong. %s' BAD_RESULT_TYPE = 'Bad result type "%s". Available types are %s.' CANNOT_WRITE_RESULT = 'I cannot write result "%s". %s' @@ -71,9 +71,11 @@ class Converter: 'openoffice.org 1': 'openof~1', 'openoffice.org 2': 'openof~1', } - def __init__(self, docPath, resultType, port=DEFAULT_PORT): + def __init__(self, docPath, resultType, port=DEFAULT_PORT, + templatePath=None): self.port = port - self.docUrl, self.docPath = self.getInputUrls(docPath) + # The path to the document to convert + self.docUrl, self.docPath = self.getFilePath(docPath) self.inputType = os.path.splitext(docPath)[1][1:].lower() self.resultType = resultType self.resultFilter = self.getResultFilter() @@ -81,16 +83,21 @@ class Converter: self.loContext = None self.oo = None # The LibreOffice application object self.doc = None # The LibreOffice loaded document + # The path to a LibreOffice template (ie, a ".ott" file) from which + # styles can be imported + self.templateUrl = self.templatePath = None + if templatePath: + self.templateUrl, self.templatePath = self.getFilePath(templatePath) - def getInputUrls(self, docPath): - '''Returns the absolute path of the input file. In fact, it returns a - tuple with some URL version of the path for OO as the first element + def getFilePath(self, filePath): + '''Returns the absolute path of p_filePath. In fact, it returns a + tuple with some URL version of the path for LO as the first element and the absolute path as the second element.''' import unohelper - if not os.path.exists(docPath) and not os.path.isfile(docPath): - raise ConverterError(DOC_NOT_FOUND % docPath) - docAbsPath = os.path.abspath(docPath) - # Return one path for OO, one path for me. + if not os.path.exists(filePath) and not os.path.isfile(filePath): + raise ConverterError(DOC_NOT_FOUND % filePath) + docAbsPath = os.path.abspath(filePath) + # Return one path for OO, one path for me return unohelper.systemPathToFileUrl(docAbsPath), docAbsPath def getResultFilter(self): @@ -132,6 +139,18 @@ class Converter: e = sys.exc_info()[1] raise ConverterError(CANNOT_WRITE_RESULT % (res, e)) + def props(self, properties): + '''Create a UNO-compliant tuple of properties, from tuple p_properties + containing sub-tuples (s_propertyName, value).''' + from com.sun.star.beans import PropertyValue + res = [] + for name, value in properties: + prop = PropertyValue() + prop.Name = name + prop.Value = value + res.append(prop) + return tuple(res) + def connect(self): '''Connects to LibreOffice''' if os.name == 'nt': @@ -161,10 +180,11 @@ class Converter: raise ConverterError(CONNECT_ERROR % (self.port, e)) def updateOdtDocument(self): - '''If the input file is an ODT document, we will perform 2 tasks: - 1) Update all annexes; - 2) Update sections (if sections refer to external content, we try to - include the content within the result file) + '''If the input file is an ODT document, we will perform those tasks: + 1) update all annexes; + 2) update sections (if sections refer to external content, we try to + include the content within the result file); + 3) load styles from an external template if given. ''' from com.sun.star.lang import IndexOutOfBoundsException # I need to use IndexOutOfBoundsException because sometimes, when @@ -197,29 +217,26 @@ class Converter: # of the section. Else, it won't appear. except IndexOutOfBoundsException: pass - + # Import styles from an external file when required + if self.templateUrl: + params = self.props(('OverwriteStyles', True), + ('LoadPageStyles', False)) + self.doc.StyleFamilies.loadStylesFromURL(self.templateUrl, params) + def loadDocument(self): from com.sun.star.lang import IllegalArgumentException, \ IndexOutOfBoundsException - from com.sun.star.beans import PropertyValue try: # Loads the document to convert in a new hidden frame - prop = PropertyValue(); prop.Name = 'Hidden'; prop.Value = True + props = [('Hidden', True)] if self.inputType == 'csv': # Give some additional params if we need to open a CSV file - prop2 = PropertyValue() - prop2.Name = 'FilterFlags' - prop2.Value = '59,34,76,1' - #prop2.Name = 'FilterData' - #prop2.Value = 'Any' - props = (prop, prop2) - else: - props = (prop,) + props.append(('FilterFlags', '59,34,76,1')) + #props.append(('FilterData', 'Any')) self.doc = self.oo.loadComponentFromURL(self.docUrl, "_blank", 0, - props) - if self.inputType == 'odt': - # Perform additional tasks for odt documents - self.updateOdtDocument() + self.props(props)) + # Perform additional tasks for odt documents + if self.inputType == 'odt': self.updateOdtDocument() try: self.doc.refresh() except AttributeError: @@ -232,22 +249,13 @@ class Converter: '''Calls LO to perform a document conversion. Note that the conversion is not really done if the source and target documents have the same type.''' - properties = [] - from com.sun.star.beans import PropertyValue - prop = PropertyValue() - prop.Name = 'FilterName' - prop.Value = self.resultFilter - properties.append(prop) - if self.resultType == 'csv': - # For CSV export, add options (separator, etc) - optionsProp = PropertyValue() - optionsProp.Name = 'FilterOptions' - optionsProp.Value = '59,34,76,1' - properties.append(optionsProp) - self.doc.storeToURL(self.resultUrl, tuple(properties)) + props = [('FilterName', self.resultFilter)] + if self.resultType == 'csv': # Add options for CSV export (separator...) + props.append(('FilterOptions', '59,34,76,1')) + self.doc.storeToURL(self.resultUrl, self.props(props)) def run(self): - '''Connects to LO, does the job and disconnects.''' + '''Connects to LO, does the job and disconnects''' self.connect() self.loadDocument() self.convertDocument() @@ -274,13 +282,17 @@ class ConverterScript: help="The port on which LibreOffice runs " \ "Default is %d." % DEFAULT_PORT, default=DEFAULT_PORT, metavar="PORT", type='int') + optParser.add_option("-t", "--template", dest="template", + default=None, metavar="TEMPLATE", type='string', + help="The path to a LibreOffice template from " \ + "which you may import styles.") (options, args) = optParser.parse_args() if len(args) != 2: sys.stderr.write(WRONG_NB_OF_ARGS) sys.stderr.write('\n') optParser.print_help() sys.exit(ERROR_CODE) - converter = Converter(args[0], args[1], options.port) + converter = Converter(args[0], args[1], options.port, options.template) try: converter.run() except ConverterError: diff --git a/pod/pod_parser.py b/pod/pod_parser.py index 4e061c3..dafd5b6 100644 --- a/pod/pod_parser.py +++ b/pod/pod_parser.py @@ -83,9 +83,11 @@ class PodEnvironment(OdfEnvironment): # Current state self.state = self.READING_CONTENT # Elements we must ignore (they will not be included in the result) - self.ignorableElements = None # Will be set after namespace propagation + self.ignorableElems = None # Will be set after namespace propagation # Elements that may be impacted by POD statements - self.impactableElements = None # Idem + self.impactableElems = None # Idem + # Elements representing start and end tags surrounding expressions + self.exprStartElems = self.exprEndElems = None # Idem # Stack of currently visited tables self.tableStack = [] self.tableIndex = -1 @@ -193,30 +195,36 @@ class PodEnvironment(OdfEnvironment): # Create a table of names of used tags and attributes (precomputed, # including namespace, for performance). table = ns[self.NS_TABLE] - self.tags = { - 'tracked-changes': '%s:tracked-changes' % ns[self.NS_TEXT], - 'change': '%s:change' % ns[self.NS_TEXT], - 'annotation': '%s:annotation' % ns[self.NS_OFFICE], - 'change-start': '%s:change-start' % ns[self.NS_TEXT], - 'change-end': '%s:change-end' % ns[self.NS_TEXT], - 'conditional-text': '%s:conditional-text' % ns[self.NS_TEXT], + text = ns[self.NS_TEXT] + office = ns[self.NS_OFFICE] + tags = { + 'tracked-changes': '%s:tracked-changes' % text, + 'change': '%s:change' % text, + 'annotation': '%s:annotation' % office, + 'change-start': '%s:change-start' % text, + 'change-end': '%s:change-end' % text, + 'conditional-text': '%s:conditional-text' % text, + 'text-input': '%s:text-input' % text, 'table': '%s:table' % table, 'table-name': '%s:name' % table, 'table-cell': '%s:table-cell' % table, 'table-column': '%s:table-column' % table, 'formula': '%s:formula' % table, - 'value-type': '%s:value-type' % ns[self.NS_OFFICE], - 'value': '%s:value' % ns[self.NS_OFFICE], - 'string-value': '%s:string-value' % ns[self.NS_OFFICE], - 'span': '%s:span' % ns[self.NS_TEXT], + 'value-type': '%s:value-type' % office, + 'value': '%s:value' % office, + 'string-value': '%s:string-value' % office, + 'span': '%s:span' % text, 'number-columns-spanned': '%s:number-columns-spanned' % table, 'number-columns-repeated': '%s:number-columns-repeated' % table, } - self.ignorableElements = (self.tags['tracked-changes'], - self.tags['change']) - self.impactableElements = ( - Text.OD.elem, Title.OD.elem, Table.OD.elem, Row.OD.elem, - Cell.OD.elem, Section.OD.elem) + self.tags = tags + self.ignorableElems = (tags['tracked-changes'], tags['change']) + self.exprStartElems = (tags['change-start'], tags['conditional-text'], \ + tags['text-input']) + self.exprEndElems = (tags['change-end'], tags['conditional-text'], \ + tags['text-input']) + self.impactableElems = (Text.OD.elem, Title.OD.elem, Table.OD.elem, + Row.OD.elem, Cell.OD.elem, Section.OD.elem) self.inserts = self.transformInserts() # ------------------------------------------------------------------------------ @@ -234,15 +242,15 @@ class PodParser(OdfParser): officeNs = ns[e.NS_OFFICE] textNs = ns[e.NS_TEXT] tableNs = ns[e.NS_TABLE] - if elem in e.ignorableElements: + if elem in e.ignorableElems: e.state = e.IGNORING elif elem == e.tags['annotation']: # Be it in an ODT or ODS template, an annotation is considered to # contain a POD statement. e.state = e.READING_STATEMENT - elif elem in (e.tags['change-start'], e.tags['conditional-text']): - # In an ODT template, any text in track-changes or any conditional - # field is considered to contain a POD expression. + elif elem in e.exprStartElems: + # Any track-changed text or being in a conditional or input field is + # considered to be a POD expression. e.state = e.READING_EXPRESSION e.exprHasStyle = False elif (elem == e.tags['table-cell']) and \ @@ -272,7 +280,7 @@ class PodParser(OdfParser): if e.state == e.IGNORING: pass elif e.state == e.READING_CONTENT: - if elem in e.impactableElements: + if elem in e.impactableElems: if e.mode == e.ADD_IN_SUBBUFFER: e.addSubBuffer() e.currentBuffer.addElement(e.currentElem.name) @@ -290,7 +298,7 @@ class PodParser(OdfParser): ns = e.onEndElement() officeNs = ns[e.NS_OFFICE] textNs = ns[e.NS_TEXT] - if elem in e.ignorableElements: + if elem in e.ignorableElems: e.state = e.READING_CONTENT elif elem == e.tags['annotation']: # Manage statement @@ -317,7 +325,7 @@ class PodParser(OdfParser): e.currentOdsHook = None # Dump the ending tag e.currentBuffer.dumpEndElement(elem) - if elem in e.impactableElements: + if elem in e.impactableElems: if isinstance(e.currentBuffer, MemoryBuffer): isMainElement = e.currentBuffer.isMainElement(elem) # Unreference the element among buffer.elements @@ -346,8 +354,7 @@ class PodParser(OdfParser): e.currentStatement.append(statementLine) e.currentContent = '' elif e.state == e.READING_EXPRESSION: - if (elem == e.tags['change-end']) or \ - (elem == e.tags['conditional-text']): + if elem in e.exprEndElems: expression = e.currentContent.strip() e.currentContent = '' # Manage expression diff --git a/pod/renderer.py b/pod/renderer.py index 21ad744..32a1df9 100644 --- a/pod/renderer.py +++ b/pod/renderer.py @@ -18,13 +18,12 @@ # ------------------------------------------------------------------------------ import zipfile, shutil, xml.sax, os, os.path, re, mimetypes, time - from UserDict import UserDict - -import appy.pod, time, cgi +import appy.pod from appy.pod import PodError from appy.shared import mimeTypes, mimeTypesExts from appy.shared.xml_parser import XmlElement +from appy.shared.zip import unzip, zip from appy.shared.utils import FolderDeleter, executeCommand, FileWrapper from appy.pod.pod_parser import PodParser, PodEnvironment, OdInsert from appy.pod.converter import FILE_TYPES @@ -101,7 +100,7 @@ class Renderer: def __init__(self, template, context, result, pythonWithUnoPath=None, ooPort=2002, stylesMapping={}, forceOoCall=False, finalizeFunction=None, overwriteExisting=False, - raiseOnError=False, imageResolver=None): + raiseOnError=False, imageResolver=None, stylesTemplate=None): '''This Python Open Document Renderer (PodRenderer) loads a document template (p_template) which is an ODT or ODS file with some elements written in Python. Based on this template and some Python objects @@ -145,9 +144,11 @@ class Renderer: XHTML content. Indeed, POD may not be able (ie, may not have the permission to) perform a HTTP GET on those images. Currently, the resolver can only be a Zope application object. + + - p_stylesTemplate can be the path to a LibreOffice file (ie, a .ott + file) whose styles will be imported within the result. ''' self.template = template - self.templateZip = zipfile.ZipFile(template) self.result = result self.contentXml = None # Content (string) of content.xml self.stylesXml = None # Content (string) of styles.xml @@ -162,6 +163,7 @@ class Renderer: self.overwriteExisting = overwriteExisting self.raiseOnError = raiseOnError self.imageResolver = imageResolver + self.stylesTemplate = stylesTemplate # Remember potential files or images that will be included through # "do ... from document" statements: we will need to declare them in # META-INF/manifest.xml. Keys are file names as they appear within the @@ -173,49 +175,16 @@ class Renderer: # Unzip template self.unzipFolder = os.path.join(self.tempFolder, 'unzip') os.mkdir(self.unzipFolder) - for zippedFile in self.templateZip.namelist(): - # Before writing the zippedFile into self.unzipFolder, create the - # intermediary subfolder(s) if needed. - fileName = None - if zippedFile.endswith('/') or zippedFile.endswith(os.sep): - # This is an empty folder. Create it nevertheless. If zippedFile - # starts with a '/', os.path.join will consider it an absolute - # path and will throw away self.unzipFolder. - os.makedirs(os.path.join(self.unzipFolder, - zippedFile.lstrip('/'))) - else: - fileName = os.path.basename(zippedFile) - folderName = os.path.dirname(zippedFile) - fullFolderName = self.unzipFolder - if folderName: - fullFolderName = os.path.join(fullFolderName, folderName) - if not os.path.exists(fullFolderName): - os.makedirs(fullFolderName) - # Unzip the file in self.unzipFolder - if fileName: - fullFileName = os.path.join(fullFolderName, fileName) - f = open(fullFileName, 'wb') - fileContent = self.templateZip.read(zippedFile) - if (fileName == 'content.xml') and not folderName: - # content.xml files may reside in subfolders. - # We modify only the one in the root folder. - self.contentXml = fileContent - elif (fileName == 'styles.xml') and not folderName: - # Same remark as above. - self.stylesManager = StylesManager(fileContent) - self.stylesXml = fileContent - elif (fileName == 'mimetype') and \ - (fileContent == mimeTypes['ods']): - # From LibreOffice 3.5, it is not possible anymore to dump - # errors into the resulting ods as annotations. Indeed, - # annotations can't reside anymore within paragraphs. ODS - # files generated with pod and containing error messages in - # annotations cause LibreOffice 3.5 and 4.0 to crash. - # LibreOffice >= 4.1 simply does not show the annotation. - self.raiseOnError = True - f.write(fileContent) - f.close() - self.templateZip.close() + info = unzip(template, self.unzipFolder, odf=True) + self.contentXml = info['content.xml'] + self.stylesXml = info['styles.xml'] + self.stylesManager = StylesManager(self.stylesXml) + # From LibreOffice 3.5, it is not possible anymore to dump errors into + # the resulting ods as annotations. Indeed, annotations can't reside + # anymore within paragraphs. ODS files generated with pod and containing + # error messages in annotations cause LibreOffice 3.5 and 4.0 to crash. + # LibreOffice >= 4.1 simply does not show the annotation. + if info['mimetype'] == mimeTypes['ods']: self.raiseOnError = True # Create the content.xml parser pe = PodEnvironment contentInserts = ( @@ -440,7 +409,7 @@ class Renderer: # Public interface def run(self): - '''Renders the result.''' + '''Renders the result''' try: # Remember which parser is running self.currentParser = self.contentParser @@ -490,7 +459,8 @@ class Renderer: try: from appy.pod.converter import Converter, ConverterError try: - Converter(resultName, resultType, self.ooPort).run() + Converter(resultName, resultType, self.ooPort, + self.stylesTemplate).run() except ConverterError, ce: raise PodError(CONVERT_ERROR % str(ce)) except ImportError: @@ -513,6 +483,7 @@ class Renderer: cmd = '%s %s %s %s -p%d' % \ (self.pyPath, convScript, qResultName, resultType, self.ooPort) + if self.stylesTemplate: cmd += ' -t%s' % self.stylesTemplate loOutput = executeCommand(cmd) except PodError, pe: # When trying to call LO in server mode for producing ODT or ODS @@ -559,7 +530,7 @@ class Renderer: f = file(contentXml, 'w') f.write(content) f.close() - # Call the user-defined "finalize" function when present. + # Call the user-defined "finalize" function when present if self.finalizeFunction: try: self.finalizeFunction(self.unzipFolder) @@ -569,38 +540,7 @@ class Renderer: # the POD template (odt, ods...) resultExt = self.getTemplateType() resultName = os.path.join(self.tempFolder, 'result.%s' % resultExt) - try: - resultZip = zipfile.ZipFile(resultName, 'w', zipfile.ZIP_DEFLATED) - except RuntimeError: - resultZip = zipfile.ZipFile(resultName,'w') - # Insert first the file "mimetype" (uncompressed), in order to be - # compliant with the OpenDocument Format specification, section 17.4, - # that expresses this restriction. Else, libraries like "magic", under - # Linux/Unix, are unable to detect the correct mimetype for a pod result - # (it simply recognizes it as a "application/zip" and not a - # "application/vnd.oasis.opendocument.text)". - mimetypeFile = os.path.join(self.unzipFolder, 'mimetype') - # This file may not exist (presumably, ods files from Google Drive) - if not os.path.exists(mimetypeFile): - f = open(mimetypeFile, 'w') - f.write(mimeTypes[resultExt]) - f.close() - resultZip.write(mimetypeFile, 'mimetype', zipfile.ZIP_STORED) - for dir, dirnames, filenames in os.walk(self.unzipFolder): - for f in filenames: - folderName = dir[len(self.unzipFolder)+1:] - # Ignore file "mimetype" that was already inserted. - if (folderName == '') and (f == 'mimetype'): continue - resultZip.write(os.path.join(dir, f), - os.path.join(folderName, f)) - if not dirnames and not filenames: - # This is an empty leaf folder. We must create an entry in the - # zip for him. - folderName = dir[len(self.unzipFolder):] - zInfo = zipfile.ZipInfo("%s/" % folderName,time.localtime()[:6]) - zInfo.external_attr = 48 - resultZip.writestr(zInfo, '') - resultZip.close() + zip(resultName, self.unzipFolder, odf=True) resultType = os.path.splitext(self.result)[1].strip('.') if (resultType in self.templateTypes) and not self.forceOoCall: # Simply move the ODT result to the result diff --git a/pod/styles.in.styles.xml b/pod/styles.in.styles.xml index c9e799f..98bc641 100644 --- a/pod/styles.in.styles.xml +++ b/pod/styles.in.styles.xml @@ -4,18 +4,15 @@ @style@:font-name-asian="PodStarSymbol" @style@:font-size-asian="9pt" @style@:font-name-complex="PodStarSymbol" @style@:font-size-complex="9pt"/> -<@style@:style style:name="AppyStandard" style:family="paragraph" style:class="text" style:master-page-name=""> +<@style@:style style:name="AppyStandard" style:family="paragraph" style:class="text" style:master-page-name="" @style@:parent-style-name="Standard"> <@style@:paragraph-properties fo:margin-left="0cm" fo:margin-right="0cm" fo:margin-top="0.101cm" fo:margin-bottom="0.169cm" fo:text-indent="0cm" style:auto-text-indent="false" style:page-number="auto"/> - <@style@:text-properties style:font-name="DejaVu Sans" fo:font-size="10pt"/> <@style@:style @style@:name="Appy_Table_Content" @style@:display-name="Appy Table Contents" @style@:family="paragraph" - @style@:parent-style-name="AppyStandard" @style@:class="extra"> + @style@:parent-style-name="AppyStandard" @style@:class="extra"> <@style@:paragraph-properties @fo@:margin-top="0cm" @fo@:margin-bottom="0cm" @text@:number-lines="false" @text@:line-number="0"/> - <@style@:text-properties @fo@:font-size="8pt"/> <@style@:style @style@:name="Appy_Table_Heading" @style@:display-name="Appy Table Heading" @style@:family="paragraph" @style@:parent-style-name="Appy_Table_Contents" @style@:class="extra"> - <@style@:paragraph-properties @fo@:text-align="center" @style@:justify-single-word="false" @text@:number-lines="false" - @text@:line-number="0"/> + <@style@:paragraph-properties @fo@:text-align="center" @style@:justify-single-word="false" @text@:number-lines="false" @text@:line-number="0"/> <@style@:text-properties @fo@:font-weight="bold" @style@:font-weight-asian="bold" @style@:font-weight-complex="bold"/> diff --git a/pod/xhtml2odt.py b/pod/xhtml2odt.py index f6dff41..86b8e4d 100644 --- a/pod/xhtml2odt.py +++ b/pod/xhtml2odt.py @@ -17,13 +17,13 @@ from appy.pod import * # To which ODT tags do HTML tags correspond ? HTML_2_ODT = {'h1':'h', 'h2':'h', 'h3':'h', 'h4':'h', 'h5':'h', 'h6':'h', - 'p':'p', 'div': 'p', 'b':'span', 'i':'span', 'strong':'span', - 'strike':'span', 'u':'span', 'em': 'span', 'sub': 'span', - 'sup': 'span', 'br': 'line-break'} + 'p':'p', 'div': 'p', 'b':'span', 'i':'span', 'strong':'span', 'strike':'span', + 's':'span', 'u':'span', 'em': 'span', 'sub': 'span', 'sup': 'span', + 'br': 'line-break'} DEFAULT_ODT_STYLES = {'b': 'podBold', 'strong':'podBold', 'i': 'podItalic', - 'u': 'podUnderline', 'strike': 'podStrike', - 'em': 'podItalic', 'sup': 'podSup', 'sub':'podSub', - 'td': 'podCell', 'th': 'podHeaderCell'} + 'u': 'podUnderline', 'strike': 'podStrike', 's': 'podStrike', + 'em': 'podItalic', 'sup': 'podSup', 'sub':'podSub', 'td': 'podCell', + 'th': 'podHeaderCell'} INNER_TAGS = ('b', 'strong', 'i', 'u', 'em', 'sup', 'sub', 'span') TABLE_CELL_TAGS = ('td', 'th') OUTER_TAGS = TABLE_CELL_TAGS + ('li',) diff --git a/shared/utils.py b/shared/utils.py index 92bb029..efcea1e 100644 --- a/shared/utils.py +++ b/shared/utils.py @@ -245,7 +245,7 @@ def getTempFileName(prefix='', extension=''): # ------------------------------------------------------------------------------ def executeCommand(cmd): - '''Executes command p_cmd and returns the content of its stderr.''' + '''Executes command p_cmd and returns the content of its stderr''' childStdIn, childStdOut, childStdErr = os.popen3(cmd) res = childStdErr.read() childStdIn.close(); childStdOut.close(); childStdErr.close() diff --git a/shared/zip.py b/shared/zip.py new file mode 100644 index 0000000..357f4e2 --- /dev/null +++ b/shared/zip.py @@ -0,0 +1,94 @@ +'''Functions for (un)zipping files''' + +# ------------------------------------------------------------------------------ +import os, os.path, zipfile, time +from appy.shared import mimeTypes + +# ------------------------------------------------------------------------------ +def unzip(f, folder, odf=False): + '''Unzips file p_f into p_folder. p_f can be any anything accepted by the + zipfile.ZipFile constructor. p_folder must exist. + + If p_odf is True, p_f is considered to be an odt or ods file and this + function will return a dict containing the content of content.xml and + styles.xml from the zipped file.''' + zipFile = zipfile.ZipFile(f) + if odf: res = {} + else: res = None + for zippedFile in zipFile.namelist(): + # Before writing the zippedFile into p_folder, create the intermediary + # subfolder(s) if needed. + fileName = None + if zippedFile.endswith('/') or zippedFile.endswith(os.sep): + # This is an empty folder. Create it nevertheless. If zippedFile + # starts with a '/', os.path.join will consider it an absolute + # path and will throw away folder. + os.makedirs(os.path.join(folder, zippedFile.lstrip('/'))) + else: + fileName = os.path.basename(zippedFile) + folderName = os.path.dirname(zippedFile) + fullFolderName = folder + if folderName: + fullFolderName = os.path.join(fullFolderName, folderName) + if not os.path.exists(fullFolderName): + os.makedirs(fullFolderName) + # Unzip the file in folder + if fileName: + fullFileName = os.path.join(fullFolderName, fileName) + f = open(fullFileName, 'wb') + fileContent = zipFile.read(zippedFile) + if odf and not folderName: + # content.xml and others may reside in subfolders. Get only the + # one in the root folder. + if fileName == 'content.xml': + res['content.xml'] = fileContent + elif fileName == 'styles.xml': + res['styles.xml'] = fileContent + elif fileName == 'mimetype': + res['mimetype'] = fileContent + f.write(fileContent) + f.close() + zipFile.close() + return res + +# ------------------------------------------------------------------------------ +def zip(f, folder, odf=False): + '''Zips the content of p_folder into the zip file whose (preferably) + absolute filename is p_f. If p_odf is True, p_folder is considered to + contain the standard content of an ODF file (content.xml,...). In this + case, some rules must be respected while building the zip (see below).''' + # Remove p_f if it exists + if os.path.exists(f): os.remove(f) + try: + zipFile = zipfile.ZipFile(f, 'w', zipfile.ZIP_DEFLATED) + except RuntimeError: + zipFile = zipfile.ZipFile(f, 'w') + # If p_odf is True, insert first the file "mimetype" (uncompressed), in + # order to be compliant with the OpenDocument Format specification, + # section 17.4, that expresses this restriction. Else, libraries like + # "magic", under Linux/Unix, are unable to detect the correct mimetype for + # a pod result (it simply recognizes it as a "application/zip" and not a + # "application/vnd.oasis.opendocument.text)". + if odf: + mimetypeFile = os.path.join(folder, 'mimetype') + # This file may not exist (presumably, ods files from Google Drive) + if not os.path.exists(mimetypeFile): + f = file(mimetypeFile, 'w') + f.write(mimeTypes[os.path.splitext(f)[-1][1:]]) + f.close() + zipFile.write(mimetypeFile, 'mimetype', zipfile.ZIP_STORED) + for dir, dirnames, filenames in os.walk(folder): + for name in filenames: + folderName = dir[len(folder)+1:] + # For p_odf files, ignore file "mimetype" that was already inserted + if odf and (folderName == '') and (name == 'mimetype'): continue + zipFile.write(os.path.join(dir,name), os.path.join(folderName,name)) + if not dirnames and not filenames: + # This is an empty leaf folder. We must create an entry in the + # zip for him. + folderName = dir[len(folder):] + zInfo = zipfile.ZipInfo("%s/" % folderName, time.localtime()[:6]) + zInfo.external_attr = 48 + zipFile.writestr(zInfo, '') + zipFile.close() +# ------------------------------------------------------------------------------