More input and output formats for OO conversion in converter.py and bugfix in skyn edit.

2009-12-17 21:14:52 +01:00 · 2009-12-17 21:14:52 +01:00 · e89eda4838
commit e89eda4838
parent fff2b6a329
7 changed files with 190 additions and 113 deletions
--- a/pod/converter.py
+++ b/pod/converter.py
@ -20,15 +20,26 @@
 import sys, os, os.path, time, signal
 from optparse import OptionParser

-ODT_FILE_TYPES = {'doc': 'MS Word 97', # Could be 'MS Word 2003 XML'
-                  'pdf': 'writer_pdf_Export',
-                  'rtf': 'Rich Text Format',
-                  'txt': 'Text',
-                  'html': 'HTML (StarWriter)',
-                  'htm': 'HTML (StarWriter)',
-                  'odt': 'ODT'}
-# Conversion to ODT does not make any conversion; it simply updates indexes and
-# linked documents.
+htmlFilters = {'odt': 'HTML (StarWriter)',
+               'ods': 'HTML (StarCalc)',
+               'odp': 'impress_html_Export'}
+
+FILE_TYPES = {'odt': 'writer8',
+              'ods': 'calc8',
+              'odp': 'impress8',
+              'htm': htmlFilters, 'html': htmlFilters,
+              'rtf': 'Rich Text Format',
+              'txt': 'Text',
+              'csv': 'Text - txt - csv (StarCalc)',
+              'pdf': {'odt': 'writer_pdf_Export',  'ods': 'calc_pdf_Export',
+                      'odp': 'impress_pdf_Export', 'odg': 'draw_pdf_Export'},
+              'swf': 'impress_flash_Export',
+              'doc': 'MS Word 97',
+              'xls': 'MS Excel 97',
+              'ppt': 'MS PowerPoint 97',
+}
+# Conversion from odt to odt does not make any conversion, but updates indexes
+# and linked documents.

 # ------------------------------------------------------------------------------
 class ConverterError(Exception): pass
@ -46,7 +57,7 @@ DEFAULT_PORT = 2002

 # ------------------------------------------------------------------------------
 class Converter:
-    '''Converts an ODT document into pdf, doc, txt or rtf.'''
+    '''Converts an document readable by OpenOffice into pdf, doc, txt or rtf.'''
    exeVariants = ('soffice.exe', 'soffice')
    pathReplacements = {'program files': 'progra~1',
                        'openoffice.org 1': 'openof~1',
@ -54,41 +65,64 @@ class Converter:
                        }
    def __init__(self, docPath, resultType, port=DEFAULT_PORT):
        self.port = port
-        self.docUrl, self.docPath = self.getDocUrls(docPath)
-        self.resultFilter = self.getResultFilter(resultType)
-        self.resultUrl = self.getResultUrl(resultType)
+        self.docUrl, self.docPath = self.getInputUrls(docPath)
+        self.inputType = os.path.splitext(docPath)[1][1:].lower()
+        self.resultType = resultType
+        self.resultFilter = self.getResultFilter()
+        self.resultUrl = self.getResultUrl()
        self.ooContext = None
-        self.oo = None # OpenOffice application object
-        self.doc = None # OpenOffice loaded document
-    def getDocUrls(self, docPath):
+        self.oo = None # The OpenOffice application object
+        self.doc = None # The OpenOffice loaded document
+
+    def getInputUrls(self, docPath):
+        '''Returns the absolute path of the input file. In fact, it returns a
+           tuple with some URL version of the path for OO as the first element
+           and the absolute path as the second element.''' 
        import uno
        if not os.path.exists(docPath) and not os.path.isfile(docPath):
            raise ConverterError(DOC_NOT_FOUND % docPath)
        docAbsPath = os.path.abspath(docPath)
        # Return one path for OO, one path for me.
        return uno.systemPathToFileUrl(docAbsPath), docAbsPath
-    def getResultFilter(self, resultType):
-        if ODT_FILE_TYPES.has_key(resultType):
-            res = ODT_FILE_TYPES[resultType]
+
+    def getResultFilter(self):
+        '''Based on the result type, identifies which OO filter to use for the
+           document conversion.'''
+        if FILE_TYPES.has_key(self.resultType):
+            res = FILE_TYPES[self.resultType]
+            if isinstance(res, dict):
+                res = res[self.inputType]
        else:
-            raise ConverterError(BAD_RESULT_TYPE % (resultType,
-                                                    ODT_FILE_TYPES.keys()))
+            raise ConverterError(BAD_RESULT_TYPE % (self.resultType,
+                                                    FILE_TYPES.keys()))
        return res
-    def getResultUrl(self, resultType):
+
+    def getResultUrl(self):
+        '''Returns the path of the result file in the format needed by OO. If
+           the result type and the input type are the same (ie the user wants to
+           refresh indexes or some other action and not perform a real
+           conversion), the result file is named
+                           <inputFileName>.res.<resultType>.
+
+           Else, the result file is named like the input file but with a
+           different extension:
+                           <inputFileName>.<resultType>
+        '''
        import uno
        baseName = os.path.splitext(self.docPath)[0]
-        if resultType != 'odt':
-            res = '%s.%s' % (baseName, resultType)
+        if self.resultType != self.inputType:
+            res = '%s.%s' % (baseName, self.resultType)
        else:
-            res = '%s.res.%s' % (baseName, resultType)
+            res = '%s.res.%s' % (baseName, self.resultType)
        try:
            f = open(res, 'w')
            f.write('Hello')
            f.close()
            os.remove(res)
            return uno.systemPathToFileUrl(res)
-        except OSError, oe:
-            raise ConverterError(CANNOT_WRITE_RESULT % (res, oe))
+        except (OSError, IOError), ioe:
+            raise ConverterError(CANNOT_WRITE_RESULT % (res, ioe))
+
    def connect(self):
        '''Connects to OpenOffice'''
        if os.name == 'nt':
@ -115,73 +149,90 @@ class Converter:
                'com.sun.star.frame.Desktop', self.ooContext)
        except NoConnectException, nce:
            raise ConverterError(CONNECT_ERROR % (self.port, nce))
-    def disconnect(self):
-        self.doc.close(True)
-        # Do a nasty thing before exiting the python process. In case the
-        # last call is a oneway call (e.g. see idl-spec of insertString),
-        # it must be forced out of the remote-bridge caches before python
-        # exits the process. Otherwise, the oneway call may or may not reach
-        # the target object.
-        # I do this here by calling a cheap synchronous call (getPropertyValue).
-        self.ooContext.ServiceManager
-    def loadDocument(self):
-        from com.sun.star.lang import IllegalArgumentException, \
-                                      IndexOutOfBoundsException
+
+    def updateOdtDocument(self):
+        '''If the input file is an ODT document, we will perform 2 tasks:
+           1) Update all annexes;
+           2) Update sections (if sections refer to external content, we try to
+              include the content within the result file)
+        '''
+        from com.sun.star.lang import IndexOutOfBoundsException
        # I need to use IndexOutOfBoundsException because sometimes, when
        # using sections.getCount, UNO returns a number that is bigger than
        # the real number of sections (this is because it also counts the
        # sections that are present within the sub-documents to integrate)
+        # Update all indexes
+        indexes = self.doc.getDocumentIndexes()
+        indexesCount = indexes.getCount()
+        if indexesCount != 0:
+            for i in range(indexesCount):
+                try:
+                    indexes.getByIndex(i).update()
+                except IndexOutOfBoundsException:
+                    pass
+        # Update sections
+        self.doc.updateLinks()
+        sections = self.doc.getTextSections()
+        sectionsCount = sections.getCount()
+        if sectionsCount != 0:
+            for i in range(sectionsCount-1, -1, -1):
+                # I must walk into the section from last one to the first
+                # one. Else, when "disposing" sections, I remove sections
+                # and the remaining sections other indexes.
+                try:
+                    section = sections.getByIndex(i)
+                    if section.FileLink and section.FileLink.FileURL:
+                        section.dispose() # This method removes the
+                        # <section></section> tags without removing the content
+                        # of the section. Else, it won't appear.
+                except IndexOutOfBoundsException:
+                    pass
+        
+    def loadDocument(self):
+        from com.sun.star.lang import IllegalArgumentException, \
+                                      IndexOutOfBoundsException
        from com.sun.star.beans import PropertyValue
        try:
-            # Load the document to convert in a new hidden frame
+            # Loads the document to convert in a new hidden frame
            prop = PropertyValue()
            prop.Name = 'Hidden'
            prop.Value = True
            self.doc = self.oo.loadComponentFromURL(self.docUrl, "_blank", 0,
                                                    (prop,))
-            # Update all indexes
-            indexes = self.doc.getDocumentIndexes()
-            indexesCount = indexes.getCount()
-            if indexesCount != 0:
-                for i in range(indexesCount):
-                    try:
-                        indexes.getByIndex(i).update()
-                    except IndexOutOfBoundsException:
-                        pass
-            # Update sections
-            self.doc.updateLinks()
-            sections = self.doc.getTextSections()
-            sectionsCount = sections.getCount()
-            if sectionsCount != 0:
-                for i in range(sectionsCount-1, -1, -1):
-                    # I must walk into the section from last one to the first
-                    # one. Else, when "disposing" sections, I remove sections
-                    # and the remaining sections other indexes.
-                    try:
-                        section = sections.getByIndex(i)
-                        if section.FileLink and section.FileLink.FileURL:
-                            section.dispose() # This method removes the
-                            # <section></section> tags without removing the content
-                            # of the section. Else, it won't appear.
-                    except IndexOutOfBoundsException:
-                        pass
+            if self.inputType == 'odt':
+                # Perform additional tasks for odt documents
+                self.updateOdtDocument()
+            try:
+                self.doc.refresh()
+            except AttributeError:
+                pass
        except IllegalArgumentException, iae:
            raise ConverterError(URL_NOT_FOUND % (self.docPath, iae))
+
    def convertDocument(self):
-        if self.resultFilter != 'ODT':
-            # I must really perform a conversion
-            from com.sun.star.beans import PropertyValue
-            prop = PropertyValue()
-            prop.Name = 'FilterName'
-            prop.Value = self.resultFilter
-            self.doc.storeToURL(self.resultUrl, (prop,))
-        else:
-            self.doc.storeToURL(self.resultUrl, ())
+        '''Calls OO to perform a document conversion. Note that the conversion
+           is not really done if the source and target documents have the same
+           type.'''
+        properties = []
+        from com.sun.star.beans import PropertyValue
+        prop = PropertyValue()
+        prop.Name = 'FilterName'
+        prop.Value = self.resultFilter
+        properties.append(prop)
+        if self.resultType == 'csv':
+            # For CSV export, add options (separator, etc)
+            optionsProp = PropertyValue()
+            optionsProp.Name = 'FilterOptions'
+            optionsProp.Value = '59,34,76,1'
+            properties.append(optionsProp)
+        self.doc.storeToURL(self.resultUrl, tuple(properties))
+
    def run(self):
+        '''Connects to OO, does the job and disconnects.'''
        self.connect()
        self.loadDocument()
        self.convertDocument()
-        self.disconnect()
+        self.doc.close(True)

 # ConverterScript-related messages ---------------------------------------------
 WRONG_NB_OF_ARGS = 'Wrong number of arguments.'
@ -191,12 +242,13 @@ ERROR_CODE = 1
 class ConverterScript:
    usage = 'usage: python converter.py fileToConvert outputType [options]\n' \
            '   where fileToConvert is the absolute or relative pathname of\n' \
-            '         the ODT file you want to convert;\n'\
+            '         the file you want to convert (or whose content like\n' \
+            '         indexes need to be refreshed);\n'\
            '   and   outputType is the output format, that must be one of\n' \
            '         %s.\n' \
-            ' "python" should be a UNO-enabled Python interpreter (ie the one\n' \
-            ' which is included in the OpenOffice.org distribution).' % \
-            str(ODT_FILE_TYPES.keys())
+            ' "python" should be a UNO-enabled Python interpreter (ie the ' \
+            '  one which is included in the OpenOffice.org distribution).' % \
+            str(FILE_TYPES.keys())
    def run(self):
        optParser = OptionParser(usage=ConverterScript.usage)
        optParser.add_option("-p", "--port", dest="port",
--- a/pod/renderer.py
+++ b/pod/renderer.py
@ -25,7 +25,7 @@ import appy.pod
 from appy.pod import PodError
 from appy.shared.xml_parser import XmlElement
 from appy.pod.pod_parser import PodParser, PodEnvironment, OdInsert
-from appy.pod.converter import ODT_FILE_TYPES
+from appy.pod.converter import FILE_TYPES
 from appy.pod.buffers import FileBuffer
 from appy.pod.xhtml2odt import Xhtml2OdtConverter
 from appy.pod.doc_importers import OdtImporter, ImageImporter, PdfImporter
@ -423,9 +423,9 @@ class Renderer:
                os.rename(resultOdtName, self.result)
            else:
                if resultType.startswith('.'): resultType = resultType[1:]
-                if not resultType in ODT_FILE_TYPES.keys():
+                if not resultType in FILE_TYPES.keys():
                    raise PodError(BAD_RESULT_TYPE % (
-                        self.result, ODT_FILE_TYPES.keys()))
+                        self.result, FILE_TYPES.keys()))
                # Call OpenOffice to perform the conversion or document update
                self.callOpenOffice(resultOdtName, resultType)
                # I have the result. Move it to the correct name