[pod] Added the possibility, for function 'document', to import, into an ODT result, external documents of various formats: doc, xls, ppt, etc. This new 'ConvertImporter' uses LibreOffice to first convert the doc into PDF, and then uses the PdfImporter to split the PDF into images (one image per page) and include in in the pod result.

2013-04-19 15:30:18 +02:00 · 2013-04-19 15:30:18 +02:00 · eaf7156b47
commit eaf7156b47
parent 0a8e06f1d9
4 changed files with 55 additions and 23 deletions
--- a/pod/converter.py
+++ b/pod/converter.py
@ -32,7 +32,13 @@ FILE_TYPES = {'odt': 'writer8',
              'txt': 'Text',
              'csv': 'Text - txt - csv (StarCalc)',
              'pdf': {'odt': 'writer_pdf_Export',  'ods': 'calc_pdf_Export',
-                      'odp': 'impress_pdf_Export', 'odg': 'draw_pdf_Export'},
+                      'odp': 'impress_pdf_Export', 'htm': 'writer_pdf_Export',
                      'html': 'writer_pdf_Export', 'rtf': 'writer_pdf_Export',
                      'txt': 'writer_pdf_Export', 'csv': 'calc_pdf_Export',
                      'swf': 'draw_pdf_Export', 'doc': 'writer_pdf_Export',
                      'xls': 'calc_pdf_Export', 'ppt': 'impress_pdf_Export',
                      'docx': 'writer_pdf_Export', 'xlsx': 'calc_pdf_Export'
                      },
              'swf': 'impress_flash_Export',
              'doc': 'MS Word 97',
              'xls': 'MS Excel 97',
--- a/pod/doc_importers.py
+++ b/pod/doc_importers.py
@ -36,6 +36,7 @@ PDF_TO_IMG_ERROR = 'A PDF file could not be converted into images. Please ' \
                   'system and the "gs" program is in the path.'
 CONVERT_ERROR = 'Program "convert", from imagemagick, must be installed and ' \
                'in the path for converting a SVG file into a PNG file.'
 TO_PDF_ERROR = 'ConvertImporter error while converting a doc to PDF: %s.'
 # ------------------------------------------------------------------------------
 class DocImporter:
@ -116,7 +117,8 @@ class DocImporter:
        '''In the case parameter "at" was used, we may want to move the file at
           p_at within the ODT result in p_importPath (for images) or do
           nothing (for docs). In the latter case, the file to import stays
-           at _at, and is not copied into p_importPath.'''
+           at _at, and is not copied into p_importPath. So the previously
           computed p_importPath is not used at all.'''
        return at
 class OdtImporter(DocImporter):
@ -135,18 +137,18 @@ class PdfImporter(DocImporter):
    '''This class allows to import the content of a PDF file into a pod
       template. It calls gs to split the PDF into images and calls the
       ImageImporter for importing it into the result.'''
    imagePrefix = 'PdfPart'
    def getImportFolder(self): return '%s/docImports' % self.tempFolder
    def run(self):
        imagePrefix = os.path.splitext(os.path.basename(self.importPath))[0]
        # Split the PDF into images with Ghostscript
        imagesFolder = os.path.dirname(self.importPath)
        cmd = 'gs -dNOPAUSE -dBATCH -sDEVICE=jpeg -r125x125 ' \
              '-sOutputFile=%s/%s%%d.jpg %s' % \
-              (imagesFolder, self.imagePrefix, self.importPath)
+              (imagesFolder, imagePrefix, self.importPath)
        os.system(cmd)
        # Check that at least one image was generated
        succeeded = False
-        firstImage = '%s1.jpg' % self.imagePrefix
+        firstImage = '%s1.jpg' % imagePrefix
        for fileName in os.listdir(imagesFolder):
            if fileName == firstImage:
                succeeded = True
@ -157,10 +159,10 @@ class PdfImporter(DocImporter):
        i = 0
        while not noMoreImages:
            i += 1
-            nextImage = '%s/%s%d.jpg' % (imagesFolder, self.imagePrefix, i)
+            nextImage = '%s/%s%d.jpg' % (imagesFolder, imagePrefix, i)
            if os.path.exists(nextImage):
                # Use internally an Image importer for doing this job.
-                imgImporter =ImageImporter(None, nextImage, 'jpg',self.renderer)
+                imgImporter= ImageImporter(None, nextImage, 'jpg',self.renderer)
                imgImporter.setImageInfo('paragraph', True, None, None, None)
                self.res += imgImporter.run()
                os.remove(nextImage)
@ -168,6 +170,20 @@ class PdfImporter(DocImporter):
                noMoreImages = True
        return self.res
 class ConvertImporter(DocImporter):
    '''This class allows to import the content of any file that LibreOffice (LO)
       can convert into PDF: doc, rtf, xls. It first calls LO to convert the
       document into PDF, then calls a PdfImporter.'''
    def getImportFolder(self): return '%s/docImports' % self.tempFolder
    def run(self):
        # Convert the document into PDF with LibreOffice
        output = self.renderer.callLibreOffice(self.importPath, 'pdf')
        if output: raise PodError(TO_PDF_ERROR % output)
        pdfFile = '%s.pdf' % os.path.splitext(self.importPath)[0]
        # Launch a PdfImporter to import this PDF into the POD result.
        pdfImporter = PdfImporter(None, pdfFile, 'pdf', self.renderer)
        return pdfImporter.run()
 # Compute size of images -------------------------------------------------------
 jpgTypes = ('jpg', 'jpeg')
 pxToCm = 44.173513561
--- a/pod/renderer.py
+++ b/pod/renderer.py
@ -25,13 +25,13 @@ import appy.pod, time, cgi
 from appy.pod import PodError
 from appy.shared import mimeTypes, mimeTypesExts
 from appy.shared.xml_parser import XmlElement
-from appy.shared.utils import FolderDeleter, executeCommand
+from appy.shared.utils import FolderDeleter, executeCommand, FileWrapper
 from appy.shared.utils import FileWrapper
 from appy.pod.pod_parser import PodParser, PodEnvironment, OdInsert
 from appy.pod.converter import FILE_TYPES
 from appy.pod.buffers import FileBuffer
 from appy.pod.xhtml2odt import Xhtml2OdtConverter
-from appy.pod.doc_importers import OdtImporter, ImageImporter, PdfImporter
+from appy.pod.doc_importers import \
     OdtImporter, ImageImporter, PdfImporter, ConvertImporter
 from appy.pod.styles_manager import StylesManager
 # ------------------------------------------------------------------------------
@ -274,6 +274,7 @@ class Renderer:
    imageFormats = ('png', 'jpeg', 'jpg', 'gif', 'svg')
    ooFormats = ('odt',)
    convertibleFormats = FILE_TYPES.keys()
    def importDocument(self, content=None, at=None, format=None,
                       anchor='as-char', wrapInPara=True, size=None,
                       sizeUnit='cm', style=None):
@ -300,12 +301,12 @@ class Renderer:
        # Is there someting to import?
        if not content and not at:
            raise PodError(DOC_NOT_SPECIFIED)
        # Convert Zope files into Appy wrappers.
        if content.__class__.__name__ == 'File':
            content = FileWrapper(content)
        # Guess document format
        if isinstance(content, FileWrapper):
            format = content.mimeType
        elif hasattr(content, 'filename') and content.filename:
            format = os.path.splitext(content.filename)[1][1:]
            content = content.data
        if not format:
            # It should be deduced from p_at
            if not at:
@ -325,6 +326,8 @@ class Renderer:
            isImage = True
        elif format == 'pdf':
            importer = PdfImporter
        elif format in self.convertibleFormats:
            importer = ConvertImporter
        else:
            raise PodError(DOC_WRONG_FORMAT % format)
        imp = importer(content, at, format, self)
--- a/shared/init.py
+++ b/shared/init.py
@ -5,6 +5,8 @@ import os.path
 # ------------------------------------------------------------------------------
 appyPath = os.path.realpath(os.path.dirname(appy.__file__))
 od = 'application/vnd.oasis.opendocument'
 ms = 'application/vnd.openxmlformats-officedocument'
 mimeTypes = {'odt': '%s.text' % od,
             'ods': '%s.spreadsheet' % od,
             'doc': 'application/msword',
@ -12,16 +14,21 @@ mimeTypes = {'odt': '%s.text' % od,
             'pdf': 'application/pdf'
             }
 mimeTypesExts = {
-             '%s.text' % od:        'odt',
+    '%s.text' % od:        'odt',
-             '%s.spreadsheet' % od: 'ods',
+    '%s.spreadsheet' % od: 'ods',
-             'application/msword':  'doc',
+    'application/msword':  'doc',
-             'text/rtf':            'rtf',
+    'text/rtf':            'rtf',
-             'application/pdf':     'pdf',
+    'application/pdf':     'pdf',
-             'image/png':           'png',
+    'image/png':           'png',
-             'image/jpeg':          'jpg',
+    'image/jpeg':          'jpg',
-             'image/pjpeg':         'jpg',
+    'image/pjpeg':         'jpg',
-             'image/gif':           'gif'
+    'image/gif':           'gif',
-             }
+    'application/vnd.ms-excel': 'xls',
    'application/vnd.ms-powerpoint': 'ppt',
    '%s.wordprocessingml.document' % ms: 'docx',
    '%s.spreadsheetml.sheet' % ms: 'xlsx',
    '%s.presentationml.presentation' % ms: 'pptx',
 }
 # ------------------------------------------------------------------------------
 class UnmarshalledFile: