diff --git a/pod/converter.py b/pod/converter.py index d44e64f..c7e040d 100644 --- a/pod/converter.py +++ b/pod/converter.py @@ -32,7 +32,13 @@ FILE_TYPES = {'odt': 'writer8', 'txt': 'Text', 'csv': 'Text - txt - csv (StarCalc)', 'pdf': {'odt': 'writer_pdf_Export', 'ods': 'calc_pdf_Export', - 'odp': 'impress_pdf_Export', 'odg': 'draw_pdf_Export'}, + 'odp': 'impress_pdf_Export', 'htm': 'writer_pdf_Export', + 'html': 'writer_pdf_Export', 'rtf': 'writer_pdf_Export', + 'txt': 'writer_pdf_Export', 'csv': 'calc_pdf_Export', + 'swf': 'draw_pdf_Export', 'doc': 'writer_pdf_Export', + 'xls': 'calc_pdf_Export', 'ppt': 'impress_pdf_Export', + 'docx': 'writer_pdf_Export', 'xlsx': 'calc_pdf_Export' + }, 'swf': 'impress_flash_Export', 'doc': 'MS Word 97', 'xls': 'MS Excel 97', diff --git a/pod/doc_importers.py b/pod/doc_importers.py index 480b982..18ee4db 100644 --- a/pod/doc_importers.py +++ b/pod/doc_importers.py @@ -36,6 +36,7 @@ PDF_TO_IMG_ERROR = 'A PDF file could not be converted into images. Please ' \ 'system and the "gs" program is in the path.' CONVERT_ERROR = 'Program "convert", from imagemagick, must be installed and ' \ 'in the path for converting a SVG file into a PNG file.' +TO_PDF_ERROR = 'ConvertImporter error while converting a doc to PDF: %s.' # ------------------------------------------------------------------------------ class DocImporter: @@ -116,7 +117,8 @@ class DocImporter: '''In the case parameter "at" was used, we may want to move the file at p_at within the ODT result in p_importPath (for images) or do nothing (for docs). In the latter case, the file to import stays - at _at, and is not copied into p_importPath.''' + at _at, and is not copied into p_importPath. So the previously + computed p_importPath is not used at all.''' return at class OdtImporter(DocImporter): @@ -135,18 +137,18 @@ class PdfImporter(DocImporter): '''This class allows to import the content of a PDF file into a pod template. It calls gs to split the PDF into images and calls the ImageImporter for importing it into the result.''' - imagePrefix = 'PdfPart' def getImportFolder(self): return '%s/docImports' % self.tempFolder def run(self): + imagePrefix = os.path.splitext(os.path.basename(self.importPath))[0] # Split the PDF into images with Ghostscript imagesFolder = os.path.dirname(self.importPath) cmd = 'gs -dNOPAUSE -dBATCH -sDEVICE=jpeg -r125x125 ' \ '-sOutputFile=%s/%s%%d.jpg %s' % \ - (imagesFolder, self.imagePrefix, self.importPath) + (imagesFolder, imagePrefix, self.importPath) os.system(cmd) # Check that at least one image was generated succeeded = False - firstImage = '%s1.jpg' % self.imagePrefix + firstImage = '%s1.jpg' % imagePrefix for fileName in os.listdir(imagesFolder): if fileName == firstImage: succeeded = True @@ -157,10 +159,10 @@ class PdfImporter(DocImporter): i = 0 while not noMoreImages: i += 1 - nextImage = '%s/%s%d.jpg' % (imagesFolder, self.imagePrefix, i) + nextImage = '%s/%s%d.jpg' % (imagesFolder, imagePrefix, i) if os.path.exists(nextImage): # Use internally an Image importer for doing this job. - imgImporter =ImageImporter(None, nextImage, 'jpg',self.renderer) + imgImporter= ImageImporter(None, nextImage, 'jpg',self.renderer) imgImporter.setImageInfo('paragraph', True, None, None, None) self.res += imgImporter.run() os.remove(nextImage) @@ -168,6 +170,20 @@ class PdfImporter(DocImporter): noMoreImages = True return self.res +class ConvertImporter(DocImporter): + '''This class allows to import the content of any file that LibreOffice (LO) + can convert into PDF: doc, rtf, xls. It first calls LO to convert the + document into PDF, then calls a PdfImporter.''' + def getImportFolder(self): return '%s/docImports' % self.tempFolder + def run(self): + # Convert the document into PDF with LibreOffice + output = self.renderer.callLibreOffice(self.importPath, 'pdf') + if output: raise PodError(TO_PDF_ERROR % output) + pdfFile = '%s.pdf' % os.path.splitext(self.importPath)[0] + # Launch a PdfImporter to import this PDF into the POD result. + pdfImporter = PdfImporter(None, pdfFile, 'pdf', self.renderer) + return pdfImporter.run() + # Compute size of images ------------------------------------------------------- jpgTypes = ('jpg', 'jpeg') pxToCm = 44.173513561 diff --git a/pod/renderer.py b/pod/renderer.py index 3aaf4a4..25399be 100644 --- a/pod/renderer.py +++ b/pod/renderer.py @@ -25,13 +25,13 @@ import appy.pod, time, cgi from appy.pod import PodError from appy.shared import mimeTypes, mimeTypesExts from appy.shared.xml_parser import XmlElement -from appy.shared.utils import FolderDeleter, executeCommand -from appy.shared.utils import FileWrapper +from appy.shared.utils import FolderDeleter, executeCommand, FileWrapper from appy.pod.pod_parser import PodParser, PodEnvironment, OdInsert from appy.pod.converter import FILE_TYPES from appy.pod.buffers import FileBuffer from appy.pod.xhtml2odt import Xhtml2OdtConverter -from appy.pod.doc_importers import OdtImporter, ImageImporter, PdfImporter +from appy.pod.doc_importers import \ + OdtImporter, ImageImporter, PdfImporter, ConvertImporter from appy.pod.styles_manager import StylesManager # ------------------------------------------------------------------------------ @@ -274,6 +274,7 @@ class Renderer: imageFormats = ('png', 'jpeg', 'jpg', 'gif', 'svg') ooFormats = ('odt',) + convertibleFormats = FILE_TYPES.keys() def importDocument(self, content=None, at=None, format=None, anchor='as-char', wrapInPara=True, size=None, sizeUnit='cm', style=None): @@ -300,12 +301,12 @@ class Renderer: # Is there someting to import? if not content and not at: raise PodError(DOC_NOT_SPECIFIED) + # Convert Zope files into Appy wrappers. + if content.__class__.__name__ == 'File': + content = FileWrapper(content) # Guess document format if isinstance(content, FileWrapper): format = content.mimeType - elif hasattr(content, 'filename') and content.filename: - format = os.path.splitext(content.filename)[1][1:] - content = content.data if not format: # It should be deduced from p_at if not at: @@ -325,6 +326,8 @@ class Renderer: isImage = True elif format == 'pdf': importer = PdfImporter + elif format in self.convertibleFormats: + importer = ConvertImporter else: raise PodError(DOC_WRONG_FORMAT % format) imp = importer(content, at, format, self) diff --git a/shared/__init__.py b/shared/__init__.py index bf93f3e..7efdc8e 100644 --- a/shared/__init__.py +++ b/shared/__init__.py @@ -5,6 +5,8 @@ import os.path # ------------------------------------------------------------------------------ appyPath = os.path.realpath(os.path.dirname(appy.__file__)) od = 'application/vnd.oasis.opendocument' +ms = 'application/vnd.openxmlformats-officedocument' + mimeTypes = {'odt': '%s.text' % od, 'ods': '%s.spreadsheet' % od, 'doc': 'application/msword', @@ -12,16 +14,21 @@ mimeTypes = {'odt': '%s.text' % od, 'pdf': 'application/pdf' } mimeTypesExts = { - '%s.text' % od: 'odt', - '%s.spreadsheet' % od: 'ods', - 'application/msword': 'doc', - 'text/rtf': 'rtf', - 'application/pdf': 'pdf', - 'image/png': 'png', - 'image/jpeg': 'jpg', - 'image/pjpeg': 'jpg', - 'image/gif': 'gif' - } + '%s.text' % od: 'odt', + '%s.spreadsheet' % od: 'ods', + 'application/msword': 'doc', + 'text/rtf': 'rtf', + 'application/pdf': 'pdf', + 'image/png': 'png', + 'image/jpeg': 'jpg', + 'image/pjpeg': 'jpg', + 'image/gif': 'gif', + 'application/vnd.ms-excel': 'xls', + 'application/vnd.ms-powerpoint': 'ppt', + '%s.wordprocessingml.document' % ms: 'docx', + '%s.spreadsheetml.sheet' % ms: 'xlsx', + '%s.presentationml.presentation' % ms: 'pptx', +} # ------------------------------------------------------------------------------ class UnmarshalledFile: