[pod] Added the possibility, for function 'document', to import, into an ODT result, external documents of various formats: doc, xls, ppt, etc. This new 'ConvertImporter' uses LibreOffice to first convert the doc into PDF, and then uses the PdfImporter to split the PDF into images (one image per page) and include in in the pod result.
This commit is contained in:
parent
0a8e06f1d9
commit
eaf7156b47
|
@ -32,7 +32,13 @@ FILE_TYPES = {'odt': 'writer8',
|
||||||
'txt': 'Text',
|
'txt': 'Text',
|
||||||
'csv': 'Text - txt - csv (StarCalc)',
|
'csv': 'Text - txt - csv (StarCalc)',
|
||||||
'pdf': {'odt': 'writer_pdf_Export', 'ods': 'calc_pdf_Export',
|
'pdf': {'odt': 'writer_pdf_Export', 'ods': 'calc_pdf_Export',
|
||||||
'odp': 'impress_pdf_Export', 'odg': 'draw_pdf_Export'},
|
'odp': 'impress_pdf_Export', 'htm': 'writer_pdf_Export',
|
||||||
|
'html': 'writer_pdf_Export', 'rtf': 'writer_pdf_Export',
|
||||||
|
'txt': 'writer_pdf_Export', 'csv': 'calc_pdf_Export',
|
||||||
|
'swf': 'draw_pdf_Export', 'doc': 'writer_pdf_Export',
|
||||||
|
'xls': 'calc_pdf_Export', 'ppt': 'impress_pdf_Export',
|
||||||
|
'docx': 'writer_pdf_Export', 'xlsx': 'calc_pdf_Export'
|
||||||
|
},
|
||||||
'swf': 'impress_flash_Export',
|
'swf': 'impress_flash_Export',
|
||||||
'doc': 'MS Word 97',
|
'doc': 'MS Word 97',
|
||||||
'xls': 'MS Excel 97',
|
'xls': 'MS Excel 97',
|
||||||
|
|
|
@ -36,6 +36,7 @@ PDF_TO_IMG_ERROR = 'A PDF file could not be converted into images. Please ' \
|
||||||
'system and the "gs" program is in the path.'
|
'system and the "gs" program is in the path.'
|
||||||
CONVERT_ERROR = 'Program "convert", from imagemagick, must be installed and ' \
|
CONVERT_ERROR = 'Program "convert", from imagemagick, must be installed and ' \
|
||||||
'in the path for converting a SVG file into a PNG file.'
|
'in the path for converting a SVG file into a PNG file.'
|
||||||
|
TO_PDF_ERROR = 'ConvertImporter error while converting a doc to PDF: %s.'
|
||||||
|
|
||||||
# ------------------------------------------------------------------------------
|
# ------------------------------------------------------------------------------
|
||||||
class DocImporter:
|
class DocImporter:
|
||||||
|
@ -116,7 +117,8 @@ class DocImporter:
|
||||||
'''In the case parameter "at" was used, we may want to move the file at
|
'''In the case parameter "at" was used, we may want to move the file at
|
||||||
p_at within the ODT result in p_importPath (for images) or do
|
p_at within the ODT result in p_importPath (for images) or do
|
||||||
nothing (for docs). In the latter case, the file to import stays
|
nothing (for docs). In the latter case, the file to import stays
|
||||||
at _at, and is not copied into p_importPath.'''
|
at _at, and is not copied into p_importPath. So the previously
|
||||||
|
computed p_importPath is not used at all.'''
|
||||||
return at
|
return at
|
||||||
|
|
||||||
class OdtImporter(DocImporter):
|
class OdtImporter(DocImporter):
|
||||||
|
@ -135,18 +137,18 @@ class PdfImporter(DocImporter):
|
||||||
'''This class allows to import the content of a PDF file into a pod
|
'''This class allows to import the content of a PDF file into a pod
|
||||||
template. It calls gs to split the PDF into images and calls the
|
template. It calls gs to split the PDF into images and calls the
|
||||||
ImageImporter for importing it into the result.'''
|
ImageImporter for importing it into the result.'''
|
||||||
imagePrefix = 'PdfPart'
|
|
||||||
def getImportFolder(self): return '%s/docImports' % self.tempFolder
|
def getImportFolder(self): return '%s/docImports' % self.tempFolder
|
||||||
def run(self):
|
def run(self):
|
||||||
|
imagePrefix = os.path.splitext(os.path.basename(self.importPath))[0]
|
||||||
# Split the PDF into images with Ghostscript
|
# Split the PDF into images with Ghostscript
|
||||||
imagesFolder = os.path.dirname(self.importPath)
|
imagesFolder = os.path.dirname(self.importPath)
|
||||||
cmd = 'gs -dNOPAUSE -dBATCH -sDEVICE=jpeg -r125x125 ' \
|
cmd = 'gs -dNOPAUSE -dBATCH -sDEVICE=jpeg -r125x125 ' \
|
||||||
'-sOutputFile=%s/%s%%d.jpg %s' % \
|
'-sOutputFile=%s/%s%%d.jpg %s' % \
|
||||||
(imagesFolder, self.imagePrefix, self.importPath)
|
(imagesFolder, imagePrefix, self.importPath)
|
||||||
os.system(cmd)
|
os.system(cmd)
|
||||||
# Check that at least one image was generated
|
# Check that at least one image was generated
|
||||||
succeeded = False
|
succeeded = False
|
||||||
firstImage = '%s1.jpg' % self.imagePrefix
|
firstImage = '%s1.jpg' % imagePrefix
|
||||||
for fileName in os.listdir(imagesFolder):
|
for fileName in os.listdir(imagesFolder):
|
||||||
if fileName == firstImage:
|
if fileName == firstImage:
|
||||||
succeeded = True
|
succeeded = True
|
||||||
|
@ -157,10 +159,10 @@ class PdfImporter(DocImporter):
|
||||||
i = 0
|
i = 0
|
||||||
while not noMoreImages:
|
while not noMoreImages:
|
||||||
i += 1
|
i += 1
|
||||||
nextImage = '%s/%s%d.jpg' % (imagesFolder, self.imagePrefix, i)
|
nextImage = '%s/%s%d.jpg' % (imagesFolder, imagePrefix, i)
|
||||||
if os.path.exists(nextImage):
|
if os.path.exists(nextImage):
|
||||||
# Use internally an Image importer for doing this job.
|
# Use internally an Image importer for doing this job.
|
||||||
imgImporter =ImageImporter(None, nextImage, 'jpg',self.renderer)
|
imgImporter= ImageImporter(None, nextImage, 'jpg',self.renderer)
|
||||||
imgImporter.setImageInfo('paragraph', True, None, None, None)
|
imgImporter.setImageInfo('paragraph', True, None, None, None)
|
||||||
self.res += imgImporter.run()
|
self.res += imgImporter.run()
|
||||||
os.remove(nextImage)
|
os.remove(nextImage)
|
||||||
|
@ -168,6 +170,20 @@ class PdfImporter(DocImporter):
|
||||||
noMoreImages = True
|
noMoreImages = True
|
||||||
return self.res
|
return self.res
|
||||||
|
|
||||||
|
class ConvertImporter(DocImporter):
|
||||||
|
'''This class allows to import the content of any file that LibreOffice (LO)
|
||||||
|
can convert into PDF: doc, rtf, xls. It first calls LO to convert the
|
||||||
|
document into PDF, then calls a PdfImporter.'''
|
||||||
|
def getImportFolder(self): return '%s/docImports' % self.tempFolder
|
||||||
|
def run(self):
|
||||||
|
# Convert the document into PDF with LibreOffice
|
||||||
|
output = self.renderer.callLibreOffice(self.importPath, 'pdf')
|
||||||
|
if output: raise PodError(TO_PDF_ERROR % output)
|
||||||
|
pdfFile = '%s.pdf' % os.path.splitext(self.importPath)[0]
|
||||||
|
# Launch a PdfImporter to import this PDF into the POD result.
|
||||||
|
pdfImporter = PdfImporter(None, pdfFile, 'pdf', self.renderer)
|
||||||
|
return pdfImporter.run()
|
||||||
|
|
||||||
# Compute size of images -------------------------------------------------------
|
# Compute size of images -------------------------------------------------------
|
||||||
jpgTypes = ('jpg', 'jpeg')
|
jpgTypes = ('jpg', 'jpeg')
|
||||||
pxToCm = 44.173513561
|
pxToCm = 44.173513561
|
||||||
|
|
|
@ -25,13 +25,13 @@ import appy.pod, time, cgi
|
||||||
from appy.pod import PodError
|
from appy.pod import PodError
|
||||||
from appy.shared import mimeTypes, mimeTypesExts
|
from appy.shared import mimeTypes, mimeTypesExts
|
||||||
from appy.shared.xml_parser import XmlElement
|
from appy.shared.xml_parser import XmlElement
|
||||||
from appy.shared.utils import FolderDeleter, executeCommand
|
from appy.shared.utils import FolderDeleter, executeCommand, FileWrapper
|
||||||
from appy.shared.utils import FileWrapper
|
|
||||||
from appy.pod.pod_parser import PodParser, PodEnvironment, OdInsert
|
from appy.pod.pod_parser import PodParser, PodEnvironment, OdInsert
|
||||||
from appy.pod.converter import FILE_TYPES
|
from appy.pod.converter import FILE_TYPES
|
||||||
from appy.pod.buffers import FileBuffer
|
from appy.pod.buffers import FileBuffer
|
||||||
from appy.pod.xhtml2odt import Xhtml2OdtConverter
|
from appy.pod.xhtml2odt import Xhtml2OdtConverter
|
||||||
from appy.pod.doc_importers import OdtImporter, ImageImporter, PdfImporter
|
from appy.pod.doc_importers import \
|
||||||
|
OdtImporter, ImageImporter, PdfImporter, ConvertImporter
|
||||||
from appy.pod.styles_manager import StylesManager
|
from appy.pod.styles_manager import StylesManager
|
||||||
|
|
||||||
# ------------------------------------------------------------------------------
|
# ------------------------------------------------------------------------------
|
||||||
|
@ -274,6 +274,7 @@ class Renderer:
|
||||||
|
|
||||||
imageFormats = ('png', 'jpeg', 'jpg', 'gif', 'svg')
|
imageFormats = ('png', 'jpeg', 'jpg', 'gif', 'svg')
|
||||||
ooFormats = ('odt',)
|
ooFormats = ('odt',)
|
||||||
|
convertibleFormats = FILE_TYPES.keys()
|
||||||
def importDocument(self, content=None, at=None, format=None,
|
def importDocument(self, content=None, at=None, format=None,
|
||||||
anchor='as-char', wrapInPara=True, size=None,
|
anchor='as-char', wrapInPara=True, size=None,
|
||||||
sizeUnit='cm', style=None):
|
sizeUnit='cm', style=None):
|
||||||
|
@ -300,12 +301,12 @@ class Renderer:
|
||||||
# Is there someting to import?
|
# Is there someting to import?
|
||||||
if not content and not at:
|
if not content and not at:
|
||||||
raise PodError(DOC_NOT_SPECIFIED)
|
raise PodError(DOC_NOT_SPECIFIED)
|
||||||
|
# Convert Zope files into Appy wrappers.
|
||||||
|
if content.__class__.__name__ == 'File':
|
||||||
|
content = FileWrapper(content)
|
||||||
# Guess document format
|
# Guess document format
|
||||||
if isinstance(content, FileWrapper):
|
if isinstance(content, FileWrapper):
|
||||||
format = content.mimeType
|
format = content.mimeType
|
||||||
elif hasattr(content, 'filename') and content.filename:
|
|
||||||
format = os.path.splitext(content.filename)[1][1:]
|
|
||||||
content = content.data
|
|
||||||
if not format:
|
if not format:
|
||||||
# It should be deduced from p_at
|
# It should be deduced from p_at
|
||||||
if not at:
|
if not at:
|
||||||
|
@ -325,6 +326,8 @@ class Renderer:
|
||||||
isImage = True
|
isImage = True
|
||||||
elif format == 'pdf':
|
elif format == 'pdf':
|
||||||
importer = PdfImporter
|
importer = PdfImporter
|
||||||
|
elif format in self.convertibleFormats:
|
||||||
|
importer = ConvertImporter
|
||||||
else:
|
else:
|
||||||
raise PodError(DOC_WRONG_FORMAT % format)
|
raise PodError(DOC_WRONG_FORMAT % format)
|
||||||
imp = importer(content, at, format, self)
|
imp = importer(content, at, format, self)
|
||||||
|
|
|
@ -5,6 +5,8 @@ import os.path
|
||||||
# ------------------------------------------------------------------------------
|
# ------------------------------------------------------------------------------
|
||||||
appyPath = os.path.realpath(os.path.dirname(appy.__file__))
|
appyPath = os.path.realpath(os.path.dirname(appy.__file__))
|
||||||
od = 'application/vnd.oasis.opendocument'
|
od = 'application/vnd.oasis.opendocument'
|
||||||
|
ms = 'application/vnd.openxmlformats-officedocument'
|
||||||
|
|
||||||
mimeTypes = {'odt': '%s.text' % od,
|
mimeTypes = {'odt': '%s.text' % od,
|
||||||
'ods': '%s.spreadsheet' % od,
|
'ods': '%s.spreadsheet' % od,
|
||||||
'doc': 'application/msword',
|
'doc': 'application/msword',
|
||||||
|
@ -20,8 +22,13 @@ mimeTypesExts = {
|
||||||
'image/png': 'png',
|
'image/png': 'png',
|
||||||
'image/jpeg': 'jpg',
|
'image/jpeg': 'jpg',
|
||||||
'image/pjpeg': 'jpg',
|
'image/pjpeg': 'jpg',
|
||||||
'image/gif': 'gif'
|
'image/gif': 'gif',
|
||||||
}
|
'application/vnd.ms-excel': 'xls',
|
||||||
|
'application/vnd.ms-powerpoint': 'ppt',
|
||||||
|
'%s.wordprocessingml.document' % ms: 'docx',
|
||||||
|
'%s.spreadsheetml.sheet' % ms: 'xlsx',
|
||||||
|
'%s.presentationml.presentation' % ms: 'pptx',
|
||||||
|
}
|
||||||
|
|
||||||
# ------------------------------------------------------------------------------
|
# ------------------------------------------------------------------------------
|
||||||
class UnmarshalledFile:
|
class UnmarshalledFile:
|
||||||
|
|
Loading…
Reference in a new issue