From 02fce031433df736f0130a387c2a4d7ccd2e0758 Mon Sep 17 00:00:00 2001 From: Gaetan Delannay Date: Sat, 2 Jul 2011 11:46:49 +0200 Subject: [PATCH] appy.pod: optimized image importing: when an image is imported several times in a pod template through a call to 'do ... from document(at=path)', pod inserts only one copy of the file into the ODT result. --- pod/doc_importers.py | 128 ++++++++++++++++++++----------------------- pod/renderer.py | 23 ++++---- 2 files changed, 73 insertions(+), 78 deletions(-) diff --git a/pod/doc_importers.py b/pod/doc_importers.py index 155bdb1..50807ec 100644 --- a/pod/doc_importers.py +++ b/pod/doc_importers.py @@ -17,7 +17,7 @@ # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301,USA. # ------------------------------------------------------------------------------ -import os, os.path, time, shutil, struct +import os, os.path, time, shutil, struct, random from appy.pod import PodError from appy.pod.odf_parser import OdfEnvironment @@ -31,10 +31,13 @@ PDF_TO_IMG_ERROR = 'A PDF file could not be converted into images. Please ' \ class DocImporter: '''Base class used for importing external content into a pod template (an image, another pod template, another odt document...''' - def __init__(self, content, at, format, tempFolder, ns): + def __init__(self, content, at, format, tempFolder, ns, fileNames): self.content = content - self.at = at # If content is None, p_at tells us where to find it - # (file system path, url, etc) + # If content is None, p_at tells us where to find it (file system path, + # url, etc) + self.at = at + # Ensure this path exists. + if at and not os.path.isfile(at): raise PodError(FILE_NOT_FOUND % at) self.format = format self.res = u'' self.ns = ns @@ -45,64 +48,51 @@ class DocImporter: self.svgNs = ns[OdfEnvironment.NS_SVG] self.tempFolder = tempFolder self.importFolder = self.getImportFolder() - # If the importer generates one or several images, we will retain their - # names here, because we will need to declare them in - # META-INF/manifest.xml - self.fileNames = [] - if self.at: - # Check that the file exists - if not os.path.isfile(self.at): - raise PodError(FILE_NOT_FOUND % self.at) - self.importPath = self.moveFile(self.at) + # Create the import folder if it does not exist. + if not os.path.exists(self.importFolder): os.mkdir(self.importFolder) + self.importPath = self.getImportPath(at, format) + # A link to the global fileNames dict (explained in renderer.py) + self.fileNames = fileNames + if at: + # Move the file within the ODT, if it is an image and if this image + # has not already been imported. + self.importPath = self.moveFile(at, self.importPath) else: # We need to dump the file content (in self.content) in a temp file # first. self.content may be binary or a file handler. - if not os.path.exists(self.importFolder): - os.mkdir(self.importFolder) if isinstance(self.content, file): - self.fileName = os.path.basename(self.content.name) fileContent = self.content.read() else: - self.fileName = 'f%f.%s' % (time.time(), self.format) fileContent = self.content - self.importPath = self.getImportPath(self.fileName) - theFile = file(self.importPath, 'w') - theFile.write(fileContent) - theFile.close() - self.importPath = os.path.abspath(self.importPath) + f = file(self.importPath, 'w') + f.write(fileContent) + f.close() + def getImportFolder(self): '''This method must be overridden and gives the path where to dump the content of the document or image. In the case of a document it is a temp folder; in the case of an image it is a folder within the ODT result.''' - pass - def getImportPath(self, fileName): - '''Import path is the path to the external file or image that is now - stored on disk. We check here that this name does not correspond - to an existing file; if yes, we change the path until we get a path - that does not correspond to an existing file.''' - res = '%s/%s' % (self.importFolder, fileName) - resIsGood = False - while not resIsGood: - if not os.path.exists(res): - resIsGood = True - else: - # We must find another file name, this one already exists. - name, ext = os.path.splitext(res) - name += 'g' - res = name + ext - return res - def moveFile(self, at): + + def getImportPath(self, at, format): + '''Gets the path name of the file to dump on disk (within the ODT for + images, in a temp folder for docs).''' + if not format: + format = os.path.splitext(at)[1][1:] + fileName = 'f.%d.%f.%s' % (random.randint(0,10), time.time(), format) + return os.path.abspath('%s/%s' % (self.importFolder, fileName)) + + def moveFile(self, at, importPath): '''In the case parameter "at" was used, we may want to move the file at - p_at within the ODT result (for images) or do nothing (for - documents).''' + p_at within the ODT result in p_importPath (for images) or do + nothing (for docs). In the latter case, the file to import stays + at _at, and is not copied into p_importPath.''' return at class OdtImporter(DocImporter): '''This class allows to import the content of another ODT document into a pod template.''' - def getImportFolder(self): - return '%s/docImports' % self.tempFolder + def getImportFolder(self): return '%s/docImports' % self.tempFolder def run(self): self.res += '<%s:section %s:name="PodImportSection%f">' \ '<%s:section-source %s:href="%s" ' \ @@ -116,8 +106,7 @@ class PdfImporter(DocImporter): template. It calls gs to split the PDF into images and calls the ImageImporter for importing it into the result.''' imagePrefix = 'PdfPart' - def getImportFolder(self): - return '%s/docImports' % self.tempFolder + def getImportFolder(self): return '%s/docImports' % self.tempFolder def run(self): # Split the PDF into images with Ghostscript imagesFolder = os.path.dirname(self.importPath) @@ -132,8 +121,7 @@ class PdfImporter(DocImporter): if fileName == firstImage: succeeded = True break - if not succeeded: - raise PodError(PDF_TO_IMG_ERROR) + if not succeeded: raise PodError(PDF_TO_IMG_ERROR) # Insert images into the result. noMoreImages = False i = 0 @@ -143,10 +131,9 @@ class PdfImporter(DocImporter): if os.path.exists(nextImage): # Use internally an Image importer for doing this job. imgImporter = ImageImporter(None, nextImage, 'jpg', - self.tempFolder, self.ns) + self.tempFolder, self.ns, self.fileNames) imgImporter.setAnchor('paragraph') self.res += imgImporter.run() - self.fileNames += imgImporter.fileNames os.remove(nextImage) else: noMoreImages = True @@ -194,21 +181,25 @@ class ImageImporter(DocImporter): externally.''' anchorTypes = ('page', 'paragraph', 'char', 'as-char') WRONG_ANCHOR = 'Wrong anchor. Valid values for anchors are: %s.' - def getImportFolder(self): - return '%s/unzip/Pictures' % self.tempFolder - def moveFile(self, at): - '''Image to insert is at p_at. We must move it into the ODT result.''' - fileName = os.path.basename(at) - folderName = self.getImportFolder() - if not os.path.exists(folderName): - os.mkdir(folderName) - res = self.getImportPath(fileName) - shutil.copy(at, res) - return res + def getImportFolder(self): return '%s/unzip/Pictures' % self.tempFolder + + def moveFile(self, at, importPath): + '''Copies file at p_at into the ODT file at p_importPath.''' + # Has this image already been imported ? + for imagePath, imageAt in self.fileNames.iteritems(): + if imageAt == at: + # Yes! + i = importPath.rfind('/Pictures/') + 1 + return importPath[:i] + imagePath + # If I am here, the image has not already been imported: copy it. + shutil.copy(at, importPath) + return importPath + def setAnchor(self, anchor): if anchor not in self.anchorTypes: raise PodError(self.WRONG_ANCHOR % str(self.anchorTypes)) self.anchor = anchor + def run(self): # Some shorcuts for the used xml namespaces d = self.drawNs @@ -219,18 +210,19 @@ class ImageImporter(DocImporter): # Compute path to image i = self.importPath.rfind('/Pictures/') imagePath = self.importPath[i+1:] - self.fileNames.append(imagePath) + self.fileNames[imagePath] = self.at # Compute image size width, height = getSize(self.importPath, self.format) if width != None: size = ' %s:width="%fcm" %s:height="%fcm"' % (s, width, s, height) else: size = '' - self.res += '<%s:p><%s:frame %s:name="%s" %s:z-index="0" ' \ - '%s:anchor-type="%s"%s><%s:image %s:type="simple" ' \ - '%s:show="embed" %s:href="%s" %s:actuate="onLoad"/>' \ - '' % \ - (t, d, d, imageName, d, t, self.anchor, size, d, x, x, x, - imagePath, x, d, t) + image = '<%s:frame %s:name="%s" %s:z-index="0" %s:anchor-type="%s"%s>' \ + '<%s:image %s:type="simple" %s:show="embed" %s:href="%s" ' \ + '%s:actuate="onLoad"/>' % (d, d, imageName, d, t, \ + self.anchor, size, d, x, x, x, imagePath, x, d) + if hasattr(self, 'wrapInPara') and self.wrapInPara: + image = '<%s:p>%s' % (t, image, t) + self.res += image return self.res # ------------------------------------------------------------------------------ diff --git a/pod/renderer.py b/pod/renderer.py index bbc36e9..d6224e6 100644 --- a/pod/renderer.py +++ b/pod/renderer.py @@ -142,10 +142,14 @@ class Renderer: self.forceOoCall = forceOoCall self.finalizeFunction = finalizeFunction self.overwriteExisting = overwriteExisting - # Retain potential files or images that will be included through + # Remember potential files or images that will be included through # "do ... from document" statements: we will need to declare them in - # META-INF/manifest.xml. - self.fileNames = [] + # META-INF/manifest.xml. Keys are file names as they appear within the + # ODT file (to dump in manifest.xml); values are original paths of + # included images (used for avoiding to create multiple copies of a file + # which is imported several times). + # imported file). + self.fileNames = {} self.prepareFolders() # Unzip template self.unzipFolder = os.path.join(self.tempFolder, 'unzip') @@ -255,12 +259,12 @@ class Renderer: imageFormats = ('png', 'jpeg', 'jpg', 'gif') ooFormats = ('odt',) def importDocument(self, content=None, at=None, format=None, - anchor='as-char'): + anchor='as-char', wrapInPara=True): '''If p_at is not None, it represents a path or url allowing to find the document. If p_at is None, the content of the document is supposed to be in binary format in p_content. The document - p_format may be: odt or any format in imageFormats. p_anchor is only - relevant for images.''' + p_format may be: odt or any format in imageFormats. p_anchor and + p_wrapInPara are only relevant for images.''' ns = self.currentParser.env.namespaces importer = None # Is there someting to import? @@ -287,12 +291,11 @@ class Renderer: importer = PdfImporter else: raise PodError(DOC_WRONG_FORMAT % format) - imp = importer(content, at, format, self.tempFolder, ns) + imp = importer(content, at, format, self.tempFolder, ns, self.fileNames) if isImage: imp.setAnchor(anchor) + imp.wrapInPara = wrapInPara res = imp.run() - if imp.fileNames: - self.fileNames += imp.fileNames return res def prepareFolders(self): @@ -323,7 +326,7 @@ class Renderer: if self.fileNames: j = os.path.join toInsert = '' - for fileName in self.fileNames: + for fileName in self.fileNames.iterkeys(): mimeType = mimetypes.guess_type(fileName)[0] toInsert += ' \n' % (mimeType, fileName)