appy.pod: optimized image importing: when an image is imported several times in a pod template through a call to 'do ... from document(at=path)', pod inserts only one copy of the file into the ODT result.

This commit is contained in:
Gaetan Delannay 2011-07-02 11:46:49 +02:00
parent e38b78d10c
commit 02fce03143
2 changed files with 73 additions and 78 deletions

View file

@ -17,7 +17,7 @@
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301,USA. # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301,USA.
# ------------------------------------------------------------------------------ # ------------------------------------------------------------------------------
import os, os.path, time, shutil, struct import os, os.path, time, shutil, struct, random
from appy.pod import PodError from appy.pod import PodError
from appy.pod.odf_parser import OdfEnvironment from appy.pod.odf_parser import OdfEnvironment
@ -31,10 +31,13 @@ PDF_TO_IMG_ERROR = 'A PDF file could not be converted into images. Please ' \
class DocImporter: class DocImporter:
'''Base class used for importing external content into a pod template (an '''Base class used for importing external content into a pod template (an
image, another pod template, another odt document...''' image, another pod template, another odt document...'''
def __init__(self, content, at, format, tempFolder, ns): def __init__(self, content, at, format, tempFolder, ns, fileNames):
self.content = content self.content = content
self.at = at # If content is None, p_at tells us where to find it # If content is None, p_at tells us where to find it (file system path,
# (file system path, url, etc) # url, etc)
self.at = at
# Ensure this path exists.
if at and not os.path.isfile(at): raise PodError(FILE_NOT_FOUND % at)
self.format = format self.format = format
self.res = u'' self.res = u''
self.ns = ns self.ns = ns
@ -45,64 +48,51 @@ class DocImporter:
self.svgNs = ns[OdfEnvironment.NS_SVG] self.svgNs = ns[OdfEnvironment.NS_SVG]
self.tempFolder = tempFolder self.tempFolder = tempFolder
self.importFolder = self.getImportFolder() self.importFolder = self.getImportFolder()
# If the importer generates one or several images, we will retain their # Create the import folder if it does not exist.
# names here, because we will need to declare them in if not os.path.exists(self.importFolder): os.mkdir(self.importFolder)
# META-INF/manifest.xml self.importPath = self.getImportPath(at, format)
self.fileNames = [] # A link to the global fileNames dict (explained in renderer.py)
if self.at: self.fileNames = fileNames
# Check that the file exists if at:
if not os.path.isfile(self.at): # Move the file within the ODT, if it is an image and if this image
raise PodError(FILE_NOT_FOUND % self.at) # has not already been imported.
self.importPath = self.moveFile(self.at) self.importPath = self.moveFile(at, self.importPath)
else: else:
# We need to dump the file content (in self.content) in a temp file # We need to dump the file content (in self.content) in a temp file
# first. self.content may be binary or a file handler. # first. self.content may be binary or a file handler.
if not os.path.exists(self.importFolder):
os.mkdir(self.importFolder)
if isinstance(self.content, file): if isinstance(self.content, file):
self.fileName = os.path.basename(self.content.name)
fileContent = self.content.read() fileContent = self.content.read()
else: else:
self.fileName = 'f%f.%s' % (time.time(), self.format)
fileContent = self.content fileContent = self.content
self.importPath = self.getImportPath(self.fileName) f = file(self.importPath, 'w')
theFile = file(self.importPath, 'w') f.write(fileContent)
theFile.write(fileContent) f.close()
theFile.close()
self.importPath = os.path.abspath(self.importPath)
def getImportFolder(self): def getImportFolder(self):
'''This method must be overridden and gives the path where to dump the '''This method must be overridden and gives the path where to dump the
content of the document or image. In the case of a document it is a content of the document or image. In the case of a document it is a
temp folder; in the case of an image it is a folder within the ODT temp folder; in the case of an image it is a folder within the ODT
result.''' result.'''
pass
def getImportPath(self, fileName): def getImportPath(self, at, format):
'''Import path is the path to the external file or image that is now '''Gets the path name of the file to dump on disk (within the ODT for
stored on disk. We check here that this name does not correspond images, in a temp folder for docs).'''
to an existing file; if yes, we change the path until we get a path if not format:
that does not correspond to an existing file.''' format = os.path.splitext(at)[1][1:]
res = '%s/%s' % (self.importFolder, fileName) fileName = 'f.%d.%f.%s' % (random.randint(0,10), time.time(), format)
resIsGood = False return os.path.abspath('%s/%s' % (self.importFolder, fileName))
while not resIsGood:
if not os.path.exists(res): def moveFile(self, at, importPath):
resIsGood = True
else:
# We must find another file name, this one already exists.
name, ext = os.path.splitext(res)
name += 'g'
res = name + ext
return res
def moveFile(self, at):
'''In the case parameter "at" was used, we may want to move the file at '''In the case parameter "at" was used, we may want to move the file at
p_at within the ODT result (for images) or do nothing (for p_at within the ODT result in p_importPath (for images) or do
documents).''' nothing (for docs). In the latter case, the file to import stays
at _at, and is not copied into p_importPath.'''
return at return at
class OdtImporter(DocImporter): class OdtImporter(DocImporter):
'''This class allows to import the content of another ODT document into a '''This class allows to import the content of another ODT document into a
pod template.''' pod template.'''
def getImportFolder(self): def getImportFolder(self): return '%s/docImports' % self.tempFolder
return '%s/docImports' % self.tempFolder
def run(self): def run(self):
self.res += '<%s:section %s:name="PodImportSection%f">' \ self.res += '<%s:section %s:name="PodImportSection%f">' \
'<%s:section-source %s:href="%s" ' \ '<%s:section-source %s:href="%s" ' \
@ -116,8 +106,7 @@ class PdfImporter(DocImporter):
template. It calls gs to split the PDF into images and calls the template. It calls gs to split the PDF into images and calls the
ImageImporter for importing it into the result.''' ImageImporter for importing it into the result.'''
imagePrefix = 'PdfPart' imagePrefix = 'PdfPart'
def getImportFolder(self): def getImportFolder(self): return '%s/docImports' % self.tempFolder
return '%s/docImports' % self.tempFolder
def run(self): def run(self):
# Split the PDF into images with Ghostscript # Split the PDF into images with Ghostscript
imagesFolder = os.path.dirname(self.importPath) imagesFolder = os.path.dirname(self.importPath)
@ -132,8 +121,7 @@ class PdfImporter(DocImporter):
if fileName == firstImage: if fileName == firstImage:
succeeded = True succeeded = True
break break
if not succeeded: if not succeeded: raise PodError(PDF_TO_IMG_ERROR)
raise PodError(PDF_TO_IMG_ERROR)
# Insert images into the result. # Insert images into the result.
noMoreImages = False noMoreImages = False
i = 0 i = 0
@ -143,10 +131,9 @@ class PdfImporter(DocImporter):
if os.path.exists(nextImage): if os.path.exists(nextImage):
# Use internally an Image importer for doing this job. # Use internally an Image importer for doing this job.
imgImporter = ImageImporter(None, nextImage, 'jpg', imgImporter = ImageImporter(None, nextImage, 'jpg',
self.tempFolder, self.ns) self.tempFolder, self.ns, self.fileNames)
imgImporter.setAnchor('paragraph') imgImporter.setAnchor('paragraph')
self.res += imgImporter.run() self.res += imgImporter.run()
self.fileNames += imgImporter.fileNames
os.remove(nextImage) os.remove(nextImage)
else: else:
noMoreImages = True noMoreImages = True
@ -194,21 +181,25 @@ class ImageImporter(DocImporter):
externally.''' externally.'''
anchorTypes = ('page', 'paragraph', 'char', 'as-char') anchorTypes = ('page', 'paragraph', 'char', 'as-char')
WRONG_ANCHOR = 'Wrong anchor. Valid values for anchors are: %s.' WRONG_ANCHOR = 'Wrong anchor. Valid values for anchors are: %s.'
def getImportFolder(self): def getImportFolder(self): return '%s/unzip/Pictures' % self.tempFolder
return '%s/unzip/Pictures' % self.tempFolder
def moveFile(self, at): def moveFile(self, at, importPath):
'''Image to insert is at p_at. We must move it into the ODT result.''' '''Copies file at p_at into the ODT file at p_importPath.'''
fileName = os.path.basename(at) # Has this image already been imported ?
folderName = self.getImportFolder() for imagePath, imageAt in self.fileNames.iteritems():
if not os.path.exists(folderName): if imageAt == at:
os.mkdir(folderName) # Yes!
res = self.getImportPath(fileName) i = importPath.rfind('/Pictures/') + 1
shutil.copy(at, res) return importPath[:i] + imagePath
return res # If I am here, the image has not already been imported: copy it.
shutil.copy(at, importPath)
return importPath
def setAnchor(self, anchor): def setAnchor(self, anchor):
if anchor not in self.anchorTypes: if anchor not in self.anchorTypes:
raise PodError(self.WRONG_ANCHOR % str(self.anchorTypes)) raise PodError(self.WRONG_ANCHOR % str(self.anchorTypes))
self.anchor = anchor self.anchor = anchor
def run(self): def run(self):
# Some shorcuts for the used xml namespaces # Some shorcuts for the used xml namespaces
d = self.drawNs d = self.drawNs
@ -219,18 +210,19 @@ class ImageImporter(DocImporter):
# Compute path to image # Compute path to image
i = self.importPath.rfind('/Pictures/') i = self.importPath.rfind('/Pictures/')
imagePath = self.importPath[i+1:] imagePath = self.importPath[i+1:]
self.fileNames.append(imagePath) self.fileNames[imagePath] = self.at
# Compute image size # Compute image size
width, height = getSize(self.importPath, self.format) width, height = getSize(self.importPath, self.format)
if width != None: if width != None:
size = ' %s:width="%fcm" %s:height="%fcm"' % (s, width, s, height) size = ' %s:width="%fcm" %s:height="%fcm"' % (s, width, s, height)
else: else:
size = '' size = ''
self.res += '<%s:p><%s:frame %s:name="%s" %s:z-index="0" ' \ image = '<%s:frame %s:name="%s" %s:z-index="0" %s:anchor-type="%s"%s>' \
'%s:anchor-type="%s"%s><%s:image %s:type="simple" ' \ '<%s:image %s:type="simple" %s:show="embed" %s:href="%s" ' \
'%s:show="embed" %s:href="%s" %s:actuate="onLoad"/>' \ '%s:actuate="onLoad"/></%s:frame>' % (d, d, imageName, d, t, \
'</%s:frame></%s:p>' % \ self.anchor, size, d, x, x, x, imagePath, x, d)
(t, d, d, imageName, d, t, self.anchor, size, d, x, x, x, if hasattr(self, 'wrapInPara') and self.wrapInPara:
imagePath, x, d, t) image = '<%s:p>%s</%s:p>' % (t, image, t)
self.res += image
return self.res return self.res
# ------------------------------------------------------------------------------ # ------------------------------------------------------------------------------

View file

@ -142,10 +142,14 @@ class Renderer:
self.forceOoCall = forceOoCall self.forceOoCall = forceOoCall
self.finalizeFunction = finalizeFunction self.finalizeFunction = finalizeFunction
self.overwriteExisting = overwriteExisting self.overwriteExisting = overwriteExisting
# Retain potential files or images that will be included through # Remember potential files or images that will be included through
# "do ... from document" statements: we will need to declare them in # "do ... from document" statements: we will need to declare them in
# META-INF/manifest.xml. # META-INF/manifest.xml. Keys are file names as they appear within the
self.fileNames = [] # ODT file (to dump in manifest.xml); values are original paths of
# included images (used for avoiding to create multiple copies of a file
# which is imported several times).
# imported file).
self.fileNames = {}
self.prepareFolders() self.prepareFolders()
# Unzip template # Unzip template
self.unzipFolder = os.path.join(self.tempFolder, 'unzip') self.unzipFolder = os.path.join(self.tempFolder, 'unzip')
@ -255,12 +259,12 @@ class Renderer:
imageFormats = ('png', 'jpeg', 'jpg', 'gif') imageFormats = ('png', 'jpeg', 'jpg', 'gif')
ooFormats = ('odt',) ooFormats = ('odt',)
def importDocument(self, content=None, at=None, format=None, def importDocument(self, content=None, at=None, format=None,
anchor='as-char'): anchor='as-char', wrapInPara=True):
'''If p_at is not None, it represents a path or url allowing to find '''If p_at is not None, it represents a path or url allowing to find
the document. If p_at is None, the content of the document is the document. If p_at is None, the content of the document is
supposed to be in binary format in p_content. The document supposed to be in binary format in p_content. The document
p_format may be: odt or any format in imageFormats. p_anchor is only p_format may be: odt or any format in imageFormats. p_anchor and
relevant for images.''' p_wrapInPara are only relevant for images.'''
ns = self.currentParser.env.namespaces ns = self.currentParser.env.namespaces
importer = None importer = None
# Is there someting to import? # Is there someting to import?
@ -287,12 +291,11 @@ class Renderer:
importer = PdfImporter importer = PdfImporter
else: else:
raise PodError(DOC_WRONG_FORMAT % format) raise PodError(DOC_WRONG_FORMAT % format)
imp = importer(content, at, format, self.tempFolder, ns) imp = importer(content, at, format, self.tempFolder, ns, self.fileNames)
if isImage: if isImage:
imp.setAnchor(anchor) imp.setAnchor(anchor)
imp.wrapInPara = wrapInPara
res = imp.run() res = imp.run()
if imp.fileNames:
self.fileNames += imp.fileNames
return res return res
def prepareFolders(self): def prepareFolders(self):
@ -323,7 +326,7 @@ class Renderer:
if self.fileNames: if self.fileNames:
j = os.path.join j = os.path.join
toInsert = '' toInsert = ''
for fileName in self.fileNames: for fileName in self.fileNames.iterkeys():
mimeType = mimetypes.guess_type(fileName)[0] mimeType = mimetypes.guess_type(fileName)[0]
toInsert += ' <manifest:file-entry manifest:media-type="%s" ' \ toInsert += ' <manifest:file-entry manifest:media-type="%s" ' \
'manifest:full-path="%s"/>\n' % (mimeType, fileName) 'manifest:full-path="%s"/>\n' % (mimeType, fileName)