appy.pod: optimized image importing: when an image is imported several times in a pod template through a call to 'do ... from document(at=path)', pod inserts only one copy of the file into the ODT result.
This commit is contained in:
parent
e38b78d10c
commit
02fce03143
|
@ -17,7 +17,7 @@
|
||||||
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301,USA.
|
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301,USA.
|
||||||
|
|
||||||
# ------------------------------------------------------------------------------
|
# ------------------------------------------------------------------------------
|
||||||
import os, os.path, time, shutil, struct
|
import os, os.path, time, shutil, struct, random
|
||||||
from appy.pod import PodError
|
from appy.pod import PodError
|
||||||
from appy.pod.odf_parser import OdfEnvironment
|
from appy.pod.odf_parser import OdfEnvironment
|
||||||
|
|
||||||
|
@ -31,10 +31,13 @@ PDF_TO_IMG_ERROR = 'A PDF file could not be converted into images. Please ' \
|
||||||
class DocImporter:
|
class DocImporter:
|
||||||
'''Base class used for importing external content into a pod template (an
|
'''Base class used for importing external content into a pod template (an
|
||||||
image, another pod template, another odt document...'''
|
image, another pod template, another odt document...'''
|
||||||
def __init__(self, content, at, format, tempFolder, ns):
|
def __init__(self, content, at, format, tempFolder, ns, fileNames):
|
||||||
self.content = content
|
self.content = content
|
||||||
self.at = at # If content is None, p_at tells us where to find it
|
# If content is None, p_at tells us where to find it (file system path,
|
||||||
# (file system path, url, etc)
|
# url, etc)
|
||||||
|
self.at = at
|
||||||
|
# Ensure this path exists.
|
||||||
|
if at and not os.path.isfile(at): raise PodError(FILE_NOT_FOUND % at)
|
||||||
self.format = format
|
self.format = format
|
||||||
self.res = u''
|
self.res = u''
|
||||||
self.ns = ns
|
self.ns = ns
|
||||||
|
@ -45,64 +48,51 @@ class DocImporter:
|
||||||
self.svgNs = ns[OdfEnvironment.NS_SVG]
|
self.svgNs = ns[OdfEnvironment.NS_SVG]
|
||||||
self.tempFolder = tempFolder
|
self.tempFolder = tempFolder
|
||||||
self.importFolder = self.getImportFolder()
|
self.importFolder = self.getImportFolder()
|
||||||
# If the importer generates one or several images, we will retain their
|
# Create the import folder if it does not exist.
|
||||||
# names here, because we will need to declare them in
|
if not os.path.exists(self.importFolder): os.mkdir(self.importFolder)
|
||||||
# META-INF/manifest.xml
|
self.importPath = self.getImportPath(at, format)
|
||||||
self.fileNames = []
|
# A link to the global fileNames dict (explained in renderer.py)
|
||||||
if self.at:
|
self.fileNames = fileNames
|
||||||
# Check that the file exists
|
if at:
|
||||||
if not os.path.isfile(self.at):
|
# Move the file within the ODT, if it is an image and if this image
|
||||||
raise PodError(FILE_NOT_FOUND % self.at)
|
# has not already been imported.
|
||||||
self.importPath = self.moveFile(self.at)
|
self.importPath = self.moveFile(at, self.importPath)
|
||||||
else:
|
else:
|
||||||
# We need to dump the file content (in self.content) in a temp file
|
# We need to dump the file content (in self.content) in a temp file
|
||||||
# first. self.content may be binary or a file handler.
|
# first. self.content may be binary or a file handler.
|
||||||
if not os.path.exists(self.importFolder):
|
|
||||||
os.mkdir(self.importFolder)
|
|
||||||
if isinstance(self.content, file):
|
if isinstance(self.content, file):
|
||||||
self.fileName = os.path.basename(self.content.name)
|
|
||||||
fileContent = self.content.read()
|
fileContent = self.content.read()
|
||||||
else:
|
else:
|
||||||
self.fileName = 'f%f.%s' % (time.time(), self.format)
|
|
||||||
fileContent = self.content
|
fileContent = self.content
|
||||||
self.importPath = self.getImportPath(self.fileName)
|
f = file(self.importPath, 'w')
|
||||||
theFile = file(self.importPath, 'w')
|
f.write(fileContent)
|
||||||
theFile.write(fileContent)
|
f.close()
|
||||||
theFile.close()
|
|
||||||
self.importPath = os.path.abspath(self.importPath)
|
|
||||||
def getImportFolder(self):
|
def getImportFolder(self):
|
||||||
'''This method must be overridden and gives the path where to dump the
|
'''This method must be overridden and gives the path where to dump the
|
||||||
content of the document or image. In the case of a document it is a
|
content of the document or image. In the case of a document it is a
|
||||||
temp folder; in the case of an image it is a folder within the ODT
|
temp folder; in the case of an image it is a folder within the ODT
|
||||||
result.'''
|
result.'''
|
||||||
pass
|
|
||||||
def getImportPath(self, fileName):
|
def getImportPath(self, at, format):
|
||||||
'''Import path is the path to the external file or image that is now
|
'''Gets the path name of the file to dump on disk (within the ODT for
|
||||||
stored on disk. We check here that this name does not correspond
|
images, in a temp folder for docs).'''
|
||||||
to an existing file; if yes, we change the path until we get a path
|
if not format:
|
||||||
that does not correspond to an existing file.'''
|
format = os.path.splitext(at)[1][1:]
|
||||||
res = '%s/%s' % (self.importFolder, fileName)
|
fileName = 'f.%d.%f.%s' % (random.randint(0,10), time.time(), format)
|
||||||
resIsGood = False
|
return os.path.abspath('%s/%s' % (self.importFolder, fileName))
|
||||||
while not resIsGood:
|
|
||||||
if not os.path.exists(res):
|
def moveFile(self, at, importPath):
|
||||||
resIsGood = True
|
|
||||||
else:
|
|
||||||
# We must find another file name, this one already exists.
|
|
||||||
name, ext = os.path.splitext(res)
|
|
||||||
name += 'g'
|
|
||||||
res = name + ext
|
|
||||||
return res
|
|
||||||
def moveFile(self, at):
|
|
||||||
'''In the case parameter "at" was used, we may want to move the file at
|
'''In the case parameter "at" was used, we may want to move the file at
|
||||||
p_at within the ODT result (for images) or do nothing (for
|
p_at within the ODT result in p_importPath (for images) or do
|
||||||
documents).'''
|
nothing (for docs). In the latter case, the file to import stays
|
||||||
|
at _at, and is not copied into p_importPath.'''
|
||||||
return at
|
return at
|
||||||
|
|
||||||
class OdtImporter(DocImporter):
|
class OdtImporter(DocImporter):
|
||||||
'''This class allows to import the content of another ODT document into a
|
'''This class allows to import the content of another ODT document into a
|
||||||
pod template.'''
|
pod template.'''
|
||||||
def getImportFolder(self):
|
def getImportFolder(self): return '%s/docImports' % self.tempFolder
|
||||||
return '%s/docImports' % self.tempFolder
|
|
||||||
def run(self):
|
def run(self):
|
||||||
self.res += '<%s:section %s:name="PodImportSection%f">' \
|
self.res += '<%s:section %s:name="PodImportSection%f">' \
|
||||||
'<%s:section-source %s:href="%s" ' \
|
'<%s:section-source %s:href="%s" ' \
|
||||||
|
@ -116,8 +106,7 @@ class PdfImporter(DocImporter):
|
||||||
template. It calls gs to split the PDF into images and calls the
|
template. It calls gs to split the PDF into images and calls the
|
||||||
ImageImporter for importing it into the result.'''
|
ImageImporter for importing it into the result.'''
|
||||||
imagePrefix = 'PdfPart'
|
imagePrefix = 'PdfPart'
|
||||||
def getImportFolder(self):
|
def getImportFolder(self): return '%s/docImports' % self.tempFolder
|
||||||
return '%s/docImports' % self.tempFolder
|
|
||||||
def run(self):
|
def run(self):
|
||||||
# Split the PDF into images with Ghostscript
|
# Split the PDF into images with Ghostscript
|
||||||
imagesFolder = os.path.dirname(self.importPath)
|
imagesFolder = os.path.dirname(self.importPath)
|
||||||
|
@ -132,8 +121,7 @@ class PdfImporter(DocImporter):
|
||||||
if fileName == firstImage:
|
if fileName == firstImage:
|
||||||
succeeded = True
|
succeeded = True
|
||||||
break
|
break
|
||||||
if not succeeded:
|
if not succeeded: raise PodError(PDF_TO_IMG_ERROR)
|
||||||
raise PodError(PDF_TO_IMG_ERROR)
|
|
||||||
# Insert images into the result.
|
# Insert images into the result.
|
||||||
noMoreImages = False
|
noMoreImages = False
|
||||||
i = 0
|
i = 0
|
||||||
|
@ -143,10 +131,9 @@ class PdfImporter(DocImporter):
|
||||||
if os.path.exists(nextImage):
|
if os.path.exists(nextImage):
|
||||||
# Use internally an Image importer for doing this job.
|
# Use internally an Image importer for doing this job.
|
||||||
imgImporter = ImageImporter(None, nextImage, 'jpg',
|
imgImporter = ImageImporter(None, nextImage, 'jpg',
|
||||||
self.tempFolder, self.ns)
|
self.tempFolder, self.ns, self.fileNames)
|
||||||
imgImporter.setAnchor('paragraph')
|
imgImporter.setAnchor('paragraph')
|
||||||
self.res += imgImporter.run()
|
self.res += imgImporter.run()
|
||||||
self.fileNames += imgImporter.fileNames
|
|
||||||
os.remove(nextImage)
|
os.remove(nextImage)
|
||||||
else:
|
else:
|
||||||
noMoreImages = True
|
noMoreImages = True
|
||||||
|
@ -194,21 +181,25 @@ class ImageImporter(DocImporter):
|
||||||
externally.'''
|
externally.'''
|
||||||
anchorTypes = ('page', 'paragraph', 'char', 'as-char')
|
anchorTypes = ('page', 'paragraph', 'char', 'as-char')
|
||||||
WRONG_ANCHOR = 'Wrong anchor. Valid values for anchors are: %s.'
|
WRONG_ANCHOR = 'Wrong anchor. Valid values for anchors are: %s.'
|
||||||
def getImportFolder(self):
|
def getImportFolder(self): return '%s/unzip/Pictures' % self.tempFolder
|
||||||
return '%s/unzip/Pictures' % self.tempFolder
|
|
||||||
def moveFile(self, at):
|
def moveFile(self, at, importPath):
|
||||||
'''Image to insert is at p_at. We must move it into the ODT result.'''
|
'''Copies file at p_at into the ODT file at p_importPath.'''
|
||||||
fileName = os.path.basename(at)
|
# Has this image already been imported ?
|
||||||
folderName = self.getImportFolder()
|
for imagePath, imageAt in self.fileNames.iteritems():
|
||||||
if not os.path.exists(folderName):
|
if imageAt == at:
|
||||||
os.mkdir(folderName)
|
# Yes!
|
||||||
res = self.getImportPath(fileName)
|
i = importPath.rfind('/Pictures/') + 1
|
||||||
shutil.copy(at, res)
|
return importPath[:i] + imagePath
|
||||||
return res
|
# If I am here, the image has not already been imported: copy it.
|
||||||
|
shutil.copy(at, importPath)
|
||||||
|
return importPath
|
||||||
|
|
||||||
def setAnchor(self, anchor):
|
def setAnchor(self, anchor):
|
||||||
if anchor not in self.anchorTypes:
|
if anchor not in self.anchorTypes:
|
||||||
raise PodError(self.WRONG_ANCHOR % str(self.anchorTypes))
|
raise PodError(self.WRONG_ANCHOR % str(self.anchorTypes))
|
||||||
self.anchor = anchor
|
self.anchor = anchor
|
||||||
|
|
||||||
def run(self):
|
def run(self):
|
||||||
# Some shorcuts for the used xml namespaces
|
# Some shorcuts for the used xml namespaces
|
||||||
d = self.drawNs
|
d = self.drawNs
|
||||||
|
@ -219,18 +210,19 @@ class ImageImporter(DocImporter):
|
||||||
# Compute path to image
|
# Compute path to image
|
||||||
i = self.importPath.rfind('/Pictures/')
|
i = self.importPath.rfind('/Pictures/')
|
||||||
imagePath = self.importPath[i+1:]
|
imagePath = self.importPath[i+1:]
|
||||||
self.fileNames.append(imagePath)
|
self.fileNames[imagePath] = self.at
|
||||||
# Compute image size
|
# Compute image size
|
||||||
width, height = getSize(self.importPath, self.format)
|
width, height = getSize(self.importPath, self.format)
|
||||||
if width != None:
|
if width != None:
|
||||||
size = ' %s:width="%fcm" %s:height="%fcm"' % (s, width, s, height)
|
size = ' %s:width="%fcm" %s:height="%fcm"' % (s, width, s, height)
|
||||||
else:
|
else:
|
||||||
size = ''
|
size = ''
|
||||||
self.res += '<%s:p><%s:frame %s:name="%s" %s:z-index="0" ' \
|
image = '<%s:frame %s:name="%s" %s:z-index="0" %s:anchor-type="%s"%s>' \
|
||||||
'%s:anchor-type="%s"%s><%s:image %s:type="simple" ' \
|
'<%s:image %s:type="simple" %s:show="embed" %s:href="%s" ' \
|
||||||
'%s:show="embed" %s:href="%s" %s:actuate="onLoad"/>' \
|
'%s:actuate="onLoad"/></%s:frame>' % (d, d, imageName, d, t, \
|
||||||
'</%s:frame></%s:p>' % \
|
self.anchor, size, d, x, x, x, imagePath, x, d)
|
||||||
(t, d, d, imageName, d, t, self.anchor, size, d, x, x, x,
|
if hasattr(self, 'wrapInPara') and self.wrapInPara:
|
||||||
imagePath, x, d, t)
|
image = '<%s:p>%s</%s:p>' % (t, image, t)
|
||||||
|
self.res += image
|
||||||
return self.res
|
return self.res
|
||||||
# ------------------------------------------------------------------------------
|
# ------------------------------------------------------------------------------
|
||||||
|
|
|
@ -142,10 +142,14 @@ class Renderer:
|
||||||
self.forceOoCall = forceOoCall
|
self.forceOoCall = forceOoCall
|
||||||
self.finalizeFunction = finalizeFunction
|
self.finalizeFunction = finalizeFunction
|
||||||
self.overwriteExisting = overwriteExisting
|
self.overwriteExisting = overwriteExisting
|
||||||
# Retain potential files or images that will be included through
|
# Remember potential files or images that will be included through
|
||||||
# "do ... from document" statements: we will need to declare them in
|
# "do ... from document" statements: we will need to declare them in
|
||||||
# META-INF/manifest.xml.
|
# META-INF/manifest.xml. Keys are file names as they appear within the
|
||||||
self.fileNames = []
|
# ODT file (to dump in manifest.xml); values are original paths of
|
||||||
|
# included images (used for avoiding to create multiple copies of a file
|
||||||
|
# which is imported several times).
|
||||||
|
# imported file).
|
||||||
|
self.fileNames = {}
|
||||||
self.prepareFolders()
|
self.prepareFolders()
|
||||||
# Unzip template
|
# Unzip template
|
||||||
self.unzipFolder = os.path.join(self.tempFolder, 'unzip')
|
self.unzipFolder = os.path.join(self.tempFolder, 'unzip')
|
||||||
|
@ -255,12 +259,12 @@ class Renderer:
|
||||||
imageFormats = ('png', 'jpeg', 'jpg', 'gif')
|
imageFormats = ('png', 'jpeg', 'jpg', 'gif')
|
||||||
ooFormats = ('odt',)
|
ooFormats = ('odt',)
|
||||||
def importDocument(self, content=None, at=None, format=None,
|
def importDocument(self, content=None, at=None, format=None,
|
||||||
anchor='as-char'):
|
anchor='as-char', wrapInPara=True):
|
||||||
'''If p_at is not None, it represents a path or url allowing to find
|
'''If p_at is not None, it represents a path or url allowing to find
|
||||||
the document. If p_at is None, the content of the document is
|
the document. If p_at is None, the content of the document is
|
||||||
supposed to be in binary format in p_content. The document
|
supposed to be in binary format in p_content. The document
|
||||||
p_format may be: odt or any format in imageFormats. p_anchor is only
|
p_format may be: odt or any format in imageFormats. p_anchor and
|
||||||
relevant for images.'''
|
p_wrapInPara are only relevant for images.'''
|
||||||
ns = self.currentParser.env.namespaces
|
ns = self.currentParser.env.namespaces
|
||||||
importer = None
|
importer = None
|
||||||
# Is there someting to import?
|
# Is there someting to import?
|
||||||
|
@ -287,12 +291,11 @@ class Renderer:
|
||||||
importer = PdfImporter
|
importer = PdfImporter
|
||||||
else:
|
else:
|
||||||
raise PodError(DOC_WRONG_FORMAT % format)
|
raise PodError(DOC_WRONG_FORMAT % format)
|
||||||
imp = importer(content, at, format, self.tempFolder, ns)
|
imp = importer(content, at, format, self.tempFolder, ns, self.fileNames)
|
||||||
if isImage:
|
if isImage:
|
||||||
imp.setAnchor(anchor)
|
imp.setAnchor(anchor)
|
||||||
|
imp.wrapInPara = wrapInPara
|
||||||
res = imp.run()
|
res = imp.run()
|
||||||
if imp.fileNames:
|
|
||||||
self.fileNames += imp.fileNames
|
|
||||||
return res
|
return res
|
||||||
|
|
||||||
def prepareFolders(self):
|
def prepareFolders(self):
|
||||||
|
@ -323,7 +326,7 @@ class Renderer:
|
||||||
if self.fileNames:
|
if self.fileNames:
|
||||||
j = os.path.join
|
j = os.path.join
|
||||||
toInsert = ''
|
toInsert = ''
|
||||||
for fileName in self.fileNames:
|
for fileName in self.fileNames.iterkeys():
|
||||||
mimeType = mimetypes.guess_type(fileName)[0]
|
mimeType = mimetypes.guess_type(fileName)[0]
|
||||||
toInsert += ' <manifest:file-entry manifest:media-type="%s" ' \
|
toInsert += ' <manifest:file-entry manifest:media-type="%s" ' \
|
||||||
'manifest:full-path="%s"/>\n' % (mimeType, fileName)
|
'manifest:full-path="%s"/>\n' % (mimeType, fileName)
|
||||||
|
|
Loading…
Reference in a new issue