2009-06-29 07:06:01 -05:00
|
|
|
# ------------------------------------------------------------------------------
|
|
|
|
# Appy is a framework for building applications in the Python language.
|
|
|
|
# Copyright (C) 2007 Gaetan Delannay
|
|
|
|
|
|
|
|
# This program is free software; you can redistribute it and/or
|
|
|
|
# modify it under the terms of the GNU General Public License
|
|
|
|
# as published by the Free Software Foundation; either version 2
|
|
|
|
# of the License, or (at your option) any later version.
|
|
|
|
|
|
|
|
# This program is distributed in the hope that it will be useful,
|
|
|
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
# GNU General Public License for more details.
|
|
|
|
|
|
|
|
# You should have received a copy of the GNU General Public License
|
|
|
|
# along with this program; if not, write to the Free Software
|
|
|
|
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301,USA.
|
|
|
|
|
|
|
|
# ------------------------------------------------------------------------------
|
2012-01-04 11:03:46 -06:00
|
|
|
import os, os.path, time, shutil, struct, random, urlparse
|
2009-06-29 07:06:01 -05:00
|
|
|
from appy.pod import PodError
|
|
|
|
from appy.pod.odf_parser import OdfEnvironment
|
2012-01-04 11:03:46 -06:00
|
|
|
from appy.shared import mimeTypesExts
|
2011-12-15 15:56:53 -06:00
|
|
|
from appy.shared.utils import FileWrapper
|
2012-01-04 11:03:46 -06:00
|
|
|
from appy.shared.dav import Resource
|
2009-06-29 07:06:01 -05:00
|
|
|
|
|
|
|
# ------------------------------------------------------------------------------
|
|
|
|
FILE_NOT_FOUND = "'%s' does not exist or is not a file."
|
|
|
|
PDF_TO_IMG_ERROR = 'A PDF file could not be converted into images. Please ' \
|
|
|
|
'ensure that Ghostscript (gs) is installed on your ' \
|
|
|
|
'system and the "gs" program is in the path.'
|
|
|
|
|
|
|
|
# ------------------------------------------------------------------------------
|
|
|
|
class DocImporter:
|
|
|
|
'''Base class used for importing external content into a pod template (an
|
|
|
|
image, another pod template, another odt document...'''
|
2012-01-04 11:03:46 -06:00
|
|
|
def __init__(self, content, at, format, renderer):
|
2009-06-29 07:06:01 -05:00
|
|
|
self.content = content
|
2011-07-02 04:46:49 -05:00
|
|
|
# If content is None, p_at tells us where to find it (file system path,
|
|
|
|
# url, etc)
|
|
|
|
self.at = at
|
2012-01-04 11:03:46 -06:00
|
|
|
# Ensure this path exists, if it is a local path.
|
|
|
|
if at and not at.startswith('http') and not os.path.isfile(at):
|
|
|
|
raise PodError(FILE_NOT_FOUND % at)
|
2009-06-29 07:06:01 -05:00
|
|
|
self.format = format
|
|
|
|
self.res = u''
|
2012-01-04 11:03:46 -06:00
|
|
|
self.renderer = renderer
|
|
|
|
self.ns = renderer.currentParser.env.namespaces
|
2009-06-29 07:06:01 -05:00
|
|
|
# Unpack some useful namespaces
|
2012-01-04 11:03:46 -06:00
|
|
|
self.textNs = self.ns[OdfEnvironment.NS_TEXT]
|
|
|
|
self.linkNs = self.ns[OdfEnvironment.NS_XLINK]
|
|
|
|
self.drawNs = self.ns[OdfEnvironment.NS_DRAW]
|
|
|
|
self.svgNs = self.ns[OdfEnvironment.NS_SVG]
|
|
|
|
self.tempFolder = renderer.tempFolder
|
2009-06-29 07:06:01 -05:00
|
|
|
self.importFolder = self.getImportFolder()
|
2011-07-02 04:46:49 -05:00
|
|
|
# Create the import folder if it does not exist.
|
|
|
|
if not os.path.exists(self.importFolder): os.mkdir(self.importFolder)
|
|
|
|
self.importPath = self.getImportPath(at, format)
|
|
|
|
# A link to the global fileNames dict (explained in renderer.py)
|
2012-01-04 11:03:46 -06:00
|
|
|
self.fileNames = renderer.fileNames
|
2011-07-02 04:46:49 -05:00
|
|
|
if at:
|
|
|
|
# Move the file within the ODT, if it is an image and if this image
|
|
|
|
# has not already been imported.
|
|
|
|
self.importPath = self.moveFile(at, self.importPath)
|
2009-06-29 07:06:01 -05:00
|
|
|
else:
|
|
|
|
# We need to dump the file content (in self.content) in a temp file
|
2011-12-15 15:56:53 -06:00
|
|
|
# first. self.content may be binary, a file handler or a
|
|
|
|
# FileWrapper.
|
2009-06-29 07:06:01 -05:00
|
|
|
if isinstance(self.content, file):
|
|
|
|
fileContent = self.content.read()
|
2011-12-15 15:56:53 -06:00
|
|
|
elif isinstance(self.content, FileWrapper):
|
|
|
|
fileContent = content.content
|
2009-06-29 07:06:01 -05:00
|
|
|
else:
|
|
|
|
fileContent = self.content
|
2011-09-20 12:21:48 -05:00
|
|
|
f = file(self.importPath, 'wb')
|
2011-07-02 04:46:49 -05:00
|
|
|
f.write(fileContent)
|
|
|
|
f.close()
|
2011-09-24 05:53:33 -05:00
|
|
|
# ImageImporter adds additional, image-specific attrs, through
|
|
|
|
# ImageImporter.setImageInfo.
|
2011-07-02 04:46:49 -05:00
|
|
|
|
2009-06-29 07:06:01 -05:00
|
|
|
def getImportFolder(self):
|
|
|
|
'''This method must be overridden and gives the path where to dump the
|
|
|
|
content of the document or image. In the case of a document it is a
|
|
|
|
temp folder; in the case of an image it is a folder within the ODT
|
|
|
|
result.'''
|
2011-07-02 04:46:49 -05:00
|
|
|
|
|
|
|
def getImportPath(self, at, format):
|
|
|
|
'''Gets the path name of the file to dump on disk (within the ODT for
|
|
|
|
images, in a temp folder for docs).'''
|
|
|
|
if not format:
|
2012-01-04 11:03:46 -06:00
|
|
|
if at.startswith('http'):
|
|
|
|
format = '' # We will know it only after the HTTP GET.
|
|
|
|
else:
|
|
|
|
format = os.path.splitext(at)[1][1:]
|
2011-07-02 04:46:49 -05:00
|
|
|
fileName = 'f.%d.%f.%s' % (random.randint(0,10), time.time(), format)
|
|
|
|
return os.path.abspath('%s/%s' % (self.importFolder, fileName))
|
|
|
|
|
|
|
|
def moveFile(self, at, importPath):
|
2009-06-29 07:06:01 -05:00
|
|
|
'''In the case parameter "at" was used, we may want to move the file at
|
2011-07-02 04:46:49 -05:00
|
|
|
p_at within the ODT result in p_importPath (for images) or do
|
|
|
|
nothing (for docs). In the latter case, the file to import stays
|
|
|
|
at _at, and is not copied into p_importPath.'''
|
2009-06-29 07:06:01 -05:00
|
|
|
return at
|
|
|
|
|
|
|
|
class OdtImporter(DocImporter):
|
|
|
|
'''This class allows to import the content of another ODT document into a
|
|
|
|
pod template.'''
|
2011-07-02 04:46:49 -05:00
|
|
|
def getImportFolder(self): return '%s/docImports' % self.tempFolder
|
2009-06-29 07:06:01 -05:00
|
|
|
def run(self):
|
|
|
|
self.res += '<%s:section %s:name="PodImportSection%f">' \
|
|
|
|
'<%s:section-source %s:href="%s" ' \
|
|
|
|
'%s:filter-name="writer8"/></%s:section>' % (
|
|
|
|
self.textNs, self.textNs, time.time(), self.textNs,
|
|
|
|
self.linkNs, self.importPath, self.textNs, self.textNs)
|
|
|
|
return self.res
|
|
|
|
|
|
|
|
class PdfImporter(DocImporter):
|
|
|
|
'''This class allows to import the content of a PDF file into a pod
|
|
|
|
template. It calls gs to split the PDF into images and calls the
|
|
|
|
ImageImporter for importing it into the result.'''
|
|
|
|
imagePrefix = 'PdfPart'
|
2011-07-02 04:46:49 -05:00
|
|
|
def getImportFolder(self): return '%s/docImports' % self.tempFolder
|
2009-06-29 07:06:01 -05:00
|
|
|
def run(self):
|
|
|
|
# Split the PDF into images with Ghostscript
|
|
|
|
imagesFolder = os.path.dirname(self.importPath)
|
|
|
|
cmd = 'gs -dNOPAUSE -dBATCH -sDEVICE=jpeg -r125x125 ' \
|
|
|
|
'-sOutputFile=%s/%s%%d.jpg %s' % \
|
|
|
|
(imagesFolder, self.imagePrefix, self.importPath)
|
|
|
|
os.system(cmd)
|
|
|
|
# Check that at least one image was generated
|
|
|
|
succeeded = False
|
|
|
|
firstImage = '%s1.jpg' % self.imagePrefix
|
|
|
|
for fileName in os.listdir(imagesFolder):
|
|
|
|
if fileName == firstImage:
|
|
|
|
succeeded = True
|
|
|
|
break
|
2011-07-02 04:46:49 -05:00
|
|
|
if not succeeded: raise PodError(PDF_TO_IMG_ERROR)
|
2009-06-29 07:06:01 -05:00
|
|
|
# Insert images into the result.
|
|
|
|
noMoreImages = False
|
|
|
|
i = 0
|
|
|
|
while not noMoreImages:
|
|
|
|
i += 1
|
|
|
|
nextImage = '%s/%s%d.jpg' % (imagesFolder, self.imagePrefix, i)
|
|
|
|
if os.path.exists(nextImage):
|
|
|
|
# Use internally an Image importer for doing this job.
|
2012-01-04 11:03:46 -06:00
|
|
|
imgImporter =ImageImporter(None, nextImage, 'jpg',self.renderer)
|
2009-06-29 07:06:01 -05:00
|
|
|
imgImporter.setAnchor('paragraph')
|
|
|
|
self.res += imgImporter.run()
|
|
|
|
os.remove(nextImage)
|
|
|
|
else:
|
|
|
|
noMoreImages = True
|
|
|
|
return self.res
|
|
|
|
|
|
|
|
# Compute size of images -------------------------------------------------------
|
|
|
|
jpgTypes = ('jpg', 'jpeg')
|
|
|
|
pxToCm = 44.173513561
|
|
|
|
def getSize(filePath, fileType):
|
|
|
|
'''Gets the size of an image by reading first bytes.'''
|
|
|
|
x, y = (None, None)
|
2011-09-02 02:59:49 -05:00
|
|
|
f = file(filePath, 'rb')
|
2009-06-29 07:06:01 -05:00
|
|
|
if fileType in jpgTypes:
|
|
|
|
# Dummy read to skip header ID
|
|
|
|
f.read(2)
|
|
|
|
while True:
|
|
|
|
# Extract the segment header.
|
|
|
|
(marker, code, length) = struct.unpack("!BBH", f.read(4))
|
|
|
|
# Verify that it's a valid segment.
|
|
|
|
if marker != 0xFF:
|
|
|
|
# No JPEG marker
|
|
|
|
break
|
|
|
|
elif code >= 0xC0 and code <= 0xC3:
|
|
|
|
# Segments that contain size info
|
|
|
|
(y, x) = struct.unpack("!xHH", f.read(5))
|
|
|
|
break
|
|
|
|
else:
|
|
|
|
# Dummy read to skip over data
|
|
|
|
f.read(length-2)
|
|
|
|
elif fileType == 'png':
|
|
|
|
# Dummy read to skip header data
|
|
|
|
f.read(12)
|
|
|
|
if f.read(4) == "IHDR":
|
|
|
|
x, y = struct.unpack("!LL", f.read(8))
|
|
|
|
elif fileType == 'gif':
|
|
|
|
imgType = f.read(6)
|
|
|
|
buf = f.read(5)
|
|
|
|
if len(buf) == 5:
|
|
|
|
# else: invalid/corrupted GIF (bad header)
|
|
|
|
x, y, u = struct.unpack("<HHB", buf)
|
|
|
|
return float(x)/pxToCm, float(y)/pxToCm
|
|
|
|
|
|
|
|
class ImageImporter(DocImporter):
|
|
|
|
'''This class allows to import into the ODT result an image stored
|
|
|
|
externally.'''
|
|
|
|
anchorTypes = ('page', 'paragraph', 'char', 'as-char')
|
|
|
|
WRONG_ANCHOR = 'Wrong anchor. Valid values for anchors are: %s.'
|
2011-09-20 12:21:48 -05:00
|
|
|
pictFolder = '%sPictures%s' % (os.sep, os.sep)
|
|
|
|
def getImportFolder(self):
|
|
|
|
return os.path.join(self.tempFolder, 'unzip', 'Pictures')
|
2011-07-02 04:46:49 -05:00
|
|
|
|
|
|
|
def moveFile(self, at, importPath):
|
|
|
|
'''Copies file at p_at into the ODT file at p_importPath.'''
|
|
|
|
# Has this image already been imported ?
|
|
|
|
for imagePath, imageAt in self.fileNames.iteritems():
|
|
|
|
if imageAt == at:
|
|
|
|
# Yes!
|
2011-09-20 12:21:48 -05:00
|
|
|
i = importPath.rfind(self.pictFolder) + 1
|
2011-07-02 04:46:49 -05:00
|
|
|
return importPath[:i] + imagePath
|
2012-01-04 11:03:46 -06:00
|
|
|
# The image has not already been imported: copy it.
|
|
|
|
if not at.startswith('http'):
|
|
|
|
shutil.copy(at, importPath)
|
|
|
|
return importPath
|
|
|
|
# The image must be retrieved via a URL. Try to perform a HTTP GET.
|
|
|
|
response = Resource(at).get()
|
|
|
|
if response.code == 200:
|
|
|
|
# At last, I can get the file format.
|
|
|
|
self.format = mimeTypesExts[response.headers['Content-Type']]
|
|
|
|
importPath += self.format
|
|
|
|
f = file(importPath, 'wb')
|
|
|
|
f.write(response.body)
|
|
|
|
f.close()
|
|
|
|
return importPath
|
|
|
|
# The HTTP GET did not work, maybe for security reasons (we probably
|
|
|
|
# have no permission to get the file). But maybe the URL was a local
|
|
|
|
# one, from an application server running this POD code. In this case,
|
|
|
|
# if an image resolver has been given to POD, use it to retrieve the
|
|
|
|
# image.
|
|
|
|
imageResolver = self.renderer.imageResolver
|
|
|
|
if not imageResolver:
|
|
|
|
# Return some default image explaining that the image wasn't found.
|
|
|
|
import appy.pod
|
|
|
|
podFolder = os.path.dirname(appy.pod.__file__)
|
|
|
|
img = os.path.join(podFolder, 'imageNotFound.jpg')
|
|
|
|
self.format = 'jpg'
|
|
|
|
importPath += self.format
|
|
|
|
f = file(img)
|
|
|
|
imageContent = f.read()
|
|
|
|
f.close()
|
|
|
|
f = file(importPath, 'wb')
|
|
|
|
f.write(imageContent)
|
|
|
|
f.close()
|
|
|
|
else:
|
|
|
|
# The imageResolver is a Zope application. From it, we will
|
|
|
|
# retrieve the object on which the image is stored and get
|
|
|
|
# the file to download.
|
|
|
|
urlParts = urlparse.urlsplit(at)
|
|
|
|
path = urlParts[2][1:]
|
|
|
|
obj = imageResolver.unrestrictedTraverse(path.split('/')[:-1])
|
|
|
|
zopeFile = getattr(obj, urlParts[3].split('=')[1])
|
|
|
|
appyFile = FileWrapper(zopeFile)
|
|
|
|
self.format = mimeTypesExts[appyFile.mimeType]
|
|
|
|
importPath += self.format
|
|
|
|
appyFile.dump(importPath)
|
2011-07-02 04:46:49 -05:00
|
|
|
return importPath
|
|
|
|
|
2012-01-04 11:03:46 -06:00
|
|
|
def setImageInfo(self, anchor, wrapInPara, size, sizeUnit, style):
|
2011-09-24 05:53:33 -05:00
|
|
|
# Initialise anchor
|
2009-06-29 07:06:01 -05:00
|
|
|
if anchor not in self.anchorTypes:
|
|
|
|
raise PodError(self.WRONG_ANCHOR % str(self.anchorTypes))
|
|
|
|
self.anchor = anchor
|
2011-09-24 05:53:33 -05:00
|
|
|
self.wrapInPara = wrapInPara
|
|
|
|
self.size = size
|
2012-01-04 11:03:46 -06:00
|
|
|
self.sizeUnit = sizeUnit
|
|
|
|
# Put CSS attributes from p_style in a dict.
|
|
|
|
self.cssAttrs = {}
|
|
|
|
for attr in style.split(';'):
|
|
|
|
if not attr.strip(): continue
|
|
|
|
name, value = attr.strip().split(':')
|
|
|
|
value = value.strip()
|
|
|
|
if value.endswith('px'): value = value[:-2]
|
|
|
|
if value.isdigit(): value=int(value)
|
|
|
|
self.cssAttrs[name.strip()] = value
|
2011-07-02 04:46:49 -05:00
|
|
|
|
2009-06-29 07:06:01 -05:00
|
|
|
def run(self):
|
|
|
|
# Some shorcuts for the used xml namespaces
|
|
|
|
d = self.drawNs
|
|
|
|
t = self.textNs
|
|
|
|
x = self.linkNs
|
|
|
|
s = self.svgNs
|
|
|
|
imageName = 'Image%f' % time.time()
|
|
|
|
# Compute path to image
|
2011-09-20 12:21:48 -05:00
|
|
|
i = self.importPath.rfind(self.pictFolder)
|
|
|
|
imagePath = self.importPath[i+1:].replace('\\', '/')
|
2011-07-02 04:46:49 -05:00
|
|
|
self.fileNames[imagePath] = self.at
|
2012-01-04 11:03:46 -06:00
|
|
|
# Retrieve image size from self.size.
|
|
|
|
width = height = None
|
2011-09-24 05:53:33 -05:00
|
|
|
if self.size:
|
|
|
|
width, height = self.size
|
2012-01-04 11:03:46 -06:00
|
|
|
if self.sizeUnit == 'px':
|
|
|
|
# Convert it to cm
|
|
|
|
width = float(width) / pxToCm
|
|
|
|
height = float(height) / pxToCm
|
|
|
|
# Override self.size if 'height' or 'width' is found in self.cssAttrs
|
|
|
|
if 'width' in self.cssAttrs:
|
|
|
|
width = float(self.cssAttrs['width']) / pxToCm
|
|
|
|
if 'height' in self.cssAttrs:
|
|
|
|
height = float(self.cssAttrs['height']) / pxToCm
|
|
|
|
# If width and/or height is missing, compute it.
|
|
|
|
if not width or not height:
|
2011-09-24 05:53:33 -05:00
|
|
|
width, height = getSize(self.importPath, self.format)
|
2009-06-29 07:06:01 -05:00
|
|
|
if width != None:
|
|
|
|
size = ' %s:width="%fcm" %s:height="%fcm"' % (s, width, s, height)
|
|
|
|
else:
|
|
|
|
size = ''
|
2012-01-04 11:03:46 -06:00
|
|
|
if 'float' in self.cssAttrs:
|
|
|
|
floatValue = self.cssAttrs['float'].capitalize()
|
|
|
|
styleInfo = '%s:style-name="podImage%s" ' % (d, floatValue)
|
|
|
|
self.anchor = 'char'
|
|
|
|
else:
|
|
|
|
styleInfo = ''
|
|
|
|
image = '<%s:frame %s%s:name="%s" %s:z-index="0" ' \
|
|
|
|
'%s:anchor-type="%s"%s><%s:image %s:type="simple" ' \
|
|
|
|
'%s:show="embed" %s:href="%s" %s:actuate="onLoad"/>' \
|
|
|
|
'</%s:frame>' % (d, styleInfo, d, imageName, d, t, self.anchor,
|
|
|
|
size, d, x, x, x, imagePath, x, d)
|
2011-07-02 04:46:49 -05:00
|
|
|
if hasattr(self, 'wrapInPara') and self.wrapInPara:
|
|
|
|
image = '<%s:p>%s</%s:p>' % (t, image, t)
|
|
|
|
self.res += image
|
2009-06-29 07:06:01 -05:00
|
|
|
return self.res
|
|
|
|
# ------------------------------------------------------------------------------
|