Initial import

2009-06-29 14:06:01 +02:00 · 2009-06-29 14:06:01 +02:00 · 4043163fc4
commit 4043163fc4
427 changed files with 18387 additions and 0 deletions
--- a/pod/doc_importers.py
+++ b/pod/doc_importers.py
@ -0,0 +1,230 @@
+# ------------------------------------------------------------------------------
+# Appy is a framework for building applications in the Python language.
+# Copyright (C) 2007 Gaetan Delannay
+
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License
+# as published by the Free Software Foundation; either version 2
+# of the License, or (at your option) any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301,USA.
+
+# ------------------------------------------------------------------------------
+import os, os.path, time, shutil, struct
+from appy.pod import PodError
+from appy.pod.odf_parser import OdfEnvironment
+
+# ------------------------------------------------------------------------------
+FILE_NOT_FOUND = "'%s' does not exist or is not a file."
+PDF_TO_IMG_ERROR = 'A PDF file could not be converted into images. Please ' \
+                   'ensure that Ghostscript (gs) is installed on your ' \
+                   'system and the "gs" program is in the path.'
+
+# ------------------------------------------------------------------------------
+class DocImporter:
+    '''Base class used for importing external content into a pod template (an
+       image, another pod template, another odt document...'''
+    def __init__(self, content, at, format, tempFolder, ns):
+        self.content = content
+        self.at = at # If content is None, p_at tells us where to find it
+        # (file system path, url, etc)
+        self.format = format
+        self.res = u''
+        self.ns = ns
+        # Unpack some useful namespaces
+        self.textNs = ns[OdfEnvironment.NS_TEXT]
+        self.linkNs = ns[OdfEnvironment.NS_XLINK]
+        self.drawNs = ns[OdfEnvironment.NS_DRAW]
+        self.svgNs = ns[OdfEnvironment.NS_SVG]
+        self.tempFolder = tempFolder
+        self.importFolder = self.getImportFolder()
+        if self.at:
+            # Check that the file exists
+            if not os.path.isfile(self.at):
+                raise PodError(FILE_NOT_FOUND % self.at)
+            self.importPath = self.moveFile(self.at)
+        else:
+            # We need to dump the file content (in self.content) in a temp file
+            # first. self.content may be binary or a file handler.
+            if not os.path.exists(self.importFolder):
+                os.mkdir(self.importFolder)
+            if isinstance(self.content, file):
+                self.fileName = os.path.basename(self.content.name)
+                fileContent = self.content.read()
+            else:
+                self.fileName = 'f%f.%s' % (time.time(), self.format)
+                fileContent = self.content
+            self.importPath = self.getImportPath(self.fileName)
+            theFile = file(self.importPath, 'w')
+            theFile.write(fileContent)
+            theFile.close()
+        self.importPath = os.path.abspath(self.importPath)
+    def getImportFolder(self):
+        '''This method must be overridden and gives the path where to dump the
+           content of the document or image. In the case of a document it is a
+           temp folder; in the case of an image it is a folder within the ODT
+           result.'''
+        pass
+    def getImportPath(self, fileName):
+        '''Import path is the path to the external file or image that is now
+           stored on disk. We check here that this name does not correspond
+           to an existing file; if yes, we change the path until we get a path
+           that does not correspond to an existing file.'''
+        res = '%s/%s' % (self.importFolder, fileName)
+        resIsGood = False
+        while not resIsGood:
+            if not os.path.exists(res):
+                resIsGood = True
+            else:
+                # We must find another file name, this one already exists.
+                name, ext = os.path.splitext(res)
+                name += 'g'
+                res = name + ext
+        return res
+    def moveFile(self, at):
+        '''In the case parameter "at" was used, we may want to move the file at
+           p_at within the ODT result (for images) or do nothing (for
+           documents).'''
+        return at
+
+class OdtImporter(DocImporter):
+    '''This class allows to import the content of another ODT document into a
+       pod template.'''
+    def getImportFolder(self):
+        return '%s/docImports' % self.tempFolder
+    def run(self):
+        self.res += '<%s:section %s:name="PodImportSection%f">' \
+                    '<%s:section-source %s:href="%s" ' \
+                    '%s:filter-name="writer8"/></%s:section>' % (
+                        self.textNs, self.textNs, time.time(), self.textNs,
+                        self.linkNs, self.importPath, self.textNs, self.textNs)
+        return self.res
+
+class PdfImporter(DocImporter):
+    '''This class allows to import the content of a PDF file into a pod
+       template. It calls gs to split the PDF into images and calls the
+       ImageImporter for importing it into the result.'''
+    imagePrefix = 'PdfPart'
+    def getImportFolder(self):
+        return '%s/docImports' % self.tempFolder
+    def run(self):
+        # Split the PDF into images with Ghostscript
+        imagesFolder = os.path.dirname(self.importPath)
+        cmd = 'gs -dNOPAUSE -dBATCH -sDEVICE=jpeg -r125x125 ' \
+              '-sOutputFile=%s/%s%%d.jpg %s' % \
+              (imagesFolder, self.imagePrefix, self.importPath)
+        os.system(cmd)
+        # Check that at least one image was generated
+        succeeded = False
+        firstImage = '%s1.jpg' % self.imagePrefix
+        for fileName in os.listdir(imagesFolder):
+            if fileName == firstImage:
+                succeeded = True
+                break
+        if not succeeded:
+            raise PodError(PDF_TO_IMG_ERROR)
+        # Insert images into the result.
+        noMoreImages = False
+        i = 0
+        while not noMoreImages:
+            i += 1
+            nextImage = '%s/%s%d.jpg' % (imagesFolder, self.imagePrefix, i)
+            if os.path.exists(nextImage):
+                # Use internally an Image importer for doing this job.
+                imgImporter = ImageImporter(None, nextImage, 'jpg',
+                    self.tempFolder, self.ns)
+                imgImporter.setAnchor('paragraph')
+                self.res += imgImporter.run()
+                os.remove(nextImage)
+            else:
+                noMoreImages = True
+        return self.res
+
+# Compute size of images -------------------------------------------------------
+jpgTypes = ('jpg', 'jpeg')
+pxToCm = 44.173513561
+def getSize(filePath, fileType):
+    '''Gets the size of an image by reading first bytes.'''
+    x, y = (None, None)
+    f = file(filePath)
+    if fileType in jpgTypes:
+        # Dummy read to skip header ID
+        f.read(2)
+        while True:
+            # Extract the segment header.
+            (marker, code, length) = struct.unpack("!BBH", f.read(4))
+            # Verify that it's a valid segment.
+            if marker != 0xFF:
+                # No JPEG marker
+                break
+            elif code >= 0xC0 and code <= 0xC3:
+                # Segments that contain size info
+                (y, x) = struct.unpack("!xHH", f.read(5))
+                break
+            else:
+                # Dummy read to skip over data
+                f.read(length-2)
+    elif fileType == 'png':
+        # Dummy read to skip header data
+        f.read(12)
+        if f.read(4) == "IHDR":
+            x, y = struct.unpack("!LL", f.read(8))
+    elif fileType == 'gif':
+        imgType = f.read(6)
+        buf = f.read(5)
+        if len(buf) == 5:
+            # else: invalid/corrupted GIF (bad header)
+            x, y, u = struct.unpack("<HHB", buf)
+    return float(x)/pxToCm, float(y)/pxToCm
+
+class ImageImporter(DocImporter):
+    '''This class allows to import into the ODT result an image stored
+       externally.'''
+    anchorTypes = ('page', 'paragraph', 'char', 'as-char')
+    WRONG_ANCHOR = 'Wrong anchor. Valid values for anchors are: %s.'
+    def getImportFolder(self):
+        return '%s/unzip/Pictures' % self.tempFolder
+    def moveFile(self, at):
+        '''Image to insert is at p_at. We must move it into the ODT result.'''
+        fileName = os.path.basename(at)
+        folderName = self.getImportFolder()
+        if not os.path.exists(folderName):
+            os.mkdir(folderName)
+        res = self.getImportPath(fileName)
+        shutil.copy(at, res)
+        return res
+    def setAnchor(self, anchor):
+        if anchor not in self.anchorTypes:
+            raise PodError(self.WRONG_ANCHOR % str(self.anchorTypes))
+        self.anchor = anchor
+    def run(self):
+        # Some shorcuts for the used xml namespaces
+        d = self.drawNs
+        t = self.textNs
+        x = self.linkNs
+        s = self.svgNs
+        imageName = 'Image%f' % time.time()
+        # Compute path to image
+        i = self.importPath.rfind('/Pictures/')
+        imagePath = self.importPath[i+1:]
+        # Compute image size
+        width, height = getSize(self.importPath, self.format)
+        if width != None:
+            size = ' %s:width="%fcm" %s:height="%fcm"' % (s, width, s, height)
+        else:
+            size = ''
+        self.res += '<%s:p><%s:frame %s:name="%s" %s:z-index="0" ' \
+                    '%s:anchor-type="%s"%s><%s:image %s:type="simple" ' \
+                    '%s:show="embed" %s:href="%s" %s:actuate="onLoad"/>' \
+                    '</%s:frame></%s:p>' % \
+                    (t, d, d, imageName, d, t, self.anchor, size, d, x, x, x,
+                     imagePath, x, d, t)
+        return self.res
+# ------------------------------------------------------------------------------