Initial import
This commit is contained in:
commit
4043163fc4
427 changed files with 18387 additions and 0 deletions
222
pod/converter.py
Executable file
222
pod/converter.py
Executable file
|
@ -0,0 +1,222 @@
|
|||
# ------------------------------------------------------------------------------
|
||||
# Appy is a framework for building applications in the Python language.
|
||||
# Copyright (C) 2007 Gaetan Delannay
|
||||
|
||||
# This program is free software; you can redistribute it and/or
|
||||
# modify it under the terms of the GNU General Public License
|
||||
# as published by the Free Software Foundation; either version 2
|
||||
# of the License, or (at your option) any later version.
|
||||
|
||||
# This program is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU General Public License for more details.
|
||||
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with this program; if not, write to the Free Software
|
||||
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301,USA.
|
||||
|
||||
# ------------------------------------------------------------------------------
|
||||
import sys, os, os.path, time, signal
|
||||
from optparse import OptionParser
|
||||
|
||||
ODT_FILE_TYPES = {'doc': 'MS Word 97', # Could be 'MS Word 2003 XML'
|
||||
'pdf': 'writer_pdf_Export',
|
||||
'rtf': 'Rich Text Format',
|
||||
'txt': 'Text',
|
||||
'html': 'HTML (StarWriter)',
|
||||
'htm': 'HTML (StarWriter)',
|
||||
'odt': 'ODT'}
|
||||
# Conversion to ODT does not make any conversion; it simply updates indexes and
|
||||
# linked documents.
|
||||
|
||||
# ------------------------------------------------------------------------------
|
||||
class ConverterError(Exception): pass
|
||||
|
||||
# ConverterError-related messages ----------------------------------------------
|
||||
DOC_NOT_FOUND = 'Document "%s" was not found.'
|
||||
URL_NOT_FOUND = 'Doc URL "%s" is wrong. %s'
|
||||
BAD_RESULT_TYPE = 'Bad result type "%s". Available types are %s.'
|
||||
CANNOT_WRITE_RESULT = 'I cannot write result "%s". %s'
|
||||
CONNECT_ERROR = 'Could not connect to OpenOffice on port %d. UNO ' \
|
||||
'(OpenOffice API) says: %s.'
|
||||
|
||||
# Some constants ---------------------------------------------------------------
|
||||
DEFAULT_PORT = 2002
|
||||
|
||||
# ------------------------------------------------------------------------------
|
||||
class Converter:
|
||||
'''Converts an ODT document into pdf, doc, txt or rtf.'''
|
||||
exeVariants = ('soffice.exe', 'soffice')
|
||||
pathReplacements = {'program files': 'progra~1',
|
||||
'openoffice.org 1': 'openof~1',
|
||||
'openoffice.org 2': 'openof~1',
|
||||
}
|
||||
def __init__(self, docPath, resultType, port=DEFAULT_PORT):
|
||||
self.port = port
|
||||
self.docUrl = self.getDocUrl(docPath)
|
||||
self.resultFilter = self.getResultFilter(resultType)
|
||||
self.resultUrl = self.getResultUrl(resultType)
|
||||
self.ooContext = None
|
||||
self.oo = None # OpenOffice application object
|
||||
self.doc = None # OpenOffice loaded document
|
||||
def getDocUrl(self, docPath):
|
||||
if not os.path.exists(docPath) and not os.path.isfile(docPath):
|
||||
raise ConverterError(DOC_NOT_FOUND % docPath)
|
||||
docAbsPath = os.path.abspath(docPath)
|
||||
docUrl = 'file:///' + docAbsPath.replace('\\', '/')
|
||||
return docUrl
|
||||
def getResultFilter(self, resultType):
|
||||
if ODT_FILE_TYPES.has_key(resultType):
|
||||
res = ODT_FILE_TYPES[resultType]
|
||||
else:
|
||||
raise ConverterError(BAD_RESULT_TYPE % (resultType,
|
||||
ODT_FILE_TYPES.keys()))
|
||||
return res
|
||||
def getResultUrl(self, resultType):
|
||||
baseName = os.path.splitext(self.docUrl)[0]
|
||||
if resultType != 'odt':
|
||||
res = '%s.%s' % (baseName, resultType)
|
||||
else:
|
||||
res = '%s.res.%s' % (baseName, resultType)
|
||||
fileName = res[8:]
|
||||
try:
|
||||
f = open(fileName, 'w')
|
||||
f.write('Hello')
|
||||
f.close()
|
||||
os.remove(fileName)
|
||||
return res
|
||||
except OSError, oe:
|
||||
raise ConverterError(CANNOT_WRITE_RESULT % (res, oe))
|
||||
def connect(self):
|
||||
'''Connects to OpenOffice'''
|
||||
import socket
|
||||
import uno
|
||||
from com.sun.star.connection import NoConnectException
|
||||
try:
|
||||
# Get the uno component context from the PyUNO runtime
|
||||
localContext = uno.getComponentContext()
|
||||
# Create the UnoUrlResolver
|
||||
resolver = localContext.ServiceManager.createInstanceWithContext(
|
||||
"com.sun.star.bridge.UnoUrlResolver", localContext)
|
||||
# Connect to the running office
|
||||
self.ooContext = resolver.resolve(
|
||||
'uno:socket,host=localhost,port=%d;urp;StarOffice.' \
|
||||
'ComponentContext' % self.port)
|
||||
# Is seems that we can't define a timeout for this method.
|
||||
# I need it because, for example, when a web server already listens
|
||||
# to the given port (thus, not a OpenOffice instance), this method
|
||||
# blocks.
|
||||
smgr = self.ooContext.ServiceManager
|
||||
# Get the central desktop object
|
||||
self.oo = smgr.createInstanceWithContext(
|
||||
'com.sun.star.frame.Desktop', self.ooContext)
|
||||
except NoConnectException, nce:
|
||||
raise ConverterError(CONNECT_ERROR % (self.port, nce))
|
||||
def disconnect(self):
|
||||
self.doc.close(True)
|
||||
# Do a nasty thing before exiting the python process. In case the
|
||||
# last call is a oneway call (e.g. see idl-spec of insertString),
|
||||
# it must be forced out of the remote-bridge caches before python
|
||||
# exits the process. Otherwise, the oneway call may or may not reach
|
||||
# the target object.
|
||||
# I do this here by calling a cheap synchronous call (getPropertyValue).
|
||||
self.ooContext.ServiceManager
|
||||
def loadDocument(self):
|
||||
from com.sun.star.lang import IllegalArgumentException, \
|
||||
IndexOutOfBoundsException
|
||||
# I need to use IndexOutOfBoundsException because sometimes, when
|
||||
# using sections.getCount, UNO returns a number that is bigger than
|
||||
# the real number of sections (this is because it also counts the
|
||||
# sections that are present within the sub-documents to integrate)
|
||||
from com.sun.star.beans import PropertyValue
|
||||
try:
|
||||
# Load the document to convert in a new hidden frame
|
||||
prop = PropertyValue()
|
||||
prop.Name = 'Hidden'
|
||||
prop.Value = True
|
||||
self.doc = self.oo.loadComponentFromURL(self.docUrl, "_blank", 0,
|
||||
(prop,))
|
||||
# Update all indexes
|
||||
indexes = self.doc.getDocumentIndexes()
|
||||
indexesCount = indexes.getCount()
|
||||
if indexesCount != 0:
|
||||
for i in range(indexesCount):
|
||||
try:
|
||||
indexes.getByIndex(i).update()
|
||||
except IndexOutOfBoundsException:
|
||||
pass
|
||||
# Update sections
|
||||
self.doc.updateLinks()
|
||||
sections = self.doc.getTextSections()
|
||||
sectionsCount = sections.getCount()
|
||||
if sectionsCount != 0:
|
||||
for i in range(sectionsCount-1, -1, -1):
|
||||
# I must walk into the section from last one to the first
|
||||
# one. Else, when "disposing" sections, I remove sections
|
||||
# and the remaining sections other indexes.
|
||||
try:
|
||||
section = sections.getByIndex(i)
|
||||
if section.FileLink and section.FileLink.FileURL:
|
||||
section.dispose() # This method removes the
|
||||
# <section></section> tags without removing the content
|
||||
# of the section. Else, it won't appear.
|
||||
except IndexOutOfBoundsException:
|
||||
pass
|
||||
except IllegalArgumentException, iae:
|
||||
raise ConverterError(URL_NOT_FOUND % (self.docUrl, iae))
|
||||
def convertDocument(self):
|
||||
if self.resultFilter != 'ODT':
|
||||
# I must really perform a conversion
|
||||
from com.sun.star.beans import PropertyValue
|
||||
prop = PropertyValue()
|
||||
prop.Name = 'FilterName'
|
||||
prop.Value = self.resultFilter
|
||||
self.doc.storeToURL(self.resultUrl, (prop,))
|
||||
else:
|
||||
self.doc.storeToURL(self.resultUrl, ())
|
||||
def run(self):
|
||||
self.connect()
|
||||
self.loadDocument()
|
||||
self.convertDocument()
|
||||
self.disconnect()
|
||||
|
||||
# ConverterScript-related messages ---------------------------------------------
|
||||
WRONG_NB_OF_ARGS = 'Wrong number of arguments.'
|
||||
ERROR_CODE = 1
|
||||
|
||||
# Class representing the command-line program ----------------------------------
|
||||
class ConverterScript:
|
||||
usage = 'usage: python converter.py fileToConvert outputType [options]\n' \
|
||||
' where fileToConvert is the absolute or relative pathname of\n' \
|
||||
' the ODT file you want to convert;\n'\
|
||||
' and outputType is the output format, that must be one of\n' \
|
||||
' %s.\n' \
|
||||
' "python" should be a UNO-enabled Python interpreter (ie the one\n' \
|
||||
' which is included in the OpenOffice.org distribution).' % \
|
||||
str(ODT_FILE_TYPES.keys())
|
||||
def run(self):
|
||||
optParser = OptionParser(usage=ConverterScript.usage)
|
||||
optParser.add_option("-p", "--port", dest="port",
|
||||
help="The port on which OpenOffice runs " \
|
||||
"Default is %d." % DEFAULT_PORT,
|
||||
default=DEFAULT_PORT, metavar="PORT", type='int')
|
||||
(options, args) = optParser.parse_args()
|
||||
if len(args) != 2:
|
||||
sys.stderr.write(WRONG_NB_OF_ARGS)
|
||||
sys.stderr.write('\n')
|
||||
optParser.print_help()
|
||||
sys.exit(ERROR_CODE)
|
||||
converter = Converter(args[0], args[1], options.port)
|
||||
try:
|
||||
converter.run()
|
||||
except ConverterError, ce:
|
||||
sys.stderr.write(str(ce))
|
||||
sys.stderr.write('\n')
|
||||
optParser.print_help()
|
||||
sys.exit(ERROR_CODE)
|
||||
|
||||
# ------------------------------------------------------------------------------
|
||||
if __name__ == '__main__':
|
||||
ConverterScript().run()
|
||||
# ------------------------------------------------------------------------------
|
Loading…
Add table
Add a link
Reference in a new issue