2009-06-29 07:06:01 -05:00
|
|
|
# ------------------------------------------------------------------------------
|
|
|
|
# Appy is a framework for building applications in the Python language.
|
|
|
|
# Copyright (C) 2007 Gaetan Delannay
|
|
|
|
|
|
|
|
# This program is free software; you can redistribute it and/or
|
|
|
|
# modify it under the terms of the GNU General Public License
|
|
|
|
# as published by the Free Software Foundation; either version 2
|
|
|
|
# of the License, or (at your option) any later version.
|
|
|
|
|
|
|
|
# This program is distributed in the hope that it will be useful,
|
|
|
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
# GNU General Public License for more details.
|
|
|
|
|
|
|
|
# You should have received a copy of the GNU General Public License
|
|
|
|
# along with this program; if not, write to the Free Software
|
|
|
|
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301,USA.
|
|
|
|
|
|
|
|
# ------------------------------------------------------------------------------
|
2009-11-20 13:17:06 -06:00
|
|
|
import sys, os, os.path, time, signal
|
2009-06-29 07:06:01 -05:00
|
|
|
from optparse import OptionParser
|
|
|
|
|
2009-12-17 14:14:52 -06:00
|
|
|
htmlFilters = {'odt': 'HTML (StarWriter)',
|
|
|
|
'ods': 'HTML (StarCalc)',
|
|
|
|
'odp': 'impress_html_Export'}
|
|
|
|
|
|
|
|
FILE_TYPES = {'odt': 'writer8',
|
|
|
|
'ods': 'calc8',
|
|
|
|
'odp': 'impress8',
|
|
|
|
'htm': htmlFilters, 'html': htmlFilters,
|
|
|
|
'rtf': 'Rich Text Format',
|
|
|
|
'txt': 'Text',
|
|
|
|
'csv': 'Text - txt - csv (StarCalc)',
|
|
|
|
'pdf': {'odt': 'writer_pdf_Export', 'ods': 'calc_pdf_Export',
|
|
|
|
'odp': 'impress_pdf_Export', 'odg': 'draw_pdf_Export'},
|
|
|
|
'swf': 'impress_flash_Export',
|
|
|
|
'doc': 'MS Word 97',
|
|
|
|
'xls': 'MS Excel 97',
|
|
|
|
'ppt': 'MS PowerPoint 97',
|
2011-01-18 08:48:55 -06:00
|
|
|
'docx': 'MS Word 2007 XML',
|
|
|
|
'xlsx': 'Calc MS Excel 2007 XML',
|
2009-12-17 14:14:52 -06:00
|
|
|
}
|
|
|
|
# Conversion from odt to odt does not make any conversion, but updates indexes
|
|
|
|
# and linked documents.
|
2009-06-29 07:06:01 -05:00
|
|
|
|
|
|
|
# ------------------------------------------------------------------------------
|
|
|
|
class ConverterError(Exception): pass
|
|
|
|
|
|
|
|
# ConverterError-related messages ----------------------------------------------
|
|
|
|
DOC_NOT_FOUND = 'Document "%s" was not found.'
|
|
|
|
URL_NOT_FOUND = 'Doc URL "%s" is wrong. %s'
|
|
|
|
BAD_RESULT_TYPE = 'Bad result type "%s". Available types are %s.'
|
|
|
|
CANNOT_WRITE_RESULT = 'I cannot write result "%s". %s'
|
|
|
|
CONNECT_ERROR = 'Could not connect to OpenOffice on port %d. UNO ' \
|
|
|
|
'(OpenOffice API) says: %s.'
|
|
|
|
|
|
|
|
# Some constants ---------------------------------------------------------------
|
|
|
|
DEFAULT_PORT = 2002
|
|
|
|
|
|
|
|
# ------------------------------------------------------------------------------
|
|
|
|
class Converter:
|
2011-01-18 08:48:55 -06:00
|
|
|
'''Converts a document readable by OpenOffice into pdf, doc, txt, rtf...'''
|
2009-06-29 07:06:01 -05:00
|
|
|
exeVariants = ('soffice.exe', 'soffice')
|
|
|
|
pathReplacements = {'program files': 'progra~1',
|
|
|
|
'openoffice.org 1': 'openof~1',
|
|
|
|
'openoffice.org 2': 'openof~1',
|
|
|
|
}
|
|
|
|
def __init__(self, docPath, resultType, port=DEFAULT_PORT):
|
|
|
|
self.port = port
|
2009-12-17 14:14:52 -06:00
|
|
|
self.docUrl, self.docPath = self.getInputUrls(docPath)
|
|
|
|
self.inputType = os.path.splitext(docPath)[1][1:].lower()
|
|
|
|
self.resultType = resultType
|
|
|
|
self.resultFilter = self.getResultFilter()
|
|
|
|
self.resultUrl = self.getResultUrl()
|
2009-06-29 07:06:01 -05:00
|
|
|
self.ooContext = None
|
2009-12-17 14:14:52 -06:00
|
|
|
self.oo = None # The OpenOffice application object
|
|
|
|
self.doc = None # The OpenOffice loaded document
|
|
|
|
|
|
|
|
def getInputUrls(self, docPath):
|
|
|
|
'''Returns the absolute path of the input file. In fact, it returns a
|
|
|
|
tuple with some URL version of the path for OO as the first element
|
|
|
|
and the absolute path as the second element.'''
|
2011-02-23 04:30:44 -06:00
|
|
|
import unohelper
|
2009-06-29 07:06:01 -05:00
|
|
|
if not os.path.exists(docPath) and not os.path.isfile(docPath):
|
|
|
|
raise ConverterError(DOC_NOT_FOUND % docPath)
|
|
|
|
docAbsPath = os.path.abspath(docPath)
|
2009-11-20 13:17:06 -06:00
|
|
|
# Return one path for OO, one path for me.
|
2011-02-23 04:30:44 -06:00
|
|
|
return unohelper.systemPathToFileUrl(docAbsPath), docAbsPath
|
2009-12-17 14:14:52 -06:00
|
|
|
|
|
|
|
def getResultFilter(self):
|
|
|
|
'''Based on the result type, identifies which OO filter to use for the
|
|
|
|
document conversion.'''
|
|
|
|
if FILE_TYPES.has_key(self.resultType):
|
|
|
|
res = FILE_TYPES[self.resultType]
|
|
|
|
if isinstance(res, dict):
|
|
|
|
res = res[self.inputType]
|
2009-06-29 07:06:01 -05:00
|
|
|
else:
|
2009-12-17 14:14:52 -06:00
|
|
|
raise ConverterError(BAD_RESULT_TYPE % (self.resultType,
|
|
|
|
FILE_TYPES.keys()))
|
2009-06-29 07:06:01 -05:00
|
|
|
return res
|
2009-12-17 14:14:52 -06:00
|
|
|
|
|
|
|
def getResultUrl(self):
|
|
|
|
'''Returns the path of the result file in the format needed by OO. If
|
|
|
|
the result type and the input type are the same (ie the user wants to
|
|
|
|
refresh indexes or some other action and not perform a real
|
|
|
|
conversion), the result file is named
|
|
|
|
<inputFileName>.res.<resultType>.
|
|
|
|
|
|
|
|
Else, the result file is named like the input file but with a
|
|
|
|
different extension:
|
|
|
|
<inputFileName>.<resultType>
|
|
|
|
'''
|
2011-02-23 04:30:44 -06:00
|
|
|
import unohelper
|
2009-11-20 13:17:06 -06:00
|
|
|
baseName = os.path.splitext(self.docPath)[0]
|
2009-12-17 14:14:52 -06:00
|
|
|
if self.resultType != self.inputType:
|
|
|
|
res = '%s.%s' % (baseName, self.resultType)
|
2009-06-29 07:06:01 -05:00
|
|
|
else:
|
2009-12-17 14:14:52 -06:00
|
|
|
res = '%s.res.%s' % (baseName, self.resultType)
|
2009-06-29 07:06:01 -05:00
|
|
|
try:
|
2009-11-20 13:17:06 -06:00
|
|
|
f = open(res, 'w')
|
2009-06-29 07:06:01 -05:00
|
|
|
f.write('Hello')
|
|
|
|
f.close()
|
2009-11-20 13:17:06 -06:00
|
|
|
os.remove(res)
|
2011-02-23 04:30:44 -06:00
|
|
|
return unohelper.systemPathToFileUrl(res)
|
2009-12-17 14:14:52 -06:00
|
|
|
except (OSError, IOError), ioe:
|
|
|
|
raise ConverterError(CANNOT_WRITE_RESULT % (res, ioe))
|
|
|
|
|
2009-06-29 07:06:01 -05:00
|
|
|
def connect(self):
|
|
|
|
'''Connects to OpenOffice'''
|
2009-11-25 13:37:22 -06:00
|
|
|
if os.name == 'nt':
|
|
|
|
import socket
|
2009-06-29 07:06:01 -05:00
|
|
|
import uno
|
|
|
|
from com.sun.star.connection import NoConnectException
|
|
|
|
try:
|
|
|
|
# Get the uno component context from the PyUNO runtime
|
|
|
|
localContext = uno.getComponentContext()
|
|
|
|
# Create the UnoUrlResolver
|
|
|
|
resolver = localContext.ServiceManager.createInstanceWithContext(
|
|
|
|
"com.sun.star.bridge.UnoUrlResolver", localContext)
|
|
|
|
# Connect to the running office
|
|
|
|
self.ooContext = resolver.resolve(
|
|
|
|
'uno:socket,host=localhost,port=%d;urp;StarOffice.' \
|
|
|
|
'ComponentContext' % self.port)
|
|
|
|
# Is seems that we can't define a timeout for this method.
|
|
|
|
# I need it because, for example, when a web server already listens
|
|
|
|
# to the given port (thus, not a OpenOffice instance), this method
|
|
|
|
# blocks.
|
|
|
|
smgr = self.ooContext.ServiceManager
|
|
|
|
# Get the central desktop object
|
|
|
|
self.oo = smgr.createInstanceWithContext(
|
|
|
|
'com.sun.star.frame.Desktop', self.ooContext)
|
|
|
|
except NoConnectException, nce:
|
|
|
|
raise ConverterError(CONNECT_ERROR % (self.port, nce))
|
2009-12-17 14:14:52 -06:00
|
|
|
|
|
|
|
def updateOdtDocument(self):
|
|
|
|
'''If the input file is an ODT document, we will perform 2 tasks:
|
|
|
|
1) Update all annexes;
|
|
|
|
2) Update sections (if sections refer to external content, we try to
|
|
|
|
include the content within the result file)
|
|
|
|
'''
|
|
|
|
from com.sun.star.lang import IndexOutOfBoundsException
|
2009-06-29 07:06:01 -05:00
|
|
|
# I need to use IndexOutOfBoundsException because sometimes, when
|
|
|
|
# using sections.getCount, UNO returns a number that is bigger than
|
|
|
|
# the real number of sections (this is because it also counts the
|
|
|
|
# sections that are present within the sub-documents to integrate)
|
2009-12-17 14:14:52 -06:00
|
|
|
# Update all indexes
|
|
|
|
indexes = self.doc.getDocumentIndexes()
|
|
|
|
indexesCount = indexes.getCount()
|
|
|
|
if indexesCount != 0:
|
|
|
|
for i in range(indexesCount):
|
|
|
|
try:
|
|
|
|
indexes.getByIndex(i).update()
|
|
|
|
except IndexOutOfBoundsException:
|
|
|
|
pass
|
|
|
|
# Update sections
|
|
|
|
self.doc.updateLinks()
|
|
|
|
sections = self.doc.getTextSections()
|
|
|
|
sectionsCount = sections.getCount()
|
|
|
|
if sectionsCount != 0:
|
|
|
|
for i in range(sectionsCount-1, -1, -1):
|
|
|
|
# I must walk into the section from last one to the first
|
|
|
|
# one. Else, when "disposing" sections, I remove sections
|
|
|
|
# and the remaining sections other indexes.
|
|
|
|
try:
|
|
|
|
section = sections.getByIndex(i)
|
|
|
|
if section.FileLink and section.FileLink.FileURL:
|
|
|
|
section.dispose() # This method removes the
|
|
|
|
# <section></section> tags without removing the content
|
|
|
|
# of the section. Else, it won't appear.
|
|
|
|
except IndexOutOfBoundsException:
|
|
|
|
pass
|
|
|
|
|
|
|
|
def loadDocument(self):
|
|
|
|
from com.sun.star.lang import IllegalArgumentException, \
|
|
|
|
IndexOutOfBoundsException
|
2009-06-29 07:06:01 -05:00
|
|
|
from com.sun.star.beans import PropertyValue
|
|
|
|
try:
|
2009-12-17 14:14:52 -06:00
|
|
|
# Loads the document to convert in a new hidden frame
|
2011-01-18 08:48:55 -06:00
|
|
|
prop = PropertyValue(); prop.Name = 'Hidden'; prop.Value = True
|
|
|
|
if self.inputType == 'csv':
|
|
|
|
prop2 = PropertyValue()
|
|
|
|
prop2.Name = 'FilterFlags'
|
|
|
|
prop2.Value = '59,34,76,1'
|
|
|
|
#prop2.Name = 'FilterData'
|
|
|
|
#prop2.Value = 'Any'
|
|
|
|
props = (prop, prop2)
|
|
|
|
else:
|
|
|
|
props = (prop,)
|
|
|
|
# Give some additional params if we need to open a CSV file
|
2009-06-29 07:06:01 -05:00
|
|
|
self.doc = self.oo.loadComponentFromURL(self.docUrl, "_blank", 0,
|
2011-01-18 08:48:55 -06:00
|
|
|
props)
|
2009-12-17 14:14:52 -06:00
|
|
|
if self.inputType == 'odt':
|
|
|
|
# Perform additional tasks for odt documents
|
|
|
|
self.updateOdtDocument()
|
|
|
|
try:
|
|
|
|
self.doc.refresh()
|
|
|
|
except AttributeError:
|
|
|
|
pass
|
2009-06-29 07:06:01 -05:00
|
|
|
except IllegalArgumentException, iae:
|
2009-11-20 13:17:06 -06:00
|
|
|
raise ConverterError(URL_NOT_FOUND % (self.docPath, iae))
|
2009-12-17 14:14:52 -06:00
|
|
|
|
2009-06-29 07:06:01 -05:00
|
|
|
def convertDocument(self):
|
2009-12-17 14:14:52 -06:00
|
|
|
'''Calls OO to perform a document conversion. Note that the conversion
|
|
|
|
is not really done if the source and target documents have the same
|
|
|
|
type.'''
|
|
|
|
properties = []
|
|
|
|
from com.sun.star.beans import PropertyValue
|
|
|
|
prop = PropertyValue()
|
|
|
|
prop.Name = 'FilterName'
|
|
|
|
prop.Value = self.resultFilter
|
|
|
|
properties.append(prop)
|
|
|
|
if self.resultType == 'csv':
|
|
|
|
# For CSV export, add options (separator, etc)
|
|
|
|
optionsProp = PropertyValue()
|
|
|
|
optionsProp.Name = 'FilterOptions'
|
|
|
|
optionsProp.Value = '59,34,76,1'
|
|
|
|
properties.append(optionsProp)
|
|
|
|
self.doc.storeToURL(self.resultUrl, tuple(properties))
|
|
|
|
|
2009-06-29 07:06:01 -05:00
|
|
|
def run(self):
|
2009-12-17 14:14:52 -06:00
|
|
|
'''Connects to OO, does the job and disconnects.'''
|
2009-06-29 07:06:01 -05:00
|
|
|
self.connect()
|
|
|
|
self.loadDocument()
|
|
|
|
self.convertDocument()
|
2009-12-17 14:14:52 -06:00
|
|
|
self.doc.close(True)
|
2009-06-29 07:06:01 -05:00
|
|
|
|
|
|
|
# ConverterScript-related messages ---------------------------------------------
|
|
|
|
WRONG_NB_OF_ARGS = 'Wrong number of arguments.'
|
|
|
|
ERROR_CODE = 1
|
|
|
|
|
|
|
|
# Class representing the command-line program ----------------------------------
|
|
|
|
class ConverterScript:
|
|
|
|
usage = 'usage: python converter.py fileToConvert outputType [options]\n' \
|
|
|
|
' where fileToConvert is the absolute or relative pathname of\n' \
|
2009-12-17 14:14:52 -06:00
|
|
|
' the file you want to convert (or whose content like\n' \
|
|
|
|
' indexes need to be refreshed);\n'\
|
2009-06-29 07:06:01 -05:00
|
|
|
' and outputType is the output format, that must be one of\n' \
|
|
|
|
' %s.\n' \
|
2009-12-17 14:14:52 -06:00
|
|
|
' "python" should be a UNO-enabled Python interpreter (ie the ' \
|
|
|
|
' one which is included in the OpenOffice.org distribution).' % \
|
|
|
|
str(FILE_TYPES.keys())
|
2009-06-29 07:06:01 -05:00
|
|
|
def run(self):
|
|
|
|
optParser = OptionParser(usage=ConverterScript.usage)
|
|
|
|
optParser.add_option("-p", "--port", dest="port",
|
|
|
|
help="The port on which OpenOffice runs " \
|
|
|
|
"Default is %d." % DEFAULT_PORT,
|
|
|
|
default=DEFAULT_PORT, metavar="PORT", type='int')
|
|
|
|
(options, args) = optParser.parse_args()
|
|
|
|
if len(args) != 2:
|
|
|
|
sys.stderr.write(WRONG_NB_OF_ARGS)
|
|
|
|
sys.stderr.write('\n')
|
|
|
|
optParser.print_help()
|
|
|
|
sys.exit(ERROR_CODE)
|
|
|
|
converter = Converter(args[0], args[1], options.port)
|
|
|
|
try:
|
|
|
|
converter.run()
|
|
|
|
except ConverterError, ce:
|
|
|
|
sys.stderr.write(str(ce))
|
|
|
|
sys.stderr.write('\n')
|
|
|
|
optParser.print_help()
|
|
|
|
sys.exit(ERROR_CODE)
|
|
|
|
|
|
|
|
# ------------------------------------------------------------------------------
|
|
|
|
if __name__ == '__main__':
|
|
|
|
ConverterScript().run()
|
|
|
|
# ------------------------------------------------------------------------------
|