More input and output formats for OO conversion in converter.py and bugfix in skyn edit.

This commit is contained in:
Gaetan Delannay 2009-12-17 21:14:52 +01:00
parent fff2b6a329
commit e89eda4838
7 changed files with 190 additions and 113 deletions

View file

@ -20,15 +20,26 @@
import sys, os, os.path, time, signal
from optparse import OptionParser
ODT_FILE_TYPES = {'doc': 'MS Word 97', # Could be 'MS Word 2003 XML'
'pdf': 'writer_pdf_Export',
'rtf': 'Rich Text Format',
'txt': 'Text',
'html': 'HTML (StarWriter)',
'htm': 'HTML (StarWriter)',
'odt': 'ODT'}
# Conversion to ODT does not make any conversion; it simply updates indexes and
# linked documents.
htmlFilters = {'odt': 'HTML (StarWriter)',
'ods': 'HTML (StarCalc)',
'odp': 'impress_html_Export'}
FILE_TYPES = {'odt': 'writer8',
'ods': 'calc8',
'odp': 'impress8',
'htm': htmlFilters, 'html': htmlFilters,
'rtf': 'Rich Text Format',
'txt': 'Text',
'csv': 'Text - txt - csv (StarCalc)',
'pdf': {'odt': 'writer_pdf_Export', 'ods': 'calc_pdf_Export',
'odp': 'impress_pdf_Export', 'odg': 'draw_pdf_Export'},
'swf': 'impress_flash_Export',
'doc': 'MS Word 97',
'xls': 'MS Excel 97',
'ppt': 'MS PowerPoint 97',
}
# Conversion from odt to odt does not make any conversion, but updates indexes
# and linked documents.
# ------------------------------------------------------------------------------
class ConverterError(Exception): pass
@ -46,7 +57,7 @@ DEFAULT_PORT = 2002
# ------------------------------------------------------------------------------
class Converter:
'''Converts an ODT document into pdf, doc, txt or rtf.'''
'''Converts an document readable by OpenOffice into pdf, doc, txt or rtf.'''
exeVariants = ('soffice.exe', 'soffice')
pathReplacements = {'program files': 'progra~1',
'openoffice.org 1': 'openof~1',
@ -54,41 +65,64 @@ class Converter:
}
def __init__(self, docPath, resultType, port=DEFAULT_PORT):
self.port = port
self.docUrl, self.docPath = self.getDocUrls(docPath)
self.resultFilter = self.getResultFilter(resultType)
self.resultUrl = self.getResultUrl(resultType)
self.docUrl, self.docPath = self.getInputUrls(docPath)
self.inputType = os.path.splitext(docPath)[1][1:].lower()
self.resultType = resultType
self.resultFilter = self.getResultFilter()
self.resultUrl = self.getResultUrl()
self.ooContext = None
self.oo = None # OpenOffice application object
self.doc = None # OpenOffice loaded document
def getDocUrls(self, docPath):
self.oo = None # The OpenOffice application object
self.doc = None # The OpenOffice loaded document
def getInputUrls(self, docPath):
'''Returns the absolute path of the input file. In fact, it returns a
tuple with some URL version of the path for OO as the first element
and the absolute path as the second element.'''
import uno
if not os.path.exists(docPath) and not os.path.isfile(docPath):
raise ConverterError(DOC_NOT_FOUND % docPath)
docAbsPath = os.path.abspath(docPath)
# Return one path for OO, one path for me.
return uno.systemPathToFileUrl(docAbsPath), docAbsPath
def getResultFilter(self, resultType):
if ODT_FILE_TYPES.has_key(resultType):
res = ODT_FILE_TYPES[resultType]
def getResultFilter(self):
'''Based on the result type, identifies which OO filter to use for the
document conversion.'''
if FILE_TYPES.has_key(self.resultType):
res = FILE_TYPES[self.resultType]
if isinstance(res, dict):
res = res[self.inputType]
else:
raise ConverterError(BAD_RESULT_TYPE % (resultType,
ODT_FILE_TYPES.keys()))
raise ConverterError(BAD_RESULT_TYPE % (self.resultType,
FILE_TYPES.keys()))
return res
def getResultUrl(self, resultType):
def getResultUrl(self):
'''Returns the path of the result file in the format needed by OO. If
the result type and the input type are the same (ie the user wants to
refresh indexes or some other action and not perform a real
conversion), the result file is named
<inputFileName>.res.<resultType>.
Else, the result file is named like the input file but with a
different extension:
<inputFileName>.<resultType>
'''
import uno
baseName = os.path.splitext(self.docPath)[0]
if resultType != 'odt':
res = '%s.%s' % (baseName, resultType)
if self.resultType != self.inputType:
res = '%s.%s' % (baseName, self.resultType)
else:
res = '%s.res.%s' % (baseName, resultType)
res = '%s.res.%s' % (baseName, self.resultType)
try:
f = open(res, 'w')
f.write('Hello')
f.close()
os.remove(res)
return uno.systemPathToFileUrl(res)
except OSError, oe:
raise ConverterError(CANNOT_WRITE_RESULT % (res, oe))
except (OSError, IOError), ioe:
raise ConverterError(CANNOT_WRITE_RESULT % (res, ioe))
def connect(self):
'''Connects to OpenOffice'''
if os.name == 'nt':
@ -115,73 +149,90 @@ class Converter:
'com.sun.star.frame.Desktop', self.ooContext)
except NoConnectException, nce:
raise ConverterError(CONNECT_ERROR % (self.port, nce))
def disconnect(self):
self.doc.close(True)
# Do a nasty thing before exiting the python process. In case the
# last call is a oneway call (e.g. see idl-spec of insertString),
# it must be forced out of the remote-bridge caches before python
# exits the process. Otherwise, the oneway call may or may not reach
# the target object.
# I do this here by calling a cheap synchronous call (getPropertyValue).
self.ooContext.ServiceManager
def loadDocument(self):
from com.sun.star.lang import IllegalArgumentException, \
IndexOutOfBoundsException
def updateOdtDocument(self):
'''If the input file is an ODT document, we will perform 2 tasks:
1) Update all annexes;
2) Update sections (if sections refer to external content, we try to
include the content within the result file)
'''
from com.sun.star.lang import IndexOutOfBoundsException
# I need to use IndexOutOfBoundsException because sometimes, when
# using sections.getCount, UNO returns a number that is bigger than
# the real number of sections (this is because it also counts the
# sections that are present within the sub-documents to integrate)
# Update all indexes
indexes = self.doc.getDocumentIndexes()
indexesCount = indexes.getCount()
if indexesCount != 0:
for i in range(indexesCount):
try:
indexes.getByIndex(i).update()
except IndexOutOfBoundsException:
pass
# Update sections
self.doc.updateLinks()
sections = self.doc.getTextSections()
sectionsCount = sections.getCount()
if sectionsCount != 0:
for i in range(sectionsCount-1, -1, -1):
# I must walk into the section from last one to the first
# one. Else, when "disposing" sections, I remove sections
# and the remaining sections other indexes.
try:
section = sections.getByIndex(i)
if section.FileLink and section.FileLink.FileURL:
section.dispose() # This method removes the
# <section></section> tags without removing the content
# of the section. Else, it won't appear.
except IndexOutOfBoundsException:
pass
def loadDocument(self):
from com.sun.star.lang import IllegalArgumentException, \
IndexOutOfBoundsException
from com.sun.star.beans import PropertyValue
try:
# Load the document to convert in a new hidden frame
# Loads the document to convert in a new hidden frame
prop = PropertyValue()
prop.Name = 'Hidden'
prop.Value = True
self.doc = self.oo.loadComponentFromURL(self.docUrl, "_blank", 0,
(prop,))
# Update all indexes
indexes = self.doc.getDocumentIndexes()
indexesCount = indexes.getCount()
if indexesCount != 0:
for i in range(indexesCount):
try:
indexes.getByIndex(i).update()
except IndexOutOfBoundsException:
pass
# Update sections
self.doc.updateLinks()
sections = self.doc.getTextSections()
sectionsCount = sections.getCount()
if sectionsCount != 0:
for i in range(sectionsCount-1, -1, -1):
# I must walk into the section from last one to the first
# one. Else, when "disposing" sections, I remove sections
# and the remaining sections other indexes.
try:
section = sections.getByIndex(i)
if section.FileLink and section.FileLink.FileURL:
section.dispose() # This method removes the
# <section></section> tags without removing the content
# of the section. Else, it won't appear.
except IndexOutOfBoundsException:
pass
if self.inputType == 'odt':
# Perform additional tasks for odt documents
self.updateOdtDocument()
try:
self.doc.refresh()
except AttributeError:
pass
except IllegalArgumentException, iae:
raise ConverterError(URL_NOT_FOUND % (self.docPath, iae))
def convertDocument(self):
if self.resultFilter != 'ODT':
# I must really perform a conversion
from com.sun.star.beans import PropertyValue
prop = PropertyValue()
prop.Name = 'FilterName'
prop.Value = self.resultFilter
self.doc.storeToURL(self.resultUrl, (prop,))
else:
self.doc.storeToURL(self.resultUrl, ())
'''Calls OO to perform a document conversion. Note that the conversion
is not really done if the source and target documents have the same
type.'''
properties = []
from com.sun.star.beans import PropertyValue
prop = PropertyValue()
prop.Name = 'FilterName'
prop.Value = self.resultFilter
properties.append(prop)
if self.resultType == 'csv':
# For CSV export, add options (separator, etc)
optionsProp = PropertyValue()
optionsProp.Name = 'FilterOptions'
optionsProp.Value = '59,34,76,1'
properties.append(optionsProp)
self.doc.storeToURL(self.resultUrl, tuple(properties))
def run(self):
'''Connects to OO, does the job and disconnects.'''
self.connect()
self.loadDocument()
self.convertDocument()
self.disconnect()
self.doc.close(True)
# ConverterScript-related messages ---------------------------------------------
WRONG_NB_OF_ARGS = 'Wrong number of arguments.'
@ -191,12 +242,13 @@ ERROR_CODE = 1
class ConverterScript:
usage = 'usage: python converter.py fileToConvert outputType [options]\n' \
' where fileToConvert is the absolute or relative pathname of\n' \
' the ODT file you want to convert;\n'\
' the file you want to convert (or whose content like\n' \
' indexes need to be refreshed);\n'\
' and outputType is the output format, that must be one of\n' \
' %s.\n' \
' "python" should be a UNO-enabled Python interpreter (ie the one\n' \
' which is included in the OpenOffice.org distribution).' % \
str(ODT_FILE_TYPES.keys())
' "python" should be a UNO-enabled Python interpreter (ie the ' \
' one which is included in the OpenOffice.org distribution).' % \
str(FILE_TYPES.keys())
def run(self):
optParser = OptionParser(usage=ConverterScript.usage)
optParser.add_option("-p", "--port", dest="port",

View file

@ -25,7 +25,7 @@ import appy.pod
from appy.pod import PodError
from appy.shared.xml_parser import XmlElement
from appy.pod.pod_parser import PodParser, PodEnvironment, OdInsert
from appy.pod.converter import ODT_FILE_TYPES
from appy.pod.converter import FILE_TYPES
from appy.pod.buffers import FileBuffer
from appy.pod.xhtml2odt import Xhtml2OdtConverter
from appy.pod.doc_importers import OdtImporter, ImageImporter, PdfImporter
@ -423,9 +423,9 @@ class Renderer:
os.rename(resultOdtName, self.result)
else:
if resultType.startswith('.'): resultType = resultType[1:]
if not resultType in ODT_FILE_TYPES.keys():
if not resultType in FILE_TYPES.keys():
raise PodError(BAD_RESULT_TYPE % (
self.result, ODT_FILE_TYPES.keys()))
self.result, FILE_TYPES.keys()))
# Call OpenOffice to perform the conversion or document update
self.callOpenOffice(resultOdtName, resultType)
# I have the result. Move it to the correct name