More input and output formats for OO conversion in converter.py and bugfix in skyn edit.
This commit is contained in:
parent
fff2b6a329
commit
e89eda4838
7 changed files with 190 additions and 113 deletions
212
pod/converter.py
212
pod/converter.py
|
@ -20,15 +20,26 @@
|
|||
import sys, os, os.path, time, signal
|
||||
from optparse import OptionParser
|
||||
|
||||
ODT_FILE_TYPES = {'doc': 'MS Word 97', # Could be 'MS Word 2003 XML'
|
||||
'pdf': 'writer_pdf_Export',
|
||||
'rtf': 'Rich Text Format',
|
||||
'txt': 'Text',
|
||||
'html': 'HTML (StarWriter)',
|
||||
'htm': 'HTML (StarWriter)',
|
||||
'odt': 'ODT'}
|
||||
# Conversion to ODT does not make any conversion; it simply updates indexes and
|
||||
# linked documents.
|
||||
htmlFilters = {'odt': 'HTML (StarWriter)',
|
||||
'ods': 'HTML (StarCalc)',
|
||||
'odp': 'impress_html_Export'}
|
||||
|
||||
FILE_TYPES = {'odt': 'writer8',
|
||||
'ods': 'calc8',
|
||||
'odp': 'impress8',
|
||||
'htm': htmlFilters, 'html': htmlFilters,
|
||||
'rtf': 'Rich Text Format',
|
||||
'txt': 'Text',
|
||||
'csv': 'Text - txt - csv (StarCalc)',
|
||||
'pdf': {'odt': 'writer_pdf_Export', 'ods': 'calc_pdf_Export',
|
||||
'odp': 'impress_pdf_Export', 'odg': 'draw_pdf_Export'},
|
||||
'swf': 'impress_flash_Export',
|
||||
'doc': 'MS Word 97',
|
||||
'xls': 'MS Excel 97',
|
||||
'ppt': 'MS PowerPoint 97',
|
||||
}
|
||||
# Conversion from odt to odt does not make any conversion, but updates indexes
|
||||
# and linked documents.
|
||||
|
||||
# ------------------------------------------------------------------------------
|
||||
class ConverterError(Exception): pass
|
||||
|
@ -46,7 +57,7 @@ DEFAULT_PORT = 2002
|
|||
|
||||
# ------------------------------------------------------------------------------
|
||||
class Converter:
|
||||
'''Converts an ODT document into pdf, doc, txt or rtf.'''
|
||||
'''Converts an document readable by OpenOffice into pdf, doc, txt or rtf.'''
|
||||
exeVariants = ('soffice.exe', 'soffice')
|
||||
pathReplacements = {'program files': 'progra~1',
|
||||
'openoffice.org 1': 'openof~1',
|
||||
|
@ -54,41 +65,64 @@ class Converter:
|
|||
}
|
||||
def __init__(self, docPath, resultType, port=DEFAULT_PORT):
|
||||
self.port = port
|
||||
self.docUrl, self.docPath = self.getDocUrls(docPath)
|
||||
self.resultFilter = self.getResultFilter(resultType)
|
||||
self.resultUrl = self.getResultUrl(resultType)
|
||||
self.docUrl, self.docPath = self.getInputUrls(docPath)
|
||||
self.inputType = os.path.splitext(docPath)[1][1:].lower()
|
||||
self.resultType = resultType
|
||||
self.resultFilter = self.getResultFilter()
|
||||
self.resultUrl = self.getResultUrl()
|
||||
self.ooContext = None
|
||||
self.oo = None # OpenOffice application object
|
||||
self.doc = None # OpenOffice loaded document
|
||||
def getDocUrls(self, docPath):
|
||||
self.oo = None # The OpenOffice application object
|
||||
self.doc = None # The OpenOffice loaded document
|
||||
|
||||
def getInputUrls(self, docPath):
|
||||
'''Returns the absolute path of the input file. In fact, it returns a
|
||||
tuple with some URL version of the path for OO as the first element
|
||||
and the absolute path as the second element.'''
|
||||
import uno
|
||||
if not os.path.exists(docPath) and not os.path.isfile(docPath):
|
||||
raise ConverterError(DOC_NOT_FOUND % docPath)
|
||||
docAbsPath = os.path.abspath(docPath)
|
||||
# Return one path for OO, one path for me.
|
||||
return uno.systemPathToFileUrl(docAbsPath), docAbsPath
|
||||
def getResultFilter(self, resultType):
|
||||
if ODT_FILE_TYPES.has_key(resultType):
|
||||
res = ODT_FILE_TYPES[resultType]
|
||||
|
||||
def getResultFilter(self):
|
||||
'''Based on the result type, identifies which OO filter to use for the
|
||||
document conversion.'''
|
||||
if FILE_TYPES.has_key(self.resultType):
|
||||
res = FILE_TYPES[self.resultType]
|
||||
if isinstance(res, dict):
|
||||
res = res[self.inputType]
|
||||
else:
|
||||
raise ConverterError(BAD_RESULT_TYPE % (resultType,
|
||||
ODT_FILE_TYPES.keys()))
|
||||
raise ConverterError(BAD_RESULT_TYPE % (self.resultType,
|
||||
FILE_TYPES.keys()))
|
||||
return res
|
||||
def getResultUrl(self, resultType):
|
||||
|
||||
def getResultUrl(self):
|
||||
'''Returns the path of the result file in the format needed by OO. If
|
||||
the result type and the input type are the same (ie the user wants to
|
||||
refresh indexes or some other action and not perform a real
|
||||
conversion), the result file is named
|
||||
<inputFileName>.res.<resultType>.
|
||||
|
||||
Else, the result file is named like the input file but with a
|
||||
different extension:
|
||||
<inputFileName>.<resultType>
|
||||
'''
|
||||
import uno
|
||||
baseName = os.path.splitext(self.docPath)[0]
|
||||
if resultType != 'odt':
|
||||
res = '%s.%s' % (baseName, resultType)
|
||||
if self.resultType != self.inputType:
|
||||
res = '%s.%s' % (baseName, self.resultType)
|
||||
else:
|
||||
res = '%s.res.%s' % (baseName, resultType)
|
||||
res = '%s.res.%s' % (baseName, self.resultType)
|
||||
try:
|
||||
f = open(res, 'w')
|
||||
f.write('Hello')
|
||||
f.close()
|
||||
os.remove(res)
|
||||
return uno.systemPathToFileUrl(res)
|
||||
except OSError, oe:
|
||||
raise ConverterError(CANNOT_WRITE_RESULT % (res, oe))
|
||||
except (OSError, IOError), ioe:
|
||||
raise ConverterError(CANNOT_WRITE_RESULT % (res, ioe))
|
||||
|
||||
def connect(self):
|
||||
'''Connects to OpenOffice'''
|
||||
if os.name == 'nt':
|
||||
|
@ -115,73 +149,90 @@ class Converter:
|
|||
'com.sun.star.frame.Desktop', self.ooContext)
|
||||
except NoConnectException, nce:
|
||||
raise ConverterError(CONNECT_ERROR % (self.port, nce))
|
||||
def disconnect(self):
|
||||
self.doc.close(True)
|
||||
# Do a nasty thing before exiting the python process. In case the
|
||||
# last call is a oneway call (e.g. see idl-spec of insertString),
|
||||
# it must be forced out of the remote-bridge caches before python
|
||||
# exits the process. Otherwise, the oneway call may or may not reach
|
||||
# the target object.
|
||||
# I do this here by calling a cheap synchronous call (getPropertyValue).
|
||||
self.ooContext.ServiceManager
|
||||
def loadDocument(self):
|
||||
from com.sun.star.lang import IllegalArgumentException, \
|
||||
IndexOutOfBoundsException
|
||||
|
||||
def updateOdtDocument(self):
|
||||
'''If the input file is an ODT document, we will perform 2 tasks:
|
||||
1) Update all annexes;
|
||||
2) Update sections (if sections refer to external content, we try to
|
||||
include the content within the result file)
|
||||
'''
|
||||
from com.sun.star.lang import IndexOutOfBoundsException
|
||||
# I need to use IndexOutOfBoundsException because sometimes, when
|
||||
# using sections.getCount, UNO returns a number that is bigger than
|
||||
# the real number of sections (this is because it also counts the
|
||||
# sections that are present within the sub-documents to integrate)
|
||||
# Update all indexes
|
||||
indexes = self.doc.getDocumentIndexes()
|
||||
indexesCount = indexes.getCount()
|
||||
if indexesCount != 0:
|
||||
for i in range(indexesCount):
|
||||
try:
|
||||
indexes.getByIndex(i).update()
|
||||
except IndexOutOfBoundsException:
|
||||
pass
|
||||
# Update sections
|
||||
self.doc.updateLinks()
|
||||
sections = self.doc.getTextSections()
|
||||
sectionsCount = sections.getCount()
|
||||
if sectionsCount != 0:
|
||||
for i in range(sectionsCount-1, -1, -1):
|
||||
# I must walk into the section from last one to the first
|
||||
# one. Else, when "disposing" sections, I remove sections
|
||||
# and the remaining sections other indexes.
|
||||
try:
|
||||
section = sections.getByIndex(i)
|
||||
if section.FileLink and section.FileLink.FileURL:
|
||||
section.dispose() # This method removes the
|
||||
# <section></section> tags without removing the content
|
||||
# of the section. Else, it won't appear.
|
||||
except IndexOutOfBoundsException:
|
||||
pass
|
||||
|
||||
def loadDocument(self):
|
||||
from com.sun.star.lang import IllegalArgumentException, \
|
||||
IndexOutOfBoundsException
|
||||
from com.sun.star.beans import PropertyValue
|
||||
try:
|
||||
# Load the document to convert in a new hidden frame
|
||||
# Loads the document to convert in a new hidden frame
|
||||
prop = PropertyValue()
|
||||
prop.Name = 'Hidden'
|
||||
prop.Value = True
|
||||
self.doc = self.oo.loadComponentFromURL(self.docUrl, "_blank", 0,
|
||||
(prop,))
|
||||
# Update all indexes
|
||||
indexes = self.doc.getDocumentIndexes()
|
||||
indexesCount = indexes.getCount()
|
||||
if indexesCount != 0:
|
||||
for i in range(indexesCount):
|
||||
try:
|
||||
indexes.getByIndex(i).update()
|
||||
except IndexOutOfBoundsException:
|
||||
pass
|
||||
# Update sections
|
||||
self.doc.updateLinks()
|
||||
sections = self.doc.getTextSections()
|
||||
sectionsCount = sections.getCount()
|
||||
if sectionsCount != 0:
|
||||
for i in range(sectionsCount-1, -1, -1):
|
||||
# I must walk into the section from last one to the first
|
||||
# one. Else, when "disposing" sections, I remove sections
|
||||
# and the remaining sections other indexes.
|
||||
try:
|
||||
section = sections.getByIndex(i)
|
||||
if section.FileLink and section.FileLink.FileURL:
|
||||
section.dispose() # This method removes the
|
||||
# <section></section> tags without removing the content
|
||||
# of the section. Else, it won't appear.
|
||||
except IndexOutOfBoundsException:
|
||||
pass
|
||||
if self.inputType == 'odt':
|
||||
# Perform additional tasks for odt documents
|
||||
self.updateOdtDocument()
|
||||
try:
|
||||
self.doc.refresh()
|
||||
except AttributeError:
|
||||
pass
|
||||
except IllegalArgumentException, iae:
|
||||
raise ConverterError(URL_NOT_FOUND % (self.docPath, iae))
|
||||
|
||||
def convertDocument(self):
|
||||
if self.resultFilter != 'ODT':
|
||||
# I must really perform a conversion
|
||||
from com.sun.star.beans import PropertyValue
|
||||
prop = PropertyValue()
|
||||
prop.Name = 'FilterName'
|
||||
prop.Value = self.resultFilter
|
||||
self.doc.storeToURL(self.resultUrl, (prop,))
|
||||
else:
|
||||
self.doc.storeToURL(self.resultUrl, ())
|
||||
'''Calls OO to perform a document conversion. Note that the conversion
|
||||
is not really done if the source and target documents have the same
|
||||
type.'''
|
||||
properties = []
|
||||
from com.sun.star.beans import PropertyValue
|
||||
prop = PropertyValue()
|
||||
prop.Name = 'FilterName'
|
||||
prop.Value = self.resultFilter
|
||||
properties.append(prop)
|
||||
if self.resultType == 'csv':
|
||||
# For CSV export, add options (separator, etc)
|
||||
optionsProp = PropertyValue()
|
||||
optionsProp.Name = 'FilterOptions'
|
||||
optionsProp.Value = '59,34,76,1'
|
||||
properties.append(optionsProp)
|
||||
self.doc.storeToURL(self.resultUrl, tuple(properties))
|
||||
|
||||
def run(self):
|
||||
'''Connects to OO, does the job and disconnects.'''
|
||||
self.connect()
|
||||
self.loadDocument()
|
||||
self.convertDocument()
|
||||
self.disconnect()
|
||||
self.doc.close(True)
|
||||
|
||||
# ConverterScript-related messages ---------------------------------------------
|
||||
WRONG_NB_OF_ARGS = 'Wrong number of arguments.'
|
||||
|
@ -191,12 +242,13 @@ ERROR_CODE = 1
|
|||
class ConverterScript:
|
||||
usage = 'usage: python converter.py fileToConvert outputType [options]\n' \
|
||||
' where fileToConvert is the absolute or relative pathname of\n' \
|
||||
' the ODT file you want to convert;\n'\
|
||||
' the file you want to convert (or whose content like\n' \
|
||||
' indexes need to be refreshed);\n'\
|
||||
' and outputType is the output format, that must be one of\n' \
|
||||
' %s.\n' \
|
||||
' "python" should be a UNO-enabled Python interpreter (ie the one\n' \
|
||||
' which is included in the OpenOffice.org distribution).' % \
|
||||
str(ODT_FILE_TYPES.keys())
|
||||
' "python" should be a UNO-enabled Python interpreter (ie the ' \
|
||||
' one which is included in the OpenOffice.org distribution).' % \
|
||||
str(FILE_TYPES.keys())
|
||||
def run(self):
|
||||
optParser = OptionParser(usage=ConverterScript.usage)
|
||||
optParser.add_option("-p", "--port", dest="port",
|
||||
|
|
|
@ -25,7 +25,7 @@ import appy.pod
|
|||
from appy.pod import PodError
|
||||
from appy.shared.xml_parser import XmlElement
|
||||
from appy.pod.pod_parser import PodParser, PodEnvironment, OdInsert
|
||||
from appy.pod.converter import ODT_FILE_TYPES
|
||||
from appy.pod.converter import FILE_TYPES
|
||||
from appy.pod.buffers import FileBuffer
|
||||
from appy.pod.xhtml2odt import Xhtml2OdtConverter
|
||||
from appy.pod.doc_importers import OdtImporter, ImageImporter, PdfImporter
|
||||
|
@ -423,9 +423,9 @@ class Renderer:
|
|||
os.rename(resultOdtName, self.result)
|
||||
else:
|
||||
if resultType.startswith('.'): resultType = resultType[1:]
|
||||
if not resultType in ODT_FILE_TYPES.keys():
|
||||
if not resultType in FILE_TYPES.keys():
|
||||
raise PodError(BAD_RESULT_TYPE % (
|
||||
self.result, ODT_FILE_TYPES.keys()))
|
||||
self.result, FILE_TYPES.keys()))
|
||||
# Call OpenOffice to perform the conversion or document update
|
||||
self.callOpenOffice(resultOdtName, resultType)
|
||||
# I have the result. Move it to the correct name
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue