appypod-rattail/pod/pod_parser.py

304 lines
13 KiB
Python

# ------------------------------------------------------------------------------
# Appy is a framework for building applications in the Python language.
# Copyright (C) 2007 Gaetan Delannay
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301,USA.
# ------------------------------------------------------------------------------
import re
from appy.shared.xml_parser import XmlElement
from appy.pod.buffers import FileBuffer, MemoryBuffer
from appy.pod.odf_parser import OdfEnvironment, OdfParser
from appy.pod.elements import *
# ------------------------------------------------------------------------------
class OdTable:
'''Informations about the currently parsed Open Document (Od)table.'''
def __init__(self):
self.nbOfColumns = 0
self.nbOfRows = 0
self.curColIndex = None
self.curRowAttrs = None
def isOneCell(self):
return (self.nbOfColumns == 1) and (self.nbOfRows == 1)
class OdInsert:
'''While parsing an odt/pod file, we may need to insert a specific odt chunk
at a given place in the odt file (ie: add the pod-specific fonts and
styles). OdInsert instances define such 'inserts' (what to insert and
when).'''
def __init__(self, odtChunk, elem, nsUris={}):
self.odtChunk = odtChunk.decode('utf-8') # The odt chunk to insert
self.elem = elem # The p_odtChunk will be inserted just after the p_elem
# start, which must be an XmlElement instance. If more than one p_elem
# is present in the odt file, the p_odtChunk will be inserted only at
# the first p_elem occurrence.
self.nsUris = nsUris # The URI replacements that need to be done in
# p_odtChunk. It is a dict whose keys are names used in p_odtChunk (in
# the form @name@) to refer to XML namespaces, and values are URIs of
# those namespaces.
def resolve(self, namespaces):
'''Replaces all unresolved namespaces in p_odtChunk, thanks to the dict
of p_namespaces.'''
for nsName, nsUri in self.nsUris.iteritems():
self.odtChunk = re.sub('@%s@' % nsName, namespaces[nsUri],
self.odtChunk)
return self.odtChunk
class PodEnvironment(OdfEnvironment):
'''Contains all elements representing the current parser state during
parsing.'''
# Elements we must ignore (they will not be included in the result
ignorableElements = None # Will be set after namespace propagation
# Elements that may be impacted by POD statements
impactableElements = None # Idem
# Possibles modes
# ADD_IN_BUFFER: when encountering an impactable element, we must
# continue to dump it in the current buffer
ADD_IN_BUFFER = 0
# ADD_IN_SUBBUFFER: when encountering an impactable element, we must
# create a new subbuffer and dump it in it.
ADD_IN_SUBBUFFER = 1
# Possible states
IGNORING = 0 # We are ignoring what we are currently reading
READING_CONTENT = 1 # We are reading "normal" content
READING_STATEMENT = 2
# We are reading a POD statement (for, if...), which is located within a
# office:annotation element
READING_EXPRESSION = 3
# We are reading a POD expression, which is located between
# a text:change-start and a text:change-end elements
def __init__(self, context, inserts=[]):
OdfEnvironment.__init__(self)
# Buffer where we must dump the content we are currently reading
self.currentBuffer = None
# XML element content we are currently reading
self.currentContent = ''
# Current statement (a list of lines) that we are currently reading
self.currentStatement = []
# Current mode
self.mode = self.ADD_IN_SUBBUFFER
# Current state
self.state = self.READING_CONTENT
# Stack of currently visited tables
self.tableStack = []
self.tableIndex = -1
# Evaluation context
self.context = context
# For the currently read expression, is there style-related information
# associated with it?
self.exprHasStyle = False
self.gotNamespaces = False # Namespace definitions were not already
# encountered
# Store inserts
self.inserts = inserts
# Currently walked "if" actions
self.ifActions = []
# Currently walked named "if" actions
self.namedIfActions = {} #~{s_statementName: IfAction}~
def getTable(self):
'''Gets the currently parsed table.'''
res = None
if self.tableIndex != -1:
res = self.tableStack[self.tableIndex]
return res
def transformInserts(self):
'''Now the namespaces were parsed; I can put p_inserts in the form of a
dict for easier and more performant access while parsing.'''
res = {}
for insert in self.inserts:
elemName = insert.elem.getFullName(self.namespaces)
if not res.has_key(elemName):
res[elemName] = insert
return res
def manageInserts(self):
'''We just dumped the start of an elem. Here we will insert any odt
chunk if needed.'''
if self.inserts.has_key(self.currentElem.elem):
insert = self.inserts[self.currentElem.elem]
self.currentBuffer.write(insert.resolve(self.namespaces))
# The insert is destroyed after single use
del self.inserts[self.currentElem.elem]
def onStartElement(self):
ns = self.namespaces
if not self.gotNamespaces:
# We suppose that all the interesting (from the POD point of view)
# XML namespace definitions are defined at the root XML element.
# Here we propagate them in XML element definitions that we use
# throughout POD.
self.gotNamespaces = True
self.propagateNamespaces()
elem = self.currentElem.elem
tableNs = self.ns(self.NS_TABLE)
if elem == Table.OD.elem:
self.tableStack.append(OdTable())
self.tableIndex += 1
elif elem == Row.OD.elem:
self.getTable().nbOfRows += 1
self.getTable().curColIndex = -1
self.getTable().curRowAttrs = self.currentElem.attrs
elif elem == Cell.OD.elem:
self.getTable().curColIndex += 1
elif elem == ('%s:table-column' % tableNs):
attrs = self.currentElem.attrs
if attrs.has_key('%s:number-columns-repeated' % tableNs):
self.getTable().nbOfColumns += int(
attrs['%s:number-columns-repeated' % tableNs])
else:
self.getTable().nbOfColumns += 1
return ns
def onEndElement(self):
ns = self.namespaces
if self.currentElem.elem == Table.OD.elem:
self.tableStack.pop()
self.tableIndex -= 1
return ns
def addSubBuffer(self):
subBuffer = self.currentBuffer.addSubBuffer()
self.currentBuffer = subBuffer
self.mode = self.ADD_IN_BUFFER
def propagateNamespaces(self):
'''Propagates the namespaces in all XML element definitions that are
used throughout POD.'''
ns = self.namespaces
for elemName in PodElement.POD_ELEMS:
xmlElemDef = eval(elemName[0].upper() + elemName[1:]).OD
elemFullName = xmlElemDef.getFullName(ns)
xmlElemDef.__init__(elemFullName)
self.ignorableElements = ('%s:tracked-changes' % ns[self.NS_TEXT],
'%s:change' % ns[self.NS_TEXT])
self.impactableElements = (
Text.OD.elem, Title.OD.elem, Table.OD.elem, Row.OD.elem,
Cell.OD.elem, Section.OD.elem)
self.inserts = self.transformInserts()
# ------------------------------------------------------------------------------
class PodParser(OdfParser):
def __init__(self, env, caller):
OdfParser.__init__(self, env, caller)
def endDocument(self):
self.env.currentBuffer.content.close()
def startElement(self, elem, attrs):
e = OdfParser.startElement(self, elem, attrs)
ns = e.onStartElement()
officeNs = ns[e.NS_OFFICE]
textNs = ns[e.NS_TEXT]
if elem in e.ignorableElements:
e.state = e.IGNORING
elif elem == ('%s:annotation' % officeNs):
e.state = e.READING_STATEMENT
elif elem == ('%s:change-start' % textNs):
e.state = e.READING_EXPRESSION
e.exprHasStyle = False
else:
if e.state == e.IGNORING:
pass
elif e.state == e.READING_CONTENT:
if elem in e.impactableElements:
if e.mode == e.ADD_IN_SUBBUFFER:
e.addSubBuffer()
e.currentBuffer.addElement(e.currentElem.name)
e.currentBuffer.dumpStartElement(elem, attrs)
elif e.state == e.READING_STATEMENT:
pass
elif e.state == e.READING_EXPRESSION:
if (elem == ('%s:span' % textNs)) and \
not e.currentContent.strip():
e.currentBuffer.dumpStartElement(elem, attrs)
e.exprHasStyle = True
e.manageInserts()
def endElement(self, elem):
e = OdfParser.endElement(self, elem)
ns = e.onEndElement()
officeNs = ns[e.NS_OFFICE]
textNs = ns[e.NS_TEXT]
if elem in e.ignorableElements:
e.state = e.READING_CONTENT
elif elem == ('%s:annotation' % officeNs):
# Manage statement
oldCb = e.currentBuffer
actionElemIndex = oldCb.createAction(e.currentStatement)
e.currentStatement = []
if actionElemIndex != -1:
e.currentBuffer = oldCb.\
transferActionIndependentContent(actionElemIndex)
if e.currentBuffer == oldCb:
e.mode = e.ADD_IN_SUBBUFFER
else:
e.mode = e.ADD_IN_BUFFER
e.state = e.READING_CONTENT
else:
if e.state == e.IGNORING:
pass
elif e.state == e.READING_CONTENT:
e.currentBuffer.dumpEndElement(elem)
if elem in e.impactableElements:
if isinstance(e.currentBuffer, MemoryBuffer):
isMainElement = e.currentBuffer.isMainElement(elem)
# Unreference the element among the 'elements' attribute
e.currentBuffer.unreferenceElement(elem)
if isMainElement:
parent = e.currentBuffer.parent
if not e.currentBuffer.action:
# Delete this buffer and transfer content to parent
e.currentBuffer.transferAllContent()
parent.removeLastSubBuffer()
e.currentBuffer = parent
else:
if isinstance(parent, FileBuffer):
# Execute buffer action and delete the buffer
e.currentBuffer.action.execute()
parent.removeLastSubBuffer()
e.currentBuffer = parent
e.mode = e.ADD_IN_SUBBUFFER
elif e.state == e.READING_STATEMENT:
if e.currentElem.elem == Text.OD.elem:
statementLine = e.currentContent.strip()
if statementLine:
e.currentStatement.append(statementLine)
e.currentContent = ''
elif e.state == e.READING_EXPRESSION:
if elem == ('%s:change-end' % textNs):
expression = e.currentContent.strip()
e.currentContent = ''
# Manage expression
e.currentBuffer.addExpression(expression)
if e.exprHasStyle:
e.currentBuffer.dumpEndElement('%s:span' % textNs)
e.state = e.READING_CONTENT
def characters(self, content):
e = OdfParser.characters(self, content)
if e.state == e.IGNORING:
pass
elif e.state == e.READING_CONTENT:
e.currentBuffer.dumpContent(content)
elif e.state == e.READING_STATEMENT:
if e.currentElem.elem.startswith(e.namespaces[e.NS_TEXT]):
e.currentContent += content
elif e.state == e.READING_EXPRESSION:
e.currentContent += content
# ------------------------------------------------------------------------------