[pod] Performance improvement: stop computing tag names, create a dict of precomputed tags before starting parsing.

This commit is contained in:
Gaetan Delannay 2013-01-31 12:50:25 +01:00
parent ad94fee755
commit d5d99b67eb

View file

@ -60,10 +60,6 @@ class OdInsert:
class PodEnvironment(OdfEnvironment):
'''Contains all elements representing the current parser state during
parsing.'''
# Elements we must ignore (they will not be included in the result
ignorableElements = None # Will be set after namespace propagation
# Elements that may be impacted by POD statements
impactableElements = None # Idem
# Possibles modes
# ADD_IN_BUFFER: when encountering an impactable element, we must
# continue to dump it in the current buffer
@ -92,6 +88,10 @@ class PodEnvironment(OdfEnvironment):
self.mode = self.ADD_IN_SUBBUFFER
# Current state
self.state = self.READING_CONTENT
# Elements we must ignore (they will not be included in the result)
self.ignorableElements = None # Will be set after namespace propagation
# Elements that may be impacted by POD statements
self.impactableElements = None # Idem
# Stack of currently visited tables
self.tableStack = []
self.tableIndex = -1
@ -110,6 +110,8 @@ class PodEnvironment(OdfEnvironment):
self.namedIfActions = {} #~{s_statementName: IfAction}~
# Currently parsed expression within an ODS template
self.currentOdsExpression = None
# Names of some tags, that we will compute after namespace propagation
self.tags = None
def getTable(self):
'''Gets the currently parsed table.'''
@ -157,15 +159,15 @@ class PodEnvironment(OdfEnvironment):
self.getTable().curRowAttrs = self.currentElem.attrs
elif elem == Cell.OD.elem:
colspan = 1
attrSpan = '%s:number-columns-spanned' % tableNs
attrSpan = self.tags['number-columns-spanned']
if self.currentElem.attrs.has_key(attrSpan):
colspan = int(self.currentElem.attrs[attrSpan])
self.getTable().curColIndex += colspan
elif elem == ('%s:table-column' % tableNs):
elif elem == self.tags['table-column']:
attrs = self.currentElem.attrs
if attrs.has_key('%s:number-columns-repeated' % tableNs):
if attrs.has_key(self.tags['number-columns-repeated']):
self.getTable().nbOfColumns += int(
attrs['%s:number-columns-repeated' % tableNs])
attrs[self.tags['number-columns-repeated']])
else:
self.getTable().nbOfColumns += 1
return ns
@ -190,8 +192,28 @@ class PodEnvironment(OdfEnvironment):
xmlElemDef = eval(elemName[0].upper() + elemName[1:]).OD
elemFullName = xmlElemDef.getFullName(ns)
xmlElemDef.__init__(elemFullName)
self.ignorableElements = ('%s:tracked-changes' % ns[self.NS_TEXT],
'%s:change' % ns[self.NS_TEXT])
# Create a table of names of used tags and attributes (precomputed,
# including namespace, for performance).
self.tags = {
'tracked-changes': '%s:tracked-changes' % ns[self.NS_TEXT],
'change': '%s:change' % ns[self.NS_TEXT],
'annotation': '%s:annotation' % ns[self.NS_OFFICE],
'change-start': '%s:change-start' % ns[self.NS_TEXT],
'change-end': '%s:change-end' % ns[self.NS_TEXT],
'conditional-text': '%s:conditional-text' % ns[self.NS_TEXT],
'table-cell': '%s:table-cell' % ns[self.NS_TABLE],
'formula': '%s:formula' % ns[self.NS_TABLE],
'value-type': '%s:value-type' % ns[self.NS_OFFICE],
'string-value': '%s:string-value' % ns[self.NS_OFFICE],
'span': '%s:span' % ns[self.NS_TEXT],
'number-columns-spanned': '%s:number-columns-spanned' % \
ns[self.NS_TABLE],
'number-columns-repeated': '%s:number-columns-repeated' % \
ns[self.NS_TABLE],
'table-column': '%s:table-column' % ns[self.NS_TABLE],
}
self.ignorableElements = (self.tags['tracked-changes'],
self.tags['change'])
self.impactableElements = (
Text.OD.elem, Title.OD.elem, Table.OD.elem, Row.OD.elem,
Cell.OD.elem, Section.OD.elem)
@ -213,19 +235,18 @@ class PodParser(OdfParser):
tableNs = ns[e.NS_TABLE]
if elem in e.ignorableElements:
e.state = e.IGNORING
elif elem == ('%s:annotation' % officeNs):
elif elem == e.tags['annotation']:
# Be it in an ODT or ODS template, an annotation is considered to
# contain a POD statement.
e.state = e.READING_STATEMENT
elif (elem == ('%s:change-start' % textNs)) or \
(elem == ('%s:conditional-text' % textNs)):
elif elem in (e.tags['change-start'], e.tags['conditional-text']):
# In an ODT template, any text in track-changes or any conditional
# field is considered to contain a POD expression.
e.state = e.READING_EXPRESSION
e.exprHasStyle = False
elif (elem == ('%s:table-cell' % tableNs)) and \
attrs.has_key('%s:formula' % tableNs) and \
(attrs['%s:value-type' % officeNs] == 'string'):
elif (elem == e.tags['table-cell']) and \
attrs.has_key(e.tags['formula']) and \
(attrs[e.tags['value-type']] == 'string'):
# In an ODS template, any cell containing a formula of type "string"
# is considered to contain a POD expression. But here it is a
# special case: we need to dump the cell; the expression is not
@ -237,9 +258,9 @@ class PodParser(OdfParser):
e.addSubBuffer()
e.currentBuffer.addElement(e.currentElem.name)
e.currentBuffer.dumpStartElement(elem, attrs,
ignoreAttrs=('%s:formula'%tableNs, '%s:string-value'%officeNs))
ignoreAttrs=(e.tags['formula'], e.tags['string-value']))
# We already have the POD expression: remember it on the env.
e.currentOdsExpression = attrs['%s:string-value' % officeNs]
e.currentOdsExpression = attrs[e.tags['string-value']]
else:
if e.state == e.IGNORING:
pass
@ -252,8 +273,7 @@ class PodParser(OdfParser):
elif e.state == e.READING_STATEMENT:
pass
elif e.state == e.READING_EXPRESSION:
if (elem == ('%s:span' % textNs)) and \
not e.currentContent.strip():
if (elem == (e.tags['span'])) and not e.currentContent.strip():
e.currentBuffer.dumpStartElement(elem, attrs)
e.exprHasStyle = True
e.manageInserts()
@ -265,7 +285,7 @@ class PodParser(OdfParser):
textNs = ns[e.NS_TEXT]
if elem in e.ignorableElements:
e.state = e.READING_CONTENT
elif elem == ('%s:annotation' % officeNs):
elif elem == e.tags['annotation']:
# Manage statement
oldCb = e.currentBuffer
actionElemIndex = oldCb.createAction(e.currentStatement)
@ -316,14 +336,14 @@ class PodParser(OdfParser):
e.currentStatement.append(statementLine)
e.currentContent = ''
elif e.state == e.READING_EXPRESSION:
if (elem == ('%s:change-end' % textNs)) or \
(elem == ('%s:conditional-text' % textNs)):
if (elem == e.tags['change-end']) or \
(elem == e.tags['conditional-text']):
expression = e.currentContent.strip()
e.currentContent = ''
# Manage expression
e.currentBuffer.addExpression(expression)
if e.exprHasStyle:
e.currentBuffer.dumpEndElement('%s:span' % textNs)
e.currentBuffer.dumpEndElement(e.tags['span'])
e.state = e.READING_CONTENT
def characters(self, content):