[pod] Performance improvement: stop computing tag names, create a dict of precomputed tags before starting parsing.

This commit is contained in:
Gaetan Delannay 2013-01-31 12:50:25 +01:00
parent ad94fee755
commit d5d99b67eb

View file

@ -60,10 +60,6 @@ class OdInsert:
class PodEnvironment(OdfEnvironment): class PodEnvironment(OdfEnvironment):
'''Contains all elements representing the current parser state during '''Contains all elements representing the current parser state during
parsing.''' parsing.'''
# Elements we must ignore (they will not be included in the result
ignorableElements = None # Will be set after namespace propagation
# Elements that may be impacted by POD statements
impactableElements = None # Idem
# Possibles modes # Possibles modes
# ADD_IN_BUFFER: when encountering an impactable element, we must # ADD_IN_BUFFER: when encountering an impactable element, we must
# continue to dump it in the current buffer # continue to dump it in the current buffer
@ -92,6 +88,10 @@ class PodEnvironment(OdfEnvironment):
self.mode = self.ADD_IN_SUBBUFFER self.mode = self.ADD_IN_SUBBUFFER
# Current state # Current state
self.state = self.READING_CONTENT self.state = self.READING_CONTENT
# Elements we must ignore (they will not be included in the result)
self.ignorableElements = None # Will be set after namespace propagation
# Elements that may be impacted by POD statements
self.impactableElements = None # Idem
# Stack of currently visited tables # Stack of currently visited tables
self.tableStack = [] self.tableStack = []
self.tableIndex = -1 self.tableIndex = -1
@ -110,6 +110,8 @@ class PodEnvironment(OdfEnvironment):
self.namedIfActions = {} #~{s_statementName: IfAction}~ self.namedIfActions = {} #~{s_statementName: IfAction}~
# Currently parsed expression within an ODS template # Currently parsed expression within an ODS template
self.currentOdsExpression = None self.currentOdsExpression = None
# Names of some tags, that we will compute after namespace propagation
self.tags = None
def getTable(self): def getTable(self):
'''Gets the currently parsed table.''' '''Gets the currently parsed table.'''
@ -157,15 +159,15 @@ class PodEnvironment(OdfEnvironment):
self.getTable().curRowAttrs = self.currentElem.attrs self.getTable().curRowAttrs = self.currentElem.attrs
elif elem == Cell.OD.elem: elif elem == Cell.OD.elem:
colspan = 1 colspan = 1
attrSpan = '%s:number-columns-spanned' % tableNs attrSpan = self.tags['number-columns-spanned']
if self.currentElem.attrs.has_key(attrSpan): if self.currentElem.attrs.has_key(attrSpan):
colspan = int(self.currentElem.attrs[attrSpan]) colspan = int(self.currentElem.attrs[attrSpan])
self.getTable().curColIndex += colspan self.getTable().curColIndex += colspan
elif elem == ('%s:table-column' % tableNs): elif elem == self.tags['table-column']:
attrs = self.currentElem.attrs attrs = self.currentElem.attrs
if attrs.has_key('%s:number-columns-repeated' % tableNs): if attrs.has_key(self.tags['number-columns-repeated']):
self.getTable().nbOfColumns += int( self.getTable().nbOfColumns += int(
attrs['%s:number-columns-repeated' % tableNs]) attrs[self.tags['number-columns-repeated']])
else: else:
self.getTable().nbOfColumns += 1 self.getTable().nbOfColumns += 1
return ns return ns
@ -190,8 +192,28 @@ class PodEnvironment(OdfEnvironment):
xmlElemDef = eval(elemName[0].upper() + elemName[1:]).OD xmlElemDef = eval(elemName[0].upper() + elemName[1:]).OD
elemFullName = xmlElemDef.getFullName(ns) elemFullName = xmlElemDef.getFullName(ns)
xmlElemDef.__init__(elemFullName) xmlElemDef.__init__(elemFullName)
self.ignorableElements = ('%s:tracked-changes' % ns[self.NS_TEXT], # Create a table of names of used tags and attributes (precomputed,
'%s:change' % ns[self.NS_TEXT]) # including namespace, for performance).
self.tags = {
'tracked-changes': '%s:tracked-changes' % ns[self.NS_TEXT],
'change': '%s:change' % ns[self.NS_TEXT],
'annotation': '%s:annotation' % ns[self.NS_OFFICE],
'change-start': '%s:change-start' % ns[self.NS_TEXT],
'change-end': '%s:change-end' % ns[self.NS_TEXT],
'conditional-text': '%s:conditional-text' % ns[self.NS_TEXT],
'table-cell': '%s:table-cell' % ns[self.NS_TABLE],
'formula': '%s:formula' % ns[self.NS_TABLE],
'value-type': '%s:value-type' % ns[self.NS_OFFICE],
'string-value': '%s:string-value' % ns[self.NS_OFFICE],
'span': '%s:span' % ns[self.NS_TEXT],
'number-columns-spanned': '%s:number-columns-spanned' % \
ns[self.NS_TABLE],
'number-columns-repeated': '%s:number-columns-repeated' % \
ns[self.NS_TABLE],
'table-column': '%s:table-column' % ns[self.NS_TABLE],
}
self.ignorableElements = (self.tags['tracked-changes'],
self.tags['change'])
self.impactableElements = ( self.impactableElements = (
Text.OD.elem, Title.OD.elem, Table.OD.elem, Row.OD.elem, Text.OD.elem, Title.OD.elem, Table.OD.elem, Row.OD.elem,
Cell.OD.elem, Section.OD.elem) Cell.OD.elem, Section.OD.elem)
@ -213,19 +235,18 @@ class PodParser(OdfParser):
tableNs = ns[e.NS_TABLE] tableNs = ns[e.NS_TABLE]
if elem in e.ignorableElements: if elem in e.ignorableElements:
e.state = e.IGNORING e.state = e.IGNORING
elif elem == ('%s:annotation' % officeNs): elif elem == e.tags['annotation']:
# Be it in an ODT or ODS template, an annotation is considered to # Be it in an ODT or ODS template, an annotation is considered to
# contain a POD statement. # contain a POD statement.
e.state = e.READING_STATEMENT e.state = e.READING_STATEMENT
elif (elem == ('%s:change-start' % textNs)) or \ elif elem in (e.tags['change-start'], e.tags['conditional-text']):
(elem == ('%s:conditional-text' % textNs)):
# In an ODT template, any text in track-changes or any conditional # In an ODT template, any text in track-changes or any conditional
# field is considered to contain a POD expression. # field is considered to contain a POD expression.
e.state = e.READING_EXPRESSION e.state = e.READING_EXPRESSION
e.exprHasStyle = False e.exprHasStyle = False
elif (elem == ('%s:table-cell' % tableNs)) and \ elif (elem == e.tags['table-cell']) and \
attrs.has_key('%s:formula' % tableNs) and \ attrs.has_key(e.tags['formula']) and \
(attrs['%s:value-type' % officeNs] == 'string'): (attrs[e.tags['value-type']] == 'string'):
# In an ODS template, any cell containing a formula of type "string" # In an ODS template, any cell containing a formula of type "string"
# is considered to contain a POD expression. But here it is a # is considered to contain a POD expression. But here it is a
# special case: we need to dump the cell; the expression is not # special case: we need to dump the cell; the expression is not
@ -237,9 +258,9 @@ class PodParser(OdfParser):
e.addSubBuffer() e.addSubBuffer()
e.currentBuffer.addElement(e.currentElem.name) e.currentBuffer.addElement(e.currentElem.name)
e.currentBuffer.dumpStartElement(elem, attrs, e.currentBuffer.dumpStartElement(elem, attrs,
ignoreAttrs=('%s:formula'%tableNs, '%s:string-value'%officeNs)) ignoreAttrs=(e.tags['formula'], e.tags['string-value']))
# We already have the POD expression: remember it on the env. # We already have the POD expression: remember it on the env.
e.currentOdsExpression = attrs['%s:string-value' % officeNs] e.currentOdsExpression = attrs[e.tags['string-value']]
else: else:
if e.state == e.IGNORING: if e.state == e.IGNORING:
pass pass
@ -252,8 +273,7 @@ class PodParser(OdfParser):
elif e.state == e.READING_STATEMENT: elif e.state == e.READING_STATEMENT:
pass pass
elif e.state == e.READING_EXPRESSION: elif e.state == e.READING_EXPRESSION:
if (elem == ('%s:span' % textNs)) and \ if (elem == (e.tags['span'])) and not e.currentContent.strip():
not e.currentContent.strip():
e.currentBuffer.dumpStartElement(elem, attrs) e.currentBuffer.dumpStartElement(elem, attrs)
e.exprHasStyle = True e.exprHasStyle = True
e.manageInserts() e.manageInserts()
@ -265,7 +285,7 @@ class PodParser(OdfParser):
textNs = ns[e.NS_TEXT] textNs = ns[e.NS_TEXT]
if elem in e.ignorableElements: if elem in e.ignorableElements:
e.state = e.READING_CONTENT e.state = e.READING_CONTENT
elif elem == ('%s:annotation' % officeNs): elif elem == e.tags['annotation']:
# Manage statement # Manage statement
oldCb = e.currentBuffer oldCb = e.currentBuffer
actionElemIndex = oldCb.createAction(e.currentStatement) actionElemIndex = oldCb.createAction(e.currentStatement)
@ -316,14 +336,14 @@ class PodParser(OdfParser):
e.currentStatement.append(statementLine) e.currentStatement.append(statementLine)
e.currentContent = '' e.currentContent = ''
elif e.state == e.READING_EXPRESSION: elif e.state == e.READING_EXPRESSION:
if (elem == ('%s:change-end' % textNs)) or \ if (elem == e.tags['change-end']) or \
(elem == ('%s:conditional-text' % textNs)): (elem == e.tags['conditional-text']):
expression = e.currentContent.strip() expression = e.currentContent.strip()
e.currentContent = '' e.currentContent = ''
# Manage expression # Manage expression
e.currentBuffer.addExpression(expression) e.currentBuffer.addExpression(expression)
if e.exprHasStyle: if e.exprHasStyle:
e.currentBuffer.dumpEndElement('%s:span' % textNs) e.currentBuffer.dumpEndElement(e.tags['span'])
e.state = e.READING_CONTENT e.state = e.READING_CONTENT
def characters(self, content): def characters(self, content):