[pod] Performance improvement: stop computing tag names, create a dict of precomputed tags before starting parsing.
This commit is contained in:
parent
ad94fee755
commit
d5d99b67eb
|
@ -60,10 +60,6 @@ class OdInsert:
|
||||||
class PodEnvironment(OdfEnvironment):
|
class PodEnvironment(OdfEnvironment):
|
||||||
'''Contains all elements representing the current parser state during
|
'''Contains all elements representing the current parser state during
|
||||||
parsing.'''
|
parsing.'''
|
||||||
# Elements we must ignore (they will not be included in the result
|
|
||||||
ignorableElements = None # Will be set after namespace propagation
|
|
||||||
# Elements that may be impacted by POD statements
|
|
||||||
impactableElements = None # Idem
|
|
||||||
# Possibles modes
|
# Possibles modes
|
||||||
# ADD_IN_BUFFER: when encountering an impactable element, we must
|
# ADD_IN_BUFFER: when encountering an impactable element, we must
|
||||||
# continue to dump it in the current buffer
|
# continue to dump it in the current buffer
|
||||||
|
@ -92,6 +88,10 @@ class PodEnvironment(OdfEnvironment):
|
||||||
self.mode = self.ADD_IN_SUBBUFFER
|
self.mode = self.ADD_IN_SUBBUFFER
|
||||||
# Current state
|
# Current state
|
||||||
self.state = self.READING_CONTENT
|
self.state = self.READING_CONTENT
|
||||||
|
# Elements we must ignore (they will not be included in the result)
|
||||||
|
self.ignorableElements = None # Will be set after namespace propagation
|
||||||
|
# Elements that may be impacted by POD statements
|
||||||
|
self.impactableElements = None # Idem
|
||||||
# Stack of currently visited tables
|
# Stack of currently visited tables
|
||||||
self.tableStack = []
|
self.tableStack = []
|
||||||
self.tableIndex = -1
|
self.tableIndex = -1
|
||||||
|
@ -110,6 +110,8 @@ class PodEnvironment(OdfEnvironment):
|
||||||
self.namedIfActions = {} #~{s_statementName: IfAction}~
|
self.namedIfActions = {} #~{s_statementName: IfAction}~
|
||||||
# Currently parsed expression within an ODS template
|
# Currently parsed expression within an ODS template
|
||||||
self.currentOdsExpression = None
|
self.currentOdsExpression = None
|
||||||
|
# Names of some tags, that we will compute after namespace propagation
|
||||||
|
self.tags = None
|
||||||
|
|
||||||
def getTable(self):
|
def getTable(self):
|
||||||
'''Gets the currently parsed table.'''
|
'''Gets the currently parsed table.'''
|
||||||
|
@ -157,15 +159,15 @@ class PodEnvironment(OdfEnvironment):
|
||||||
self.getTable().curRowAttrs = self.currentElem.attrs
|
self.getTable().curRowAttrs = self.currentElem.attrs
|
||||||
elif elem == Cell.OD.elem:
|
elif elem == Cell.OD.elem:
|
||||||
colspan = 1
|
colspan = 1
|
||||||
attrSpan = '%s:number-columns-spanned' % tableNs
|
attrSpan = self.tags['number-columns-spanned']
|
||||||
if self.currentElem.attrs.has_key(attrSpan):
|
if self.currentElem.attrs.has_key(attrSpan):
|
||||||
colspan = int(self.currentElem.attrs[attrSpan])
|
colspan = int(self.currentElem.attrs[attrSpan])
|
||||||
self.getTable().curColIndex += colspan
|
self.getTable().curColIndex += colspan
|
||||||
elif elem == ('%s:table-column' % tableNs):
|
elif elem == self.tags['table-column']:
|
||||||
attrs = self.currentElem.attrs
|
attrs = self.currentElem.attrs
|
||||||
if attrs.has_key('%s:number-columns-repeated' % tableNs):
|
if attrs.has_key(self.tags['number-columns-repeated']):
|
||||||
self.getTable().nbOfColumns += int(
|
self.getTable().nbOfColumns += int(
|
||||||
attrs['%s:number-columns-repeated' % tableNs])
|
attrs[self.tags['number-columns-repeated']])
|
||||||
else:
|
else:
|
||||||
self.getTable().nbOfColumns += 1
|
self.getTable().nbOfColumns += 1
|
||||||
return ns
|
return ns
|
||||||
|
@ -190,8 +192,28 @@ class PodEnvironment(OdfEnvironment):
|
||||||
xmlElemDef = eval(elemName[0].upper() + elemName[1:]).OD
|
xmlElemDef = eval(elemName[0].upper() + elemName[1:]).OD
|
||||||
elemFullName = xmlElemDef.getFullName(ns)
|
elemFullName = xmlElemDef.getFullName(ns)
|
||||||
xmlElemDef.__init__(elemFullName)
|
xmlElemDef.__init__(elemFullName)
|
||||||
self.ignorableElements = ('%s:tracked-changes' % ns[self.NS_TEXT],
|
# Create a table of names of used tags and attributes (precomputed,
|
||||||
'%s:change' % ns[self.NS_TEXT])
|
# including namespace, for performance).
|
||||||
|
self.tags = {
|
||||||
|
'tracked-changes': '%s:tracked-changes' % ns[self.NS_TEXT],
|
||||||
|
'change': '%s:change' % ns[self.NS_TEXT],
|
||||||
|
'annotation': '%s:annotation' % ns[self.NS_OFFICE],
|
||||||
|
'change-start': '%s:change-start' % ns[self.NS_TEXT],
|
||||||
|
'change-end': '%s:change-end' % ns[self.NS_TEXT],
|
||||||
|
'conditional-text': '%s:conditional-text' % ns[self.NS_TEXT],
|
||||||
|
'table-cell': '%s:table-cell' % ns[self.NS_TABLE],
|
||||||
|
'formula': '%s:formula' % ns[self.NS_TABLE],
|
||||||
|
'value-type': '%s:value-type' % ns[self.NS_OFFICE],
|
||||||
|
'string-value': '%s:string-value' % ns[self.NS_OFFICE],
|
||||||
|
'span': '%s:span' % ns[self.NS_TEXT],
|
||||||
|
'number-columns-spanned': '%s:number-columns-spanned' % \
|
||||||
|
ns[self.NS_TABLE],
|
||||||
|
'number-columns-repeated': '%s:number-columns-repeated' % \
|
||||||
|
ns[self.NS_TABLE],
|
||||||
|
'table-column': '%s:table-column' % ns[self.NS_TABLE],
|
||||||
|
}
|
||||||
|
self.ignorableElements = (self.tags['tracked-changes'],
|
||||||
|
self.tags['change'])
|
||||||
self.impactableElements = (
|
self.impactableElements = (
|
||||||
Text.OD.elem, Title.OD.elem, Table.OD.elem, Row.OD.elem,
|
Text.OD.elem, Title.OD.elem, Table.OD.elem, Row.OD.elem,
|
||||||
Cell.OD.elem, Section.OD.elem)
|
Cell.OD.elem, Section.OD.elem)
|
||||||
|
@ -213,19 +235,18 @@ class PodParser(OdfParser):
|
||||||
tableNs = ns[e.NS_TABLE]
|
tableNs = ns[e.NS_TABLE]
|
||||||
if elem in e.ignorableElements:
|
if elem in e.ignorableElements:
|
||||||
e.state = e.IGNORING
|
e.state = e.IGNORING
|
||||||
elif elem == ('%s:annotation' % officeNs):
|
elif elem == e.tags['annotation']:
|
||||||
# Be it in an ODT or ODS template, an annotation is considered to
|
# Be it in an ODT or ODS template, an annotation is considered to
|
||||||
# contain a POD statement.
|
# contain a POD statement.
|
||||||
e.state = e.READING_STATEMENT
|
e.state = e.READING_STATEMENT
|
||||||
elif (elem == ('%s:change-start' % textNs)) or \
|
elif elem in (e.tags['change-start'], e.tags['conditional-text']):
|
||||||
(elem == ('%s:conditional-text' % textNs)):
|
|
||||||
# In an ODT template, any text in track-changes or any conditional
|
# In an ODT template, any text in track-changes or any conditional
|
||||||
# field is considered to contain a POD expression.
|
# field is considered to contain a POD expression.
|
||||||
e.state = e.READING_EXPRESSION
|
e.state = e.READING_EXPRESSION
|
||||||
e.exprHasStyle = False
|
e.exprHasStyle = False
|
||||||
elif (elem == ('%s:table-cell' % tableNs)) and \
|
elif (elem == e.tags['table-cell']) and \
|
||||||
attrs.has_key('%s:formula' % tableNs) and \
|
attrs.has_key(e.tags['formula']) and \
|
||||||
(attrs['%s:value-type' % officeNs] == 'string'):
|
(attrs[e.tags['value-type']] == 'string'):
|
||||||
# In an ODS template, any cell containing a formula of type "string"
|
# In an ODS template, any cell containing a formula of type "string"
|
||||||
# is considered to contain a POD expression. But here it is a
|
# is considered to contain a POD expression. But here it is a
|
||||||
# special case: we need to dump the cell; the expression is not
|
# special case: we need to dump the cell; the expression is not
|
||||||
|
@ -237,9 +258,9 @@ class PodParser(OdfParser):
|
||||||
e.addSubBuffer()
|
e.addSubBuffer()
|
||||||
e.currentBuffer.addElement(e.currentElem.name)
|
e.currentBuffer.addElement(e.currentElem.name)
|
||||||
e.currentBuffer.dumpStartElement(elem, attrs,
|
e.currentBuffer.dumpStartElement(elem, attrs,
|
||||||
ignoreAttrs=('%s:formula'%tableNs, '%s:string-value'%officeNs))
|
ignoreAttrs=(e.tags['formula'], e.tags['string-value']))
|
||||||
# We already have the POD expression: remember it on the env.
|
# We already have the POD expression: remember it on the env.
|
||||||
e.currentOdsExpression = attrs['%s:string-value' % officeNs]
|
e.currentOdsExpression = attrs[e.tags['string-value']]
|
||||||
else:
|
else:
|
||||||
if e.state == e.IGNORING:
|
if e.state == e.IGNORING:
|
||||||
pass
|
pass
|
||||||
|
@ -252,8 +273,7 @@ class PodParser(OdfParser):
|
||||||
elif e.state == e.READING_STATEMENT:
|
elif e.state == e.READING_STATEMENT:
|
||||||
pass
|
pass
|
||||||
elif e.state == e.READING_EXPRESSION:
|
elif e.state == e.READING_EXPRESSION:
|
||||||
if (elem == ('%s:span' % textNs)) and \
|
if (elem == (e.tags['span'])) and not e.currentContent.strip():
|
||||||
not e.currentContent.strip():
|
|
||||||
e.currentBuffer.dumpStartElement(elem, attrs)
|
e.currentBuffer.dumpStartElement(elem, attrs)
|
||||||
e.exprHasStyle = True
|
e.exprHasStyle = True
|
||||||
e.manageInserts()
|
e.manageInserts()
|
||||||
|
@ -265,7 +285,7 @@ class PodParser(OdfParser):
|
||||||
textNs = ns[e.NS_TEXT]
|
textNs = ns[e.NS_TEXT]
|
||||||
if elem in e.ignorableElements:
|
if elem in e.ignorableElements:
|
||||||
e.state = e.READING_CONTENT
|
e.state = e.READING_CONTENT
|
||||||
elif elem == ('%s:annotation' % officeNs):
|
elif elem == e.tags['annotation']:
|
||||||
# Manage statement
|
# Manage statement
|
||||||
oldCb = e.currentBuffer
|
oldCb = e.currentBuffer
|
||||||
actionElemIndex = oldCb.createAction(e.currentStatement)
|
actionElemIndex = oldCb.createAction(e.currentStatement)
|
||||||
|
@ -316,14 +336,14 @@ class PodParser(OdfParser):
|
||||||
e.currentStatement.append(statementLine)
|
e.currentStatement.append(statementLine)
|
||||||
e.currentContent = ''
|
e.currentContent = ''
|
||||||
elif e.state == e.READING_EXPRESSION:
|
elif e.state == e.READING_EXPRESSION:
|
||||||
if (elem == ('%s:change-end' % textNs)) or \
|
if (elem == e.tags['change-end']) or \
|
||||||
(elem == ('%s:conditional-text' % textNs)):
|
(elem == e.tags['conditional-text']):
|
||||||
expression = e.currentContent.strip()
|
expression = e.currentContent.strip()
|
||||||
e.currentContent = ''
|
e.currentContent = ''
|
||||||
# Manage expression
|
# Manage expression
|
||||||
e.currentBuffer.addExpression(expression)
|
e.currentBuffer.addExpression(expression)
|
||||||
if e.exprHasStyle:
|
if e.exprHasStyle:
|
||||||
e.currentBuffer.dumpEndElement('%s:span' % textNs)
|
e.currentBuffer.dumpEndElement(e.tags['span'])
|
||||||
e.state = e.READING_CONTENT
|
e.state = e.READING_CONTENT
|
||||||
|
|
||||||
def characters(self, content):
|
def characters(self, content):
|
||||||
|
|
Loading…
Reference in a new issue