From d5d99b67eb9128ae17a6b5b350c5222bd041eca0 Mon Sep 17 00:00:00 2001
From: Gaetan Delannay <gaetan.delannay@gmail.com>
Date: Thu, 31 Jan 2013 12:50:25 +0100
Subject: [PATCH] [pod] Performance improvement: stop computing tag names,
 create a dict of precomputed tags before starting parsing.

---
 pod/pod_parser.py | 68 ++++++++++++++++++++++++++++++-----------------
 1 file changed, 44 insertions(+), 24 deletions(-)

diff --git a/pod/pod_parser.py b/pod/pod_parser.py
index c56a69f..18e70ab 100644
--- a/pod/pod_parser.py
+++ b/pod/pod_parser.py
@@ -60,10 +60,6 @@ class OdInsert:
 class PodEnvironment(OdfEnvironment):
     '''Contains all elements representing the current parser state during
        parsing.'''
-    # Elements we must ignore (they will not be included in the result
-    ignorableElements = None # Will be set after namespace propagation
-    # Elements that may be impacted by POD statements
-    impactableElements = None # Idem
     # Possibles modes
     # ADD_IN_BUFFER: when encountering an impactable element, we must
     #                continue to dump it in the current buffer
@@ -92,6 +88,10 @@ class PodEnvironment(OdfEnvironment):
         self.mode = self.ADD_IN_SUBBUFFER
         # Current state
         self.state = self.READING_CONTENT
+        # Elements we must ignore (they will not be included in the result)
+        self.ignorableElements = None # Will be set after namespace propagation
+        # Elements that may be impacted by POD statements
+        self.impactableElements = None # Idem
         # Stack of currently visited tables
         self.tableStack = []
         self.tableIndex = -1
@@ -110,6 +110,8 @@ class PodEnvironment(OdfEnvironment):
         self.namedIfActions = {} #~{s_statementName: IfAction}~
         # Currently parsed expression within an ODS template
         self.currentOdsExpression = None
+        # Names of some tags, that we will compute after namespace propagation
+        self.tags = None
 
     def getTable(self):
         '''Gets the currently parsed table.'''
@@ -157,15 +159,15 @@ class PodEnvironment(OdfEnvironment):
             self.getTable().curRowAttrs = self.currentElem.attrs
         elif elem == Cell.OD.elem:
             colspan = 1
-            attrSpan = '%s:number-columns-spanned' % tableNs
+            attrSpan = self.tags['number-columns-spanned']
             if self.currentElem.attrs.has_key(attrSpan):
                 colspan = int(self.currentElem.attrs[attrSpan])
             self.getTable().curColIndex += colspan
-        elif elem == ('%s:table-column' % tableNs):
+        elif elem == self.tags['table-column']:
             attrs = self.currentElem.attrs
-            if attrs.has_key('%s:number-columns-repeated' % tableNs):
+            if attrs.has_key(self.tags['number-columns-repeated']):
                 self.getTable().nbOfColumns += int(
-                    attrs['%s:number-columns-repeated' % tableNs])
+                    attrs[self.tags['number-columns-repeated']])
             else:
                 self.getTable().nbOfColumns += 1
         return ns
@@ -190,8 +192,28 @@ class PodEnvironment(OdfEnvironment):
             xmlElemDef = eval(elemName[0].upper() + elemName[1:]).OD
             elemFullName = xmlElemDef.getFullName(ns)
             xmlElemDef.__init__(elemFullName)
-        self.ignorableElements = ('%s:tracked-changes' % ns[self.NS_TEXT],
-                                  '%s:change' % ns[self.NS_TEXT])
+        # Create a table of names of used tags and attributes (precomputed,
+        # including namespace, for performance).
+        self.tags = {
+          'tracked-changes': '%s:tracked-changes' % ns[self.NS_TEXT],
+          'change': '%s:change' % ns[self.NS_TEXT],
+          'annotation': '%s:annotation' % ns[self.NS_OFFICE],
+          'change-start': '%s:change-start' % ns[self.NS_TEXT],
+          'change-end': '%s:change-end' % ns[self.NS_TEXT],
+          'conditional-text': '%s:conditional-text' % ns[self.NS_TEXT],
+          'table-cell': '%s:table-cell' % ns[self.NS_TABLE],
+          'formula': '%s:formula' % ns[self.NS_TABLE],
+          'value-type': '%s:value-type' % ns[self.NS_OFFICE],
+          'string-value': '%s:string-value' % ns[self.NS_OFFICE],
+          'span': '%s:span' % ns[self.NS_TEXT],
+          'number-columns-spanned': '%s:number-columns-spanned' % \
+                                    ns[self.NS_TABLE],
+          'number-columns-repeated': '%s:number-columns-repeated' % \
+                                    ns[self.NS_TABLE],
+          'table-column': '%s:table-column' % ns[self.NS_TABLE],
+        }
+        self.ignorableElements = (self.tags['tracked-changes'],
+                                  self.tags['change'])
         self.impactableElements = (
            Text.OD.elem, Title.OD.elem, Table.OD.elem, Row.OD.elem,
            Cell.OD.elem, Section.OD.elem)
@@ -213,19 +235,18 @@ class PodParser(OdfParser):
         tableNs = ns[e.NS_TABLE]
         if elem in e.ignorableElements:
             e.state = e.IGNORING
-        elif elem == ('%s:annotation' % officeNs):
+        elif elem == e.tags['annotation']:
             # Be it in an ODT or ODS template, an annotation is considered to
             # contain a POD statement.
             e.state = e.READING_STATEMENT
-        elif (elem == ('%s:change-start' % textNs)) or \
-             (elem == ('%s:conditional-text' % textNs)):
+        elif elem in (e.tags['change-start'], e.tags['conditional-text']):
             # In an ODT template, any text in track-changes or any conditional
             # field is considered to contain a POD expression.
             e.state = e.READING_EXPRESSION
             e.exprHasStyle = False
-        elif (elem == ('%s:table-cell' % tableNs)) and \
-             attrs.has_key('%s:formula' % tableNs) and \
-             (attrs['%s:value-type' % officeNs] == 'string'):
+        elif (elem == e.tags['table-cell']) and \
+             attrs.has_key(e.tags['formula']) and \
+             (attrs[e.tags['value-type']] == 'string'):
             # In an ODS template, any cell containing a formula of type "string"
             # is considered to contain a POD expression. But here it is a
             # special case: we need to dump the cell; the expression is not
@@ -237,9 +258,9 @@ class PodParser(OdfParser):
                 e.addSubBuffer()
             e.currentBuffer.addElement(e.currentElem.name)
             e.currentBuffer.dumpStartElement(elem, attrs,
-                ignoreAttrs=('%s:formula'%tableNs, '%s:string-value'%officeNs))
+                ignoreAttrs=(e.tags['formula'], e.tags['string-value']))
             # We already have the POD expression: remember it on the env.
-            e.currentOdsExpression = attrs['%s:string-value' % officeNs]
+            e.currentOdsExpression = attrs[e.tags['string-value']]
         else:
             if e.state == e.IGNORING:
                 pass
@@ -252,8 +273,7 @@ class PodParser(OdfParser):
             elif e.state == e.READING_STATEMENT:
                 pass
             elif e.state == e.READING_EXPRESSION:
-                if (elem == ('%s:span' % textNs)) and \
-                   not e.currentContent.strip():
+                if (elem == (e.tags['span'])) and not e.currentContent.strip():
                     e.currentBuffer.dumpStartElement(elem, attrs)
                     e.exprHasStyle = True
         e.manageInserts()
@@ -265,7 +285,7 @@ class PodParser(OdfParser):
         textNs = ns[e.NS_TEXT]
         if elem in e.ignorableElements:
             e.state = e.READING_CONTENT
-        elif elem == ('%s:annotation' % officeNs):
+        elif elem == e.tags['annotation']:
             # Manage statement
             oldCb = e.currentBuffer
             actionElemIndex = oldCb.createAction(e.currentStatement)
@@ -316,14 +336,14 @@ class PodParser(OdfParser):
                         e.currentStatement.append(statementLine)
                     e.currentContent = ''
             elif e.state == e.READING_EXPRESSION:
-                if (elem == ('%s:change-end' % textNs)) or \
-                   (elem == ('%s:conditional-text' % textNs)):
+                if (elem == e.tags['change-end']) or \
+                   (elem == e.tags['conditional-text']):
                     expression = e.currentContent.strip()
                     e.currentContent = ''
                     # Manage expression
                     e.currentBuffer.addExpression(expression)
                     if e.exprHasStyle:
-                        e.currentBuffer.dumpEndElement('%s:span' % textNs)
+                        e.currentBuffer.dumpEndElement(e.tags['span'])
                     e.state = e.READING_CONTENT
 
     def characters(self, content):