appypod-rattail/pod/buffers.py

640 lines
28 KiB
Python

# ------------------------------------------------------------------------------
# Appy is a framework for building applications in the Python language.
# Copyright (C) 2007 Gaetan Delannay
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301,USA.
# ------------------------------------------------------------------------------
import re
from xml.sax.saxutils import quoteattr
from appy.shared.xml_parser import xmlPrologue, escapeXml
from appy.pod import PodError
from appy.pod.elements import *
from appy.pod.actions import IfAction, ElseAction, ForAction, VariableAction, \
NullAction
# ------------------------------------------------------------------------------
class ParsingError(Exception): pass
# ParsingError-related constants -----------------------------------------------
ELEMENT = 'identifies the part of the document that will be impacted ' \
'by the command. It must be one of %s.' % str(PodElement.POD_ELEMS)
FOR_EXPRESSION = 'must be of the form: {name} in {expression}. {name} must be '\
'a Python variable name. It is the name of the iteration ' \
'variable. {expression} is a Python expression that, when ' \
'evaluated, produces a Python sequence (tuple, string, list, '\
'etc).'
POD_STATEMENT = 'A Pod statement has the ' \
'form: do {element} [{command} {expression}]. {element} ' + \
ELEMENT + ' Optional {command} can be "if" ' \
'(conditional inclusion of the element) or "for" (multiple ' \
'inclusion of the element). For an "if" command, {expression} '\
'is any Python expression. For a "for" command, {expression} '+\
FOR_EXPRESSION
FROM_CLAUSE = 'A "from" clause has the form: from {expression}, where ' \
'{expression} is a Python expression that, when evaluated, ' \
'produces a valid chunk of odt content that will be inserted ' \
'instead of the element that is the target of the note.'
BAD_STATEMENT_GROUP = 'Syntax error while parsing a note whose content is ' \
'"%s". In a note, you may specify at most 2 lines: a ' \
'pod statement and a "from" clause. ' + POD_STATEMENT + \
' ' + FROM_CLAUSE
BAD_STATEMENT = 'Syntax error for statement "%s". ' + POD_STATEMENT
BAD_ELEMENT = 'Bad element "%s". An element ' + ELEMENT
BAD_MINUS = "The '-' operator can't be used with element '%s'. It can only be "\
"specified for elements among %s."
ELEMENT_NOT_FOUND = 'Action specified element "%s" but available elements ' \
'in this part of the document are %s.'
BAD_FROM_CLAUSE = 'Syntax error in "from" clause "%s". ' + FROM_CLAUSE
DUPLICATE_NAMED_IF = 'An "if" statement with the same name already exists.'
ELSE_WITHOUT_IF = 'No previous "if" statement could be found for this "else" ' \
'statement.'
ELSE_WITHOUT_NAMED_IF = 'I could not find an "if" statement named "%s".'
BAD_FOR_EXPRESSION = 'Bad "for" expression "%s". A "for" expression ' + \
FOR_EXPRESSION
BAD_VAR_EXPRESSION = 'Bad variable definition "%s". A variable definition ' \
'must have the form {name} = {expression}. {name} must be a Python-' \
'compliant variable name. {expression} is a Python expression. When ' \
'encountering such a statement, pod will define, in the specified part ' \
'of the document, a variable {name} whose value will be the evaluated ' \
'{expression}.'
EVAL_EXPR_ERROR = 'Error while evaluating expression "%s". %s'
NULL_ACTION_ERROR = 'There was a problem with this action. Possible causes: ' \
'(1) you specified no action (ie "do text") while not ' \
'specifying any from clause; (2) you specified the from ' \
'clause on the same line as the action, which is not ' \
'allowed (ie "do text from ...").'
# ------------------------------------------------------------------------------
class BufferIterator:
def __init__(self, buffer):
self.buffer = buffer
self.remainingSubBufferIndexes = self.buffer.subBuffers.keys()
self.remainingElemIndexes = self.buffer.elements.keys()
self.remainingSubBufferIndexes.sort()
self.remainingElemIndexes.sort()
def hasNext(self):
return self.remainingSubBufferIndexes or self.remainingElemIndexes
def next(self):
nextSubBufferIndex = None
if self.remainingSubBufferIndexes:
nextSubBufferIndex = self.remainingSubBufferIndexes[0]
nextExprIndex = None
if self.remainingElemIndexes:
nextExprIndex = self.remainingElemIndexes[0]
# Compute min between nextSubBufferIndex and nextExprIndex
if (nextSubBufferIndex != None) and (nextExprIndex != None):
res = min(nextSubBufferIndex, nextExprIndex)
elif (nextSubBufferIndex == None) and (nextExprIndex != None):
res = nextExprIndex
elif (nextSubBufferIndex != None) and (nextExprIndex == None):
res = nextSubBufferIndex
# Update "remaining" lists
if res == nextSubBufferIndex:
self.remainingSubBufferIndexes = self.remainingSubBufferIndexes[1:]
resDict = self.buffer.subBuffers
elif res == nextExprIndex:
self.remainingElemIndexes = self.remainingElemIndexes[1:]
resDict = self.buffer.elements
return res, resDict[res]
# ------------------------------------------------------------------------------
class Buffer:
'''Abstract class representing any buffer used during rendering.'''
elementRex = re.compile('([\w-]+:[\w-]+)\s*(.*?)>', re.S)
def __init__(self, env, parent):
self.parent = parent
self.subBuffers = {} # ~{i_bufferIndex: Buffer}~
self.env = env
def addSubBuffer(self, subBuffer=None):
if not subBuffer:
subBuffer = MemoryBuffer(self.env, self)
self.subBuffers[self.getLength()] = subBuffer
subBuffer.parent = self
return subBuffer
def removeLastSubBuffer(self):
subBufferIndexes = self.subBuffers.keys()
subBufferIndexes.sort()
lastIndex = subBufferIndexes.pop()
del self.subBuffers[lastIndex]
def write(self, something): pass # To be overridden
def getLength(self): pass # To be overridden
def dumpStartElement(self, elem, attrs={}, ignoreAttrs=(),
insertAttributesHook=False):
'''Inserts into this buffer the start tag p_elem, with its p_attrs,
excepted those listed in p_ignoreAttrs. If p_insertAttributesHook
is True (works only for MemoryBuffers), we will insert an Attributes
instance at the end of the list of dumped attributes, in order to be
able, when evaluating the buffer, to dump additional attributes, not
known at this dump time.'''
self.write('<%s' % elem)
for name, value in attrs.items():
if ignoreAttrs and (name in ignoreAttrs): continue
# If the value begins with ':', it is a Python expression. Else,
# it is a static value.
if not value.startswith(':'):
self.write(' %s=%s' % (name, quoteattr(value)))
else:
self.write(' %s="' % name)
self.addExpression(value[1:])
self.write('"')
if insertAttributesHook:
res = self.addAttributes()
else:
res = None
self.write('>')
return res
def dumpEndElement(self, elem):
self.write('</%s>' % elem)
def dumpElement(self, elem, content=None, attrs={}):
'''For dumping a whole element at once.'''
self.dumpStartElement(elem, attrs)
if content:
self.dumpContent(content)
self.dumpEndElement(elem)
def dumpContent(self, content):
'''Dumps string p_content into the buffer.'''
self.write(escapeXml(content))
# ------------------------------------------------------------------------------
class FileBuffer(Buffer):
def __init__(self, env, result):
Buffer.__init__(self, env, None)
self.result = result
self.content = file(result, 'w')
self.content.write(xmlPrologue)
# getLength is used to manage insertions into sub-buffers. But in the case
# of a FileBuffer, we will only have 1 sub-buffer at a time, and we don't
# care about where it will be inserted into the FileBuffer.
def getLength(self): return 0
def write(self, something):
try:
self.content.write(something.encode('utf-8'))
except UnicodeDecodeError:
self.content.write(something)
def addExpression(self, expression, tiedHook=None):
# At 2013-02-06, this method was not called within the whole test suite.
try:
self.dumpContent(Expression(expression).evaluate(self.env.context))
except Exception, e:
PodError.dump(self, EVAL_EXPR_ERROR % (expression, e), dumpTb=False)
def addAttributes(self):
# Into a FileBuffer, it is not possible to insert Attributes. Every
# Attributes instance is tied to an Expression; because dumping
# expressions directly into FileBuffer instances seems to be a rather
# theorical case (see comment inside the previous method), it does not
# seem to be a real problem.
pass
def pushSubBuffer(self, subBuffer): pass
def getRootBuffer(self): return self
# ------------------------------------------------------------------------------
class MemoryBuffer(Buffer):
actionRex = re.compile('(?:(\w+)\s*\:\s*)?do\s+(\w+)(-)?' \
'(?:\s+(for|if|else|with)\s*(.*))?')
forRex = re.compile('\s*([\w\-_]+)\s+in\s+(.*)')
varRex = re.compile('\s*([\w\-_]+)\s*=\s*(.*)')
def __init__(self, env, parent):
Buffer.__init__(self, env, parent)
self.content = u''
self.elements = {}
self.action = None
def addSubBuffer(self, subBuffer=None):
sb = Buffer.addSubBuffer(self, subBuffer)
self.content += ' ' # To avoid having several subbuffers referenced at
# the same place within this buffer.
return sb
def getRootBuffer(self):
'''Returns the root buffer. For POD it is always a FileBuffer. For PX,
it is a MemoryBuffer.'''
if self.parent: return self.parent.getRootBuffer()
return self
def getLength(self): return len(self.content)
def write(self, thing): self.content += thing
def getIndex(self, podElemName):
res = -1
for index, podElem in self.elements.iteritems():
if podElem.__class__.__name__.lower() == podElemName:
if index > res:
res = index
return res
def getMainElement(self):
res = None
if self.elements.has_key(0):
res = self.elements[0]
return res
def isMainElement(self, elem):
'''Is p_elem the main elemen within this buffer?'''
mainElem = self.getMainElement()
if not mainElem: return
if hasattr(mainElem, 'OD'): mainElem = mainElem.OD.elem
if elem != mainElem: return
# elem is the same as the main elem. But is it really the main elem, or
# the same elem, found deeper in the buffer?
for index, iElem in self.elements.iteritems():
foundElem = None
if hasattr(iElem, 'OD'):
if iElem.OD:
foundElem = iElem.OD.elem
else:
foundElem = iElem
if (foundElem == mainElem) and (index != 0):
return
return True
def unreferenceElement(self, elem):
# Find last occurrence of this element
elemIndex = -1
for index, iElem in self.elements.iteritems():
foundElem = None
if hasattr(iElem, 'OD'):
# A POD element
if iElem.OD:
foundElem = iElem.OD.elem
else:
# A PX elem
foundElem = iElem
if (foundElem == elem) and (index > elemIndex):
elemIndex = index
del self.elements[elemIndex]
def pushSubBuffer(self, subBuffer):
'''Sets p_subBuffer at the very end of the buffer.'''
subIndex = None
for index, aSubBuffer in self.subBuffers.iteritems():
if aSubBuffer == subBuffer:
subIndex = index
break
if subIndex != None:
# Indeed, it is possible that this buffer is not referenced
# in the parent (if it is a temp buffer generated from a cut)
del self.subBuffers[subIndex]
self.subBuffers[self.getLength()] = subBuffer
self.content += u' '
def transferAllContent(self):
'''Transfer all content to parent.'''
if isinstance(self.parent, FileBuffer):
# First unreference all elements
for index in self.getElementIndexes(expressions=False):
del self.elements[index]
self.evaluate()
else:
# Transfer content in itself
oldParentLength = self.parent.getLength()
self.parent.write(self.content)
# Transfer elements
for index, podElem in self.elements.iteritems():
self.parent.elements[oldParentLength + index] = podElem
# Transfer subBuffers
for index, buf in self.subBuffers.iteritems():
self.parent.subBuffers[oldParentLength + index] = buf
# Empty the buffer
MemoryBuffer.__init__(self, self.env, self.parent)
# Change buffer position wrt parent
self.parent.pushSubBuffer(self)
def addElement(self, elem, elemType='pod'):
if elemType == 'pod':
elem = PodElement.create(elem)
self.elements[self.getLength()] = elem
if isinstance(elem, Cell) or isinstance(elem, Table):
elem.tableInfo = self.env.getTable()
if isinstance(elem, Cell):
# Remember where this cell is in the table
elem.colIndex = elem.tableInfo.curColIndex
if elem == 'x':
# See comment on similar statement in the following below.
self.content += u' '
def addExpression(self, expression, tiedHook=None):
# Create the POD expression
expr = Expression(expression)
if tiedHook: tiedHook.tiedExpression = expr
self.elements[self.getLength()] = expr
# To be sure that an expr and an elem can't be found at the same index
# in the buffer.
self.content += u' '
def addAttributes(self):
# Create the Attributes instance
attrs = Attributes(self.env)
self.elements[self.getLength()] = attrs
self.content += u' '
return attrs
def createAction(self, statementGroup):
'''Tries to create an action based on p_statementGroup. If the statement
is not correct, r_ is -1. Else, r_ is the index of the element within
the buffer that is the object of the action.'''
res = -1
try:
# Check the whole statement group
if not statementGroup or (len(statementGroup) > 2):
raise ParsingError(BAD_STATEMENT_GROUP % str(statementGroup))
# Check the statement
statement = statementGroup[0]
aRes = self.actionRex.match(statement)
if not aRes:
raise ParsingError(BAD_STATEMENT % statement)
statementName, podElem, minus, actionType, subExpr = aRes.groups()
if not (podElem in PodElement.POD_ELEMS):
raise ParsingError(BAD_ELEMENT % podElem)
if minus and (not podElem in PodElement.MINUS_ELEMS):
raise ParsingError(
BAD_MINUS % (podElem, PodElement.MINUS_ELEMS))
indexPodElem = self.getIndex(podElem)
if indexPodElem == -1:
raise ParsingError(
ELEMENT_NOT_FOUND % (podElem, str([
e.__class__.__name__.lower() \
for e in self.elements.values()])))
podElem = self.elements[indexPodElem]
# Check the 'from' clause
fromClause = None
source = 'buffer'
if len(statementGroup) > 1:
fromClause = statementGroup[1]
source = 'from'
if not fromClause.startswith('from '):
raise ParsingError(BAD_FROM_CLAUSE % fromClause)
fromClause = fromClause[5:]
# Create the action
if actionType == 'if':
self.action = IfAction(statementName, self, subExpr, podElem,
minus, source, fromClause)
self.env.ifActions.append(self.action)
if self.action.name:
# We must register this action as a named action
if self.env.namedIfActions.has_key(self.action.name):
raise ParsingError(DUPLICATE_NAMED_IF)
self.env.namedIfActions[self.action.name] = self.action
elif actionType == 'else':
if not self.env.ifActions:
raise ParsingError(ELSE_WITHOUT_IF)
# Does the "else" action reference a named "if" action?
ifReference = subExpr.strip()
if ifReference:
if not self.env.namedIfActions.has_key(ifReference):
raise ParsingError(ELSE_WITHOUT_NAMED_IF % ifReference)
linkedIfAction = self.env.namedIfActions[ifReference]
# This "else" action "consumes" the "if" action: this way,
# it is not possible to define two "else" actions related to
# the same "if".
del self.env.namedIfActions[ifReference]
self.env.ifActions.remove(linkedIfAction)
else:
linkedIfAction = self.env.ifActions.pop()
self.action = ElseAction(statementName, self, None, podElem,
minus, source, fromClause,
linkedIfAction)
elif actionType == 'for':
forRes = MemoryBuffer.forRex.match(subExpr.strip())
if not forRes:
raise ParsingError(BAD_FOR_EXPRESSION % subExpr)
iter, subExpr = forRes.groups()
self.action = ForAction(statementName, self, subExpr, podElem,
minus, iter, source, fromClause)
elif actionType == 'with':
varRes = MemoryBuffer.varRex.match(subExpr.strip())
if not varRes:
raise ParsingError(BAD_VAR_EXPRESSION % subExpr)
varName, subExpr = varRes.groups()
self.action = VariableAction(statementName, self, subExpr,
podElem, minus, varName, source, fromClause)
else: # null action
if not fromClause:
raise ParsingError(NULL_ACTION_ERROR)
self.action = NullAction(statementName, self, None, podElem,
None, source, fromClause)
res = indexPodElem
except ParsingError, ppe:
PodError.dump(self, ppe, removeFirstLine=True)
return res
def createPxAction(self, elem, actionType, statement):
res = 0
if actionType == 'for':
forRes = MemoryBuffer.forRex.match(statement.strip())
if not forRes:
raise ParsingError(BAD_FOR_EXPRESSION % statement)
iter, subExpr = forRes.groups()
self.action = ForAction('for', self, subExpr, elem, False, iter,
'buffer', None)
elif actionType == 'if':
self.action = IfAction('if', self, statement, elem, False,
'buffer', None)
return res
def cut(self, index, keepFirstPart):
'''Cuts this buffer into 2 parts. Depending on p_keepFirstPart, the 1st
(from 0 to index-1) or the second (from index to the end) part of the
buffer is returned as a MemoryBuffer instance without parent; the other
part is self.'''
res = MemoryBuffer(self.env, None)
# Manage buffer meta-info (elements, expressions, subbuffers)
iter = BufferIterator(self)
subBuffersToDelete = []
elementsToDelete = []
mustShift = False
while iter.hasNext():
itemIndex, item = iter.next()
if keepFirstPart:
if itemIndex >= index:
newIndex = itemIndex-index
if isinstance(item, MemoryBuffer):
res.subBuffers[newIndex] = item
subBuffersToDelete.append(itemIndex)
else:
res.elements[newIndex] = item
elementsToDelete.append(itemIndex)
else:
if itemIndex < index:
if isinstance(item, MemoryBuffer):
res.subBuffers[itemIndex] = item
subBuffersToDelete.append(itemIndex)
else:
res.elements[itemIndex] = item
elementsToDelete.append(itemIndex)
else:
mustShift = True
if elementsToDelete:
for elemIndex in elementsToDelete:
del self.elements[elemIndex]
if subBuffersToDelete:
for subIndex in subBuffersToDelete:
del self.subBuffers[subIndex]
if mustShift:
elements = {}
for elemIndex, elem in self.elements.iteritems():
elements[elemIndex-index] = elem
self.elements = elements
subBuffers = {}
for subIndex, buf in self.subBuffers.iteritems():
subBuffers[subIndex-index] = buf
self.subBuffers = subBuffers
# Manage content
if keepFirstPart:
res.write(self.content[index:])
self.content = self.content[:index]
else:
res.write(self.content[:index])
self.content = self.content[index:]
return res
def getElementIndexes(self, expressions=True):
res = []
for index, elem in self.elements.iteritems():
condition = isinstance(elem, Expression)
if not expressions:
condition = not condition
if condition:
res.append(index)
return res
def transferActionIndependentContent(self, actionElemIndex):
# Manage content to transfer to parent buffer
if actionElemIndex != 0:
actionIndependentBuffer = self.cut(actionElemIndex,
keepFirstPart=False)
actionIndependentBuffer.parent = self.parent
actionIndependentBuffer.transferAllContent()
self.parent.pushSubBuffer(self)
# Manage content to transfer to a child buffer
actionElemIndex = self.getIndex(
self.action.elem.__class__.__name__.lower())
# We recompute actionElemIndex because after cut it may have changed
elemIndexes = self.getElementIndexes(expressions=False)
elemIndexes.sort()
if elemIndexes.index(actionElemIndex) != (len(elemIndexes)-1):
# I must create a sub-buffer with the impactable elements after
# the action-related element
childBuffer = self.cut(elemIndexes[elemIndexes.index(
actionElemIndex)+1], keepFirstPart=True)
self.addSubBuffer(childBuffer)
res = childBuffer
else:
res = self
return res
def getStartIndex(self, removeMainElems):
'''When I must dump the buffer, sometimes (if p_removeMainElems is
True), I must dump only a subset of it. This method returns the start
index of the buffer part I must dump.'''
if removeMainElems:
# Find the start position of the deepest element to remove
deepestElem = self.action.elem.DEEPEST_TO_REMOVE
pos = self.content.find('<%s' % deepestElem.elem)
pos = pos + len(deepestElem.elem)
# Now we must find the position of the end of this start tag,
# skipping potential attributes.
inAttrValue = False # Are we parsing an attribute value ?
endTagFound = False # Have we found the end of this tag ?
while not endTagFound:
pos += 1
nextChar = self.content[pos]
if (nextChar == '>') and not inAttrValue:
# Yes we have it
endTagFound = True
elif nextChar == '"':
inAttrValue = not inAttrValue
res = pos + 1
else:
res = 0
return res
def getStopIndex(self, removeMainElems):
'''This method returns the stop index of the buffer part I must dump.'''
if removeMainElems:
ns = self.env.namespaces
deepestElem = self.action.elem.DEEPEST_TO_REMOVE
pos = self.content.rfind('</%s>' % deepestElem.getFullName(ns))
res = pos
else:
res = self.getLength()
return res
reTagContent = re.compile('<(?P<p>[\w-]+):(?P<f>[\w-]+)(.*?)>.*</(?P=p):' \
'(?P=f)>', re.S)
def evaluate(self, subElements=True, removeMainElems=False):
result = self.getRootBuffer()
if not subElements:
# Dump the root tag in this buffer, but not its content.
res = self.reTagContent.match(self.content.strip())
if not res: result.write(self.content)
else:
g = res.group
result.write('<%s:%s%s></%s:%s>' % (g(1),g(2),g(3),g(1),g(2)))
else:
iter = BufferIterator(self)
currentIndex = self.getStartIndex(removeMainElems)
while iter.hasNext():
index, evalEntry = iter.next()
result.write(self.content[currentIndex:index])
currentIndex = index + 1
if isinstance(evalEntry, Expression):
try:
result.dumpContent(evalEntry.evaluate(self.env.context))
except Exception, e:
if self.caller() == 'pod':
PodError.dump(result, EVAL_EXPR_ERROR % (
evalEntry.expr, e), dumpTb=False)
else: # px
raise Exception(EVAL_EXPR_ERROR %(evalEntry.expr,e))
elif isinstance(evalEntry, Attributes):
result.write(evalEntry.evaluate(self.env.context))
else: # It is a subBuffer
if evalEntry.action:
evalEntry.action.execute()
else:
result.write(evalEntry.content)
stopIndex = self.getStopIndex(removeMainElems)
if currentIndex < (stopIndex-1):
result.write(self.content[currentIndex:stopIndex])
def clean(self):
'''Cleans the buffer content.'''
self.content = u''
def caller(self):
'''Returns "pod" if the caller is appy.pod, "px" if it is appy.px.'''
if self.env.__class__.__name__ == 'PxEnvironment': return 'px'
return 'pod'
# ------------------------------------------------------------------------------