Optimized XML marshall/unmarshall process for transferring large binary files.
This commit is contained in:
parent
2d82dc4e0b
commit
599396a838
|
@ -26,6 +26,14 @@ class UnmarshalledObject:
|
||||||
res = res.strip() + '>'
|
res = res.strip() + '>'
|
||||||
return res.encode('utf-8')
|
return res.encode('utf-8')
|
||||||
|
|
||||||
|
class UnmarshalledFile:
|
||||||
|
'''Used for producing file objects from a marshalled Python object.'''
|
||||||
|
def __init__(self):
|
||||||
|
self.name = '' # The name of the file on disk
|
||||||
|
self.mimeType = None # The MIME type of the file
|
||||||
|
self.content = '' # The binary content of the file of a file object
|
||||||
|
self.size = 0 # The length of the file in bytes.
|
||||||
|
|
||||||
# ------------------------------------------------------------------------------
|
# ------------------------------------------------------------------------------
|
||||||
class Dummy: pass
|
class Dummy: pass
|
||||||
# ------------------------------------------------------------------------------
|
# ------------------------------------------------------------------------------
|
||||||
|
|
|
@ -17,7 +17,7 @@
|
||||||
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301,USA.
|
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301,USA.
|
||||||
|
|
||||||
# ------------------------------------------------------------------------------
|
# ------------------------------------------------------------------------------
|
||||||
import xml.sax, base64
|
import xml.sax
|
||||||
from xml.sax.handler import ContentHandler, ErrorHandler
|
from xml.sax.handler import ContentHandler, ErrorHandler
|
||||||
from xml.sax.xmlreader import InputSource
|
from xml.sax.xmlreader import InputSource
|
||||||
from StringIO import StringIO
|
from StringIO import StringIO
|
||||||
|
@ -133,8 +133,7 @@ class XmlParser(ContentHandler, ErrorHandler):
|
||||||
return self.res
|
return self.res
|
||||||
|
|
||||||
# ------------------------------------------------------------------------------
|
# ------------------------------------------------------------------------------
|
||||||
from appy.shared import UnmarshalledObject, Dummy
|
from appy.shared import UnmarshalledObject, UnmarshalledFile
|
||||||
from appy.gen.plone25.wrappers import FileWrapper
|
|
||||||
try:
|
try:
|
||||||
from DateTime import DateTime
|
from DateTime import DateTime
|
||||||
except ImportError:
|
except ImportError:
|
||||||
|
@ -169,14 +168,10 @@ class XmlUnmarshaller(XmlParser):
|
||||||
# to store the next parsed element. A container can be a list, a tuple,
|
# to store the next parsed element. A container can be a list, a tuple,
|
||||||
# an object (the root object of the whole web or a sub-object).
|
# an object (the root object of the whole web or a sub-object).
|
||||||
self.env.currentBasicType = None # Will hold the name of the currently
|
self.env.currentBasicType = None # Will hold the name of the currently
|
||||||
# parsed basic type (unicode, float, ...)
|
# parsed basic type (unicode, float...)
|
||||||
self.env.currentContent = '' # We store here the content of tags.
|
self.env.currentContent = '' # We store here the content of tags.
|
||||||
self.env.currentFileName = '' # If current tag contains a file, we
|
|
||||||
# store here the file name.
|
|
||||||
self.env.currentMimeType = '' # If current tag contains a file, we
|
|
||||||
# store here the file name.
|
|
||||||
|
|
||||||
containerTags = ('tuple', 'list', 'object')
|
containerTags = ('tuple', 'list', 'object', 'file')
|
||||||
numericTypes = ('bool', 'int', 'float', 'long')
|
numericTypes = ('bool', 'int', 'float', 'long')
|
||||||
def startElement(self, elem, attrs):
|
def startElement(self, elem, attrs):
|
||||||
e = XmlParser.startElement(self, elem, attrs)
|
e = XmlParser.startElement(self, elem, attrs)
|
||||||
|
@ -189,18 +184,19 @@ class XmlUnmarshaller(XmlParser):
|
||||||
if elemType == 'object': newObject = UnmarshalledObject()
|
if elemType == 'object': newObject = UnmarshalledObject()
|
||||||
elif elemType == 'tuple': newObject = [] # Tuples become lists
|
elif elemType == 'tuple': newObject = [] # Tuples become lists
|
||||||
elif elemType == 'list': newObject = []
|
elif elemType == 'list': newObject = []
|
||||||
|
elif elemType == 'file':
|
||||||
|
newObject = UnmarshalledFile()
|
||||||
|
if attrs.has_key('name'):
|
||||||
|
newObject.name = attrs['name']
|
||||||
|
if attrs.has_key('mimeType'):
|
||||||
|
newObject.mimeType = attrs['mimeType']
|
||||||
else: newObject = UnmarshalledObject()
|
else: newObject = UnmarshalledObject()
|
||||||
# Store the value on the last container, or on the root object.
|
# Store the value on the last container, or on the root object.
|
||||||
self.storeValue(elem, newObject)
|
self.storeValue(elem, newObject)
|
||||||
# Push the new object on the container stack
|
# Push the new object on the container stack
|
||||||
e.containerStack.append(newObject)
|
e.containerStack.append(newObject)
|
||||||
else:
|
else:
|
||||||
# We are parsing a basic type
|
|
||||||
e.currentBasicType = elemType
|
e.currentBasicType = elemType
|
||||||
if elemType == 'file':
|
|
||||||
if attrs.has_key('name'): e.currentFileName = attrs['name']
|
|
||||||
if attrs.has_key('mimeType'):
|
|
||||||
e.currentMimeType = attrs['mimeType']
|
|
||||||
|
|
||||||
def storeValue(self, name, value):
|
def storeValue(self, name, value):
|
||||||
'''Stores the newly parsed p_value (contained in tag p_name) on the
|
'''Stores the newly parsed p_value (contained in tag p_name) on the
|
||||||
|
@ -214,8 +210,10 @@ class XmlUnmarshaller(XmlParser):
|
||||||
self.res.__class__ = self.klass
|
self.res.__class__ = self.klass
|
||||||
else:
|
else:
|
||||||
currentContainer = e.containerStack[-1]
|
currentContainer = e.containerStack[-1]
|
||||||
if type(currentContainer) == list:
|
if isinstance(currentContainer, list):
|
||||||
currentContainer.append(value)
|
currentContainer.append(value)
|
||||||
|
elif isinstance(currentContainer, UnmarshalledFile):
|
||||||
|
currentContainer.content += value
|
||||||
else:
|
else:
|
||||||
# Current container is an object
|
# Current container is an object
|
||||||
setattr(currentContainer, name, value)
|
setattr(currentContainer, name, value)
|
||||||
|
@ -236,13 +234,8 @@ class XmlUnmarshaller(XmlParser):
|
||||||
value = None
|
value = None
|
||||||
elif e.currentBasicType == 'DateTime':
|
elif e.currentBasicType == 'DateTime':
|
||||||
value = DateTime(e.currentContent.strip())
|
value = DateTime(e.currentContent.strip())
|
||||||
elif e.currentBasicType == 'file':
|
elif e.currentBasicType == 'base64':
|
||||||
value = Dummy()
|
value = e.currentContent.decode('base64')
|
||||||
value.name = e.currentFileName
|
|
||||||
value.content = base64.b64decode(e.currentContent.strip())
|
|
||||||
value.mimeType = e.currentMimeType
|
|
||||||
value.size = len(value.content)
|
|
||||||
value.__class__ = FileWrapper
|
|
||||||
else:
|
else:
|
||||||
value = e.currentContent.strip()
|
value = e.currentContent.strip()
|
||||||
# Store the value on the last container
|
# Store the value on the last container
|
||||||
|
@ -271,24 +264,59 @@ class XmlMarshaller:
|
||||||
fieldsToExclude = []
|
fieldsToExclude = []
|
||||||
atFiles = ('image', 'file') # Types of archetypes fields that contain files.
|
atFiles = ('image', 'file') # Types of archetypes fields that contain files.
|
||||||
|
|
||||||
def dumpValue(self, res, value, fieldType='basic'):
|
def dumpString(self, res, s):
|
||||||
'''Dumps the XML version of p_value to p_res.'''
|
'''Dumps a string into the result.'''
|
||||||
if fieldType == 'file':
|
|
||||||
# p_value contains the (possibly binary) content of a file. We will
|
|
||||||
# encode it in Base64.
|
|
||||||
if hasattr(value, 'data'):
|
|
||||||
v = value.data # Simple wrap for images
|
|
||||||
if hasattr(v, 'data'): v = v.data # Double wrap for files
|
|
||||||
else:
|
|
||||||
v = value
|
|
||||||
res.write(base64.b64encode(v))
|
|
||||||
elif isinstance(value, basestring):
|
|
||||||
# Replace special chars by XML entities
|
# Replace special chars by XML entities
|
||||||
for c in value:
|
for c in s:
|
||||||
if self.xmlEntities.has_key(c):
|
if self.xmlEntities.has_key(c):
|
||||||
res.write(self.xmlEntities[c])
|
res.write(self.xmlEntities[c])
|
||||||
else:
|
else:
|
||||||
res.write(c)
|
res.write(c)
|
||||||
|
|
||||||
|
def dumpFile(self, res, v):
|
||||||
|
'''Dumps a file into the result.'''
|
||||||
|
# p_value contains the (possibly binary) content of a file. We will
|
||||||
|
# encode it in Base64, in one or several parts.
|
||||||
|
res.write('<part type="base64" number="1">')
|
||||||
|
if hasattr(v, 'data'):
|
||||||
|
# The file is an Archetypes file.
|
||||||
|
valueType = v.data.__class__.__name__
|
||||||
|
if valueType == 'Pdata':
|
||||||
|
# There will be several parts.
|
||||||
|
res.write(v.data.data.encode('base64'))
|
||||||
|
# Write subsequent parts
|
||||||
|
nextPart = v.data.next
|
||||||
|
nextPartNumber = 2
|
||||||
|
while nextPart:
|
||||||
|
res.write('</part>') # Close the previous part
|
||||||
|
res.write('<part type="base64" number="%d">'%nextPartNumber)
|
||||||
|
res.write(nextPart.data.encode('base64'))
|
||||||
|
nextPart = nextPart.next
|
||||||
|
nextPartNumber += 1
|
||||||
|
else:
|
||||||
|
res.write(v.data.encode('base64'))
|
||||||
|
else:
|
||||||
|
res.write(v.encode('base64'))
|
||||||
|
res.write('</part>')
|
||||||
|
|
||||||
|
def dumpValue(self, res, value, fieldType):
|
||||||
|
'''Dumps the XML version of p_value to p_res.'''
|
||||||
|
if fieldType == 'file':
|
||||||
|
self.dumpFile(res, value)
|
||||||
|
elif fieldType == 'ref':
|
||||||
|
if value:
|
||||||
|
if type(value) in self.sequenceTypes:
|
||||||
|
for elem in value:
|
||||||
|
self.dumpField(res, 'url', elem.absolute_url_path())
|
||||||
|
else:
|
||||||
|
self.dumpField(res, 'url', value.absolute_url_path())
|
||||||
|
elif type(value) in self.sequenceTypes:
|
||||||
|
# The previous condition must be checked before this one because
|
||||||
|
# Referred objects may be stored in lists or tuples, too.
|
||||||
|
for elem in value:
|
||||||
|
self.dumpField(res, 'e', elem)
|
||||||
|
elif isinstance(value, basestring):
|
||||||
|
self.dumpString(res, value)
|
||||||
elif isinstance(value, bool):
|
elif isinstance(value, bool):
|
||||||
res.write(self.trueFalse[value])
|
res.write(self.trueFalse[value])
|
||||||
else:
|
else:
|
||||||
|
@ -298,7 +326,7 @@ class XmlMarshaller:
|
||||||
'''Dumps in p_res, the value of the p_field for p_instance.'''
|
'''Dumps in p_res, the value of the p_field for p_instance.'''
|
||||||
res.write('<'); res.write(fieldName);
|
res.write('<'); res.write(fieldName);
|
||||||
# Dump the type of the field as an XML attribute
|
# Dump the type of the field as an XML attribute
|
||||||
fType = None # No type will mean "string".
|
fType = None # No type will mean "unicode".
|
||||||
if fieldType == 'file': fType ='file'
|
if fieldType == 'file': fType ='file'
|
||||||
elif fieldType == 'ref': fType = 'list'
|
elif fieldType == 'ref': fType = 'list'
|
||||||
elif isinstance(fieldValue, bool): fType = 'bool'
|
elif isinstance(fieldValue, bool): fType = 'bool'
|
||||||
|
@ -309,28 +337,19 @@ class XmlMarshaller:
|
||||||
elif isinstance(fieldValue, list): fType = 'list'
|
elif isinstance(fieldValue, list): fType = 'list'
|
||||||
elif fieldValue.__class__.__name__ == 'DateTime': fType = 'DateTime'
|
elif fieldValue.__class__.__name__ == 'DateTime': fType = 'DateTime'
|
||||||
if fType: res.write(' type="%s"' % fType)
|
if fType: res.write(' type="%s"' % fType)
|
||||||
|
# Dump other attributes if needed
|
||||||
if type(fieldValue) in self.sequenceTypes:
|
if type(fieldValue) in self.sequenceTypes:
|
||||||
res.write(' count="%d"' % len(fieldValue))
|
res.write(' count="%d"' % len(fieldValue))
|
||||||
if fieldType == 'file':
|
if fieldType == 'file':
|
||||||
if hasattr(fieldValue, 'content_type'):
|
if hasattr(fieldValue, 'content_type'):
|
||||||
res.write(' mimeType="%s"' % fieldValue.content_type)
|
res.write(' mimeType="%s"' % fieldValue.content_type)
|
||||||
if hasattr(fieldValue, 'filename'):
|
if hasattr(fieldValue, 'filename'):
|
||||||
res.write(' name="%s"' % fieldValue.filename)
|
res.write(' name="')
|
||||||
|
self.dumpString(res, fieldValue.filename)
|
||||||
|
res.write('"')
|
||||||
res.write('>')
|
res.write('>')
|
||||||
# Dump the child elements if any
|
# Dump the field value
|
||||||
if fieldType == 'ref':
|
self.dumpValue(res, fieldValue, fieldType)
|
||||||
if fieldValue:
|
|
||||||
for elem in fieldValue:
|
|
||||||
self.dumpField(res, 'url', elem.absolute_url_path())
|
|
||||||
else:
|
|
||||||
self.dumpField(res, 'url', '')
|
|
||||||
elif type(fieldValue) in self.sequenceTypes:
|
|
||||||
# The previous condition must be checked before this one because
|
|
||||||
# Referred objects are stored in lists or tuples, too.
|
|
||||||
for elem in fieldValue:
|
|
||||||
self.dumpField(res, 'e', elem)
|
|
||||||
else:
|
|
||||||
res.write(self.dumpValue(res, fieldValue, fieldType))
|
|
||||||
res.write('</'); res.write(fieldName); res.write('>')
|
res.write('</'); res.write(fieldName); res.write('>')
|
||||||
|
|
||||||
def marshall(self, instance, objectType='popo'):
|
def marshall(self, instance, objectType='popo'):
|
||||||
|
|
Loading…
Reference in a new issue