Improvements in the XML marshaller.

2010-03-31 15:49:54 +02:00 · 2010-03-31 15:49:54 +02:00 · 9cff9df299
commit 9cff9df299
parent 3f08cb989f
5 changed files with 85 additions and 45 deletions
--- a/gen/plone25/wrappers/init.py
+++ b/gen/plone25/wrappers/init.py
@ -340,16 +340,19 @@ class AbstractWrapper:
    def export(self, at='string'):
        '''Creates an "exportable", XML version of this object. If p_at is
-           "string", this method returns the XML version. Else, (a) if not p_at,
+           "string", this method returns the XML version, without the XML
-           the XML will be exported on disk, in the OS temp folder, with an
+           prologue. Else, (a) if not p_at, the XML will be exported on disk,
-           ugly name; (b) else, it will be exported at path p_at.'''
+           in the OS temp folder, with an ugly name; (b) else, it will be
           exported at path p_at.'''
        # Determine where to put the result
        toDisk = (at != 'string')
        if toDisk and not at:
            at = getOsTempFolder() + '/' + self.o.UID() + '.xml'
        # Create the XML version of the object
-        xml = XmlMarshaller(cdata=True, dumpUnicode=True).marshall(
+        marshaller = XmlMarshaller(cdata=True, dumpUnicode=True,
-            self.o, objectType='appy')
+                                   dumpXmlPrologue=toDisk,
                                   rootTag=self.klass.__name__)
        xml = marshaller.marshall(self.o, objectType='appy')
        # Produce the desired result
        if toDisk:
            f = file(at, 'w')
--- a/gen/utils.py
+++ b/gen/utils.py
@ -229,4 +229,21 @@ class Keywords:
            op = ' %s ' % self.operator
            return op.join(self.keywords)+'*'
        return ''
 # ------------------------------------------------------------------------------
 class FakeBrain:
    '''This class behaves like a brain retrieved from a query to a ZCatalog. It
       is used for representing a fake brain that was generated from a search in
       a distant portal_catalog.'''
    Creator = None
    created = None
    modified = None
    review_state = None
    def has_key(self, key): return hasattr(self, key)
    def getPath(self): return self.path
    def getURL(self, relative=0): return self.url
    def _unrestrictedGetObject(self): return self
    def pretty_title_or_id(self): return self.Title
    def getObject(self, REQUEST=None): return self
    def getRID(self): return self.url
 # ------------------------------------------------------------------------------
--- a/shared/init.py
+++ b/shared/init.py
@ -18,6 +18,7 @@ mimeTypesExts = {
             'image/jpeg'                             : 'jpg',
             'image/gif'                              : 'gif'
             }
 xmlPrologue = '<?xml version="1.0" encoding="utf-8"?>'
 # ------------------------------------------------------------------------------
 class UnmarshalledObject:
--- a/shared/utils.py
+++ b/shared/utils.py
@ -94,4 +94,8 @@ def normalizeString(s, usage='fileName'):
                res += char
        s = res
    return unicodedata.normalize('NFKD', s).encode("ascii","ignore")
 # ------------------------------------------------------------------------------
 typeLetters = {'b': bool, 'i': int, 'j': long, 'f':float, 's':str, 'u':unicode,
               'l': list, 'd': dict}
 # ------------------------------------------------------------------------------
--- a/shared/xml_parser.py
+++ b/shared/xml_parser.py
@ -20,7 +20,7 @@
 import xml.sax, difflib, types
 from xml.sax.handler import ContentHandler, ErrorHandler
 from xml.sax.xmlreader import InputSource
-from appy.shared import UnicodeBuffer
+from appy.shared import UnicodeBuffer, xmlPrologue
 from appy.shared.errors import AppyError
 # Error-related constants ------------------------------------------------------
@ -157,24 +157,30 @@ class XmlUnmarshaller(XmlParser):
       If "object" is specified, it means that the tag contains sub-tags, each
       one corresponding to the value of an attribute for this object.
       if "tuple" is specified, it will be converted to a list.'''
-    def __init__(self, klass=None, tagTypes={}, conversionFunctions={}):
+    def __init__(self, classes={}, tagTypes={}, conversionFunctions={}):
        XmlParser.__init__(self)
-        self.klass = klass # If a klass is given here, instead of creating
+        # self.classes below is a dict whose keys are tag names and values are
-        # a root UnmarshalledObject instance, we will create an instance of this
+        # Python classes. During the unmarshalling process, when an object is
-        # class (only if the root object is an object; this does not apply if
+        # encountered, instead of creating an instance of UnmarshalledObject,
-        # it is a list or tuple; yes, technically the root tag can be a list or
+        # we will create an instance of the class specified in self.classes.
-        # tuple even if it is silly because only one root tag can exist). But be
+        # Root tag is named "xmlPythonData" by default by the XmlMarshaller.
-        # careful: we will not call the constructor of this class. We will
+        # This will not work if the object in the specified tag is not a
-        # simply create an instance of UnmarshalledObject and dynamically change
+        # UnmarshalledObject instance (ie it is a list or tuple or simple
-        # the class of the created instance to this class.
+        # value). Note that we will not call the constructor of the specified
-        self.tagTypes = tagTypes
+        # class. We will simply create an instance of UnmarshalledObject and
        # dynamically change the class of the created instance to this class.
        if not isinstance(classes, dict) and classes:
            # The user may only need to define a class for the root tag
            self.classes = {'xmlPythonData': classes}
        else:
            self.classes = classes
        # We expect that the parsed XML file will follow some conventions
        # (ie, a tag that corresponds to a list has attribute type="list" or a
        # tag that corresponds to an object has attribute type="object".). If
        # it is not the case of p_xmlContent, you can provide the missing type
        # information in p_tagTypes. Here is an example of p_tagTypes:
        # {"information": "list", "days": "list", "person": "object"}.
-        self.conversionFunctions = conversionFunctions
+        self.tagTypes = tagTypes
        # The parser assumes that data is represented in some standard way. If
        # it is not the case, you may provide, in this dict, custom functions
        # allowing to convert values of basic types (long, float, DateTime...).
@ -187,6 +193,7 @@ class XmlUnmarshaller(XmlParser):
        # and create a specific conversionFunction for it. This way, you can
        # for example convert strings that have specific values (in this case,
        # knowing that the value is a 'string' is not sufficient).        
        self.conversionFunctions = conversionFunctions
    def convertAttrs(self, attrs):
        '''Converts XML attrs to a dict.'''
@ -236,14 +243,15 @@ class XmlUnmarshaller(XmlParser):
    def storeValue(self, name, value):
        '''Stores the newly parsed p_value (contained in tag p_name) on the
-           current container in environment p_e.'''
+           current container in environment self.env.'''
        e = self.env
        # Change the class of the value if relevant
        if (name in self.classes) and isinstance(value, UnmarshalledObject):
            value.__class__ = self.classes[name]
        # Where must I store this value?
        if not e.containerStack:
            # I store the object at the root of the web.
            self.res = value
            if self.klass and isinstance(value, UnmarshalledObject):
                self.res.__class__ = self.klass
        else:
            currentContainer = e.containerStack[-1]
            if isinstance(currentContainer, list):
@ -252,7 +260,8 @@ class XmlUnmarshaller(XmlParser):
                currentContainer.content += value
            else:
                # Current container is an object
-                if hasattr(currentContainer, name):
+                if hasattr(currentContainer, name) and \
                   getattr(currentContainer, name):
                    # We have already encountered a sub-object with this name.
                    # Having several sub-objects with the same name, we will
                    # create a list.
@ -326,19 +335,19 @@ class XmlMarshaller:
    '''This class allows to produce a XML version of a Python object, which
       respects some conventions as described in the doc of the corresponding
       Unmarshaller (see above).'''
    xmlPrologue = '<?xml version="1.0" encoding="utf-8"?>'
    xmlEntities = {'<': '&lt;', '>': '&gt;', '&': '&amp;', '"': '&quot;',
                   "'": '&apos;'}
    trueFalse = {True: 'True', False: 'False'}
    sequenceTypes = (tuple, list)
    rootElementName = 'xmlPythonData'
    fieldsToMarshall = 'all'
    fieldsToExclude = []
    atFiles = ('image', 'file') # Types of archetypes fields that contain files.
-    def __init__(self, cdata=False, dumpUnicode=False, conversionFunctions={}):
+    def __init__(self, cdata=False, dumpUnicode=False, conversionFunctions={},
-        '''If p_cdata is True, all string values will be dumped as XML CDATA.'''
+                 dumpXmlPrologue=True, rootTag='xmlPythonData'):
        # If p_cdata is True, all string values will be dumped as XML CDATA.
        self.cdata = cdata
        # If p_dumpUnicode is True, the result will be unicode.
        self.dumpUnicode = dumpUnicode
        # The following dict stores specific conversion (=Python to XML)
        # functions. A specific conversion function is useful when you are not
@ -350,6 +359,10 @@ class XmlMarshaller:
        # being dumped, while the second one is the Python object or value to
        # dump.
        self.conversionFunctions = conversionFunctions
        # If dumpXmlPrologue is True, the XML prologue will be dumped.
        self.dumpXmlPrologue = dumpXmlPrologue
        # The name of the root tag
        self.rootElementName = rootTag
    def dumpString(self, res, s):
        '''Dumps a string into the result.'''
@ -391,17 +404,17 @@ class XmlMarshaller:
            res.write(v.encode('base64'))
        res.write('</part>')
-    def dumpValue(self, res, value, fieldType):
+    def dumpValue(self, res, value, fieldType, isRef=False):
        '''Dumps the XML version of p_value to p_res.'''
        # Use a custom function if one is defined for this type of value.
-        fType = value.__class__.__name__
+        className = value.__class__.__name__
-        if fType in self.conversionFunctions:
+        if className in self.conversionFunctions:
-            self.conversionFunctions[fType](res, value)
+            self.conversionFunctions[className](res, value)
            return
        # Use a standard conversion else.
        if fieldType == 'file':
            self.dumpFile(res, value)
-        elif fieldType == 'ref':
+        elif isRef:
            if value:
                if type(value) in self.sequenceTypes:
                    for elem in value:
@ -417,7 +430,7 @@ class XmlMarshaller:
            self.dumpString(res, value)
        elif isinstance(value, bool):
            res.write(self.trueFalse[value])
-        elif self.isAnObject(value):
+        elif fieldType == 'object':
            if hasattr(value, 'absolute_url'):
                res.write(value.absolute_url())
            else:
@ -434,20 +447,21 @@ class XmlMarshaller:
        res.write('<'); res.write(fieldName);
        # Dump the type of the field as an XML attribute
        fType = None # No type will mean "unicode".
-        if fieldType == 'file': fType ='file'
+        if   fieldType == 'file':                         fType = 'file'
-        elif fieldType == 'ref': fType = 'list'
+        elif fieldType == 'ref':                          fType = 'list'
-        elif isinstance(fieldValue, bool):  fType = 'bool'
+        elif isinstance(fieldValue, bool):                fType = 'bool'
-        elif isinstance(fieldValue, int):   fType = 'int'
+        elif isinstance(fieldValue, int):                 fType = 'int'
-        elif isinstance(fieldValue, float): fType = 'float'
+        elif isinstance(fieldValue, float):               fType = 'float'
-        elif isinstance(fieldValue, long):  fType = 'long'
+        elif isinstance(fieldValue, long):                fType = 'long'
-        elif isinstance(fieldValue, tuple): fType = 'tuple'
+        elif isinstance(fieldValue, tuple):               fType = 'tuple'
-        elif isinstance(fieldValue, list):  fType = 'list'
+        elif isinstance(fieldValue, list):                fType = 'list'
        elif fieldValue.__class__.__name__ == 'DateTime': fType = 'DateTime'
        elif self.isAnObject(fieldValue):                 fType = 'object'
        if fType: res.write(' type="%s"' % fType)
        # Dump other attributes if needed
        if type(fieldValue) in self.sequenceTypes:
            res.write(' count="%d"' % len(fieldValue))
-        if fieldType == 'file':
+        if fType == 'file':
            if hasattr(fieldValue, 'content_type'):
                res.write(' mimeType="%s"' % fieldValue.content_type)
            if hasattr(fieldValue, 'filename'):
@ -456,7 +470,7 @@ class XmlMarshaller:
                res.write('"')
        res.write('>')
        # Dump the field value
-        self.dumpValue(res, fieldValue, fieldType)
+        self.dumpValue(res, fieldValue, fType, isRef=(fieldType=='ref'))
        res.write('</'); res.write(fieldName); res.write('>')
    def isAnObject(self, instance):
@ -487,8 +501,9 @@ class XmlMarshaller:
            self.conversionFunctions.update(conversionFunctions)
        # Create the buffer where the XML result will be dumped.
        res = UnicodeBuffer()
-        # Dump the XML prologue
+        # Dump the XML prologue if required
-        res.write(self.xmlPrologue)
+        if self.dumpXmlPrologue:
            res.write(xmlPrologue)
        if self.isAnObject(instance):
            # Determine object ID
            if objectType in ('archetype', 'appy'):
@ -572,7 +587,7 @@ class XmlHandler(ContentHandler):
       (like dates) from a file that need to be compared to another file.'''
    def __init__(self, xmlTagsToIgnore, xmlAttrsToIgnore):
        ContentHandler.__init__(self)
-        self.res = u'<?xml version="1.0" encoding="UTF-8"?>'
+        self.res = unicode(xmlPrologue)
        self.namespaces = {} # ~{s_namespaceUri:s_namespaceName}~
        self.indentLevel = -1
        self.tabWidth = 3