diff --git a/doc/version.txt b/doc/version.txt index bbbb948..1b980ab 100755 --- a/doc/version.txt +++ b/doc/version.txt @@ -1,3 +1,6 @@ +0.4.0 (2009-08-12) +- Alpha version. + 0.3.1 (2009-04-10) - gen: added the concept of "phase" for structuring a root content type. diff --git a/shared/__init__.py b/shared/__init__.py index 6f1fcf4..4d507f0 100755 --- a/shared/__init__.py +++ b/shared/__init__.py @@ -2,9 +2,30 @@ import appy import os.path +# ------------------------------------------------------------------------------ appyPath = os.path.realpath(os.path.dirname(appy.__file__)) mimeTypes = {'odt': 'application/vnd.oasis.opendocument.text', 'doc': 'application/msword', 'rtf': 'text/rtf', 'pdf': 'application/pdf'} + +# ------------------------------------------------------------------------------ +class UnmarshalledObject: + '''Used for producing objects from a marshalled Python object (in some files + like a CSV file or an XML file).''' + def __repr__(self): + res = u' ' % attrName + res = res.strip() + '>' + return res.encode('utf-8') + +# ------------------------------------------------------------------------------ +class Dummy: pass # ------------------------------------------------------------------------------ diff --git a/shared/csv_parser.py b/shared/csv_parser.py new file mode 100644 index 0000000..ff37558 --- /dev/null +++ b/shared/csv_parser.py @@ -0,0 +1,201 @@ +# ------------------------------------------------------------------------------ +# Appy is a framework for building applications in the Python language. +# Copyright (C) 2007 Gaetan Delannay + +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License +# as published by the Free Software Foundation; either version 2 +# of the License, or (at your option) any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301,USA. + +# ------------------------------------------------------------------------------ +from appy.shared import UnmarshalledObject + +# ------------------------------------------------------------------------------ +WRONG_LINE = 'Line number %d in file %s does not have the right number of ' \ + 'fields.' + +class CsvParser: + '''This class reads a CSV file and creates a list of Python objects from it. + The first line of the CSV file must declare the format of the following + lines, which are 'data' lines. For example, if the first line of the file + is + + id,roles*,password + + Then subsequent lines in the CSV need to conform to this syntax. Field + separator will be the comma. Result of method 'parse' will be a list of + Python objects, each one having attributes id, roles and password. + Attributes declared with a star (like 'roles') are lists. An empty value + will produce an empty list in the resulting object; several values need + to be separated with the '+' sign. Here are some examples of valid 'data' + lines for the first line above: + + gdy,, + gdy,MeetingManager,abc + gdy,MeetingManager+MeetingMember,abc + + In the first (and subsequent) line(s), you may choose among the following + separators: , : ; | + ''' + separators = [',', ':', ';', '|'] + typeLetters = {'i': int, 'f': float, 's': str, 'b': bool} + def __init__(self, fileName, references={}, klass=None): + self.fileName = fileName + self.res = [] # The resulting list of Python objects. + self.sep = None + self.attributes = None # The list of attributes corresponding to + # CSV columns. + self.attributesFlags = None # Here we now if every attribute is a list + # (True) of not (False). + self.attributesTypes = None # Here we now the type of the attribute (if + # the attribute is a list it denotes the type of every item in the + # list): string, integer, float, boolean. + self.references = references + self.klass = klass # If a klass is given here, instead of creating + # UnmarshalledObject instances we will create instances of this class. + # But be careful: we will not call the constructor of this class. We + # will simply create instances of UnmarshalledObject and dynamically + # change the class of created instances to this class. + + def identifySeparator(self, line): + '''What is the separator used in this file?''' + maxLength = 0 + res = None + for sep in self.separators: + newLength = len(line.split(sep)) + if newLength > maxLength: + maxLength = newLength + res = sep + self.sep = res + + def identifyAttributes(self, line): + self.attributes = line.split(self.sep) + self.attributesFlags = [False] * len(self.attributes) + self.attributesTypes = [str] * len(self.attributes) + i = -1 + for attr in self.attributes: + i += 1 + # Is this attribute mono- or multi-valued? + if attr.endswith('*'): + self.attributesFlags[i] = True + attrNoFlag = attr.strip('*') + attrInfo = attrNoFlag.split('-') + # What is the type of value(s) for this attribute ? + if (len(attrInfo) == 2) and (attrInfo[1] in self.typeLetters): + self.attributesTypes[i] = self.typeLetters[attrInfo[1]] + # Remove trailing stars + self.attributes = [a.strip('*').split('-')[0] for a in self.attributes] + + def resolveReference(self, attrName, refId): + '''Finds, in self.reference, the object having p_refId.''' + refObjects, refAttrName = self.references[attrName] + res = None + for refObject in refObjects: + if getattr(refObject, refAttrName) == refId: + res = refObject + break + return res + + def convertValue(self, value, basicType): + '''Converts the atomic p_value which is a string into some other atomic + Python type specified in p_basicType (int, float, ...).''' + if (basicType != str) and (basicType != unicode): + try: + exec 'res = %s' % str(value) + except SyntaxError, se: + res = None + else: + try: + exec 'res = """%s"""' % str(value) + except SyntaxError, se: + try: + exec "res = '''%s'''" % str(value) + except SyntaxError, se: + res = None + return res + + def parse(self): + '''Parses the CSV file named self.fileName and creates a list of + corresponding Python objects (UnmarshalledObject instances). Among + object fields, some may be references. If it is the case, you may + specify in p_references a dict of referred objects. The parser will + then replace string values of some fields (which are supposed to be + ids of referred objects) with corresponding objects in p_references. + + How does this work? p_references must be a dictionary: + - keys correspond to field names of the current object; + - values are 2-tuples: + * 1st value is the list of available referred objects; + * 2nd value is the name of the attribute on those objects that + stores their ID. + ''' + # The first pass parses the file and creates the Python object + f = file(self.fileName) + firstLine = True + lineNb = 0 + for line in f: + lineNb += 1 + line = line.strip() + if not line: continue + if firstLine: + # The first line declares the structure of the following 'data' + # lines. + self.identifySeparator(line) + self.identifyAttributes(line) + firstLine = False + else: + # Add an object corresponding to this line. + lineObject = UnmarshalledObject() + if self.klass: + lineObject.__class__ = self.klass + i = -1 + # Do we get the right number of field values on this line ? + attrValues = line.split(self.sep) + if len(attrValues) != len(self.attributes): + raise WRONG_LINE % (lineNb, self.fileName) + for attrValue in line.split(self.sep): + i += 1 + theValue = attrValue + vType = self.attributesTypes[i] + if self.attributesFlags[i]: + # The attribute is multi-valued + if not attrValue: + theValue = [] + elif '+' in theValue: + theValue = [self.convertValue(v, vType) \ + for v in attrValue.split('+')] + else: + theValue = [self.convertValue(theValue, vType)] + else: + # The attribute is mono-valued + theValue = self.convertValue(theValue, vType) + setattr(lineObject, self.attributes[i], theValue) + self.res.append(lineObject) + f.close() + # The second pass resolves the p_references if any + for attrName, refInfo in self.references.iteritems(): + if attrName in self.attributes: + # Replace ID with real object from p_references + for obj in self.res: + attrValue = getattr(obj, attrName) + if isinstance(attrValue, list) or \ + isinstance(attrValue, tuple): + # Multiple values to resolve + newValue = [] + for v in attrValue: + newValue.append(self.resolveReference(attrName,v)) + else: + # Only one value to resolve + newValue = self.resolveReference(attrName, attrValue) + setattr(obj, attrName, newValue) + return self.res +# ------------------------------------------------------------------------------ diff --git a/shared/utils.py b/shared/utils.py index 21020b0..8413cba 100755 --- a/shared/utils.py +++ b/shared/utils.py @@ -57,193 +57,4 @@ def getOsTempFolder(): else: raise "Sorry, I can't find a temp folder on your machine." return res - # ------------------------------------------------------------------------------ -WRONG_LINE = 'Line number %d in file %s does not have the right number of ' \ - 'fields.' -class CsvObject: - '''Used for producing objects from CSV parsing.''' - def __repr__(self): - res = '' - return res - -class CsvParser: - '''This class reads a CSV file and creates a list of Python objects from it. - The first line of the CSV file must declare the format of the following - lines, which are 'data' lines. For example, if the first line of the file - is - - id,roles*,password - - Then subsequent lines in the CSV need to conform to this syntax. Field - separator will be the comma. Result of method 'parse' will be a list of - Python objects, each one having attributes id, roles and password. - Attributes declared with a star (like 'roles') are lists. An empty value - will produce an empty list in the resulting object; several values need - to be separated with the '+' sign. Here are some examples of valid 'data' - lines for the first line above: - - gdy,, - gdy,MeetingManager,abc - gdy,MeetingManager+MeetingMember,abc - - In the first (and subsequent) line(s), you may choose among the following - separators: , : ; | - ''' - separators = [',', ':', ';', '|'] - typeLetters = {'i': int, 'f': float, 's': str, 'b': bool} - def __init__(self, fileName, references={}, klass=None): - self.fileName = fileName - self.res = [] # The resulting list of Python objects. - self.sep = None - self.attributes = None # The list of attributes corresponding to - # CSV columns. - self.attributesFlags = None # Here we now if every attribute is a list - # (True) of not (False). - self.attributesTypes = None # Here we now the type of the attribute (if - # the attribute is a list it denotes the type of every item in the - # list): string, integer, float, boolean. - self.references = references - self.klass = klass # If a klass is given here, instead of creating - # CsvObject instances we will create instances of this class. But be - # careful: we will not call the constructor of this class. We will - # simply create instances of CsvObject and dynamically change the class - # of created instances to this class. - - def identifySeparator(self, line): - '''What is the separator used in this file?''' - maxLength = 0 - res = None - for sep in self.separators: - newLength = len(line.split(sep)) - if newLength > maxLength: - maxLength = newLength - res = sep - self.sep = res - - def identifyAttributes(self, line): - self.attributes = line.split(self.sep) - self.attributesFlags = [False] * len(self.attributes) - self.attributesTypes = [str] * len(self.attributes) - i = -1 - for attr in self.attributes: - i += 1 - # Is this attribute mono- or multi-valued? - if attr.endswith('*'): - self.attributesFlags[i] = True - attrNoFlag = attr.strip('*') - attrInfo = attrNoFlag.split('-') - # What is the type of value(s) for this attribute ? - if (len(attrInfo) == 2) and (attrInfo[1] in self.typeLetters): - self.attributesTypes[i] = self.typeLetters[attrInfo[1]] - # Remove trailing stars - self.attributes = [a.strip('*').split('-')[0] for a in self.attributes] - - def resolveReference(self, attrName, refId): - '''Finds, in self.reference, the object having p_refId.''' - refObjects, refAttrName = self.references[attrName] - res = None - for refObject in refObjects: - if getattr(refObject, refAttrName) == refId: - res = refObject - break - return res - - def convertValue(self, value, basicType): - '''Converts the atomic p_value which is a string into some other atomic - Python type specified in p_basicType (int, float, ...).''' - if (basicType != str) and (basicType != unicode): - try: - exec 'res = %s' % str(value) - except SyntaxError, se: - res = None - else: - try: - exec 'res = """%s"""' % str(value) - except SyntaxError, se: - try: - exec "res = '''%s'''" % str(value) - except SyntaxError, se: - res = None - return res - - def parse(self): - '''Parses the CSV file named self.fileName and creates a list of - corresponding Python objects (CsvObject instances). Among object - fields, some may be references. If it is the case, you may specify - in p_references a dict of referred objects. The parser will then - replace string values of some fields (which are supposed to be ids - of referred objects) with corresponding objects in p_references. - - How does this work? p_references must be a dictionary: - - keys correspond to field names of the current object; - - values are 2-tuples: - * 1st value is the list of available referred objects; - * 2nd value is the name of the attribute on those objects that - stores their ID. - ''' - # The first pass parses the file and creates the Python object - f = file(self.fileName) - firstLine = True - lineNb = 0 - for line in f: - lineNb += 1 - line = line.strip() - if not line: continue - if firstLine: - # The first line declares the structure of the following 'data' - # lines. - self.identifySeparator(line) - self.identifyAttributes(line) - firstLine = False - else: - # Add an object corresponding to this line. - lineObject = CsvObject() - if self.klass: - lineObject.__class__ = self.klass - i = -1 - # Do we get the right number of field values on this line ? - attrValues = line.split(self.sep) - if len(attrValues) != len(self.attributes): - raise WRONG_LINE % (lineNb, self.fileName) - for attrValue in line.split(self.sep): - i += 1 - theValue = attrValue - vType = self.attributesTypes[i] - if self.attributesFlags[i]: - # The attribute is multi-valued - if not attrValue: - theValue = [] - elif '+' in theValue: - theValue = [self.convertValue(v, vType) \ - for v in attrValue.split('+')] - else: - theValue = [self.convertValue(theValue, vType)] - else: - # The attribute is mono-valued - theValue = self.convertValue(theValue, vType) - setattr(lineObject, self.attributes[i], theValue) - self.res.append(lineObject) - f.close() - # The second pass resolves the p_references if any - for attrName, refInfo in self.references.iteritems(): - if attrName in self.attributes: - # Replace ID with real object from p_references - for obj in self.res: - attrValue = getattr(obj, attrName) - if isinstance(attrValue, list) or \ - isinstance(attrValue, tuple): - # Multiple values to resolve - newValue = [] - for v in attrValue: - newValue.append(self.resolveReference(attrName,v)) - else: - # Only one value to resolve - newValue = self.resolveReference(attrName, attrValue) - setattr(obj, attrName, newValue) - return self.res -# ------------------------------------------------------------------------------ - diff --git a/shared/xml_parser.py b/shared/xml_parser.py index 7d930e4..0e49ecb 100755 --- a/shared/xml_parser.py +++ b/shared/xml_parser.py @@ -17,9 +17,10 @@ # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301,USA. # ------------------------------------------------------------------------------ -import xml.sax +import xml.sax, base64 from xml.sax.handler import ContentHandler, ErrorHandler from xml.sax.xmlreader import InputSource +from StringIO import StringIO # ------------------------------------------------------------------------------ class XmlElement: @@ -130,4 +131,268 @@ class XmlParser(ContentHandler, ErrorHandler): inputSource.setByteStream(xmlContent) self.parser.parse(inputSource) return self.res + +# ------------------------------------------------------------------------------ +from appy.shared import UnmarshalledObject, Dummy +from appy.gen.plone25.wrappers import FileWrapper +try: + from DateTime import DateTime +except ImportError: + DateTime = 'unicode' + +class XmlUnmarshaller(XmlParser): + '''This class allows to parse a XML file and recreate the corresponding web + of Python objects. This parser assumes that the XML file respects this + convention: any tag may define in attribute "type" storing the type of + its content, which may be: + + bool * int * float * long * DateTime * tuple * list * object + + If "object" is specified, it means that the tag contains sub-tags, each + one corresponding to the value of an attribute for this object. + if "tuple" is specified, it will be converted to a list.''' + def __init__(self, klass=None): + XmlParser.__init__(self) + self.klass = klass # If a klass is given here, instead of creating + # a root UnmarshalledObject instance, we will create an instance of this + # class (only if the root object is an object; this does not apply if + # it is a list or tuple; yes, technically the root tag can be a list or + # tuple even if it is silly because only one root tag can exist). But be + # careful: we will not call the constructor of this class. We will + # simply create an instance of UnmarshalledObject and dynamically change + # the class of the created instance to this class. + + def startDocument(self): + self.res = None # The resulting web of Python objects + # (UnmarshalledObject instances). + self.env.containerStack = [] # The stack of current "containers" where + # to store the next parsed element. A container can be a list, a tuple, + # an object (the root object of the whole web or a sub-object). + self.env.currentBasicType = None # Will hold the name of the currently + # parsed basic type (unicode, float, ...) + self.env.currentContent = '' # We store here the content of tags. + self.env.currentFileName = '' # If current tag contains a file, we + # store here the file name. + self.env.currentMimeType = '' # If current tag contains a file, we + # store here the file name. + + containerTags = ('tuple', 'list', 'object') + numericTypes = ('bool', 'int', 'float', 'long') + def startElement(self, elem, attrs): + e = XmlParser.startElement(self, elem, attrs) + # Determine the type of the element. + elemType = 'unicode' # Default value + if attrs.has_key('type'): + elemType = attrs['type'] + if elemType in self.containerTags: + # I must create a new container object. + if elemType == 'object': newObject = UnmarshalledObject() + elif elemType == 'tuple': newObject = [] # Tuples become lists + elif elemType == 'list': newObject = [] + else: newObject = UnmarshalledObject() + # Store the value on the last container, or on the root object. + self.storeValue(elem, newObject) + # Push the new object on the container stack + e.containerStack.append(newObject) + else: + # We are parsing a basic type + e.currentBasicType = elemType + if elemType == 'file': + if attrs.has_key('name'): e.currentFileName = attrs['name'] + if attrs.has_key('mimeType'): + e.currentMimeType = attrs['mimeType'] + + def storeValue(self, name, value): + '''Stores the newly parsed p_value (contained in tag p_name) on the + current container in environment p_e.''' + e = self.env + # Where must I store this value? + if not e.containerStack: + # I store the object at the root of the web. + self.res = value + if self.klass and isinstance(value, UnmarshalledObject): + self.res.__class__ = self.klass + else: + currentContainer = e.containerStack[-1] + if type(currentContainer) == list: + currentContainer.append(value) + else: + # Current container is an object + setattr(currentContainer, name, value) + + def characters(self, content): + e = XmlParser.characters(self, content) + if e.currentBasicType: + e.currentContent += content + + def endElement(self, elem): + e = XmlParser.endElement(self, elem) + if e.currentBasicType: + # Get and convert the value of this field + if e.currentBasicType in self.numericTypes: + try: + exec 'value = %s' % e.currentContent.strip() + except SyntaxError: + value = None + elif e.currentBasicType == 'DateTime': + value = DateTime(e.currentContent.strip()) + elif e.currentBasicType == 'file': + value = Dummy() + value.name = e.currentFileName + value.content = base64.b64decode(e.currentContent.strip()) + value.mimeType = e.currentMimeType + value.size = len(value.content) + value.__class__ = FileWrapper + else: + value = e.currentContent.strip() + # Store the value on the last container + self.storeValue(elem, value) + # Clean the environment + e.currentBasicType = None + e.currentContent = '' + else: + e.containerStack.pop() + + # Alias 'unmarshall' -> 'parse' + unmarshall = XmlParser.parse + +# ------------------------------------------------------------------------------ +class XmlMarshaller: + '''This class allows to produce a XML version of a Python object, which + respects some conventions as described in the doc of the corresponding + Unmarshaller (see above).''' + xmlPrologue = '' + xmlEntities = {'<': '<', '>': '>', '&': '&', '"': '"', + "'": '''} + trueFalse = {True: 'True', False: 'False'} + sequenceTypes = (tuple, list) + rootElementName = 'xmlPythonData' + fieldsToMarshall = 'all' + fieldsToExclude = [] + atFiles = ('image', 'file') # Types of archetypes fields that contain files. + + def dumpValue(self, res, value, fieldType='basic'): + '''Dumps the XML version of p_value to p_res.''' + if fieldType == 'file': + # p_value contains the (possibly binary) content of a file. We will + # encode it in Base64. + if hasattr(value, 'data'): + v = value.data # Simple wrap for images + if hasattr(v, 'data'): v = v.data # Double wrap for files + else: + v = value + res.write(base64.b64encode(v)) + elif isinstance(value, basestring): + # Replace special chars by XML entities + for c in value: + if self.xmlEntities.has_key(c): + res.write(self.xmlEntities[c]) + else: + res.write(c) + elif isinstance(value, bool): + res.write(self.trueFalse[value]) + else: + res.write(value) + + def dumpField(self, res, fieldName, fieldValue, fieldType='basic'): + '''Dumps in p_res, the value of the p_field for p_instance.''' + res.write('<'); res.write(fieldName); + # Dump the type of the field as an XML attribute + fType = None # No type will mean "string". + if fieldType == 'file': fType ='file' + elif fieldType == 'ref': fType = 'list' + elif isinstance(fieldValue, bool): fType = 'bool' + elif isinstance(fieldValue, int): fType = 'int' + elif isinstance(fieldValue, float): fType = 'float' + elif isinstance(fieldValue, long): fType = 'long' + elif isinstance(fieldValue, tuple): fType = 'tuple' + elif isinstance(fieldValue, list): fType = 'list' + elif fieldValue.__class__.__name__ == 'DateTime': fType = 'DateTime' + if fType: res.write(' type="%s"' % fType) + if type(fieldValue) in self.sequenceTypes: + res.write(' count="%d"' % len(fieldValue)) + if fieldType == 'file': + if hasattr(fieldValue, 'content_type'): + res.write(' mimeType="%s"' % fieldValue.content_type) + if hasattr(fieldValue, 'filename'): + res.write(' name="%s"' % fieldValue.filename) + res.write('>') + # Dump the child elements if any + if fieldType == 'ref': + if fieldValue: + for elem in fieldValue: + self.dumpField(res, 'url', elem.absolute_url_path()) + else: + self.dumpField(res, 'url', '') + elif type(fieldValue) in self.sequenceTypes: + # The previous condition must be checked before this one because + # Referred objects are stored in lists or tuples, too. + for elem in fieldValue: + self.dumpField(res, 'e', elem) + else: + res.write(self.dumpValue(res, fieldValue, fieldType)) + res.write('') + + def marshall(self, instance, objectType='popo'): + '''Returns in a StringIO the XML version of p_instance. If p_instance + corresponds to a Plain Old Python Object, specify 'popo' for + p_objectType. If p_instance corresponds to an Archetypes object + (Zope/Plone), specify 'archetype' for p_objectType.''' + res = StringIO() + # Dump the XML prologue and root element + res.write(self.xmlPrologue) + res.write('<'); res.write(self.rootElementName) + res.write(' type="object">') + # Dump the value of the fields that must be dumped + if objectType == 'popo': + for fieldName, fieldValue in instance.__dict__.iteritems(): + mustDump = False + if fieldName in self.fieldsToExclude: + mustDump = False + elif self.fieldsToMarshall == 'all': + mustDump = True + else: + if (type(self.fieldsToMarshall) in self.sequenceTypes) and \ + (fieldName in self.fieldsToMarshall): + mustDump = True + if mustDump: + self.dumpField(res, fieldName, fieldValue) + elif objectType == 'archetype': + fields = instance.schema.fields() + for field in instance.schema.fields(): + # Dump only needed fields + mustDump = False + if field.getName() in self.fieldsToExclude: + mustDump = False + elif (self.fieldsToMarshall == 'all') and \ + (field.schemata != 'metadata'): + mustDump = True + elif self.fieldsToMarshall == 'all_with_metadata': + mustDump = True + else: + if (type(self.fieldsToMarshall) in self.sequenceTypes) and \ + (field.getName() in self.fieldsToMarshall): + mustDump = True + if mustDump: + fieldType = 'basic' + if field.type in self.atFiles: + fieldType = 'file' + elif field.type == 'reference': + fieldType = 'ref' + self.dumpField(res, field.getName(), field.get(instance), + fieldType=fieldType) + self.marshallSpecificElements(instance, res) + # Return the result + res.write('') + data = res.getvalue() + res.close() + return data + + def marshallSpecificElements(self, instance, res): + '''You can use this marshaller as a base class for creating your own. + In this case, this method will be called by the marshall method + for allowing your concrete marshaller to insert more things in the + result. p_res is the StringIO buffer where the result of the + marshalling process is currently dumped; p_instance is the instance + currently marshalled.''' # ------------------------------------------------------------------------------