Gaetan Delannay 2009-08-17 17:06:17 +02:00
parent bdb220716c
commit 2d82dc4e0b
5 changed files with 491 additions and 190 deletions

View file

@ -1,3 +1,6 @@
0.4.0 (2009-08-12)
- Alpha version.
0.3.1 (2009-04-10)
- gen: added the concept of "phase" for structuring a root content type.

View file

@ -2,9 +2,30 @@
import appy
import os.path
# ------------------------------------------------------------------------------
appyPath = os.path.realpath(os.path.dirname(appy.__file__))
mimeTypes = {'odt': 'application/vnd.oasis.opendocument.text',
'doc': 'application/msword',
'rtf': 'text/rtf',
'pdf': 'application/pdf'}
# ------------------------------------------------------------------------------
class UnmarshalledObject:
'''Used for producing objects from a marshalled Python object (in some files
like a CSV file or an XML file).'''
def __repr__(self):
res = u'<PythonObject '
for attrName, attrValue in self.__dict__.iteritems():
v = attrValue
if hasattr(v, '__repr__'):
v = v.__repr__()
try:
res += u'%s = %s ' % (attrName, v)
except UnicodeDecodeError:
res += u'%s = <encoding problem> ' % attrName
res = res.strip() + '>'
return res.encode('utf-8')
# ------------------------------------------------------------------------------
class Dummy: pass
# ------------------------------------------------------------------------------

201
shared/csv_parser.py Normal file
View file

@ -0,0 +1,201 @@
# ------------------------------------------------------------------------------
# Appy is a framework for building applications in the Python language.
# Copyright (C) 2007 Gaetan Delannay
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301,USA.
# ------------------------------------------------------------------------------
from appy.shared import UnmarshalledObject
# ------------------------------------------------------------------------------
WRONG_LINE = 'Line number %d in file %s does not have the right number of ' \
'fields.'
class CsvParser:
'''This class reads a CSV file and creates a list of Python objects from it.
The first line of the CSV file must declare the format of the following
lines, which are 'data' lines. For example, if the first line of the file
is
id,roles*,password
Then subsequent lines in the CSV need to conform to this syntax. Field
separator will be the comma. Result of method 'parse' will be a list of
Python objects, each one having attributes id, roles and password.
Attributes declared with a star (like 'roles') are lists. An empty value
will produce an empty list in the resulting object; several values need
to be separated with the '+' sign. Here are some examples of valid 'data'
lines for the first line above:
gdy,,
gdy,MeetingManager,abc
gdy,MeetingManager+MeetingMember,abc
In the first (and subsequent) line(s), you may choose among the following
separators: , : ; |
'''
separators = [',', ':', ';', '|']
typeLetters = {'i': int, 'f': float, 's': str, 'b': bool}
def __init__(self, fileName, references={}, klass=None):
self.fileName = fileName
self.res = [] # The resulting list of Python objects.
self.sep = None
self.attributes = None # The list of attributes corresponding to
# CSV columns.
self.attributesFlags = None # Here we now if every attribute is a list
# (True) of not (False).
self.attributesTypes = None # Here we now the type of the attribute (if
# the attribute is a list it denotes the type of every item in the
# list): string, integer, float, boolean.
self.references = references
self.klass = klass # If a klass is given here, instead of creating
# UnmarshalledObject instances we will create instances of this class.
# But be careful: we will not call the constructor of this class. We
# will simply create instances of UnmarshalledObject and dynamically
# change the class of created instances to this class.
def identifySeparator(self, line):
'''What is the separator used in this file?'''
maxLength = 0
res = None
for sep in self.separators:
newLength = len(line.split(sep))
if newLength > maxLength:
maxLength = newLength
res = sep
self.sep = res
def identifyAttributes(self, line):
self.attributes = line.split(self.sep)
self.attributesFlags = [False] * len(self.attributes)
self.attributesTypes = [str] * len(self.attributes)
i = -1
for attr in self.attributes:
i += 1
# Is this attribute mono- or multi-valued?
if attr.endswith('*'):
self.attributesFlags[i] = True
attrNoFlag = attr.strip('*')
attrInfo = attrNoFlag.split('-')
# What is the type of value(s) for this attribute ?
if (len(attrInfo) == 2) and (attrInfo[1] in self.typeLetters):
self.attributesTypes[i] = self.typeLetters[attrInfo[1]]
# Remove trailing stars
self.attributes = [a.strip('*').split('-')[0] for a in self.attributes]
def resolveReference(self, attrName, refId):
'''Finds, in self.reference, the object having p_refId.'''
refObjects, refAttrName = self.references[attrName]
res = None
for refObject in refObjects:
if getattr(refObject, refAttrName) == refId:
res = refObject
break
return res
def convertValue(self, value, basicType):
'''Converts the atomic p_value which is a string into some other atomic
Python type specified in p_basicType (int, float, ...).'''
if (basicType != str) and (basicType != unicode):
try:
exec 'res = %s' % str(value)
except SyntaxError, se:
res = None
else:
try:
exec 'res = """%s"""' % str(value)
except SyntaxError, se:
try:
exec "res = '''%s'''" % str(value)
except SyntaxError, se:
res = None
return res
def parse(self):
'''Parses the CSV file named self.fileName and creates a list of
corresponding Python objects (UnmarshalledObject instances). Among
object fields, some may be references. If it is the case, you may
specify in p_references a dict of referred objects. The parser will
then replace string values of some fields (which are supposed to be
ids of referred objects) with corresponding objects in p_references.
How does this work? p_references must be a dictionary:
- keys correspond to field names of the current object;
- values are 2-tuples:
* 1st value is the list of available referred objects;
* 2nd value is the name of the attribute on those objects that
stores their ID.
'''
# The first pass parses the file and creates the Python object
f = file(self.fileName)
firstLine = True
lineNb = 0
for line in f:
lineNb += 1
line = line.strip()
if not line: continue
if firstLine:
# The first line declares the structure of the following 'data'
# lines.
self.identifySeparator(line)
self.identifyAttributes(line)
firstLine = False
else:
# Add an object corresponding to this line.
lineObject = UnmarshalledObject()
if self.klass:
lineObject.__class__ = self.klass
i = -1
# Do we get the right number of field values on this line ?
attrValues = line.split(self.sep)
if len(attrValues) != len(self.attributes):
raise WRONG_LINE % (lineNb, self.fileName)
for attrValue in line.split(self.sep):
i += 1
theValue = attrValue
vType = self.attributesTypes[i]
if self.attributesFlags[i]:
# The attribute is multi-valued
if not attrValue:
theValue = []
elif '+' in theValue:
theValue = [self.convertValue(v, vType) \
for v in attrValue.split('+')]
else:
theValue = [self.convertValue(theValue, vType)]
else:
# The attribute is mono-valued
theValue = self.convertValue(theValue, vType)
setattr(lineObject, self.attributes[i], theValue)
self.res.append(lineObject)
f.close()
# The second pass resolves the p_references if any
for attrName, refInfo in self.references.iteritems():
if attrName in self.attributes:
# Replace ID with real object from p_references
for obj in self.res:
attrValue = getattr(obj, attrName)
if isinstance(attrValue, list) or \
isinstance(attrValue, tuple):
# Multiple values to resolve
newValue = []
for v in attrValue:
newValue.append(self.resolveReference(attrName,v))
else:
# Only one value to resolve
newValue = self.resolveReference(attrName, attrValue)
setattr(obj, attrName, newValue)
return self.res
# ------------------------------------------------------------------------------

View file

@ -57,193 +57,4 @@ def getOsTempFolder():
else:
raise "Sorry, I can't find a temp folder on your machine."
return res
# ------------------------------------------------------------------------------
WRONG_LINE = 'Line number %d in file %s does not have the right number of ' \
'fields.'
class CsvObject:
'''Used for producing objects from CSV parsing.'''
def __repr__(self):
res = '<CsvObject '
for attrName, attrValue in self.__dict__.iteritems():
res += attrName + '=' + str(attrValue) + ' '
res = res.strip() + '>'
return res
class CsvParser:
'''This class reads a CSV file and creates a list of Python objects from it.
The first line of the CSV file must declare the format of the following
lines, which are 'data' lines. For example, if the first line of the file
is
id,roles*,password
Then subsequent lines in the CSV need to conform to this syntax. Field
separator will be the comma. Result of method 'parse' will be a list of
Python objects, each one having attributes id, roles and password.
Attributes declared with a star (like 'roles') are lists. An empty value
will produce an empty list in the resulting object; several values need
to be separated with the '+' sign. Here are some examples of valid 'data'
lines for the first line above:
gdy,,
gdy,MeetingManager,abc
gdy,MeetingManager+MeetingMember,abc
In the first (and subsequent) line(s), you may choose among the following
separators: , : ; |
'''
separators = [',', ':', ';', '|']
typeLetters = {'i': int, 'f': float, 's': str, 'b': bool}
def __init__(self, fileName, references={}, klass=None):
self.fileName = fileName
self.res = [] # The resulting list of Python objects.
self.sep = None
self.attributes = None # The list of attributes corresponding to
# CSV columns.
self.attributesFlags = None # Here we now if every attribute is a list
# (True) of not (False).
self.attributesTypes = None # Here we now the type of the attribute (if
# the attribute is a list it denotes the type of every item in the
# list): string, integer, float, boolean.
self.references = references
self.klass = klass # If a klass is given here, instead of creating
# CsvObject instances we will create instances of this class. But be
# careful: we will not call the constructor of this class. We will
# simply create instances of CsvObject and dynamically change the class
# of created instances to this class.
def identifySeparator(self, line):
'''What is the separator used in this file?'''
maxLength = 0
res = None
for sep in self.separators:
newLength = len(line.split(sep))
if newLength > maxLength:
maxLength = newLength
res = sep
self.sep = res
def identifyAttributes(self, line):
self.attributes = line.split(self.sep)
self.attributesFlags = [False] * len(self.attributes)
self.attributesTypes = [str] * len(self.attributes)
i = -1
for attr in self.attributes:
i += 1
# Is this attribute mono- or multi-valued?
if attr.endswith('*'):
self.attributesFlags[i] = True
attrNoFlag = attr.strip('*')
attrInfo = attrNoFlag.split('-')
# What is the type of value(s) for this attribute ?
if (len(attrInfo) == 2) and (attrInfo[1] in self.typeLetters):
self.attributesTypes[i] = self.typeLetters[attrInfo[1]]
# Remove trailing stars
self.attributes = [a.strip('*').split('-')[0] for a in self.attributes]
def resolveReference(self, attrName, refId):
'''Finds, in self.reference, the object having p_refId.'''
refObjects, refAttrName = self.references[attrName]
res = None
for refObject in refObjects:
if getattr(refObject, refAttrName) == refId:
res = refObject
break
return res
def convertValue(self, value, basicType):
'''Converts the atomic p_value which is a string into some other atomic
Python type specified in p_basicType (int, float, ...).'''
if (basicType != str) and (basicType != unicode):
try:
exec 'res = %s' % str(value)
except SyntaxError, se:
res = None
else:
try:
exec 'res = """%s"""' % str(value)
except SyntaxError, se:
try:
exec "res = '''%s'''" % str(value)
except SyntaxError, se:
res = None
return res
def parse(self):
'''Parses the CSV file named self.fileName and creates a list of
corresponding Python objects (CsvObject instances). Among object
fields, some may be references. If it is the case, you may specify
in p_references a dict of referred objects. The parser will then
replace string values of some fields (which are supposed to be ids
of referred objects) with corresponding objects in p_references.
How does this work? p_references must be a dictionary:
- keys correspond to field names of the current object;
- values are 2-tuples:
* 1st value is the list of available referred objects;
* 2nd value is the name of the attribute on those objects that
stores their ID.
'''
# The first pass parses the file and creates the Python object
f = file(self.fileName)
firstLine = True
lineNb = 0
for line in f:
lineNb += 1
line = line.strip()
if not line: continue
if firstLine:
# The first line declares the structure of the following 'data'
# lines.
self.identifySeparator(line)
self.identifyAttributes(line)
firstLine = False
else:
# Add an object corresponding to this line.
lineObject = CsvObject()
if self.klass:
lineObject.__class__ = self.klass
i = -1
# Do we get the right number of field values on this line ?
attrValues = line.split(self.sep)
if len(attrValues) != len(self.attributes):
raise WRONG_LINE % (lineNb, self.fileName)
for attrValue in line.split(self.sep):
i += 1
theValue = attrValue
vType = self.attributesTypes[i]
if self.attributesFlags[i]:
# The attribute is multi-valued
if not attrValue:
theValue = []
elif '+' in theValue:
theValue = [self.convertValue(v, vType) \
for v in attrValue.split('+')]
else:
theValue = [self.convertValue(theValue, vType)]
else:
# The attribute is mono-valued
theValue = self.convertValue(theValue, vType)
setattr(lineObject, self.attributes[i], theValue)
self.res.append(lineObject)
f.close()
# The second pass resolves the p_references if any
for attrName, refInfo in self.references.iteritems():
if attrName in self.attributes:
# Replace ID with real object from p_references
for obj in self.res:
attrValue = getattr(obj, attrName)
if isinstance(attrValue, list) or \
isinstance(attrValue, tuple):
# Multiple values to resolve
newValue = []
for v in attrValue:
newValue.append(self.resolveReference(attrName,v))
else:
# Only one value to resolve
newValue = self.resolveReference(attrName, attrValue)
setattr(obj, attrName, newValue)
return self.res
# ------------------------------------------------------------------------------

View file

@ -17,9 +17,10 @@
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301,USA.
# ------------------------------------------------------------------------------
import xml.sax
import xml.sax, base64
from xml.sax.handler import ContentHandler, ErrorHandler
from xml.sax.xmlreader import InputSource
from StringIO import StringIO
# ------------------------------------------------------------------------------
class XmlElement:
@ -130,4 +131,268 @@ class XmlParser(ContentHandler, ErrorHandler):
inputSource.setByteStream(xmlContent)
self.parser.parse(inputSource)
return self.res
# ------------------------------------------------------------------------------
from appy.shared import UnmarshalledObject, Dummy
from appy.gen.plone25.wrappers import FileWrapper
try:
from DateTime import DateTime
except ImportError:
DateTime = 'unicode'
class XmlUnmarshaller(XmlParser):
'''This class allows to parse a XML file and recreate the corresponding web
of Python objects. This parser assumes that the XML file respects this
convention: any tag may define in attribute "type" storing the type of
its content, which may be:
bool * int * float * long * DateTime * tuple * list * object
If "object" is specified, it means that the tag contains sub-tags, each
one corresponding to the value of an attribute for this object.
if "tuple" is specified, it will be converted to a list.'''
def __init__(self, klass=None):
XmlParser.__init__(self)
self.klass = klass # If a klass is given here, instead of creating
# a root UnmarshalledObject instance, we will create an instance of this
# class (only if the root object is an object; this does not apply if
# it is a list or tuple; yes, technically the root tag can be a list or
# tuple even if it is silly because only one root tag can exist). But be
# careful: we will not call the constructor of this class. We will
# simply create an instance of UnmarshalledObject and dynamically change
# the class of the created instance to this class.
def startDocument(self):
self.res = None # The resulting web of Python objects
# (UnmarshalledObject instances).
self.env.containerStack = [] # The stack of current "containers" where
# to store the next parsed element. A container can be a list, a tuple,
# an object (the root object of the whole web or a sub-object).
self.env.currentBasicType = None # Will hold the name of the currently
# parsed basic type (unicode, float, ...)
self.env.currentContent = '' # We store here the content of tags.
self.env.currentFileName = '' # If current tag contains a file, we
# store here the file name.
self.env.currentMimeType = '' # If current tag contains a file, we
# store here the file name.
containerTags = ('tuple', 'list', 'object')
numericTypes = ('bool', 'int', 'float', 'long')
def startElement(self, elem, attrs):
e = XmlParser.startElement(self, elem, attrs)
# Determine the type of the element.
elemType = 'unicode' # Default value
if attrs.has_key('type'):
elemType = attrs['type']
if elemType in self.containerTags:
# I must create a new container object.
if elemType == 'object': newObject = UnmarshalledObject()
elif elemType == 'tuple': newObject = [] # Tuples become lists
elif elemType == 'list': newObject = []
else: newObject = UnmarshalledObject()
# Store the value on the last container, or on the root object.
self.storeValue(elem, newObject)
# Push the new object on the container stack
e.containerStack.append(newObject)
else:
# We are parsing a basic type
e.currentBasicType = elemType
if elemType == 'file':
if attrs.has_key('name'): e.currentFileName = attrs['name']
if attrs.has_key('mimeType'):
e.currentMimeType = attrs['mimeType']
def storeValue(self, name, value):
'''Stores the newly parsed p_value (contained in tag p_name) on the
current container in environment p_e.'''
e = self.env
# Where must I store this value?
if not e.containerStack:
# I store the object at the root of the web.
self.res = value
if self.klass and isinstance(value, UnmarshalledObject):
self.res.__class__ = self.klass
else:
currentContainer = e.containerStack[-1]
if type(currentContainer) == list:
currentContainer.append(value)
else:
# Current container is an object
setattr(currentContainer, name, value)
def characters(self, content):
e = XmlParser.characters(self, content)
if e.currentBasicType:
e.currentContent += content
def endElement(self, elem):
e = XmlParser.endElement(self, elem)
if e.currentBasicType:
# Get and convert the value of this field
if e.currentBasicType in self.numericTypes:
try:
exec 'value = %s' % e.currentContent.strip()
except SyntaxError:
value = None
elif e.currentBasicType == 'DateTime':
value = DateTime(e.currentContent.strip())
elif e.currentBasicType == 'file':
value = Dummy()
value.name = e.currentFileName
value.content = base64.b64decode(e.currentContent.strip())
value.mimeType = e.currentMimeType
value.size = len(value.content)
value.__class__ = FileWrapper
else:
value = e.currentContent.strip()
# Store the value on the last container
self.storeValue(elem, value)
# Clean the environment
e.currentBasicType = None
e.currentContent = ''
else:
e.containerStack.pop()
# Alias 'unmarshall' -> 'parse'
unmarshall = XmlParser.parse
# ------------------------------------------------------------------------------
class XmlMarshaller:
'''This class allows to produce a XML version of a Python object, which
respects some conventions as described in the doc of the corresponding
Unmarshaller (see above).'''
xmlPrologue = '<?xml version="1.0" encoding="utf-8"?>'
xmlEntities = {'<': '&lt;', '>': '&gt;', '&': '&amp;', '"': '&quot;',
"'": '&apos;'}
trueFalse = {True: 'True', False: 'False'}
sequenceTypes = (tuple, list)
rootElementName = 'xmlPythonData'
fieldsToMarshall = 'all'
fieldsToExclude = []
atFiles = ('image', 'file') # Types of archetypes fields that contain files.
def dumpValue(self, res, value, fieldType='basic'):
'''Dumps the XML version of p_value to p_res.'''
if fieldType == 'file':
# p_value contains the (possibly binary) content of a file. We will
# encode it in Base64.
if hasattr(value, 'data'):
v = value.data # Simple wrap for images
if hasattr(v, 'data'): v = v.data # Double wrap for files
else:
v = value
res.write(base64.b64encode(v))
elif isinstance(value, basestring):
# Replace special chars by XML entities
for c in value:
if self.xmlEntities.has_key(c):
res.write(self.xmlEntities[c])
else:
res.write(c)
elif isinstance(value, bool):
res.write(self.trueFalse[value])
else:
res.write(value)
def dumpField(self, res, fieldName, fieldValue, fieldType='basic'):
'''Dumps in p_res, the value of the p_field for p_instance.'''
res.write('<'); res.write(fieldName);
# Dump the type of the field as an XML attribute
fType = None # No type will mean "string".
if fieldType == 'file': fType ='file'
elif fieldType == 'ref': fType = 'list'
elif isinstance(fieldValue, bool): fType = 'bool'
elif isinstance(fieldValue, int): fType = 'int'
elif isinstance(fieldValue, float): fType = 'float'
elif isinstance(fieldValue, long): fType = 'long'
elif isinstance(fieldValue, tuple): fType = 'tuple'
elif isinstance(fieldValue, list): fType = 'list'
elif fieldValue.__class__.__name__ == 'DateTime': fType = 'DateTime'
if fType: res.write(' type="%s"' % fType)
if type(fieldValue) in self.sequenceTypes:
res.write(' count="%d"' % len(fieldValue))
if fieldType == 'file':
if hasattr(fieldValue, 'content_type'):
res.write(' mimeType="%s"' % fieldValue.content_type)
if hasattr(fieldValue, 'filename'):
res.write(' name="%s"' % fieldValue.filename)
res.write('>')
# Dump the child elements if any
if fieldType == 'ref':
if fieldValue:
for elem in fieldValue:
self.dumpField(res, 'url', elem.absolute_url_path())
else:
self.dumpField(res, 'url', '')
elif type(fieldValue) in self.sequenceTypes:
# The previous condition must be checked before this one because
# Referred objects are stored in lists or tuples, too.
for elem in fieldValue:
self.dumpField(res, 'e', elem)
else:
res.write(self.dumpValue(res, fieldValue, fieldType))
res.write('</'); res.write(fieldName); res.write('>')
def marshall(self, instance, objectType='popo'):
'''Returns in a StringIO the XML version of p_instance. If p_instance
corresponds to a Plain Old Python Object, specify 'popo' for
p_objectType. If p_instance corresponds to an Archetypes object
(Zope/Plone), specify 'archetype' for p_objectType.'''
res = StringIO()
# Dump the XML prologue and root element
res.write(self.xmlPrologue)
res.write('<'); res.write(self.rootElementName)
res.write(' type="object">')
# Dump the value of the fields that must be dumped
if objectType == 'popo':
for fieldName, fieldValue in instance.__dict__.iteritems():
mustDump = False
if fieldName in self.fieldsToExclude:
mustDump = False
elif self.fieldsToMarshall == 'all':
mustDump = True
else:
if (type(self.fieldsToMarshall) in self.sequenceTypes) and \
(fieldName in self.fieldsToMarshall):
mustDump = True
if mustDump:
self.dumpField(res, fieldName, fieldValue)
elif objectType == 'archetype':
fields = instance.schema.fields()
for field in instance.schema.fields():
# Dump only needed fields
mustDump = False
if field.getName() in self.fieldsToExclude:
mustDump = False
elif (self.fieldsToMarshall == 'all') and \
(field.schemata != 'metadata'):
mustDump = True
elif self.fieldsToMarshall == 'all_with_metadata':
mustDump = True
else:
if (type(self.fieldsToMarshall) in self.sequenceTypes) and \
(field.getName() in self.fieldsToMarshall):
mustDump = True
if mustDump:
fieldType = 'basic'
if field.type in self.atFiles:
fieldType = 'file'
elif field.type == 'reference':
fieldType = 'ref'
self.dumpField(res, field.getName(), field.get(instance),
fieldType=fieldType)
self.marshallSpecificElements(instance, res)
# Return the result
res.write('</'); res.write(self.rootElementName); res.write('>')
data = res.getvalue()
res.close()
return data
def marshallSpecificElements(self, instance, res):
'''You can use this marshaller as a base class for creating your own.
In this case, this method will be called by the marshall method
for allowing your concrete marshaller to insert more things in the
result. p_res is the StringIO buffer where the result of the
marshalling process is currently dumped; p_instance is the instance
currently marshalled.'''
# ------------------------------------------------------------------------------