appypod-rattail/shared/utils.py

634 lines
25 KiB
Python
Raw Normal View History

# -*- coding: utf-8 -*-
2009-06-29 07:06:01 -05:00
# ------------------------------------------------------------------------------
# Appy is a framework for building applications in the Python language.
# Copyright (C) 2007 Gaetan Delannay
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301,USA.
# ------------------------------------------------------------------------------
2012-12-18 15:49:26 -06:00
import os, os.path, re, time, sys, traceback, unicodedata, shutil, mimetypes
sequenceTypes = (list, tuple)
2009-06-29 07:06:01 -05:00
# ------------------------------------------------------------------------------
class FolderDeleter:
@staticmethod
2009-06-29 07:06:01 -05:00
def delete(dirName):
'''Recursively deletes p_dirName.'''
dirName = os.path.abspath(dirName)
for root, dirs, files in os.walk(dirName, topdown=False):
for name in files:
os.remove(os.path.join(root, name))
for name in dirs:
os.rmdir(os.path.join(root, name))
os.rmdir(dirName)
@staticmethod
def deleteEmpty(dirName):
'''Deletes p_dirName and its parent dirs if they are empty.'''
while True:
try:
if not os.listdir(dirName):
os.rmdir(dirName)
dirName = os.path.dirname(dirName)
else:
break
except OSError, oe:
break
2009-06-29 07:06:01 -05:00
# ------------------------------------------------------------------------------
extsToClean = ('.pyc', '.pyo', '.fsz', '.deltafsz', '.dat', '.log')
def cleanFolder(folder, exts=extsToClean, folders=(), verbose=False):
'''This function allows to remove, in p_folder and subfolders, any file
whose extension is in p_exts, and any folder whose name is in
p_folders.'''
if verbose: print('Cleaning folder %s...' % folder)
# Remove files with an extension listed in p_exts
if exts:
for root, dirs, files in os.walk(folder):
for fileName in files:
ext = os.path.splitext(fileName)[1]
if (ext in exts) or ext.endswith('~'):
fileToRemove = os.path.join(root, fileName)
if verbose: print('Removing file %s...' % fileToRemove)
os.remove(fileToRemove)
# Remove folders whose names are in p_folders.
if folders:
for root, dirs, files in os.walk(folder):
for folderName in dirs:
if folderName in folders:
toDelete = os.path.join(root, folderName)
if verbose: print('Removing folder %s...' % toDelete)
FolderDeleter.delete(toDelete)
# ------------------------------------------------------------------------------
def resolvePath(path):
'''p_path is a file path that can contain occurences of "." and "..". This
function resolves them and procuces a minimal path.'''
res = []
for elem in path.split(os.sep):
if elem == '.': pass
elif elem == '..': res.pop()
else: res.append(elem)
return os.sep.join(res)
# ------------------------------------------------------------------------------
def copyFolder(source, dest, cleanDest=False):
'''Copies the content of folder p_source to folder p_dest. p_dest is
created, with intermediary subfolders if required. If p_cleanDest is
True, it removes completely p_dest if it existed. Else, content of
p_source will be added to possibly existing content in p_dest, excepted
if file names corresponds. In this case, file in p_source will overwrite
file in p_dest.'''
dest = os.path.abspath(dest)
# Delete the dest folder if required
if os.path.exists(dest) and cleanDest:
FolderDeleter.delete(dest)
# Create the dest folder if it does not exist
if not os.path.exists(dest):
os.makedirs(dest)
# Copy the content of p_source to p_dest.
for name in os.listdir(source):
sourceName = os.path.join(source, name)
destName = os.path.join(dest, name)
if os.path.isfile(sourceName):
# Copy a single file
shutil.copy(sourceName, destName)
elif os.path.isdir(sourceName):
# Copy a subfolder (recursively)
copyFolder(sourceName, destName)
# ------------------------------------------------------------------------------
def encodeData(data, encoding=None):
'''Applies some p_encoding to string p_data, but only if an p_encoding is
specified.'''
if not encoding: return data
return data.encode(encoding)
# ------------------------------------------------------------------------------
def copyData(data, target, targetMethod, type='string', encoding=None,
chunkSize=1024):
'''Copies p_data to a p_target, using p_targetMethod. For example, it copies
p_data which is a string containing the binary content of a file, to
p_target, which can be a HTTP connection or a file object.
p_targetMethod can be "write" (files) or "send" (HTTP connections) or ...
p_type can be "string", "file" or "zope". In the latter case it is an
instance of OFS.Image.File. If p_type is "file", one may, in p_chunkSize,
specify the amount of bytes transmitted at a time.
If an p_encoding is specified, it is applied on p_data before copying.
Note that if the p_target is a Python file, it must be opened in a way
that is compatible with the content of p_data, ie file('myFile.doc','wb')
if content is binary.'''
dump = getattr(target, targetMethod)
if not type or (type == 'string'): dump(encodeData(data, encoding))
elif type == 'file':
while True:
chunk = data.read(chunkSize)
if not chunk: break
dump(encodeData(chunk, encoding))
elif type == 'zope':
# A OFS.Image.File instance can be split into several chunks
if isinstance(data.data, basestring): # One chunk
dump(encodeData(data.data, encoding))
else:
# Several chunks
data = data.data
while data is not None:
dump(encodeData(data.data, encoding))
data = data.next
# ------------------------------------------------------------------------------
def splitList(l, sub):
'''Returns a list that was build from list p_l whose elements were
re-grouped into sub-lists of p_sub elements.
For example, if l = [1,2,3,4,5] and sub = 3, the method returns
[ [1,2,3], [4,5] ].'''
res = []
i = -1
for elem in l:
i += 1
if (i % sub) == 0:
# A new sub-list must be created
res.append([elem])
else:
res[-1].append(elem)
return res
2009-06-29 07:06:01 -05:00
# ------------------------------------------------------------------------------
class Traceback:
'''Dumps the last traceback into a string.'''
def get():
res = ''
excType, excValue, tb = sys.exc_info()
tbLines = traceback.format_tb(tb)
for tbLine in tbLines:
res += ' %s' % tbLine
res += ' %s: %s' % (str(excType), str(excValue))
return res
get = staticmethod(get)
# ------------------------------------------------------------------------------
def getOsTempFolder():
tmp = '/tmp'
if os.path.exists(tmp) and os.path.isdir(tmp):
res = tmp
elif os.environ.has_key('TMP'):
res = os.environ['TMP']
elif os.environ.has_key('TEMP'):
res = os.environ['TEMP']
else:
raise "Sorry, I can't find a temp folder on your machine."
return res
2011-02-28 12:30:17 -06:00
def getTempFileName(prefix='', extension=''):
'''Returns the absolute path to a unique file name in the OS temp folder.
The caller will then be able to create a file with this name.
A p_prefix to this file can be provided. If an p_extension is provided,
it will be appended to the name. Both dotted and not dotted versions
of p_extension are allowed (ie, ".pdf" or "pdf").'''
res = '%s/%s_%f' % (getOsTempFolder(), prefix, time.time())
if extension:
if extension.startswith('.'): res += extension
else: res += '.' + extension
return res
# ------------------------------------------------------------------------------
def executeCommand(cmd):
'''Executes command p_cmd and returns the content of its stderr.'''
childStdIn, childStdOut, childStdErr = os.popen3(cmd)
res = childStdErr.read()
childStdIn.close(); childStdOut.close(); childStdErr.close()
return res
# ------------------------------------------------------------------------------
charsIgnore = u'.,:;*+=~?%^\'"<>{}[]|\t\\°'
fileNameIgnore = charsIgnore + u' $£€/'
extractIgnore = charsIgnore + '()'
alphaRex = re.compile('[a-zA-Z]')
alphanumRex = re.compile('[a-zA-Z0-9]')
def normalizeString(s, usage='fileName'):
'''Returns a version of string p_s whose special chars (like accents) have
been replaced with normal chars. Moreover, if p_usage is:
* fileName: it removes any char that can't be part of a file name;
* alphanum: it removes any non-alphanumeric char;
* alpha: it removes any non-letter char.
'''
2014-03-05 10:04:43 -06:00
strNeeded = isinstance(s, str)
# We work in unicode. Convert p_s to unicode if not unicode.
if isinstance(s, str):
try:
s = s.decode('utf-8')
except UnicodeDecodeError:
# Another encoding may be in use
s = s.decode('latin-1')
elif not isinstance(s, unicode): s = unicode(s)
# For extracted text, replace any unwanted char with a blank
if usage == 'extractedText':
res = u''
for char in s:
if char not in extractIgnore: res += char
else: res += ' '
s = res
# Standardize special chars like accents
s = unicodedata.normalize('NFKD', s).encode('ascii', 'ignore')
# Remove any other char, depending on p_usage
if usage == 'fileName':
# Remove any char that can't be found within a file name under Windows
# or that could lead to problems with LibreOffice.
res = ''
for char in s:
if char not in fileNameIgnore: res += char
elif usage.startswith('alpha'):
exec 'rex = %sRex' % usage
res = ''
for char in s:
if rex.match(char): res += char
2014-03-05 10:04:43 -06:00
elif usage == 'noAccents':
res = s
else:
res = s
# Re-code the result as a str if a str was given
2014-03-05 10:04:43 -06:00
if strNeeded: res = res.encode('utf-8')
return res
2010-03-31 08:49:54 -05:00
def normalizeText(s):
'''Normalizes p_s: remove special chars, lowerizes it, etc, for indexing
purposes.'''
return normalizeString(s, usage='extractedText').strip().lower()
def keepDigits(s):
'''Returns string p_s whose non-number chars have been removed.'''
if s is None: return s
res = ''
for c in s:
if c.isdigit(): res += c
return res
def getStringDict(d):
'''Gets the string literal corresponding to dict p_d.'''
res = []
for k, v in d.iteritems():
if type(v) not in sequenceTypes:
value = "'%s':'%s'" % (k, v.replace("'", "\\'"))
else:
value = "'%s':%s" % (k, v)
res.append(value)
return '{%s}' % ','.join(res)
def stretchText(s, pattern, char=' '):
'''Inserts occurrences of p_char within p_s according to p_pattern.
Example: stretchText("475123456", (3,2,2,2)) returns '475 12 34 56'.'''
res = ''
i = 0
for nb in pattern:
j = 0
while j < nb:
res += s[i+j]
j += 1
res += char
i += nb
return res
# ------------------------------------------------------------------------------
def formatNumber(n, sep=',', precision=2, tsep=' '):
'''Returns a string representation of number p_n, which can be a float
or integer. p_sep is the decimal separator to use. p_precision is the
number of digits to keep in the decimal part for producing a nice rounded
string representation. p_tsep is the "thousands" separator.'''
if n == None: return ''
# Manage precision
if precision == None:
res = str(n)
else:
format = '%%.%df' % precision
res = format % n
# Use the correct decimal separator
res = res.replace('.', sep)
# Insert p_tsep every 3 chars in the integer part of the number
splitted = res.split(sep)
res = ''
if len(splitted[0]) < 4: res = splitted[0]
else:
i = len(splitted[0])-1
j = 0
while i >= 0:
j += 1
res = splitted[0][i] + res
if (j % 3) == 0:
res = tsep + res
i -= 1
# Add the decimal part if not 0
if len(splitted) > 1:
try:
decPart = int(splitted[1])
if decPart != 0:
2013-04-16 12:56:47 -05:00
res += sep + splitted[1]
except ValueError:
# This exception may occur when the float value has an "exp"
# part, like in this example: 4.345e-05
res += sep + splitted[1]
return res
# ------------------------------------------------------------------------------
def lower(s):
'''French-accents-aware variant of string.lower.'''
2012-05-08 07:49:45 -05:00
isUnicode = isinstance(s, unicode)
if not isUnicode: s = s.decode('utf-8')
res = s.lower()
2012-05-08 07:49:45 -05:00
if not isUnicode: res = res.encode('utf-8')
return res
def upper(s):
'''French-accents-aware variant of string.upper.'''
2012-05-08 07:49:45 -05:00
isUnicode = isinstance(s, unicode)
if not isUnicode: s = s.decode('utf-8')
res = s.upper()
2012-05-08 07:49:45 -05:00
if not isUnicode: res = res.encode('utf-8')
return res
2010-03-31 08:49:54 -05:00
# ------------------------------------------------------------------------------
typeLetters = {'b': bool, 'i': int, 'j': long, 'f':float, 's':str, 'u':unicode,
'l': list, 'd': dict}
caExts = {'py': ('.py', '.vpy', '.cpy'), 'xml': ('.pt', '.cpt', '.xml')}
2013-08-21 15:25:27 -05:00
# ------------------------------------------------------------------------------
class CodeAnalysis:
'''This class holds information about some code analysis (line counts) that
spans some folder hierarchy.'''
def __init__(self, name):
self.name = name # Let's give a name for the analysis
self.numberOfFiles = 0 # The total number of analysed files
self.emptyLines = 0 # The number of empty lines within those files
self.commentLines = 0 # The number of comment lines
# A code line is defined as anything that is not an empty or comment
# line.
self.codeLines = 0
def numberOfLines(self):
'''Computes the total number of lines within analysed files.'''
return self.emptyLines + self.commentLines + self.codeLines
def analyseXmlFile(self, theFile):
'''Analyses the XML file named p_fileName.'''
inDoc = False
for line in theFile:
stripped = line.strip()
# Manage a comment
if not inDoc and ((line.find('<!--') != -1) or \
(line.find('<tal:comment ') != -1)):
inDoc = True
if inDoc:
self.commentLines += 1
if (line.find('-->') != -1) or \
(line.find('</tal:comment>') != -1):
inDoc = False
continue
# Manage an empty line
if not stripped:
self.emptyLines += 1
else:
self.codeLines += 1
docSeps = ('"""', "'''")
def isPythonDoc(self, line, start, isStart=False):
'''Returns True if we find, in p_line, the start of a docstring (if
p_start is True) or the end of a docstring (if p_start is False).
p_isStart indicates if p_line is the start of the docstring.'''
if start:
res = line.startswith(self.docSeps[0]) or \
line.startswith(self.docSeps[1])
else:
sepOnly = (line == self.docSeps[0]) or (line == self.docSeps[1])
if sepOnly:
# If the line contains the separator only, is this the start or
# the end of the docstring?
if isStart: res = False
else: res = True
else:
res = line.endswith(self.docSeps[0]) or \
line.endswith(self.docSeps[1])
return res
def analysePythonFile(self, theFile):
'''Analyses the Python file named p_fileName.'''
# Are we in a docstring ?
inDoc = False
for line in theFile:
stripped = line.strip()
# Manage a line that is within a docstring
inDocStart = False
if not inDoc and self.isPythonDoc(stripped, start=True):
inDoc = True
inDocStart = True
if inDoc:
self.commentLines += 1
if self.isPythonDoc(stripped, start=False, isStart=inDocStart):
inDoc = False
continue
# Manage an empty line
if not stripped:
self.emptyLines += 1
continue
# Manage a comment line
if line.startswith('#'):
self.commentLines += 1
continue
# If we are here, we have a code line.
self.codeLines += 1
def analyseFile(self, fileName):
'''Analyses file named p_fileName.'''
self.numberOfFiles += 1
theFile = file(fileName)
ext = os.path.splitext(fileName)[1]
if ext in caExts['py']: self.analysePythonFile(theFile)
elif ext in caExts['xml']: self.analyseXmlFile(theFile)
theFile.close()
def printReport(self):
'''Returns the analysis report as a string, only if there is at least
one analysed line.'''
lines = self.numberOfLines()
if not lines: return
commentRate = (self.commentLines / float(lines)) * 100.0
blankRate = (self.emptyLines / float(lines)) * 100.0
print('%s: %d files, %d lines (%.0f%% comments, %.0f%% blank)' % \
(self.name, self.numberOfFiles, lines, commentRate, blankRate))
# ------------------------------------------------------------------------------
class LinesCounter:
'''Counts and classifies the lines of code within a folder hierarchy.'''
defaultExcludes = ('%s.svn' % os.sep, '%s.bzr' % os.sep, '%stmp' % os.sep,
'%stemp' % os.sep)
def __init__(self, folderOrModule, excludes=None):
if isinstance(folderOrModule, basestring):
# It is the path of some folder
self.folder = folderOrModule
else:
# It is a Python module
self.folder = os.path.dirname(folderOrModule.__file__)
2013-08-21 15:25:27 -05:00
# These dict will hold information about analysed files.
self.python = { False: CodeAnalysis('Python'),
True: CodeAnalysis('Python (test)')}
self.xml = { False: CodeAnalysis('XML'),
True: CodeAnalysis('XML (test)')}
# Are we currently analysing real or test code?
self.inTest = False
# Which paths to exclude from the analysis?
self.excludes = list(self.defaultExcludes)
if excludes: self.excludes += excludes
def printReport(self):
'''Displays on stdout a small analysis report about self.folder.'''
total = 0
for type in ('python', 'xml'):
for zone in (False, True):
analyser = getattr(self, type)[zone]
if analyser.numberOfFiles:
analyser.printReport()
total += analyser.numberOfLines()
print 'Total (including commented and blank): ***', total, '***'
def isExcluded(self, path):
'''Must p_path be excluded from the analysis?'''
for excl in self.excludes:
if excl in path: return True
def run(self):
'''Let's start the analysis of self.folder.'''
# The test markers will allow us to know if we are analysing test code
# or real code within a given part of self.folder code hierarchy.
testMarker1 = '%stest%s' % (os.sep, os.sep)
testMarker2 = '%stest' % os.sep
testMarker3 = '%stests%s' % (os.sep, os.sep)
testMarker4 = '%stests' % os.sep
j = os.path.join
for root, folders, files in os.walk(self.folder):
if self.isExcluded(root): continue
# Are we in real code or in test code ?
self.inTest = False
if root.endswith(testMarker2) or (root.find(testMarker1) != -1) or \
root.endswith(testMarker4) or (root.find(testMarker3) != -1):
self.inTest = True
# Scan the files in this folder
for fileName in files:
ext = os.path.splitext(fileName)[1]
if ext in caExts['py']:
self.python[self.inTest].analyseFile(j(root, fileName))
elif ext in caExts['xml']:
self.xml[self.inTest].analyseFile(j(root, fileName))
self.printReport()
# ------------------------------------------------------------------------------
CONVERSION_ERROR = 'An error occurred. %s'
class FileWrapper:
'''When you get, from an appy object, the value of a File attribute, you
get an instance of this class.'''
def __init__(self, zopeFile):
'''This constructor is only used by Appy to create a nice File instance
from a Zope corresponding instance (p_zopeFile). If you need to
create a new file and assign it to a File attribute, use the
attribute setter, do not create yourself an instance of this
class.'''
d = self.__dict__
d['_zopeFile'] = zopeFile # Not for you!
d['name'] = zopeFile.filename
d['content'] = zopeFile.data
d['mimeType'] = zopeFile.content_type
d['size'] = zopeFile.size # In bytes
def __setattr__(self, name, v):
d = self.__dict__
if name == 'name':
self._zopeFile.filename = v
d['name'] = v
elif name == 'content':
self._zopeFile.update_data(v, self.mimeType, len(v))
d['content'] = v
d['size'] = len(v)
elif name == 'mimeType':
self._zopeFile.content_type = self.mimeType = v
else:
raise 'Impossible to set attribute %s. "Settable" attributes ' \
'are "name", "content" and "mimeType".' % name
def dump(self, filePath=None, format=None, tool=None):
'''Writes the file on disk. If p_filePath is specified, it is the
path name where the file will be dumped; folders mentioned in it
must exist. If not, the file will be dumped in the OS temp folder.
The absolute path name of the dumped file is returned.
If an error occurs, the method returns None. If p_format is
specified, LibreOffice will be called for converting the dumped file
to the desired format. In this case, p_tool, a Appy tool, must be
provided. Indeed, any Appy tool contains parameters for contacting
LibreOffice in server mode.'''
if not filePath:
filePath = '%s/file%f.%s' % (getOsTempFolder(), time.time(),
normalizeString(self.name))
f = file(filePath, 'w')
if self.content.__class__.__name__ == 'Pdata':
# The file content is splitted in several chunks.
f.write(self.content.data)
nextPart = self.content.next
while nextPart:
f.write(nextPart.data)
nextPart = nextPart.next
else:
# Only one chunk
f.write(self.content)
f.close()
if format:
if not tool: return
# Convert the dumped file using OpenOffice
errorMessage = tool.convert(filePath, format)
# Even if we have an "error" message, it could be a simple warning.
# So we will continue here and, as a subsequent check for knowing if
# an error occurred or not, we will test the existence of the
# converted file (see below).
os.remove(filePath)
# Return the name of the converted file.
baseName, ext = os.path.splitext(filePath)
if (ext == '.%s' % format):
filePath = '%s.res.%s' % (baseName, format)
else:
filePath = '%s.%s' % (baseName, format)
if not os.path.exists(filePath):
tool.log(CONVERSION_ERROR % errorMessage, type='error')
return
return filePath
2012-12-18 15:49:26 -06:00
def copy(self):
'''Returns a copy of this file.'''
return FileWrapper(self._zopeFile._getCopy(self._zopeFile))
2012-12-18 15:49:26 -06:00
# ------------------------------------------------------------------------------
def getMimeType(fileName):
'''Tries to guess mime type from p_fileName.'''
res, encoding = mimetypes.guess_type(fileName)
if not res:
if fileName.endswith('.po'):
res = 'text/plain'
encoding = 'utf-8'
if not res: return ''
if not encoding: return res
return '%s;;charset=%s' % (res, encoding)
# ------------------------------------------------------------------------------