From db8ad18c5fac28519d2e21cb50302054245fec03 Mon Sep 17 00:00:00 2001 From: Gaetan Delannay Date: Tue, 12 Jan 2010 21:15:14 +0100 Subject: [PATCH] Added backup/restore scripts (wrappers around repozo). The backup script has the possibility to execute a tool method on a Appy application. --- bin/backup.py | 340 +++++++++++++++++++++++++++++++ bin/job.py | 51 +++++ bin/restore.py | 93 +++++++++ gen/plone25/mixins/ToolMixin.py | 14 +- gen/plone25/wrappers/__init__.py | 8 +- gen/utils.py | 12 +- 6 files changed, 508 insertions(+), 10 deletions(-) create mode 100644 bin/backup.py create mode 100644 bin/job.py create mode 100644 bin/restore.py diff --git a/bin/backup.py b/bin/backup.py new file mode 100644 index 0000000..46c8d03 --- /dev/null +++ b/bin/backup.py @@ -0,0 +1,340 @@ +# ------------------------------------------------------------------------------ +import sys, time, os, os.path, smtplib, socket, popen2, shutil +from optparse import OptionParser +import ZODB.FileStorage +import ZODB.serialize +from DateTime import DateTime +from StringIO import StringIO +folderName = os.path.dirname(__file__) + +# ------------------------------------------------------------------------------ +class BackupError(Exception): pass +ERROR_CODE = 1 + +# ------------------------------------------------------------------------------ +class ZodbBackuper: + '''This backuper will run every night (after 00.00). Every night excepted + Sunday, it will perform an incremental backup. Every Sunday, the script + will pack the ZODB, perform a full backup, and, if successful, remove all + previous (full and incremental) backups.''' + fullBackupExts = ('.fs', '.fsz') + toRemoveExts = ('.doc', '.pdf', '.rtf', '.odt') + def __init__(self, storageLocation, backupFolder, options): + self.storageLocation = storageLocation + self.backupFolder = backupFolder + self.options = options + # Unwrap some options directly on self. + self.repozo = options.repozo or './repozo.py' + self.zopectl = options.zopectl or './zopectl' + self.logFile = file(options.logFile, 'a') + self.logMem = StringIO() # We keep a log of the last script execution, + # so we can send this info by email. + self.emails = options.emails + self.tempFolder = options.tempFolder + self.logsBackupFolder = options.logsBackupFolder + self.zopeUser = options.zopeUser + self.keepSeconds = int(options.keepSeconds) + + def log(self, msg): + for logPlace in (self.logFile, self.logMem): + logPlace.write(msg) + logPlace.write('\n') + + def executeCommand(self, cmd): + '''Executes command p_cmd.''' + w = self.log + w('Executing "%s"...' % cmd) + outstream, instream = popen2.popen4(cmd) + outTxt = outstream.readlines() + instream.close() + outstream.close() + for line in outTxt: + w(line[:-1]) + w('Done.') + + def packZodb(self): + '''Packs the ZODB and keeps one week history.''' + storage = ZODB.FileStorage.FileStorage(self.storageLocation) + #storage.pack(time.time()-(7*24*60*60), ZODB.serialize.referencesf) + storage.pack(time.time()-self.keepSeconds, ZODB.serialize.referencesf) + for fileSuffix in ('', '.index'): + fileName = self.storageLocation + fileSuffix + os.system('chown %s %s' % (self.zopeUser, fileName)) + + folderCreateError = 'Could not create backup folder. Backup of log ' \ + 'files will not take place. %s' + def backupLogs(self): + w = self.log + if not os.path.exists(self.logsBackupFolder): + # Try to create the folder when to store backups of the log files + try: + w('Try to create backup folder for logs "%s"...' % \ + self.logsBackupFolder) + os.mkdir(self.logsBackupFolder) + except IOError, ioe: + w(folderCreateError % str(ioe)) + except OSError, oe: + w(folderCreateError % str(oe)) + if os.path.exists(self.logsBackupFolder): + # Ok, we can make the backup of the log files. + # Get the folder where logs lie + d = os.path.dirname + j = os.path.join + logsFolder = j(d(d(self.storageLocation)), 'log') + for logFileName in os.listdir(logsFolder): + if logFileName.endswith('.log'): + backupTime = DateTime().strftime('%Y_%m_%d_%H_%M') + parts = os.path.splitext(logFileName) + copyFileName = '%s.%s%s' % (parts[0], backupTime, parts[1]) + absCopyFileName = j(self.logsBackupFolder, copyFileName) + absLogFileName = j(logsFolder, logFileName) + w('Moving "%s" to "%s"...' % (absLogFileName, + absCopyFileName)) + shutil.copyfile(absLogFileName, absCopyFileName) + os.remove(absLogFileName) + # I do a "copy" + a "remove" instead of a "rename" because + # a "rename" fails if the source and dest files are on + # different physical devices. + + def getDate(self, dateString): + '''Returns a DateTime instance from p_dateString, which has the form + YYYY-MM-DD-HH-MM-SS.''' + return DateTime('%s/%s/%s %s:%s:%s' % tuple(dateString.split('-'))) + + def removeOldBackups(self): + '''This method removes all files (full & incremental backups) that are + older than the last full backup.''' + w = self.log + # Determine date of the oldest full backup + oldestFullBackupDate = eighties = DateTime('1980/01/01') + for backupFile in os.listdir(self.backupFolder): + fileDate, ext = os.path.splitext(backupFile) + if ext in self.fullBackupExts: + # I have found a full backup + fileDate = self.getDate(fileDate) + if fileDate > oldestFullBackupDate: + oldestFullBackupDate = fileDate + # Remove all backup files older that oldestFullBackupDate + if oldestFullBackupDate != eighties: + w('Last full backup date: %s' % str(oldestFullBackupDate)) + for backupFile in os.listdir(self.backupFolder): + fileDate, ext = os.path.splitext(backupFile) + if self.getDate(fileDate) < oldestFullBackupDate: + fullFileName = '%s/%s' % (self.backupFolder, backupFile) + w('Removing old backup file %s...' % fullFileName) + os.remove(fullFileName) + + def sendEmails(self): + '''Send content of self.logMem to self.emails.''' + w = self.log + subject = 'Backup notification.' + msg = 'From: %s\nSubject: %s\n\n%s' % (self.options.fromAddress, + subject, self.logMem.getvalue()) + try: + w('> Sending mail notifications to %s...' % self.emails) + server, port = self.options.smtpServer.split(':') + smtpServer = smtplib.SMTP(server, port=int(port)) + res = smtpServer.sendmail(self.options.fromAddress, + self.emails.split(','), msg) + if res: + w('Could not send mail to some recipients. %s' % str(res)) + w('Done.') + except socket.error, se: + w('Could not connect to SMTP server %s (%s).' % \ + (self.options.smtpServer, str(se))) + + def removeTempFiles(self): + '''For EGW, OO produces temp files that EGW tries do delete at the time + they are produced. But in some cases EGW can't do it (ie Zope runs + with a given user and OO runs with root and produces files that can't + be deleted by the user running Zope). This is why in this script we + remove the temp files that could not be removed by Zope.''' + w = self.log + w('Removing temp files in "%s"...' % self.tempFolder) + pdfCount = 0 + docCount = 0 + for fileName in os.listdir(self.tempFolder): + ext = os.path.splitext(fileName)[1] + if ext in self.toRemoveExts: + exec '%sCount += 1' % ext[1:] + fullFileName = os.path.join(self.tempFolder, fileName) + #w('Removing "%s"...' % fullFileName) + try: + os.remove(fullFileName) + except OSError, oe: + w('Could not remove "%s" (%s).' % (fullFileName, str(oe))) + w('%d PDF document(s) removed.' % pdfCount) + w('%d Word document(s) removed.' % docCount) + + def run(self): + w = self.log + startTime = time.time() + w('\n****** Backup launched at %s ******' % str(time.asctime())) + # Shutdown the Zope instance + w('> Shutting down Zope instance...') + self.executeCommand('%s stop' % self.zopectl) + # If we are on the "full backup day", let's pack the ZODB first + if time.asctime().startswith(self.options.dayFullBackup): + w('> Day is "%s", packing the ZODB...' % self.options.dayFullBackup) + self.packZodb() + w('> Make a backup of log files...') + self.backupLogs() + w('Done.') + # Do the backup with repozo + w('> Performing backup...') + self.executeCommand('%s %s -BvzQ -r %s -f %s' % (self.options.python, + self.repozo, self.backupFolder, self.storageLocation)) + # Remove previous full backups. + self.removeOldBackups() + # If a command is specified, run Zope to execute this command + if self.options.command: + w('> Executing command "%s"...' % self.options.command) + jobScript = '%s/job.py' % folderName + cmd = '%s run %s %s' % (self.zopectl, jobScript, + self.options.command) + self.executeCommand(cmd) + # Start the instance again, in normal mode. + w('> Restarting Zope instance...') + self.executeCommand('%s start' % self.zopectl) + self.removeTempFiles() + stopTime = time.time() + w('Done in %d minute(s).' % ((stopTime-startTime)/60)) + if self.emails: + self.sendEmails() + self.logFile.close() + print self.logMem.getvalue() + self.logMem.close() + +# ------------------------------------------------------------------------------ +class ZodbBackupScript: + '''usage: python backup.py storageLocation backupFolder [options] + storageLocation is the path to a ZODB database (file storage) (ie + /opt/ZopeInstance/var/Data.fs); + backupFolder is a folder exclusively dedicated for storing backups + of the mentioned storage (ie /data/zodbbackups).''' + + weekDays = ('Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun') + def checkArgs(self, options, args): + '''Check that the scripts arguments are correct.''' + # Do I have the correct number of args? + if len(args) != 2: + raise BackupError('Wrong number of arguments.') + # Check storageLocation + if not os.path.exists(args[0]) or not os.path.isfile(args[0]): + raise BackupError('"%s" does not exist or is not a file.' % args[0]) + # Check backupFolder + if not os.path.isdir(args[1]): + raise BackupError('"%s" does not exist or is not a folder.'%args[1]) + # Try to create a file in this folder to check if we have write + # access in it. + fileName = '%s/%s.tmp' % (args[1], str(time.time())) + try: + f = file(fileName, 'w') + f.write('Hello.') + f.close() + os.remove(fileName) + except OSError, oe: + raise BackupError('I do not have the right to write in ' \ + 'folder "%s".' % args[1]) + # Check temp folder + if not os.path.isdir(options.tempFolder): + raise BackupError('Temp folder "%s" does not exist or is not ' \ + 'a folder.' % options.tempFolder) + # Check day of week + if options.dayFullBackup not in self.weekDays: + raise BackupError( + 'Day of week must be one of %s' % str(self.weekDays)) + # Check command format + if options.command: + parts = options.command.split(':') + if len(parts) not in (3,4): + raise BackupError('Command format must be ' \ + '::' \ + '[:]') + + def run(self): + optParser = OptionParser(usage=ZodbBackupScript.__doc__) + optParser.add_option("-p", "--python", dest="python", + help="The path to the Python interpreter running "\ + "Zope", + default='python2.4',metavar="REPOZO",type='string') + optParser.add_option("-r", "--repozo", dest="repozo", + help="The path to repozo.py", + default='', metavar="REPOZO", type='string') + optParser.add_option("-z", "--zopectl", dest="zopectl", + help="The path to Zope instance's zopectl script", + default='', metavar="ZOPECTL", type='string') + optParser.add_option("-l", "--logfile", dest="logFile", + help="Log file where this script will append " \ + "output (defaults to ./backup.log)", + default='./backup.log', metavar="LOGFILE", + type='string') + optParser.add_option("-d", "--day-full-backup", dest="dayFullBackup", + help="Day of the week where the full backup " \ + "must be performed (defaults to 'Sun'). " \ + "Must be one of %s" % str(self.weekDays), + default='Sun', metavar="DAYFULLBACKUP", + type='string') + optParser.add_option("-e", "--emails", dest="emails", + help="Comma-separated list of emails that will " \ + "receive the log of this script.", + default='', metavar="EMAILS", type='string') + optParser.add_option("-f", "--from-address", dest="fromAddress", + help="From address for the sent mails", + default='', metavar="FROMADDRESS", type='string') + optParser.add_option("-s", "--smtp-server", dest="smtpServer", + help="SMTP server and port (ie: localhost:25) " \ + "for sending mails", default='localhost:25', + metavar="SMTPSERVER", type='string') + optParser.add_option("-t", "--tempFolder", dest="tempFolder", + help="Folder used by OO for producing temp " \ + "files. Defaults to /tmp.", + default='/tmp', metavar="TEMP", type='string') + optParser.add_option("-b", "--logsBackupFolder",dest="logsBackupFolder", + help="Folder where backups of log files " \ + "(event.log and Z2.log) will be stored.", + default='./logsbackup', metavar="LOGSBACKUPFOLDER", + type='string') + optParser.add_option("-u", "--user", dest="zopeUser", + help="User and group that must own Data.fs. " \ + "Defaults to zope:www-data. If " \ + "this script is launched by root, for " \ + "example, when packing the ZODB this script "\ + "may produce a new Data.fs that the user " \ + "running Zope may not be able to read " \ + "anymore. After packing, this script makes " \ + "a 'chmod' on Data.fs.", + default='zope:www-data', metavar="USER", + type='string') + optParser.add_option("-k", "--keep-seconds", dest="keepSeconds", + help="Number of seconds to leave in the ZODB " \ + "history when the ZODB is packed.", + default='86400', metavar="KEEPSECONDS", + type='string') + optParser.add_option("-c", "--command", dest="command", + help="Command to execute while Zope is running. It must have the " \ + "following format: ::" \ + "[:]. is the path, " \ + "within Zope, to the Plone Site object (if not at the root of " \ + "the Zope hierarchy, use '/' as folder separator); " \ + " is the name of the Appy application; " \ + " is the name of the method to call on the tool " \ + "in this Appy application; (optional) are the arguments " \ + "to give to this method (only strings are supported). Several " \ + "arguments must be separated by '*'.", default='', + metavar="COMMAND", type='string') + (options, args) = optParser.parse_args() + try: + self.checkArgs(options, args) + backuper = ZodbBackuper(args[0], args[1], options) + backuper.run() + except BackupError, be: + sys.stderr.write(str(be)) + sys.stderr.write('\n') + optParser.print_help() + sys.exit(ERROR_CODE) + +# ------------------------------------------------------------------------------ +if __name__ == '__main__': + ZodbBackupScript().run() +# ------------------------------------------------------------------------------ diff --git a/bin/job.py b/bin/job.py new file mode 100644 index 0000000..530cf78 --- /dev/null +++ b/bin/job.py @@ -0,0 +1,51 @@ +'''job.py must be executed by a "zopectl run" command and, as single arg, + must get a string with the following format: + + ::[:]. + + is the path, within Zope, to the Plone Site object (if + not at the root of the Zope hierarchy, use '/' as + folder separator); + + is the name of the Appy application; + + is the name of the method to call on the tool in this + Appy application; + + (optional) are the arguments to give to this method (only strings + are supported). Several arguments must be separated by '*'.''' + +# ------------------------------------------------------------------------------ +import sys +# Check that job.py is called with the right parameters. +if len(sys.argv) != 2: + print 'job.py was called with wrong args.' + print __doc__ +else: + command = sys.argv[1] + parts = command.split(':') + if len(parts) not in (3,4): + print 'job.py was called with wrong args.' + print __doc__ + else: + # Unwrap parameters + if len(parts) == 3: + plonePath, appName, toolMethod = parts + args = () + else: + plonePath, appName, toolMethod, args = parts + # Zope was initialized in a minimal way. Complete Zope and Plone + # installation. + from Testing import makerequest + app = makerequest.makerequest(app) + # Get the Plone site + ploneSite = app # Initialised with the Zope root object. + for elem in plonePath.split('/'): + ploneSite = getattr(ploneSite, elem) + # Get the tool corresponding to the Appy application + toolName = 'portal_%s' % appName.lower() + tool = getattr(ploneSite, toolName).appy() + # Execute the method on the tool + if args: args = args.split('*') + exec 'tool.%s(*args)' % toolMethod +# ------------------------------------------------------------------------------ diff --git a/bin/restore.py b/bin/restore.py new file mode 100644 index 0000000..63e691f --- /dev/null +++ b/bin/restore.py @@ -0,0 +1,93 @@ +# ------------------------------------------------------------------------------ +import sys, time, os, os.path +from optparse import OptionParser + +# ------------------------------------------------------------------------------ +class RestoreError(Exception): pass +ERROR_CODE = 1 + +# ------------------------------------------------------------------------------ +class ZodbRestorer: + def __init__(self, storageLocation, backupFolder, options): + self.storageLocation = storageLocation + self.backupFolder = backupFolder + self.repozo = options.repozo or 'repozo.py' + self.restoreDate = options.date + self.python = options.python + def run(self): + startTime = time.time() + datePart = '' + if self.restoreDate: + datePart = '-D %s' % self.restoreDate + repozoCmd = '%s %s -Rv -r %s %s -o %s' % (self.python, + self.repozo, self.backupFolder, datePart, self.storageLocation) + print 'Executing %s...' % repozoCmd + os.system(repozoCmd) + stopTime = time.time() + print 'Done in %d minutes.' % ((stopTime-startTime)/60) + +# ------------------------------------------------------------------------------ +class ZodbRestoreScript: + '''usage: python restore.py storageLocation backupFolder [options] + storageLocation is the storage that will be created at the end of the + restore process (ie /tmp/Data.hurrah.fs); + backupFolder is the folder used for storing storage backups + (ie /data/zodbbackups).''' + + def checkArgs(self, options, args): + '''Check that the scripts arguments are correct.''' + # Do I have the correct number of args? + if len(args) != 2: + raise RestoreError('Wrong number of arguments.') + # Check that storageLocation does not exist. + if os.path.exists(args[0]): + raise RestoreError('"%s" exists. Please specify the name of a ' \ + 'new file (in a temp folder for example); you ' \ + 'will move this at the right place in a second '\ + 'step.' % args[0]) + # Check backupFolder + if not os.path.isdir(args[1]): + raise RestoreError('"%s" does not exist or is not a folder.' % \ + args[1]) + # Try to create storageLocation to check if we have write + # access in it. + try: + f = file(args[0], 'w') + f.write('Hello.') + f.close() + os.remove(args[0]) + except OSError, oe: + raise RestoreError('I do not have the right to write file ' \ + '"%s".' % args[0]) + + def run(self): + optParser = OptionParser(usage=ZodbRestoreScript.__doc__) + optParser.add_option("-p", "--python", dest="python", + help="The path to the Python interpreter running "\ + "Zope", + default='python2.4',metavar="REPOZO",type='string') + optParser.add_option("-r", "--repozo", dest="repozo", + help="The path to repozo.py", + default='', metavar="REPOZO", type='string') + optParser.add_option("-d", "--date", dest="date", + help="Date of the image to restore (format=" \ + "YYYY-MM-DD-HH-MM-SS). It is UTC time, " \ + "not local time. If you don't specify this " \ + "option, it defaults to now. If specified, " \ + "hour, minute, and second parts are optional", + default='', metavar="DATE", type='string') + (options, args) = optParser.parse_args() + try: + self.checkArgs(options, args) + backuper = ZodbRestorer(args[0], args[1], options) + backuper.run() + except RestoreError, be: + sys.stderr.write(str(be)) + sys.stderr.write('\n') + optParser.print_help() + sys.exit(ERROR_CODE) + +# ------------------------------------------------------------------------------ +if __name__ == '__main__': + ZodbRestoreScript().run() +# ------------------------------------------------------------------------------ diff --git a/gen/plone25/mixins/ToolMixin.py b/gen/plone25/mixins/ToolMixin.py index 27e437d..92a2b16 100644 --- a/gen/plone25/mixins/ToolMixin.py +++ b/gen/plone25/mixins/ToolMixin.py @@ -108,7 +108,7 @@ class ToolMixin(AbstractMixin): _sortFields = {'title': 'sortable_title'} def executeQuery(self, contentType, flavourNumber=1, searchName=None, startNumber=0, search=None, remember=False, - brainsOnly=False, maxResults=None): + brainsOnly=False, maxResults=None, noSecurity=False): '''Executes a query on a given p_contentType (or several, separated with commas) in Plone's portal_catalog. Portal types are from the flavour numbered p_flavourNumber. If p_searchName is specified, it @@ -134,7 +134,10 @@ class ToolMixin(AbstractMixin): specified, the method returns maximum self.getNumberOfResultsPerPage(). The method returns all objects if p_maxResults equals string "NO_LIMIT". p_maxResults is ignored if - p_brainsOnly is True.''' + p_brainsOnly is True. + + If p_noSecurity is True, it gets all the objects, even those that the + currently logged user can't see.''' # Is there one or several content types ? if contentType.find(',') != -1: # Several content types are specified @@ -194,11 +197,14 @@ class ToolMixin(AbstractMixin): # (for searchability) and can't be used for sorting. if self._sortFields.has_key(sb): sb = self._sortFields[sb] params['sort_on'] = sb - brains = self.portal_catalog.searchResults(**params) + # Determine what method to call on the portal catalog + if noSecurity: catalogMethod = 'unrestrictedSearchResults' + else: catalogMethod = 'searchResults' + exec 'brains = self.portal_catalog.%s(**params)' % catalogMethod if brainsOnly: return brains if not maxResults: maxResults = self.getNumberOfResultsPerPage() elif maxResults == 'NO_LIMIT': maxResults = None - res = SomeObjects(brains, maxResults, startNumber) + res = SomeObjects(brains, maxResults, startNumber,noSecurity=noSecurity) res.brainsToObjects() # In some cases (p_remember=True), we need to keep some information # about the query results in the current user's session, allowing him diff --git a/gen/plone25/wrappers/__init__.py b/gen/plone25/wrappers/__init__.py index 437ef73..e1db04d 100644 --- a/gen/plone25/wrappers/__init__.py +++ b/gen/plone25/wrappers/__init__.py @@ -262,12 +262,14 @@ class AbstractWrapper: replaced with normal chars.''' return unicodedata.normalize('NFKD', s).encode("ascii","ignore") - def search(self, klass, sortBy='', maxResults=None, **fields): + def search(self, klass, sortBy='', maxResults=None, + noSecurity=False, **fields): '''Searches objects of p_klass. p_sortBy must be the name of an indexed field (declared with indexed=True); every param in p_fields must take the name of an indexed field and take a possible value of this field. You can optionally specify a maximum number of results in - p_maxResults.''' + p_maxResults. If p_noSecurity is specified, you get all objects, + even if the logged user does not have the permission to view it.''' # Find the content type corresponding to p_klass flavour = self.flavour contentType = flavour.o.getPortalType(klass) @@ -278,7 +280,7 @@ class AbstractWrapper: # If I let maxResults=None, only a subset of the results will be # returned by method executeResult. res = self.tool.o.executeQuery(contentType,flavour.number,search=search, - maxResults=maxResults) + maxResults=maxResults, noSecurity=noSecurity) return [o.appy() for o in res['objects']] def count(self, klass, **fields): diff --git a/gen/utils.py b/gen/utils.py index 686fd9d..5519a22 100755 --- a/gen/utils.py +++ b/gen/utils.py @@ -175,7 +175,8 @@ class AppyRequest: class SomeObjects: '''Represents a bunch of objects retrieved from a reference or a query in portal_catalog.''' - def __init__(self, objects=None, batchSize=None, startNumber=0): + def __init__(self, objects=None, batchSize=None, startNumber=0, + noSecurity=False): self.objects = objects or [] # The objects self.totalNumber = len(self.objects) # self.objects may only represent a # part of all available objects. @@ -183,11 +184,16 @@ class SomeObjects: # self.objects. self.startNumber = startNumber # The index of first object in # self.objects in the whole list. + self.noSecurity = noSecurity def brainsToObjects(self): '''self.objects has been populated from brains from the portal_catalog, not from True objects. This method turns them (or some of them - depending on batchSize and startNumber) into real objects.''' + depending on batchSize and startNumber) into real objects. + If self.noSecurity is True, it gets the objects even if the logged + user does not have the right to get them.''' start = self.startNumber brains = self.objects[start:start + self.batchSize] - self.objects = [b.getObject() for b in brains] + if self.noSecurity: getMethod = '_unrestrictedGetObject' + else: getMethod = 'getObject' + self.objects = [getattr(b, getMethod)() for b in brains] # ------------------------------------------------------------------------------