appy.gen: improved cleaning and formatting of XHTML content; appy.pod: added some default appy-related table styles for producing cells with text in bold/normal, aligned right/left, etc.

This commit is contained in:
Gaetan Delannay 2012-05-14 17:35:34 +02:00
parent d3a2b85a10
commit 028040351c
11 changed files with 195 additions and 54 deletions

View file

@ -106,7 +106,8 @@ class Debianizer:
def __init__(self, app, out, appVersion='0.1.0',
pythonVersions=('2.6',), zopePort=8080,
depends=('openoffice.org', 'imagemagick'), sign=False):
depends=('zope2.12', 'openoffice.org', 'imagemagick'),
sign=False):
# app is the path to the Python package to Debianize.
self.app = app
self.appName = os.path.basename(app)
@ -261,10 +262,6 @@ class Debianizer:
# Create postinst, a script that will:
# - bytecompile Python files after the Debian install
# - change ownership of some files if required
# - [in the case of a app-package] execute:
# apt-get -t squeeze-backports install zope2.12
# (if zope2.12 is defined as a simple dependency in field "Depends:"
# it will fail because it will not be searched in squeeze-backports).
# - [in the case of an app-package] call update-rc.d for starting it at
# boot time.
f = file('postinst', 'w')
@ -276,8 +273,6 @@ class Debianizer:
self.appName)
content += 'if [ -e %s ]\nthen\n%sfi\n' % (bin, cmds)
if self.appName != 'appy':
# Install zope2.12 from squeeze-backports
content += 'apt-get -t squeeze-backports install zope2.12\n'
# Allow user "zope", that runs the Zope instance, to write the
# database and log files.
content += 'chown -R zope:root /var/lib/%s\n' % self.appNameLower

View file

@ -263,35 +263,6 @@ def formatNumber(n, sep=',', precision=2, tsep=' '):
res += sep + splitted[1]
return res
# ------------------------------------------------------------------------------
class XhtmlCleaner:
# Regular expressions used for cleaning.
classAttr = re.compile('class\s*=\s*".*?"')
comment = re.compile('<!--.*?-->', re.S)
'''This class has 2 objectives:
1. The main objective is to format XHTML p_s to be storable in the ZODB
according to Appy rules.
a. Every <p> or <li> must be on a single line (ending with a carriage
return); else, appy.shared.diff will not be able to compute XHTML
diffs;
b. Optimize size: HTML comments are removed.
2. If p_keepStyles (or m_clean) is False, some style-related information
will be removed, in order to get a standardized content that can be
dumped in an elegant and systematic manner into a POD template.
'''
@classmethod
def clean(klass, s, keepStyles=False):
'''Returns the cleaned variant of p_s.'''
if not keepStyles:
# Format p_s according to objective 2.
s = klass.classAttr.sub('', s)
# Format p_s according to objective 1.
s = klass.comment.sub('', s)
return s
# ------------------------------------------------------------------------------
def lower(s):
'''French-accents-aware variant of string.lower.'''

View file

@ -18,7 +18,7 @@
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301,USA.
# ------------------------------------------------------------------------------
import xml.sax, difflib, types
import xml.sax, difflib, types, cgi
from xml.sax.handler import ContentHandler, ErrorHandler, feature_external_ges,\
property_interning_dict
from xml.sax.xmlreader import InputSource
@ -887,4 +887,127 @@ class XmlComparator:
else:
lastLinePrinted = False
return not atLeastOneDiff
# ------------------------------------------------------------------------------
class XhtmlCleaner(XmlParser):
# Tags that will not be in the result, content included, if keepStyles is
# False.
tagsToIgnoreWithContent = ('style', 'colgroup')
# Tags that will be removed from the result, but whose content will be kept,
# if keepStyles is False.
tagsToIgnoreKeepContent= ('x', 'font')
# All tags to ignore
tagsToIgnore = tagsToIgnoreWithContent + tagsToIgnoreKeepContent
# Attributes to ignore, if keepStyles if False.
attrsToIgnore = ('align', 'valign', 'cellpadding', 'cellspacing', 'width',
'height', 'bgcolor', 'lang', 'border', 'class')
# Attrs to add, if not present, to ensure good formatting, be it at the web
# or ODT levels.
attrsToAdd = {'table': {'cellspacing':'0', 'cellpadding':'6', 'border':'1'},
'tr': {'valign': 'top'}}
# Tags that required a line break to be inserted after them.
lineBreakTags = ('p', 'li', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'td')
'''This class has 2 objectives:
1. The main objective is to format XHTML p_s to be storable in the ZODB
according to Appy rules.
a. Every <p> or <li> must be on a single line (ending with a carriage
return); else, appy.shared.diff will not be able to compute XHTML
diffs;
b. Optimize size: HTML comments are removed.
2. If p_keepStyles (or m_clean) is False, some style-related information
will be removed, in order to get a standardized content that can be
dumped in an elegant and systematic manner into a POD template.
'''
def clean(self, s, keepStyles=True):
# Must we keep style-related information or not?
self.env.keepStyles = keepStyles
self.env.currentContent = ''
# The stack of currently parsed elements (will contain only ignored
# ones).
self.env.currentElems = []
# 'ignoreTag' is True if we must ignore the currently walked tag.
self.env.ignoreTag = False
# 'ignoreContent' is True if, within the currently ignored tag, we must
# also ignore its content.
self.env.ignoreContent = False
return self.parse('<x>%s</x>' % s)
def startDocument(self):
# The result will be cleaned XHTML, joined from self.res.
self.res = []
def endDocument(self):
self.res = ''.join(self.res)
def startElement(self, elem, attrs):
e = self.env
# Dump any previously gathered content if any
if e.currentContent:
self.res.append(e.currentContent)
e.currentContent = ''
if e.ignoreTag and e.ignoreContent: return
if not e.keepStyles and (elem in self.tagsToIgnore):
e.ignoreTag = True
if elem in self.tagsToIgnoreWithContent:
e.ignoreContent = True
else:
e.ignoreContent = False
e.currentElems.append( (elem, e.ignoreContent) )
return
# Add a line break before the start tag if required (ie: xhtml differ
# needs to get paragraphs and other elements on separate lines).
if (elem in self.lineBreakTags) and self.res and \
(self.res[-1][-1] != '\n'):
prefix = '\n'
else:
prefix = ''
res = '%s<%s' % (prefix, elem)
# Include the found attributes, excepted those that must be ignored.
for name, value in attrs.items():
if not e.keepStyles and (name in self.attrsToIgnore): continue
res += ' %s="%s"' % (name, value)
# Include additional attributes if required.
if elem in self.attrsToAdd:
for name, value in self.attrsToAdd[elem].iteritems():
res += ' %s="%s"' % (name, value)
self.res.append('%s>' % res)
def endElement(self, elem):
e = self.env
if e.ignoreTag and (elem in self.tagsToIgnore):
# Pop the currently ignored tag
e.currentElems.pop()
if e.currentElems:
# Keep ignoring tags.
e.ignoreContent = e.currentElems[-1][1]
else:
# Stop ignoring elems
e.ignoreTag = e.ignoreContent = False
elif e.ignoreTag and e.ignoreContent:
# This is the end of a sub-tag within a region that we must ignore.
pass
else:
self.res.append(self.env.currentContent)
# Add a line break after the end tag if required (ie: xhtml differ
# needs to get paragraphs and other elements on separate lines).
if elem in self.lineBreakTags:
suffix = '\n'
else:
suffix = ''
self.res.append('</%s>%s' % (elem, suffix))
self.env.currentContent = ''
def characters(self, content):
if self.env.ignoreContent: return
# Remove blanks that ckeditor may add just after a start tag
if not self.env.currentContent or (self.env.currentContent == ' '):
toAdd = ' ' + content.lstrip()
else:
toAdd = content
# Re-transform XML special chars to entities.
self.env.currentContent += cgi.escape(content)
# ------------------------------------------------------------------------------