appy.gen: improved cleaning and formatting of XHTML content; appy.pod: added some default appy-related table styles for producing cells with text in bold/normal, aligned right/left, etc.
This commit is contained in:
parent
d3a2b85a10
commit
028040351c
11 changed files with 195 additions and 54 deletions
|
@ -106,7 +106,8 @@ class Debianizer:
|
|||
|
||||
def __init__(self, app, out, appVersion='0.1.0',
|
||||
pythonVersions=('2.6',), zopePort=8080,
|
||||
depends=('openoffice.org', 'imagemagick'), sign=False):
|
||||
depends=('zope2.12', 'openoffice.org', 'imagemagick'),
|
||||
sign=False):
|
||||
# app is the path to the Python package to Debianize.
|
||||
self.app = app
|
||||
self.appName = os.path.basename(app)
|
||||
|
@ -261,10 +262,6 @@ class Debianizer:
|
|||
# Create postinst, a script that will:
|
||||
# - bytecompile Python files after the Debian install
|
||||
# - change ownership of some files if required
|
||||
# - [in the case of a app-package] execute:
|
||||
# apt-get -t squeeze-backports install zope2.12
|
||||
# (if zope2.12 is defined as a simple dependency in field "Depends:"
|
||||
# it will fail because it will not be searched in squeeze-backports).
|
||||
# - [in the case of an app-package] call update-rc.d for starting it at
|
||||
# boot time.
|
||||
f = file('postinst', 'w')
|
||||
|
@ -276,8 +273,6 @@ class Debianizer:
|
|||
self.appName)
|
||||
content += 'if [ -e %s ]\nthen\n%sfi\n' % (bin, cmds)
|
||||
if self.appName != 'appy':
|
||||
# Install zope2.12 from squeeze-backports
|
||||
content += 'apt-get -t squeeze-backports install zope2.12\n'
|
||||
# Allow user "zope", that runs the Zope instance, to write the
|
||||
# database and log files.
|
||||
content += 'chown -R zope:root /var/lib/%s\n' % self.appNameLower
|
||||
|
|
|
@ -263,35 +263,6 @@ def formatNumber(n, sep=',', precision=2, tsep=' '):
|
|||
res += sep + splitted[1]
|
||||
return res
|
||||
|
||||
# ------------------------------------------------------------------------------
|
||||
class XhtmlCleaner:
|
||||
# Regular expressions used for cleaning.
|
||||
classAttr = re.compile('class\s*=\s*".*?"')
|
||||
comment = re.compile('<!--.*?-->', re.S)
|
||||
|
||||
'''This class has 2 objectives:
|
||||
|
||||
1. The main objective is to format XHTML p_s to be storable in the ZODB
|
||||
according to Appy rules.
|
||||
a. Every <p> or <li> must be on a single line (ending with a carriage
|
||||
return); else, appy.shared.diff will not be able to compute XHTML
|
||||
diffs;
|
||||
b. Optimize size: HTML comments are removed.
|
||||
|
||||
2. If p_keepStyles (or m_clean) is False, some style-related information
|
||||
will be removed, in order to get a standardized content that can be
|
||||
dumped in an elegant and systematic manner into a POD template.
|
||||
'''
|
||||
@classmethod
|
||||
def clean(klass, s, keepStyles=False):
|
||||
'''Returns the cleaned variant of p_s.'''
|
||||
if not keepStyles:
|
||||
# Format p_s according to objective 2.
|
||||
s = klass.classAttr.sub('', s)
|
||||
# Format p_s according to objective 1.
|
||||
s = klass.comment.sub('', s)
|
||||
return s
|
||||
|
||||
# ------------------------------------------------------------------------------
|
||||
def lower(s):
|
||||
'''French-accents-aware variant of string.lower.'''
|
||||
|
|
|
@ -18,7 +18,7 @@
|
|||
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301,USA.
|
||||
|
||||
# ------------------------------------------------------------------------------
|
||||
import xml.sax, difflib, types
|
||||
import xml.sax, difflib, types, cgi
|
||||
from xml.sax.handler import ContentHandler, ErrorHandler, feature_external_ges,\
|
||||
property_interning_dict
|
||||
from xml.sax.xmlreader import InputSource
|
||||
|
@ -887,4 +887,127 @@ class XmlComparator:
|
|||
else:
|
||||
lastLinePrinted = False
|
||||
return not atLeastOneDiff
|
||||
|
||||
# ------------------------------------------------------------------------------
|
||||
class XhtmlCleaner(XmlParser):
|
||||
|
||||
# Tags that will not be in the result, content included, if keepStyles is
|
||||
# False.
|
||||
tagsToIgnoreWithContent = ('style', 'colgroup')
|
||||
# Tags that will be removed from the result, but whose content will be kept,
|
||||
# if keepStyles is False.
|
||||
tagsToIgnoreKeepContent= ('x', 'font')
|
||||
# All tags to ignore
|
||||
tagsToIgnore = tagsToIgnoreWithContent + tagsToIgnoreKeepContent
|
||||
# Attributes to ignore, if keepStyles if False.
|
||||
attrsToIgnore = ('align', 'valign', 'cellpadding', 'cellspacing', 'width',
|
||||
'height', 'bgcolor', 'lang', 'border', 'class')
|
||||
# Attrs to add, if not present, to ensure good formatting, be it at the web
|
||||
# or ODT levels.
|
||||
attrsToAdd = {'table': {'cellspacing':'0', 'cellpadding':'6', 'border':'1'},
|
||||
'tr': {'valign': 'top'}}
|
||||
|
||||
# Tags that required a line break to be inserted after them.
|
||||
lineBreakTags = ('p', 'li', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'td')
|
||||
'''This class has 2 objectives:
|
||||
|
||||
1. The main objective is to format XHTML p_s to be storable in the ZODB
|
||||
according to Appy rules.
|
||||
a. Every <p> or <li> must be on a single line (ending with a carriage
|
||||
return); else, appy.shared.diff will not be able to compute XHTML
|
||||
diffs;
|
||||
b. Optimize size: HTML comments are removed.
|
||||
|
||||
2. If p_keepStyles (or m_clean) is False, some style-related information
|
||||
will be removed, in order to get a standardized content that can be
|
||||
dumped in an elegant and systematic manner into a POD template.
|
||||
'''
|
||||
def clean(self, s, keepStyles=True):
|
||||
# Must we keep style-related information or not?
|
||||
self.env.keepStyles = keepStyles
|
||||
self.env.currentContent = ''
|
||||
# The stack of currently parsed elements (will contain only ignored
|
||||
# ones).
|
||||
self.env.currentElems = []
|
||||
# 'ignoreTag' is True if we must ignore the currently walked tag.
|
||||
self.env.ignoreTag = False
|
||||
# 'ignoreContent' is True if, within the currently ignored tag, we must
|
||||
# also ignore its content.
|
||||
self.env.ignoreContent = False
|
||||
return self.parse('<x>%s</x>' % s)
|
||||
|
||||
def startDocument(self):
|
||||
# The result will be cleaned XHTML, joined from self.res.
|
||||
self.res = []
|
||||
|
||||
def endDocument(self):
|
||||
self.res = ''.join(self.res)
|
||||
|
||||
def startElement(self, elem, attrs):
|
||||
e = self.env
|
||||
# Dump any previously gathered content if any
|
||||
if e.currentContent:
|
||||
self.res.append(e.currentContent)
|
||||
e.currentContent = ''
|
||||
if e.ignoreTag and e.ignoreContent: return
|
||||
if not e.keepStyles and (elem in self.tagsToIgnore):
|
||||
e.ignoreTag = True
|
||||
if elem in self.tagsToIgnoreWithContent:
|
||||
e.ignoreContent = True
|
||||
else:
|
||||
e.ignoreContent = False
|
||||
e.currentElems.append( (elem, e.ignoreContent) )
|
||||
return
|
||||
# Add a line break before the start tag if required (ie: xhtml differ
|
||||
# needs to get paragraphs and other elements on separate lines).
|
||||
if (elem in self.lineBreakTags) and self.res and \
|
||||
(self.res[-1][-1] != '\n'):
|
||||
prefix = '\n'
|
||||
else:
|
||||
prefix = ''
|
||||
res = '%s<%s' % (prefix, elem)
|
||||
# Include the found attributes, excepted those that must be ignored.
|
||||
for name, value in attrs.items():
|
||||
if not e.keepStyles and (name in self.attrsToIgnore): continue
|
||||
res += ' %s="%s"' % (name, value)
|
||||
# Include additional attributes if required.
|
||||
if elem in self.attrsToAdd:
|
||||
for name, value in self.attrsToAdd[elem].iteritems():
|
||||
res += ' %s="%s"' % (name, value)
|
||||
self.res.append('%s>' % res)
|
||||
|
||||
def endElement(self, elem):
|
||||
e = self.env
|
||||
if e.ignoreTag and (elem in self.tagsToIgnore):
|
||||
# Pop the currently ignored tag
|
||||
e.currentElems.pop()
|
||||
if e.currentElems:
|
||||
# Keep ignoring tags.
|
||||
e.ignoreContent = e.currentElems[-1][1]
|
||||
else:
|
||||
# Stop ignoring elems
|
||||
e.ignoreTag = e.ignoreContent = False
|
||||
elif e.ignoreTag and e.ignoreContent:
|
||||
# This is the end of a sub-tag within a region that we must ignore.
|
||||
pass
|
||||
else:
|
||||
self.res.append(self.env.currentContent)
|
||||
# Add a line break after the end tag if required (ie: xhtml differ
|
||||
# needs to get paragraphs and other elements on separate lines).
|
||||
if elem in self.lineBreakTags:
|
||||
suffix = '\n'
|
||||
else:
|
||||
suffix = ''
|
||||
self.res.append('</%s>%s' % (elem, suffix))
|
||||
self.env.currentContent = ''
|
||||
|
||||
def characters(self, content):
|
||||
if self.env.ignoreContent: return
|
||||
# Remove blanks that ckeditor may add just after a start tag
|
||||
if not self.env.currentContent or (self.env.currentContent == ' '):
|
||||
toAdd = ' ' + content.lstrip()
|
||||
else:
|
||||
toAdd = content
|
||||
# Re-transform XML special chars to entities.
|
||||
self.env.currentContent += cgi.escape(content)
|
||||
# ------------------------------------------------------------------------------
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue