More work on appy.shared.diff...
This commit is contained in:
parent
2ec05939fe
commit
cf992843ff
|
@ -87,6 +87,16 @@ class Merger:
|
||||||
# Get the overlapping (new) diff.
|
# Get the overlapping (new) diff.
|
||||||
newDiff, newDiffStart, isPrevious = self.getNextDiff()
|
newDiff, newDiffStart, isPrevious = self.getNextDiff()
|
||||||
if not newDiff:
|
if not newDiff:
|
||||||
|
# No more new diff. So normally, we should find what remains in
|
||||||
|
# oldText at self.lineB[self.i:]
|
||||||
|
if not self.lineB[self.i:].startswith(oldText):
|
||||||
|
# Anormal additional char. Probably a space? Indeed,
|
||||||
|
# word-level comparisons imply split(' ') which can be
|
||||||
|
# error-prone.
|
||||||
|
res += self.lineB[self.i]
|
||||||
|
self.i += 1
|
||||||
|
if not self.lineB[self.i:].startswith(oldText):
|
||||||
|
raise 'Error!!!!'
|
||||||
res += self.differ.getModifiedChunk(oldText, 'insert', '',
|
res += self.differ.getModifiedChunk(oldText, 'insert', '',
|
||||||
msg=oldDiff.group(2))
|
msg=oldDiff.group(2))
|
||||||
self.i += len(oldText)
|
self.i += len(oldText)
|
||||||
|
@ -118,6 +128,9 @@ class Merger:
|
||||||
def merge(self):
|
def merge(self):
|
||||||
'''Merges self.previousDiffs into self.lineB.'''
|
'''Merges self.previousDiffs into self.lineB.'''
|
||||||
res = ''
|
res = ''
|
||||||
|
print 'MERGE'
|
||||||
|
print 'Line A', self.lineA
|
||||||
|
print 'Line B', self.lineB
|
||||||
diff, diffStart, isPrevious = self.getNextDiff()
|
diff, diffStart, isPrevious = self.getNextDiff()
|
||||||
while diff:
|
while diff:
|
||||||
# Dump the part of lineB between self.i and diffStart
|
# Dump the part of lineB between self.i and diffStart
|
||||||
|
@ -136,6 +149,8 @@ class Merger:
|
||||||
|
|
||||||
overlap = self.manageOverlap(diff)
|
overlap = self.manageOverlap(diff)
|
||||||
res += overlap
|
res += overlap
|
||||||
|
elif diff.group(1) == 'delete':
|
||||||
|
res += diff.group(0)
|
||||||
else:
|
else:
|
||||||
# Dump the diff and update self.i
|
# Dump the diff and update self.i
|
||||||
res += diff.group(0)
|
res += diff.group(0)
|
||||||
|
@ -208,7 +223,7 @@ class HtmlDiff:
|
||||||
# Will the surrouding tag be a div or a span?
|
# Will the surrouding tag be a div or a span?
|
||||||
if sep == '\n': tag = 'div'
|
if sep == '\n': tag = 'div'
|
||||||
else: tag = 'span'
|
else: tag = 'span'
|
||||||
# What message wiill it show in its 'title' attribute?
|
# What message will it show in its 'title' attribute?
|
||||||
if not msg:
|
if not msg:
|
||||||
exec 'msg = self.%sMsg' % type
|
exec 'msg = self.%sMsg' % type
|
||||||
# What CSS class (or, if none, tag-specific style) will be used ?
|
# What CSS class (or, if none, tag-specific style) will be used ?
|
||||||
|
@ -221,9 +236,10 @@ class HtmlDiff:
|
||||||
# the 'name' attribute of the tag indicates the type of the update.
|
# the 'name' attribute of the tag indicates the type of the update.
|
||||||
exec 'tagName = self.%sName' % type
|
exec 'tagName = self.%sName' % type
|
||||||
# The idea is: if there are several lines, every line must be surrounded
|
# The idea is: if there are several lines, every line must be surrounded
|
||||||
# by a tag. this way, we know that a surrounding tag can't span several
|
# by a tag. This way, we know that a surrounding tag can't span several
|
||||||
# lines, which is a prerequisite for managing cumulative diffs.
|
# lines, which is a prerequisite for managing cumulative diffs.
|
||||||
if sep == ' ':
|
if sep == ' ':
|
||||||
|
if not isinstance(seq, basestring):
|
||||||
seq = sep.join(seq)
|
seq = sep.join(seq)
|
||||||
sep = ''
|
sep = ''
|
||||||
if isinstance(seq, basestring):
|
if isinstance(seq, basestring):
|
||||||
|
@ -307,7 +323,7 @@ class HtmlDiff:
|
||||||
# Wrap content info reified tag
|
# Wrap content info reified tag
|
||||||
return startTag + content + endTag
|
return startTag + content + endTag
|
||||||
|
|
||||||
def getSeqDiff(self, seqA, seqB):
|
def getSeqDiff(self, seqA, seqB, sep):
|
||||||
'''p_seqA and p_seqB are lists of strings. Here we will try to identify
|
'''p_seqA and p_seqB are lists of strings. Here we will try to identify
|
||||||
similarities between strings from p_seqA and p_seqB, and return a
|
similarities between strings from p_seqA and p_seqB, and return a
|
||||||
list of differences between p_seqA and p_seqB, where each element
|
list of differences between p_seqA and p_seqB, where each element
|
||||||
|
@ -369,7 +385,17 @@ class HtmlDiff:
|
||||||
# Consider any "unconsumed" line from p_seqB as being inserted.
|
# Consider any "unconsumed" line from p_seqB as being inserted.
|
||||||
if k < len(seqB):
|
if k < len(seqB):
|
||||||
for line in seqB[k:]: res.append( ('insert', line) )
|
for line in seqB[k:]: res.append( ('insert', line) )
|
||||||
return res
|
# Merge similar diffs, excepted if separator is a carriage return
|
||||||
|
if sep == '\n': return res
|
||||||
|
newRes = []
|
||||||
|
lastType = None
|
||||||
|
for type, data in res:
|
||||||
|
if lastType and (type != 'replace') and (lastType == type):
|
||||||
|
newRes[-1] = (type, newRes[-1][1] + sep + data)
|
||||||
|
else:
|
||||||
|
newRes.append( (type, data) )
|
||||||
|
lastType = type
|
||||||
|
return newRes
|
||||||
|
|
||||||
def split(self, s, sep):
|
def split(self, s, sep):
|
||||||
'''Splits string p_s with p_sep. If p_sep is a space, the split can't
|
'''Splits string p_s with p_sep. If p_sep is a space, the split can't
|
||||||
|
@ -395,7 +421,8 @@ class HtmlDiff:
|
||||||
i -= 1
|
i -= 1
|
||||||
return l
|
return l
|
||||||
|
|
||||||
def getLineReplacement(self, lineA, lineB, previousDiffsA, outerTagA):
|
nextSeps = {'\n': ' ', ' ': ''}
|
||||||
|
def getReplacement(self, sep, lineA, lineB, previousDiffsA, outerTagA):
|
||||||
'''p_lineA has been replaced with p_lineB. Here, we will investigate
|
'''p_lineA has been replaced with p_lineB. Here, we will investigate
|
||||||
further here and explore differences at the *word* level between
|
further here and explore differences at the *word* level between
|
||||||
p_lineA and p_lineB.
|
p_lineA and p_lineB.
|
||||||
|
@ -414,8 +441,8 @@ class HtmlDiff:
|
||||||
contentA = matchA and matchA.group(3) or lineA
|
contentA = matchA and matchA.group(3) or lineA
|
||||||
matchB = htmlTag.match(lineB)
|
matchB = htmlTag.match(lineB)
|
||||||
contentB = matchB and matchB.group(3) or lineB
|
contentB = matchB and matchB.group(3) or lineB
|
||||||
# Perform the diff at the level fo words
|
# Perform the diff at the level of words
|
||||||
diff = self.getHtmlDiff(contentA, contentB, ' ')
|
diff = self.getHtmlDiff(contentA, contentB, self.nextSeps[sep])
|
||||||
if matchB:
|
if matchB:
|
||||||
res = self.computeTag(matchB, diff)
|
res = self.computeTag(matchB, diff)
|
||||||
else:
|
else:
|
||||||
|
@ -441,10 +468,14 @@ class HtmlDiff:
|
||||||
similar in a previous call to m_getHtmlDiff with sep=carriage
|
similar in a previous call to m_getHtmlDiff with sep=carriage
|
||||||
return.'''
|
return.'''
|
||||||
res = []
|
res = []
|
||||||
|
if sep:
|
||||||
a = self.split(old, sep)
|
a = self.split(old, sep)
|
||||||
b = self.split(new, sep)
|
b = self.split(new, sep)
|
||||||
|
else:
|
||||||
|
a = old
|
||||||
|
b = new
|
||||||
matcher = difflib.SequenceMatcher()
|
matcher = difflib.SequenceMatcher()
|
||||||
matcher.set_seqs(a,b)
|
matcher.set_seqs(a, b)
|
||||||
for action, i1, i2, j1, j2 in matcher.get_opcodes():
|
for action, i1, i2, j1, j2 in matcher.get_opcodes():
|
||||||
chunkA = self.removeGarbage(a[i1:i2])
|
chunkA = self.removeGarbage(a[i1:i2])
|
||||||
chunkB = self.removeGarbage(b[j1:j2])
|
chunkB = self.removeGarbage(b[j1:j2])
|
||||||
|
@ -467,24 +498,27 @@ class HtmlDiff:
|
||||||
# Was a deletion, not a replacement
|
# Was a deletion, not a replacement
|
||||||
toAdd = self.getModifiedChunk(chunkA, 'delete', sep)
|
toAdd = self.getModifiedChunk(chunkA, 'delete', sep)
|
||||||
else: # At least, a true replacement
|
else: # At least, a true replacement
|
||||||
if sep == '\n':
|
|
||||||
toAdd = []
|
toAdd = []
|
||||||
# We know that some lines have been replaced from a to
|
# We know that some lines/words have been replaced from a to
|
||||||
# b. By identifying similarities between those lines,
|
# b. By identifying similarities between those lines/words,
|
||||||
# consider some as having been deleted, modified or
|
# consider some as having been deleted, modified or
|
||||||
# inserted.
|
# inserted.
|
||||||
for sAction, line in self.getSeqDiff(chunkA, chunkB):
|
for sAction, line in self.getSeqDiff(chunkA, chunkB, sep):
|
||||||
if sAction in ('insert', 'delete'):
|
if sAction in ('insert', 'delete'):
|
||||||
mChunk = self.getModifiedChunk(line,sAction,sep)
|
mChunk = self.getModifiedChunk(line, sAction, sep)
|
||||||
toAdd.append(mChunk)
|
toAdd.append(mChunk)
|
||||||
elif sAction == 'equal':
|
elif sAction == 'equal':
|
||||||
toAdd.append(line)
|
toAdd.append(line)
|
||||||
elif sAction == 'replace':
|
elif sAction == 'replace':
|
||||||
toAdd.append(self.getLineReplacement(*line))
|
toAdd.append(self.getReplacement(sep, *line))
|
||||||
|
# The following line, when sep is the space (=when workin
|
||||||
|
# on diffs at the word level), leads to additional spaces
|
||||||
|
# being dumped into the result (ie, a space between a delete
|
||||||
|
# and an insert, which was not in the initial text). We
|
||||||
|
# could not find a way to avoid inserting those spaces. So
|
||||||
|
# when merging diffs (see Merger.merge), we know that a
|
||||||
|
# 'space' error can occur and we take it into account then.
|
||||||
toAdd = sep.join(toAdd)
|
toAdd = sep.join(toAdd)
|
||||||
else:
|
|
||||||
toAdd = self.getModifiedChunk(chunkA, 'delete', sep)
|
|
||||||
toAdd += self.getModifiedChunk(chunkB, 'insert', sep)
|
|
||||||
if toAdd: res.append(toAdd)
|
if toAdd: res.append(toAdd)
|
||||||
return sep.join(res)
|
return sep.join(res)
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue