From 10398e770acb2fdcec25d8a1c07c183caaa13a7c Mon Sep 17 00:00:00 2001 From: Gaetan Delannay Date: Sat, 19 Nov 2011 23:53:38 +0100 Subject: [PATCH] appy.shared.diff: more work. --- shared/diff.py | 230 ++++++++++++++++++++++++++----------------------- 1 file changed, 124 insertions(+), 106 deletions(-) diff --git a/shared/diff.py b/shared/diff.py index 4a04eea..5eebb83 100644 --- a/shared/diff.py +++ b/shared/diff.py @@ -86,15 +86,18 @@ class Merger: while oldText: # Get the overlapping (new) diff. newDiff, newDiffStart, isPrevious = self.getNextDiff() - if not newDiff: - # No more new diff. So normally, we should find what remains in - # oldText at self.lineB[self.i:] - if not self.lineB[self.i:].startswith(oldText): - raise 'Error!!!!' + if not newDiff or (newDiffStart >= (self.i + len(oldText))): + # No more new diff, or a new diff but far away, not within + # oldText. So insert new the rest of p_oldText. + # Invariant: at this point, we should find what remains in + # oldText at self.lineB[self.i:]. res += self.differ.getModifiedChunk(oldText, 'insert', '', msg=oldDiff.group(2)) self.i += len(oldText) oldText = '' + # If we have "popped" a new diff, dump it anyway. + if newDiff: + res = self.dumpDiff(res, newDiff, newDiffStart, isPrevious) break # Dump the part of the old text that has been untouched by the new # diff. @@ -119,47 +122,54 @@ class Merger: self.deltaPrevious -= len(newDiff.group(3)) return res + def dumpDiff(self, res, diff, diffStart, isPrevious): + '''Dumps the next p_diff (starting at p_diffStart) to insert into p_res + and return p_res. If p_isPrevious is True, the diff is an old one + (from self.lineA); else, it is a new one (from self.lineB).''' + # Dump the part of lineB between self.i and diffStart + res += self.lineB[self.i:diffStart] + self.i = diffStart + if isPrevious: + # Dump the old diff (from self.lineA) + if diff.group(1) == 'insert': + # Check if the inserted text is still present in lineB + if self.lineB[self.i:].startswith(diff.group(3)): + # Yes. Dump the diff and go ahead within lineB + res += diff.group(0) + self.i += len(diff.group(3)) + else: + # The inserted text can't be found as is in lineB. + # Must have been (partly) re-edited or removed. + overlap = self.manageOverlap(diff) + res += overlap + elif diff.group(1) == 'delete': + res += diff.group(0) + else: + # Dump the new diff (from self.lineB) + res += diff.group(0) + # Move forward within self.lineB + self.i += len(diff.group(0)) + # Because of this new diff, all indexes computed on lineA are + # now wrong because we express them relative to lineB. So: + # update self.deltaPrevious to take this into account. + self.deltaPrevious += len(diff.group(0)) + if diff.group(1) == 'delete': + # The indexes in self.lineA do not take the deleted text into + # account, because it wasn't deleted at this time. So remove + # from self.deltaPrevious the length of removed text. + self.deltaPrevious -= len(diff.group(3)) + return res + def merge(self): '''Merges self.previousDiffs into self.lineB.''' res = '' diff, diffStart, isPrevious = self.getNextDiff() while diff: - # Dump the part of lineB between self.i and diffStart - res += self.lineB[self.i:diffStart] - self.i = diffStart - if isPrevious: - if diff.group(1) == 'insert': - # Check if the inserted text is still present in lineB - if self.lineB[self.i:].startswith(diff.group(3)): - # Yes. Dump the diff and go ahead within lineB - res += diff.group(0) - self.i += len(diff.group(3)) - else: - # The inserted text can't be found as is in lineB. - # Must have been (partly) re-edited or removed. - - overlap = self.manageOverlap(diff) - res += overlap - elif diff.group(1) == 'delete': - res += diff.group(0) - else: - # Dump the diff and update self.i - res += diff.group(0) - self.i += len(diff.group(0)) - # Because of this new diff, all indexes computed on lineA are - # now wrong because we express them relative to lineB. So: - # update self.deltaPrevious to take this into account. - self.deltaPrevious += len(diff.group(0)) - if diff.group(1) == 'delete': - # The indexes in lineA do not take the deleted text into - # account, because it wasn't deleted at this time. So remove - # from self.deltaPrevious the length of removed text. - self.deltaPrevious -= len(diff.group(3)) - # Load next diff + res = self.dumpDiff(res, diff, diffStart, isPrevious) + # Load the next diff, if any diff, diffStart, isPrevious = self.getNextDiff() # Dump the end of self.lineB if not completely consumed - if self.i < len(self.lineB): - res += self.lineB[self.i:] + if self.i < len(self.lineB): res += self.lineB[self.i:] return res # ------------------------------------------------------------------------------ @@ -405,12 +415,14 @@ class HtmlDiff: return res garbage = ('', '\r') - def removeGarbage(self, l): + def removeGarbage(self, l, sep): '''Removes from list p_l elements that have no interest, like blank - strings or considered as is.''' + strings or considered as is. Also: strip lines (ie, if sep is a + carriage return.''' i = len(l)-1 while i >= 0: if l[i] in self.garbage: del l[i] + if sep == '\n': l[i] = l[i].strip() i -= 1 return l @@ -444,33 +456,64 @@ class HtmlDiff: if old[jo] != new[jn]: diffFound=True return i, jo+1, jn+1 - def getReplacement(self, sep, lineA, lineB, previousDiffsA, outerTagA): - '''p_lineA has been replaced with p_lineB. Here, we will investigate - further here and explore differences at the *word* level between - p_lineA and p_lineB. + def getDumpPrefix(self, res, add, previousAdd, sep): + '''In most cases, when concatenating the next diff (p_add) to the + global result (p_res), I must prefix it with p_sep (excepted if p_res + is still empty). But when p_sep is a space, no space must be inserted + between 2 adjacent updates (p_add and p_previousAdd), because such a + space was not in the original version. This method computes the + prefix, that can thus be empty if this latter case is met.''' + prefix = '' + if not res: return prefix + if (sep == ' ') and previousAdd and \ + previousAdd.endswith('') and add.startswith('') and \ - add.startswith('