2012-01-28 17:07:36 +01:00

140 lines
5.8 KiB
Python

#!/usr/bin/env python
# encoding: utf-8
from collections import defaultdict
import sys
def is_complete_edit(initial_line, a, b, cmds):
buf = a[:]
for cmd in cmds:
ctype, line, col, char = cmd
line -= initial_line
if ctype == "D":
if char != '\n':
buf[line] = buf[line][:col] + buf[line][col+len(char):]
else:
buf[line] = buf[line] + buf[line+1]
del buf[line+1]
elif ctype == "I":
buf[line] = buf[line][:col] + char + buf[line][col:]
buf = '\n'.join(buf).split('\n')
return len(buf) == len(b) and all(j==k for j,k in zip(buf, b))
def guess_edit(initial_line, lt, ct, ppos, pos):
"""
Try to guess what the user might have done by heuristically looking at cursor movement
number of changed lines and if they got longer or shorter. This will detect most simple
movements like insertion, deletion of a line or carriage return.
"""
if not len(lt) and not len(ct): return True, ()
if pos.line == ppos.line: # Movement only in one line
llen = len(lt[ppos.line - initial_line])
clen = len(ct[pos.line - initial_line])
if ppos < pos and clen > llen: # Likely that only characters have been added
es = (
("I", ppos.line, ppos.col, ct[ppos.line - initial_line][ppos.col:pos.col]),
)
if is_complete_edit(initial_line, lt, ct, es): return True, es
if clen < llen:
if ppos == pos: # 'x' or DEL or dt or something
es = (
("D", pos.line, pos.col, lt[ppos.line - initial_line][ppos.col:ppos.col + (llen - clen)]),
)
if is_complete_edit(initial_line, lt, ct, es): return True, es
if pos < ppos: # Backspacing or dT dF?
es = (
("D", pos.line, pos.col, lt[pos.line - initial_line][pos.col:pos.col + llen - clen]),
)
if is_complete_edit(initial_line, lt, ct, es): return True, es
else: # Movement in more than one line
if ppos.line + 1 == pos.line and pos.col == 0: # Carriage return?
es = (("I", ppos.line, ppos.col, "\n"),)
if is_complete_edit(initial_line, lt, ct, es): return True, es
return False, None
def diff(a, b, sline = 0):
"""
Return a list of deletions and insertions that will turn a into b. This is
done by traversing an implicit edit graph and searching for the shortest
route. The basic idea is as follows:
- Matching a character is free as long as there was no deletion/insertion
before. Then, matching will be seen as delete + insert [1].
- Deleting one character has the same cost everywhere. Each additional
character costs only have of the first deletion.
- Insertion is cheaper the earlier it happes. The first character is more
expensive that any later [2].
[1] This is that world -> aolsa will be "D" world + "I" aolsa instead of
"D" w , "D" rld, "I" a, "I" lsa
[2] This is that "hello\n\n" -> "hello\n\n\n" will insert a newline after hello
and not after \n
"""
d = defaultdict(list)
seen = defaultdict(lambda: sys.maxint)
d[0] = [ (0,0,sline, 0, ()) ]
cost = 0
D_COST = len(a)+len(b)
I_COST = len(a)+len(b)
while True:
while len(d[cost]):
x, y, line, col, what = d[cost].pop()
if a[x:] == b[y:]:
return what
if x < len(a) and y < len(b) and a[x] == b[y]:
ncol = col + 1
nline = line
if a[x] == '\n':
ncol = 0
nline +=1
lcost = cost + 1
if (what and what[-1][0] == "D" and what[-1][1] == line and
what[-1][2] == col and a[x] != '\n'):
# Matching directly after a deletion should be as costly as
# DELETE + INSERT + a bit
lcost = (D_COST + I_COST)*1.5
if seen[x+1,y+1] > lcost:
d[lcost].append((x+1,y+1, nline, ncol, what))
seen[x+1,y+1] = lcost
if y < len(b): # INSERT
ncol = col + 1
nline = line
if b[y] == '\n':
ncol = 0
nline += 1
if (what and what[-1][0] == "I" and what[-1][1] == nline and
what[-1][2]+len(what[-1][-1]) == col and b[y] != '\n' and
seen[x,y+1] > cost + (I_COST + ncol) // 2
):
seen[x,y+1] = cost + (I_COST + ncol) // 2
d[cost + (I_COST + ncol) // 2].append(
(x,y+1, line, ncol, what[:-1] + (
("I", what[-1][1], what[-1][2], what[-1][-1] + b[y]),) )
)
elif seen[x,y+1] > cost + I_COST + ncol:
seen[x,y+1] = cost + I_COST + ncol
d[cost + ncol + I_COST].append((x,y+1, nline, ncol,
what + (("I", line, col,b[y]),))
)
if x < len(a): # DELETE
if (what and what[-1][0] == "D" and what[-1][1] == line and
what[-1][2] == col and a[x] != '\n' and what[-1][-1] != '\n' and
seen[x+1,y] > cost + D_COST // 2
):
seen[x+1,y] = cost + D_COST // 2
d[cost + D_COST // 2].append((x+1,y, line, col, what[:-1] +
(("D",line, col, what[-1][-1] + a[x]),) )
)
elif seen[x+1,y] > cost + D_COST:
seen[x+1,y] = cost + D_COST
d[cost + D_COST].append((x+1,y, line, col, what +
(("D",line, col, a[x]),) )
)
cost += 1