140 lines
5.8 KiB
Python
140 lines
5.8 KiB
Python
#!/usr/bin/env python
|
|
# encoding: utf-8
|
|
|
|
from collections import defaultdict
|
|
import sys
|
|
|
|
def is_complete_edit(initial_line, a, b, cmds):
|
|
buf = a[:]
|
|
for cmd in cmds:
|
|
ctype, line, col, char = cmd
|
|
line -= initial_line
|
|
if ctype == "D":
|
|
if char != '\n':
|
|
buf[line] = buf[line][:col] + buf[line][col+len(char):]
|
|
else:
|
|
buf[line] = buf[line] + buf[line+1]
|
|
del buf[line+1]
|
|
elif ctype == "I":
|
|
buf[line] = buf[line][:col] + char + buf[line][col:]
|
|
buf = '\n'.join(buf).split('\n')
|
|
return len(buf) == len(b) and all(j==k for j,k in zip(buf, b))
|
|
|
|
def guess_edit(initial_line, lt, ct, ppos, pos):
|
|
"""
|
|
Try to guess what the user might have done by heuristically looking at cursor movement
|
|
number of changed lines and if they got longer or shorter. This will detect most simple
|
|
movements like insertion, deletion of a line or carriage return.
|
|
"""
|
|
if not len(lt) and not len(ct): return True, ()
|
|
if pos.line == ppos.line: # Movement only in one line
|
|
llen = len(lt[ppos.line - initial_line])
|
|
clen = len(ct[pos.line - initial_line])
|
|
if ppos < pos and clen > llen: # Likely that only characters have been added
|
|
es = (
|
|
("I", ppos.line, ppos.col, ct[ppos.line - initial_line][ppos.col:pos.col]),
|
|
)
|
|
if is_complete_edit(initial_line, lt, ct, es): return True, es
|
|
if clen < llen:
|
|
if ppos == pos: # 'x' or DEL or dt or something
|
|
es = (
|
|
("D", pos.line, pos.col, lt[ppos.line - initial_line][ppos.col:ppos.col + (llen - clen)]),
|
|
)
|
|
if is_complete_edit(initial_line, lt, ct, es): return True, es
|
|
if pos < ppos: # Backspacing or dT dF?
|
|
es = (
|
|
("D", pos.line, pos.col, lt[pos.line - initial_line][pos.col:pos.col + llen - clen]),
|
|
)
|
|
if is_complete_edit(initial_line, lt, ct, es): return True, es
|
|
else: # Movement in more than one line
|
|
if ppos.line + 1 == pos.line and pos.col == 0: # Carriage return?
|
|
es = (("I", ppos.line, ppos.col, "\n"),)
|
|
if is_complete_edit(initial_line, lt, ct, es): return True, es
|
|
return False, None
|
|
|
|
def diff(a, b, sline = 0):
|
|
"""
|
|
Return a list of deletions and insertions that will turn a into b. This is
|
|
done by traversing an implicit edit graph and searching for the shortest
|
|
route. The basic idea is as follows:
|
|
|
|
- Matching a character is free as long as there was no deletion/insertion
|
|
before. Then, matching will be seen as delete + insert [1].
|
|
- Deleting one character has the same cost everywhere. Each additional
|
|
character costs only have of the first deletion.
|
|
- Insertion is cheaper the earlier it happes. The first character is more
|
|
expensive that any later [2].
|
|
|
|
[1] This is that world -> aolsa will be "D" world + "I" aolsa instead of
|
|
"D" w , "D" rld, "I" a, "I" lsa
|
|
[2] This is that "hello\n\n" -> "hello\n\n\n" will insert a newline after hello
|
|
and not after \n
|
|
"""
|
|
d = defaultdict(list)
|
|
seen = defaultdict(lambda: sys.maxint)
|
|
|
|
d[0] = [ (0,0,sline, 0, ()) ]
|
|
|
|
cost = 0
|
|
D_COST = len(a)+len(b)
|
|
I_COST = len(a)+len(b)
|
|
while True:
|
|
while len(d[cost]):
|
|
x, y, line, col, what = d[cost].pop()
|
|
|
|
if a[x:] == b[y:]:
|
|
return what
|
|
|
|
if x < len(a) and y < len(b) and a[x] == b[y]:
|
|
ncol = col + 1
|
|
nline = line
|
|
if a[x] == '\n':
|
|
ncol = 0
|
|
nline +=1
|
|
lcost = cost + 1
|
|
if (what and what[-1][0] == "D" and what[-1][1] == line and
|
|
what[-1][2] == col and a[x] != '\n'):
|
|
# Matching directly after a deletion should be as costly as
|
|
# DELETE + INSERT + a bit
|
|
lcost = (D_COST + I_COST)*1.5
|
|
if seen[x+1,y+1] > lcost:
|
|
d[lcost].append((x+1,y+1, nline, ncol, what))
|
|
seen[x+1,y+1] = lcost
|
|
|
|
if y < len(b): # INSERT
|
|
ncol = col + 1
|
|
nline = line
|
|
if b[y] == '\n':
|
|
ncol = 0
|
|
nline += 1
|
|
if (what and what[-1][0] == "I" and what[-1][1] == nline and
|
|
what[-1][2]+len(what[-1][-1]) == col and b[y] != '\n' and
|
|
seen[x,y+1] > cost + (I_COST + ncol) // 2
|
|
):
|
|
seen[x,y+1] = cost + (I_COST + ncol) // 2
|
|
d[cost + (I_COST + ncol) // 2].append(
|
|
(x,y+1, line, ncol, what[:-1] + (
|
|
("I", what[-1][1], what[-1][2], what[-1][-1] + b[y]),) )
|
|
)
|
|
elif seen[x,y+1] > cost + I_COST + ncol:
|
|
seen[x,y+1] = cost + I_COST + ncol
|
|
d[cost + ncol + I_COST].append((x,y+1, nline, ncol,
|
|
what + (("I", line, col,b[y]),))
|
|
)
|
|
if x < len(a): # DELETE
|
|
if (what and what[-1][0] == "D" and what[-1][1] == line and
|
|
what[-1][2] == col and a[x] != '\n' and what[-1][-1] != '\n' and
|
|
seen[x+1,y] > cost + D_COST // 2
|
|
):
|
|
seen[x+1,y] = cost + D_COST // 2
|
|
d[cost + D_COST // 2].append((x+1,y, line, col, what[:-1] +
|
|
(("D",line, col, what[-1][-1] + a[x]),) )
|
|
)
|
|
elif seen[x+1,y] > cost + D_COST:
|
|
seen[x+1,y] = cost + D_COST
|
|
d[cost + D_COST].append((x+1,y, line, col, what +
|
|
(("D",line, col, a[x]),) )
|
|
)
|
|
cost += 1
|
|
|