Skip to content

Commit f3948f6

Browse files
authored
Merge pull request sergi#85 from josharian/varia
Assorted optimizations
2 parents 1744e29 + ded6142 commit f3948f6

File tree

2 files changed

+102
-69
lines changed

2 files changed

+102
-69
lines changed

diffmatchpatch/diff.go

+67-68
Original file line numberDiff line numberDiff line change
@@ -40,8 +40,41 @@ type Diff struct {
4040
Text string
4141
}
4242

43+
// splice removes amount elements from slice at index index, replacing them with elements.
4344
func splice(slice []Diff, index int, amount int, elements ...Diff) []Diff {
44-
return append(slice[:index], append(elements, slice[index+amount:]...)...)
45+
if len(elements) == amount {
46+
// Easy case: overwrite the relevant items.
47+
copy(slice[index:], elements)
48+
return slice
49+
}
50+
if len(elements) < amount {
51+
// Fewer new items than old.
52+
// Copy in the new items.
53+
copy(slice[index:], elements)
54+
// Shift the remaining items left.
55+
copy(slice[index+len(elements):], slice[index+amount:])
56+
// Calculate the new end of the slice.
57+
end := len(slice) - amount + len(elements)
58+
// Zero stranded elements at end so that they can be garbage collected.
59+
tail := slice[end:]
60+
for i := range tail {
61+
tail[i] = Diff{}
62+
}
63+
return slice[:end]
64+
}
65+
// More new items than old.
66+
// Make room in slice for new elements.
67+
// There's probably an even more efficient way to do this,
68+
// but this is simple and clear.
69+
need := len(slice) - amount + len(elements)
70+
for len(slice) < need {
71+
slice = append(slice, Diff{})
72+
}
73+
// Shift slice elements right to make room for new elements.
74+
copy(slice[index+len(elements):], slice[index+amount:])
75+
// Copy in new elements.
76+
copy(slice[index:], elements)
77+
return slice
4578
}
4679

4780
// DiffMain finds the differences between two texts.
@@ -145,7 +178,10 @@ func (dmp *DiffMatchPatch) diffCompute(text1, text2 []rune, checklines bool, dea
145178
diffsA := dmp.diffMainRunes(text1A, text2A, checklines, deadline)
146179
diffsB := dmp.diffMainRunes(text1B, text2B, checklines, deadline)
147180
// Merge the results.
148-
return append(diffsA, append([]Diff{Diff{DiffEqual, string(midCommon)}}, diffsB...)...)
181+
diffs := diffsA
182+
diffs = append(diffs, Diff{DiffEqual, string(midCommon)})
183+
diffs = append(diffs, diffsB...)
184+
return diffs
149185
} else if checklines && len(text1) > 100 && len(text2) > 100 {
150186
return dmp.diffLineMode(text1, text2, deadline)
151187
}
@@ -247,7 +283,7 @@ func (dmp *DiffMatchPatch) diffBisect(runes1, runes2 []rune, deadline time.Time)
247283
k2end := 0
248284
for d := 0; d < maxD; d++ {
249285
// Bail out if deadline is reached.
250-
if !deadline.IsZero() && time.Now().After(deadline) {
286+
if !deadline.IsZero() && d%16 == 0 && time.Now().After(deadline) {
251287
break
252288
}
253289

@@ -434,48 +470,29 @@ func (dmp *DiffMatchPatch) DiffCommonSuffix(text1, text2 string) int {
434470

435471
// commonPrefixLength returns the length of the common prefix of two rune slices.
436472
func commonPrefixLength(text1, text2 []rune) int {
437-
short, long := text1, text2
438-
if len(short) > len(long) {
439-
short, long = long, short
440-
}
441-
for i, r := range short {
442-
if r != long[i] {
443-
return i
473+
// Linear search. See comment in commonSuffixLength.
474+
n := 0
475+
for ; n < len(text1) && n < len(text2); n++ {
476+
if text1[n] != text2[n] {
477+
return n
444478
}
445479
}
446-
return len(short)
480+
return n
447481
}
448482

449483
// commonSuffixLength returns the length of the common suffix of two rune slices.
450484
func commonSuffixLength(text1, text2 []rune) int {
451-
n := min(len(text1), len(text2))
452-
for i := 0; i < n; i++ {
453-
if text1[len(text1)-i-1] != text2[len(text2)-i-1] {
454-
return i
485+
// Use linear search rather than the binary search discussed at https://neil.fraser.name/news/2007/10/09/.
486+
// See discussion at https://github.com/sergi/go-diff/issues/54.
487+
i1 := len(text1)
488+
i2 := len(text2)
489+
for n := 0; ; n++ {
490+
i1--
491+
i2--
492+
if i1 < 0 || i2 < 0 || text1[i1] != text2[i2] {
493+
return n
455494
}
456495
}
457-
return n
458-
459-
// TODO research and benchmark this, why is it not activated? https://github.com/sergi/go-diff/issues/54
460-
// Binary search.
461-
// Performance analysis: http://neil.fraser.name/news/2007/10/09/
462-
/*
463-
pointermin := 0
464-
pointermax := math.Min(len(text1), len(text2))
465-
pointermid := pointermax
466-
pointerend := 0
467-
for pointermin < pointermid {
468-
if text1[len(text1)-pointermid:len(text1)-pointerend] ==
469-
text2[len(text2)-pointermid:len(text2)-pointerend] {
470-
pointermin = pointermid
471-
pointerend = pointermin
472-
} else {
473-
pointermax = pointermid
474-
}
475-
pointermid = math.Floor((pointermax-pointermin)/2 + pointermin)
476-
}
477-
return pointermid
478-
*/
479496
}
480497

481498
// DiffCommonOverlap determines if the suffix of one string is the prefix of another.
@@ -628,11 +645,7 @@ func (dmp *DiffMatchPatch) diffHalfMatchI(l, s []rune, i int) [][]rune {
628645
func (dmp *DiffMatchPatch) DiffCleanupSemantic(diffs []Diff) []Diff {
629646
changes := false
630647
// Stack of indices where equalities are found.
631-
type equality struct {
632-
data int
633-
next *equality
634-
}
635-
var equalities *equality
648+
equalities := make([]int, 0, len(diffs))
636649

637650
var lastequality string
638651
// Always equal to diffs[equalities[equalitiesLength - 1]][1]
@@ -645,11 +658,7 @@ func (dmp *DiffMatchPatch) DiffCleanupSemantic(diffs []Diff) []Diff {
645658
for pointer < len(diffs) {
646659
if diffs[pointer].Type == DiffEqual {
647660
// Equality found.
648-
649-
equalities = &equality{
650-
data: pointer,
651-
next: equalities,
652-
}
661+
equalities = append(equalities, pointer)
653662
lengthInsertions1 = lengthInsertions2
654663
lengthDeletions1 = lengthDeletions2
655664
lengthInsertions2 = 0
@@ -670,23 +679,20 @@ func (dmp *DiffMatchPatch) DiffCleanupSemantic(diffs []Diff) []Diff {
670679
(len(lastequality) <= difference1) &&
671680
(len(lastequality) <= difference2) {
672681
// Duplicate record.
673-
insPoint := equalities.data
674-
diffs = append(
675-
diffs[:insPoint],
676-
append([]Diff{Diff{DiffDelete, lastequality}}, diffs[insPoint:]...)...)
682+
insPoint := equalities[len(equalities)-1]
683+
diffs = splice(diffs, insPoint, 0, Diff{DiffDelete, lastequality})
677684

678685
// Change second copy to insert.
679686
diffs[insPoint+1].Type = DiffInsert
680687
// Throw away the equality we just deleted.
681-
equalities = equalities.next
688+
equalities = equalities[:len(equalities)-1]
682689

683-
if equalities != nil {
684-
equalities = equalities.next
690+
if len(equalities) > 0 {
691+
equalities = equalities[:len(equalities)-1]
685692
}
686-
if equalities != nil {
687-
pointer = equalities.data
688-
} else {
689-
pointer = -1
693+
pointer = -1
694+
if len(equalities) > 0 {
695+
pointer = equalities[len(equalities)-1]
690696
}
691697

692698
lengthInsertions1 = 0 // Reset the counters.
@@ -724,10 +730,7 @@ func (dmp *DiffMatchPatch) DiffCleanupSemantic(diffs []Diff) []Diff {
724730
float64(overlapLength1) >= float64(len(insertion))/2 {
725731

726732
// Overlap found. Insert an equality and trim the surrounding edits.
727-
diffs = append(
728-
diffs[:pointer],
729-
append([]Diff{Diff{DiffEqual, insertion[:overlapLength1]}}, diffs[pointer:]...)...)
730-
733+
diffs = splice(diffs, pointer, 0, Diff{DiffEqual, insertion[:overlapLength1]})
731734
diffs[pointer-1].Text =
732735
deletion[0 : len(deletion)-overlapLength1]
733736
diffs[pointer+1].Text = insertion[overlapLength1:]
@@ -738,10 +741,7 @@ func (dmp *DiffMatchPatch) DiffCleanupSemantic(diffs []Diff) []Diff {
738741
float64(overlapLength2) >= float64(len(insertion))/2 {
739742
// Reverse overlap found. Insert an equality and swap and trim the surrounding edits.
740743
overlap := Diff{DiffEqual, deletion[:overlapLength2]}
741-
diffs = append(
742-
diffs[:pointer],
743-
append([]Diff{overlap}, diffs[pointer:]...)...)
744-
744+
diffs = splice(diffs, pointer, 0, overlap)
745745
diffs[pointer-1].Type = DiffInsert
746746
diffs[pointer-1].Text = insertion[0 : len(insertion)-overlapLength2]
747747
diffs[pointer+1].Type = DiffDelete
@@ -954,8 +954,7 @@ func (dmp *DiffMatchPatch) DiffCleanupEfficiency(diffs []Diff) []Diff {
954954
insPoint := equalities.data
955955

956956
// Duplicate record.
957-
diffs = append(diffs[:insPoint],
958-
append([]Diff{Diff{DiffDelete, lastequality}}, diffs[insPoint:]...)...)
957+
diffs = splice(diffs, insPoint, 0, Diff{DiffDelete, lastequality})
959958

960959
// Change second copy to insert.
961960
diffs[insPoint+1].Type = DiffInsert

diffmatchpatch/diff_test.go

+35-1
Original file line numberDiff line numberDiff line change
@@ -130,6 +130,8 @@ func TestDiffCommonSuffix(t *testing.T) {
130130
}
131131
}
132132

133+
var SinkInt int // exported sink var to avoid compiler optimizations in benchmarks
134+
133135
func BenchmarkDiffCommonSuffix(b *testing.B) {
134136
s := "ABCDEFGHIJKLMNOPQRSTUVWXYZÅÄÖ"
135137

@@ -138,10 +140,42 @@ func BenchmarkDiffCommonSuffix(b *testing.B) {
138140
b.ResetTimer()
139141

140142
for i := 0; i < b.N; i++ {
141-
dmp.DiffCommonSuffix(s, s)
143+
SinkInt = dmp.DiffCommonSuffix(s, s)
142144
}
143145
}
144146

147+
func BenchmarkCommonLength(b *testing.B) {
148+
data := []struct {
149+
name string
150+
x, y []rune
151+
}{
152+
{name: "empty", x: nil, y: []rune{}},
153+
{name: "short", x: []rune("AABCC"), y: []rune("AA-CC")},
154+
{name: "long",
155+
x: []rune(strings.Repeat("A", 1000) + "B" + strings.Repeat("C", 1000)),
156+
y: []rune(strings.Repeat("A", 1000) + "-" + strings.Repeat("C", 1000)),
157+
},
158+
}
159+
b.Run("prefix", func(b *testing.B) {
160+
for _, d := range data {
161+
b.Run(d.name, func(b *testing.B) {
162+
for i := 0; i < b.N; i++ {
163+
SinkInt = commonPrefixLength(d.x, d.y)
164+
}
165+
})
166+
}
167+
})
168+
b.Run("suffix", func(b *testing.B) {
169+
for _, d := range data {
170+
b.Run(d.name, func(b *testing.B) {
171+
for i := 0; i < b.N; i++ {
172+
SinkInt = commonSuffixLength(d.x, d.y)
173+
}
174+
})
175+
}
176+
})
177+
}
178+
145179
func TestCommonSuffixLength(t *testing.T) {
146180
type TestCase struct {
147181
Text1 string

0 commit comments

Comments
 (0)