@@ -79,6 +79,9 @@ func splice(slice []Diff, index int, amount int, elements ...Diff) []Diff {
79
79
return slice
80
80
}
81
81
82
+ // DiffFunction represents a function that can compute differences between two rune slices
83
+ type DiffFunction func (text1 , text2 []rune ) []Diff
84
+
82
85
// DiffMain finds the differences between two texts.
83
86
// If an invalid UTF-8 sequence is encountered, it will be replaced by the Unicode replacement character.
84
87
func (dmp * DiffMatchPatch ) DiffMain (text1 , text2 string , checklines bool ) []Diff {
@@ -88,14 +91,20 @@ func (dmp *DiffMatchPatch) DiffMain(text1, text2 string, checklines bool) []Diff
88
91
// DiffMainRunes finds the differences between two rune sequences.
89
92
// If an invalid UTF-8 sequence is encountered, it will be replaced by the Unicode replacement character.
90
93
func (dmp * DiffMatchPatch ) DiffMainRunes (text1 , text2 []rune , checklines bool ) []Diff {
91
- var deadline time.Time
92
- if dmp .DiffTimeout > 0 {
93
- deadline = time .Now ().Add (dmp .DiffTimeout )
94
+ deadline := dmp .getDeadline ()
95
+
96
+ // Encapsulate the deadline and line mode logic in the closure
97
+ diffFn := func (text1 , text2 []rune ) []Diff {
98
+ if checklines && len (text1 ) > 100 && len (text2 ) > 100 {
99
+ return dmp .diffBigLine (text1 , text2 , deadline )
100
+ }
101
+ return dmp .diffBisect (text1 , text2 , deadline )
94
102
}
95
- return dmp .diffMainRunes (text1 , text2 , checklines , deadline )
103
+
104
+ return dmp .diffMainRunes (text1 , text2 , diffFn )
96
105
}
97
106
98
- func (dmp * DiffMatchPatch ) diffMainRunes (text1 , text2 []rune , checklines bool , deadline time. Time ) []Diff {
107
+ func (dmp * DiffMatchPatch ) diffMainRunes (text1 , text2 []rune , diffFn DiffFunction ) []Diff {
99
108
if runesEqual (text1 , text2 ) {
100
109
var diffs []Diff
101
110
if len (text1 ) > 0 {
@@ -116,7 +125,7 @@ func (dmp *DiffMatchPatch) diffMainRunes(text1, text2 []rune, checklines bool, d
116
125
text2 = text2 [:len (text2 )- commonlength ]
117
126
118
127
// Compute the diff on the middle block.
119
- diffs := dmp .diffCompute (text1 , text2 , checklines , deadline )
128
+ diffs := dmp .diffCompute (text1 , text2 , diffFn )
120
129
121
130
// Restore the prefix and suffix.
122
131
if len (commonprefix ) != 0 {
@@ -129,8 +138,16 @@ func (dmp *DiffMatchPatch) diffMainRunes(text1, text2 []rune, checklines bool, d
129
138
return dmp .DiffCleanupMerge (diffs )
130
139
}
131
140
141
+ // getDeadline returns the deadline for the diff operation
142
+ func (dmp * DiffMatchPatch ) getDeadline () time.Time {
143
+ if dmp .DiffTimeout > 0 {
144
+ return time .Now ().Add (dmp .DiffTimeout )
145
+ }
146
+ return time.Time {}
147
+ }
148
+
132
149
// diffCompute finds the differences between two rune slices. Assumes that the texts do not have any common prefix or suffix.
133
- func (dmp * DiffMatchPatch ) diffCompute (text1 , text2 []rune , checklines bool , deadline time. Time ) []Diff {
150
+ func (dmp * DiffMatchPatch ) diffCompute (text1 , text2 []rune , diffFn DiffFunction ) []Diff {
134
151
diffs := []Diff {}
135
152
if len (text1 ) == 0 {
136
153
// Just add some text (speedup).
@@ -177,25 +194,30 @@ func (dmp *DiffMatchPatch) diffCompute(text1, text2 []rune, checklines bool, dea
177
194
text2B := hm [3 ]
178
195
midCommon := hm [4 ]
179
196
// Send both pairs off for separate processing.
180
- diffsA := dmp .diffMainRunes (text1A , text2A , checklines , deadline )
181
- diffsB := dmp .diffMainRunes (text1B , text2B , checklines , deadline )
197
+ diffsA := dmp .diffMainRunes (text1A , text2A , diffFn )
198
+ diffsB := dmp .diffMainRunes (text1B , text2B , diffFn )
182
199
// Merge the results.
183
200
diffs := diffsA
184
201
diffs = append (diffs , Diff {DiffEqual , string (midCommon )})
185
202
diffs = append (diffs , diffsB ... )
186
203
return diffs
187
- } else if checklines && len (text1 ) > 100 && len (text2 ) > 100 {
188
- return dmp .diffLineMode (text1 , text2 , deadline )
189
204
}
190
- return dmp .diffBisect (text1 , text2 , deadline )
205
+
206
+ return diffFn (text1 , text2 )
191
207
}
192
208
193
- // diffLineMode does a quick line-level diff on both []runes, then rediff the parts for greater accuracy. This speedup can produce non-minimal diffs.
194
- func (dmp * DiffMatchPatch ) diffLineMode (text1 , text2 []rune , deadline time.Time ) []Diff {
209
+ // diffBigLine does a quick line-level diff on both []runes, then rediff the parts for greater accuracy. This speedup can produce non-minimal diffs.
210
+ func (dmp * DiffMatchPatch ) diffBigLine (text1 , text2 []rune , deadline time.Time ) []Diff {
195
211
// Scan the text on a line-by-line basis first.
196
212
text1 , text2 , linearray := dmp .DiffLinesToRunes (string (text1 ), string (text2 ))
197
213
198
- diffs := dmp .diffMainRunes (text1 , text2 , false , deadline )
214
+ // For line-level diffing, we want to do a simple comparison of the line-based runes
215
+ // rather than character-by-character diffing
216
+ diffFn := func (text1 , text2 []rune ) []Diff {
217
+ return dmp .diffBisect (text1 , text2 , deadline )
218
+ }
219
+
220
+ diffs := dmp .diffMainRunes (text1 , text2 , diffFn )
199
221
200
222
// Convert the diff back to original text.
201
223
diffs = dmp .DiffCharsToLines (diffs , linearray )
@@ -230,7 +252,7 @@ func (dmp *DiffMatchPatch) diffLineMode(text1, text2 []rune, deadline time.Time)
230
252
countDelete + countInsert )
231
253
232
254
pointer = pointer - countDelete - countInsert
233
- a := dmp .diffMainRunes ([]rune (textDelete ), []rune (textInsert ), false , deadline )
255
+ a := dmp .diffMainRunes ([]rune (textDelete ), []rune (textInsert ), diffFn )
234
256
for j := len (a ) - 1 ; j >= 0 ; j -- {
235
257
diffs = splice (diffs , pointer , 0 , a [j ])
236
258
}
@@ -248,6 +270,37 @@ func (dmp *DiffMatchPatch) diffLineMode(text1, text2 []rune, deadline time.Time)
248
270
return diffs [:len (diffs )- 1 ] // Remove the dummy entry at the end.
249
271
}
250
272
273
+ // DiffLineMode finds the differences between two texts, always using line mode.
274
+ // Unlike DiffMain with checklines=true, this method will always use line mode regardless of text length.
275
+ // If an invalid UTF-8 sequence is encountered, it will be replaced by the Unicode replacement character.
276
+ func (dmp * DiffMatchPatch ) DiffLineMode (text1 , text2 string ) []Diff {
277
+ return dmp .diffOnlyByLines ([]rune (text1 ), []rune (text2 ))
278
+ }
279
+
280
+ // diffOnlyByLines finds the differences between two texts, only by lines.
281
+ func (dmp * DiffMatchPatch ) diffOnlyByLines (text1 , text2 []rune ) []Diff {
282
+ // For line-level diffing, we want to do a simple comparison of the line-based runes
283
+ // rather than character-by-character diffing
284
+ diffFn := func (text1 , text2 []rune ) []Diff {
285
+ if ! runesEqual (text1 , text2 ) {
286
+ return []Diff {
287
+ {DiffDelete , string (text1 )},
288
+ {DiffInsert , string (text2 )},
289
+ }
290
+ }
291
+ return []Diff {{DiffEqual , string (text1 )}}
292
+ }
293
+
294
+ // For line-based diffing, we want to avoid the character-based optimizations in diffCompute
295
+ // and just use our simple diff function directly
296
+ diffs := diffFn (text1 , text2 )
297
+
298
+ // Optimize line-based diffs using line-specific cleanup
299
+ diffs = dmp .DiffCleanupLineBased (diffs )
300
+
301
+ return diffs
302
+ }
303
+
251
304
// DiffBisect finds the 'middle snake' of a diff, split the problem in two and return the recursively constructed diff.
252
305
// If an invalid UTF-8 sequence is encountered, it will be replaced by the Unicode replacement character.
253
306
// See Myers 1986 paper: An O(ND) Difference Algorithm and Its Variations.
@@ -380,9 +433,14 @@ func (dmp *DiffMatchPatch) diffBisectSplit(runes1, runes2 []rune, x, y int,
380
433
runes1b := runes1 [x :]
381
434
runes2b := runes2 [y :]
382
435
436
+ // wrap dmp.diffBisect with deadline
437
+ diffFn := func (text1 , text2 []rune ) []Diff {
438
+ return dmp .diffBisect (text1 , text2 , deadline )
439
+ }
440
+
383
441
// Compute both diffs serially.
384
- diffs := dmp .diffMainRunes (runes1a , runes2a , false , deadline )
385
- diffsb := dmp .diffMainRunes (runes1b , runes2b , false , deadline )
442
+ diffs := dmp .diffMainRunes (runes1a , runes2a , diffFn )
443
+ diffsb := dmp .diffMainRunes (runes1b , runes2b , diffFn )
386
444
387
445
return append (diffs , diffsb ... )
388
446
}
@@ -953,6 +1011,77 @@ func (dmp *DiffMatchPatch) DiffCleanupEfficiency(diffs []Diff) []Diff {
953
1011
return diffs
954
1012
}
955
1013
1014
+ // DiffCleanupLineBased optimizes line-based diffs by merging consecutive operations,
1015
+ // removing empty line diffs, and grouping related line changes together.
1016
+ // This function is specifically designed for line-level diffing where each diff
1017
+ // represents entire lines rather than character-level changes.
1018
+ func (dmp * DiffMatchPatch ) DiffCleanupLineBased (diffs []Diff ) []Diff {
1019
+ if len (diffs ) == 0 {
1020
+ return diffs
1021
+ }
1022
+
1023
+ // First pass: merge consecutive operations of the same type
1024
+ cleaned := make ([]Diff , 0 , len (diffs ))
1025
+ pointer := 0
1026
+
1027
+ for pointer < len (diffs ) {
1028
+ current := diffs [pointer ]
1029
+
1030
+ // If this is an equality, just add it
1031
+ if current .Type == DiffEqual {
1032
+ cleaned = append (cleaned , current )
1033
+ pointer ++
1034
+ continue
1035
+ }
1036
+
1037
+ // Collect consecutive operations of the same type
1038
+ mergedText := current .Text
1039
+ pointer ++
1040
+
1041
+ // Merge consecutive deletions or insertions
1042
+ for pointer < len (diffs ) && diffs [pointer ].Type == current .Type {
1043
+ mergedText += diffs [pointer ].Text
1044
+ pointer ++
1045
+ }
1046
+
1047
+ // Only add non-empty merged operations
1048
+ if len (strings .TrimSpace (mergedText )) > 0 {
1049
+ cleaned = append (cleaned , Diff {current .Type , mergedText })
1050
+ }
1051
+ }
1052
+
1053
+ // Second pass: remove trivial equalities (empty lines or whitespace-only lines)
1054
+ // and merge adjacent equalities
1055
+ if len (cleaned ) > 1 {
1056
+ final := make ([]Diff , 0 , len (cleaned ))
1057
+
1058
+ for i := 0 ; i < len (cleaned ); i ++ {
1059
+ current := cleaned [i ]
1060
+
1061
+ // Skip empty or whitespace-only equalities
1062
+ if current .Type == DiffEqual && len (strings .TrimSpace (current .Text )) == 0 {
1063
+ continue
1064
+ }
1065
+
1066
+ // Merge consecutive equalities
1067
+ if current .Type == DiffEqual && len (final ) > 0 && final [len (final )- 1 ].Type == DiffEqual {
1068
+ final [len (final )- 1 ].Text += current .Text
1069
+ } else {
1070
+ final = append (final , current )
1071
+ }
1072
+ }
1073
+
1074
+ cleaned = final
1075
+ }
1076
+
1077
+ // Third pass: optimize deletion-insertion pairs
1078
+ // If we have a deletion followed by an insertion, and they're similar,
1079
+ // we might want to keep them as separate operations for clarity in line-based diffs
1080
+ // This preserves the line-by-line nature of the diff
1081
+
1082
+ return cleaned
1083
+ }
1084
+
956
1085
// DiffCleanupMerge reorders and merges like edit sections. Merge equalities.
957
1086
// Any edit section can move as long as it doesn't cross an equality.
958
1087
func (dmp * DiffMatchPatch ) DiffCleanupMerge (diffs []Diff ) []Diff {
0 commit comments