-
Notifications
You must be signed in to change notification settings - Fork 64
/
Copy pathdiff.go
338 lines (304 loc) · 10.6 KB
/
diff.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
package lib
import (
"context"
"errors"
"fmt"
"github.com/qri-io/dataset/tabular"
"github.com/qri-io/deepdiff"
"github.com/qri-io/qri/base/component"
"github.com/qri-io/qri/base/dsfs"
"github.com/qri-io/qri/dsref"
qerr "github.com/qri-io/qri/errors"
qhttp "github.com/qri-io/qri/lib/http"
)
// DiffMethods encapsulates logic for diffing Datasets on Qri
type DiffMethods struct {
d dispatcher
}
// Name returns the name of this method group
func (m DiffMethods) Name() string {
return "diff"
}
// Attributes defines attributes for each method
func (m DiffMethods) Attributes() map[string]AttributeSet {
return map[string]AttributeSet{
"changes": {Endpoint: qhttp.AEChanges, HTTPVerb: "POST"},
"diff": {Endpoint: qhttp.AEDiff, HTTPVerb: "POST"},
}
}
// Delta is an alias for deepdiff.Delta, abstracting the deepdiff implementation
// away from packages that depend on lib
type Delta = deepdiff.Delta
// DiffStat is an alias for deepdiff.Stat, abstracting the deepdiff implementation
// away from packages that depend on lib
type DiffStat = deepdiff.Stats
// DiffParams defines parameters for diffing two sources. There are three valid ways to use these
// parameters: 1) both LeftSide and RightSide set, 2) only LeftSide set with a WorkingDir, 3) only
// LeftSide set with the UseLeftPrevVersion flag.
type DiffParams struct {
// File paths or reference to datasets
LeftSide string `schema:"leftPath" json:"leftPath" qri:"dsrefOrFspath"`
RightSide string `schema:"rightPath" json:"rightPath" qri:"dsrefOrFspath"`
// If not null, the working directory that the diff is using
WorkingDir string `qri:"fspath"`
// Whether to get the previous version of the left parameter
UseLeftPrevVersion bool
// Which component or part of a dataset to compare
Selector string
}
// diffMode determinse
func (p *DiffParams) diffMode() (DiffMode, error) {
// Check parameters to make sure they fit one of the three cases that diff allows.
if p.LeftSide == "" && p.RightSide == "" {
return InvalidDiffMode, fmt.Errorf("nothing to diff")
} else if p.LeftSide != "" && p.RightSide != "" {
// Have two string parameters to compare. Should either both be references, or neither
// be references.
diffMode := InvalidDiffMode
if dsref.IsRefString(p.LeftSide) && dsref.IsRefString(p.RightSide) {
diffMode = DatasetRefDiffMode
} else if isFilePath(p.LeftSide) && isFilePath(p.RightSide) {
diffMode = FilepathDiffMode
} else {
return InvalidDiffMode, fmt.Errorf("cannot compare a file to dataset, must compare similar things")
}
// Neither of the flags should be set.
if p.WorkingDir != "" {
return diffMode, fmt.Errorf("cannot use working directory when comparing two sources")
}
if p.UseLeftPrevVersion {
return diffMode, fmt.Errorf("cannot use previous version when comparing two sources")
}
return diffMode, nil
} else if dsref.IsRefString(p.LeftSide) && p.WorkingDir != "" {
// Comparing the contents of a working directory to the dataset it represents
// TODO(dustmop): Should verify that the working directory *matches* the dataset
if p.UseLeftPrevVersion {
return InvalidDiffMode, fmt.Errorf("cannot use both previous version and working directory")
}
return WorkingDirectoryDiffMode, nil
} else if dsref.IsRefString(p.LeftSide) && p.UseLeftPrevVersion {
// Comparing a dataset to its previous version
if p.WorkingDir != "" {
return InvalidDiffMode, fmt.Errorf("cannot use both previous version and working directory")
}
return PrevVersionDiffMode, nil
}
return InvalidDiffMode, fmt.Errorf("invalid parameters to diff")
}
// DiffResponse is the result of a call to diff
type DiffResponse struct {
Stat *DiffStat `json:"stat,omitempty"`
SchemaStat *DiffStat `json:"schemaStat,omitempty"`
Schema []*Delta `json:"schema,omitempty"`
Diff []*Delta `json:"diff,omitempty"`
}
// DiffMode is one of the methods that diff can perform
type DiffMode int
const (
// InvalidDiffMode is the default diff mode
InvalidDiffMode DiffMode = iota
// DatasetRefDiffMode will diff two dataset references
DatasetRefDiffMode
// FilepathDiffMode will diff two files
FilepathDiffMode
// WorkingDirectoryDiffMode will diff a working directory against its dataset head
WorkingDirectoryDiffMode
// PrevVersionDiffMode will diff a dataset head against its previous version
PrevVersionDiffMode
)
// Diff computes the diff of two sources
func (m DiffMethods) Diff(ctx context.Context, p *DiffParams) (*DiffResponse, error) {
got, _, err := m.d.Dispatch(ctx, dispatchMethodName(m, "diff"), p)
if res, ok := got.(*DiffResponse); ok {
return res, err
}
return nil, dispatchReturnError(got, err)
}
func schemaDiff(ctx context.Context, left, right *component.BodyComponent) ([]*Delta, *DiffStat, error) {
dd := deepdiff.New()
if left.Format == ".csv" && right.Format == ".csv" {
left, _, err := tabular.ColumnsFromJSONSchema(left.InferredSchema)
if err != nil {
return nil, nil, err
}
right, _, err := tabular.ColumnsFromJSONSchema(right.InferredSchema)
if err != nil {
return nil, nil, err
}
return dd.StatDiff(ctx, left.Titles(), right.Titles())
}
return dd.StatDiff(ctx, left.InferredSchema, right.InferredSchema)
}
// assume a non-empty string, which isn't a dataset reference, is a file
func isFilePath(text string) bool {
if text == "" {
return false
}
return !dsref.IsRefString(text)
}
// diffImpl holds the method implementations for DiffMethods
type diffImpl struct{}
// Diff computes the diff of two source
func (diffImpl) Diff(scope scope, p *DiffParams) (*DiffResponse, error) {
res := &DiffResponse{}
diffMode, err := p.diffMode()
if err != nil {
return nil, err
}
if diffMode == FilepathDiffMode {
// Compare body files.
leftComp := component.NewBodyComponent(p.LeftSide)
leftData, err := leftComp.StructuredData()
if err != nil {
return nil, err
}
rightComp := component.NewBodyComponent(p.RightSide)
rightData, err := rightComp.StructuredData()
if err != nil {
return nil, err
}
res.Schema, res.SchemaStat, err = schemaDiff(scope.Context(), leftComp, rightComp)
if err != nil {
return nil, err
}
dd := deepdiff.New()
res.Diff, res.Stat, err = dd.StatDiff(scope.Context(), leftData, rightData)
if err != nil {
return nil, err
}
return res, nil
}
// Left side of diff loaded into a component
ds, err := scope.Loader().LoadDataset(scope.Context(), p.LeftSide)
if err != nil {
if errors.Is(err, dsref.ErrNoHistory) {
return nil, qerr.New(err, fmt.Sprintf("dataset %s has no versions, nothing to diff against", p.LeftSide))
}
return nil, err
}
// TODO (b5) - setting name & peername to zero values makes tests pass, but
// calling ds.DropDerivedValues is overzealous. investigate the right solution
ds.Name = ""
ds.Peername = ""
leftComp := component.ConvertDatasetToComponents(ds, scope.Filesystem())
// Right side of diff laoded into a component
var rightComp component.Component
switch diffMode {
case WorkingDirectoryDiffMode:
// Working directory, read dataset from the current files.
rightComp, err = component.ListDirectoryComponents(p.WorkingDir)
if err != nil {
return nil, err
}
err = component.ExpandListedComponents(rightComp, scope.Filesystem())
if err != nil {
return nil, err
}
// TODO(dlong): Hack! This is what fills the value. StucturedData assumes this has been
// called. Should cleanup component's API so that this isn't necessary.
_, err = component.ToDataset(rightComp)
if err != nil {
return nil, err
}
case PrevVersionDiffMode:
// The head version was already loaded, use that for the right side of the diff
rightComp = leftComp
// Load previous dataset version for the new left side
if ds.PreviousPath == "" {
return nil, fmt.Errorf("dataset has only one version, nothing to diff against")
}
ds, err = dsfs.LoadDataset(scope.Context(), scope.Filesystem(), ds.PreviousPath)
if err != nil {
return nil, err
}
leftComp = component.ConvertDatasetToComponents(ds, scope.Filesystem())
case DatasetRefDiffMode:
ds, err = scope.Loader().LoadDataset(scope.Context(), p.RightSide)
if err != nil {
return nil, err
}
// TODO (b5) - setting name & peername to zero values makes tests pass, but
// calling ds.DropDerivedValues is overzealous. investigate the right solution
ds.Name = ""
ds.Peername = ""
rightComp = component.ConvertDatasetToComponents(ds, scope.Filesystem())
}
// If in an FSI linked working directory, drop derived values, since the user is not
// expected to have those transient values on their checked out files.
if diffMode == WorkingDirectoryDiffMode {
// TODO(dlong): RemoveSubcomponent removes the component from the map, but not from the
// Value. That should be fixed so that component has a more sane API.
leftComp.Base().RemoveSubcomponent("commit")
leftComp.Base().RemoveSubcomponent("viz")
leftComp.DropDerivedValues()
rightComp.Base().RemoveSubcomponent("commit")
rightComp.Base().RemoveSubcomponent("viz")
rightComp.DropDerivedValues()
// Also load the body file, and inline it.
// TODO(dlong): This should be refactored into component so that it's easier to do.
leftDsComp := leftComp.Base().GetSubcomponent("dataset")
if leftDsComp != nil {
dsComp, ok := leftDsComp.(*component.DatasetComponent)
if ok {
ds := dsComp.Value
ds.Commit = nil
ds.Viz = nil
ds.Peername = ""
ds.PreviousPath = ""
bodyComp := leftComp.Base().GetSubcomponent("body")
if bodyComp != nil {
bodyComp.LoadAndFill(ds)
ds.Body, err = bodyComp.StructuredData()
if err != nil {
return nil, err
}
ds.BodyPath = ""
}
}
}
rightDsComp := rightComp.Base().GetSubcomponent("dataset")
if rightDsComp != nil {
dsComp, ok := rightDsComp.(*component.DatasetComponent)
if ok {
ds := dsComp.Value
ds.Commit = nil
ds.Viz = nil
ds.Peername = ""
ds.PreviousPath = ""
bodyComp := rightComp.Base().GetSubcomponent("body")
if bodyComp != nil {
bodyComp.LoadAndFill(ds)
ds.Body, err = bodyComp.StructuredData()
if err != nil {
return nil, err
}
ds.BodyPath = ""
}
}
}
}
selector := p.Selector
if selector == "" {
selector = "dataset"
}
leftComp = leftComp.Base().GetSubcomponent(selector)
rightComp = rightComp.Base().GetSubcomponent(selector)
if leftComp == nil || rightComp == nil {
return nil, fmt.Errorf("component %q not found", selector)
}
leftData, err := leftComp.StructuredData()
if err != nil {
return nil, err
}
rightData, err := rightComp.StructuredData()
if err != nil {
return nil, err
}
dd := deepdiff.New()
res.Diff, res.Stat, err = dd.StatDiff(scope.Context(), leftData, rightData)
if err != nil {
return nil, err
}
return res, nil
}