This repository has been archived by the owner on Jan 18, 2024. It is now read-only.
forked from variety/variety
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathvariety.js
155 lines (123 loc) · 4.76 KB
/
variety.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
/* Variety: A MongoDB Schema Analyzer
This tool helps you get a sense of your application's schema, as well as any
outliers to that schema. Particularly useful when you inherit a codebase with
data dump and want to quickly learn how the data's structured. Also useful for
finding rare keys.
Please see https://github.com/JamesCropcho/variety for details.
Released by Maypop Inc, © 2012, under the MIT License. */
print("Variety: A MongoDB Schema Analyzer")
print("Version 1.0.1, released 25 May 2012")
if (typeof collection === "undefined") {
throw "You have to supply a 'collection' variable, à la --eval 'var collection = \"animals\"'. Please see https://github.com/JamesCropcho/variety for details.";
}
if (typeof limit === "undefined") { var limit = db[collection].count(); }
print("Using limit of " + limit);
varietyCanHaveChildren = function (v) {
var isArray = v &&
typeof v === 'object' &&
typeof v.length === 'number' &&
!(v.propertyIsEnumerable('length'));
var isObject = typeof v === 'object';
var specialObject = v instanceof Date ||
v instanceof ObjectId ||
v instanceof BinData;
return !specialObject && (isArray || isObject);
}
db.system.js.save( { _id : "varietyCanHaveChildren", value : varietyCanHaveChildren } );
varietyMapRecursive = function(parentKey, keys) {
for (var key in keys) {
var value = keys[key];
key = (parentKey + "." + key).replace(/\.\d+/g,'.XX');
emit({key : key}, {type: varietyTypeOf(value)});
if (varietyCanHaveChildren(value)) {
varietyMapRecursive(key, value);
}
}
}
db.system.js.save({_id: "varietyMapRecursive", value: varietyMapRecursive});
varietyTypeOf = function(thing) {
if (typeof thing === "undefined") { throw "varietyTypeOf() requires an argument"; }
if (typeof thing !== "object") {
// the messiness below capitalizes the first letter, so the output matches
// the other return values below. -JC
return (typeof thing)[0].toUpperCase() + (typeof thing).slice(1);
}
else {
if (thing && thing.constructor === Array) {
return "Array";
}
else if (thing === null) {
return "null";
}
else if (thing instanceof Date) {
return "Date";
}
else if (thing instanceof ObjectId) {
return "ObjectId";
}
else if (thing instanceof BinData) {
return "BinData";
}
else {
return "Object";
}
}
}
db.system.js.save({_id: "varietyTypeOf", value: varietyTypeOf});
map = function() {
var keys = this;
for (var key in keys) {
var value = keys[key];
// Internally, Mongo uses keys like groceries.0, groceries.1, groceries.2 for
// items in an array. -JC
key = key.replace(/\.\d+/g,'.XX');
emit({key : key}, {type: varietyTypeOf(value)});
if (varietyCanHaveChildren(value)) {
varietyMapRecursive(key, value);
}
}
}
reduce = function(key, values){
var types = [];
values.forEach(function(value) {
if(types.indexOf(value.type) === -1) {
// i.e. "if 'types' does not already have 'value.type', then insert it
// into 'types'." -JC
types.push(value.type);
}
});
return { types: types };
}
var resultsCollectionName = collection + "Keys";
db[collection].mapReduce(map, reduce, {
out: {
replace : resultsCollectionName,
db : "varietyResults"},
limit : limit,
sort : {_id: -1},
scope : { limit : limit }});
var resultsDB = db.getMongo().getDB("varietyResults");
var numDocuments = db[collection].count();
resultsDB[resultsCollectionName].find({}).forEach(function(key) {
keyName = key["_id"].key;
// We throw away keys which end in an array index, since they are not useful
// for our analysis. (We still keep the key of their parent array, though.) -JC
if(keyName.match(/\.XX$/)) {
resultsDB[resultsCollectionName].remove({ "_id" : key["_id"]});
return;
}
if(!(keyName.match(/\.XX/) && !keyName.match(/\.XX$/))) {
// i.e. "Unless the key's value is an array which contains arrays" -JC
// ...we do not support totalOccurrences for these keys because it is
// a bit too tricky for a 'version 1'. Perhaps we'll support in the future. -JC
var existsQuery = {};
existsQuery[keyName] = {$exists: true};
key.totalOccurrences = db[collection].count(existsQuery);
key.percentContaining = (key.totalOccurrences / numDocuments) * 100;
}
resultsDB[resultsCollectionName].save(key);
});
var sortedKeys = resultsDB[resultsCollectionName].find({}).sort({totalOccurrences: -1});
sortedKeys.forEach(function(key) {
print(tojson(key, '', true));
});