forked from github-linguist/linguist
-
Notifications
You must be signed in to change notification settings - Fork 1
/
test_samples.rb
110 lines (94 loc) · 4.14 KB
/
test_samples.rb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
require_relative "./helper"
require "tempfile"
class TestSamples < Minitest::Test
include Linguist
def test_up_to_date
assert serialized = Samples.cache
assert latest = Samples.data
# Just warn, it shouldn't scare people off by breaking the build.
if serialized['sha256'] != latest['sha256']
warn "Samples database is out of date. Run `bundle exec rake samples`."
expected = Tempfile.new('expected.json')
expected.write Yajl.dump(serialized, :pretty => true)
expected.close
actual = Tempfile.new('actual.json')
actual.write Yajl.dump(latest, :pretty => true)
actual.close
expected.unlink
actual.unlink
end
end
def test_verify
assert data = Samples.cache
assert_equal data['languages_total'], data['languages'].inject(0) { |n, (_, c)| n += c }
assert_equal data['tokens_total'], data['language_tokens'].inject(0) { |n, (_, c)| n += c }
assert_equal data['tokens_total'], data['tokens'].inject(0) { |n, (_, ts)| n += ts.inject(0) { |m, (_, c)| m += c } }
assert !data["interpreters"].empty?
end
def test_ext_or_shebang
Samples.each do |sample|
if sample[:extname].to_s.empty? && !sample[:filename]
assert sample[:interpreter], "#{sample[:path]} should have a file extension or a shebang, maybe it belongs in filenames/ subdir"
end
end
end
def test_filename_listed
Samples.each do |sample|
if sample[:filename]
listed_filenames = Language[sample[:language]].filenames
assert_includes listed_filenames, sample[:filename], "#{sample[:path]} isn't listed as a filename for #{sample[:language]} in languages.yml"
end
end
end
# Check that there aren't samples with extensions or interpreters that
# aren't explicitly defined in languages.yml
languages_yml = File.expand_path("../../lib/linguist/languages.yml", __FILE__)
YAML.load_file(languages_yml).each do |name, options|
define_method "test_samples_have_parity_with_languages_yml_for_#{name}" do
options['extensions'] ||= []
if extnames = Samples.cache['extnames'][name]
extnames.each do |extname|
assert options['extensions'].index { |x| x.downcase.end_with? extname.downcase }, "#{name} has a sample with extension (#{extname.downcase}) that isn't explicitly defined in languages.yml"
end
end
options['interpreters'] ||= []
if interpreters = Samples.cache['interpreters'][name]
interpreters.each do |interpreter|
assert options['interpreters'].include?(interpreter),
"#{name} has a sample with an interpreter (#{interpreter}) that isn't explicitly defined in languages.yml"
end
end
end
end
# If a language extension isn't globally unique then make sure there are samples
Linguist::Language.all.each do |language|
define_method "test_#{language.name}_has_samples" do
language.extensions.each do |extension|
language_matches = Language.find_by_extension(extension)
# Check for samples if more than one language matches the given extension.
if language_matches.length > 1
language_matches.each do |match|
generic = Strategy::Extension.generic? extension
samples = generic ? "test/fixtures/Generic/#{extension.sub(/^\./, "")}/#{match.name}/*" : "samples/#{match.name}/*#{case_insensitive_glob(extension)}"
assert Dir.glob(samples).any?, "Missing samples in #{samples.inspect}. See https://github.com/github/linguist/blob/master/CONTRIBUTING.md"
end
end
end
language.filenames.each do |filename|
# Check for samples if more than one language matches the given filename
if Language.find_by_filename(filename).size > 1
sample = "samples/#{language.name}/filenames/#{filename}"
assert File.exist?(sample),
"Missing sample in #{sample.inspect}. See https://github.com/github/linguist/blob/master/CONTRIBUTING.md"
end
end
end
end
def case_insensitive_glob(extension)
glob = ""
extension.each_char do |c|
glob += c.downcase != c.upcase ? "[#{c.downcase}#{c.upcase}]" : c
end
glob
end
end