Skip to content

Commit

Permalink
Rename samples subdirectories
Browse files Browse the repository at this point in the history
  • Loading branch information
josh committed Jul 23, 2012
1 parent 314f0e4 commit 7b6caa0
Show file tree
Hide file tree
Showing 273 changed files with 11,341 additions and 11,344 deletions.
4 changes: 2 additions & 2 deletions lib/linguist/language.rb
Original file line number Diff line number Diff line change
Expand Up @@ -441,8 +441,8 @@ def inspect
end
end

extensions = Samples::DATA['extnames'] rescue {} # TODO: BAH!
filenames = Samples::DATA['filenames'] rescue {} # TODO: BAH!
extensions = Samples::DATA['extnames']
filenames = Samples::DATA['filenames']
popular = YAML.load_file(File.expand_path("../popular.yml", __FILE__))

YAML.load_file(File.expand_path("../languages.yml", __FILE__)).each do |name, options|
Expand Down
22 changes: 10 additions & 12 deletions lib/linguist/samples.rb
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
require 'set'
require 'yaml'

require 'linguist/md5'
require 'linguist/classifier'

module Linguist
# Model for accessing classifier training data.
Expand All @@ -27,7 +28,7 @@ def self.each(&block)

# Skip text and binary for now
# Possibly reconsider this later
next if category == 'text' || category == 'binary'
next if category == 'Text' || category == 'Binary'

dirname = File.join(ROOT, category)
Dir.entries(dirname).each do |filename|
Expand Down Expand Up @@ -60,32 +61,29 @@ def self.each(&block)
#
# Returns trained Classifier.
def self.data
require 'linguist/classifier'
require 'linguist/language'

db = {}
db['extnames'] = {}
db['filenames'] = {}

each do |sample|
language = Language.find_by_alias(sample[:language])
language_name = sample[:language]

# TODO: For now skip empty extnames
if sample[:extname] && sample[:extname] != ""
db['extnames'][language.name] ||= []
if !db['extnames'][language.name].include?(sample[:extname])
db['extnames'][language.name] << sample[:extname]
db['extnames'][language_name] ||= []
if !db['extnames'][language_name].include?(sample[:extname])
db['extnames'][language_name] << sample[:extname]
end
end

# TODO: For now skip empty extnames
if fn = sample[:filename]
db['filenames'][language.name] ||= []
db['filenames'][language.name] << fn
db['filenames'][language_name] ||= []
db['filenames'][language_name] << fn
end

data = File.read(sample[:path])
Classifier.train!(db, language.name, data)
Classifier.train!(db, language_name, data)
end

db['md5'] = Linguist::MD5.hexdigest(db)
Expand Down
Loading

0 comments on commit 7b6caa0

Please sign in to comment.