Skip to content

Commit

Permalink
Merge pull request s-rah#99 from s-rah/dump
Browse files Browse the repository at this point in the history
OnionScan 0.2 Refactor
  • Loading branch information
s-rah authored Oct 25, 2016
2 parents 612138d + e99384f commit c64403b
Show file tree
Hide file tree
Showing 54 changed files with 50,639 additions and 283 deletions.
6 changes: 5 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -33,4 +33,8 @@ Desktop.ini

# OS X
.DS_Store
.Trashes
.Trashes

# OnionScan
onionscandb
service-config
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ it to yourself and your users to ensure that attackers cannot easily exploit and
deanonymize.

OnionScan is not a general vulnerability scanner or security tool. It does not
feature scans that can be commonly found in other tools targetted at regular
feature scans that can be commonly found in other tools targeted at regular
websites e.g. XSS detection.

## Go Dependencies
Expand Down
36 changes: 33 additions & 3 deletions config/crawl_config.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,15 +2,45 @@ package config

import (
"encoding/json"
"fmt"
"io/ioutil"
)

// ExtraRelationship defines additional information which can be
// extracted after an initial relationship is detected.
type ExtraRelationship struct {
Name string `json:"name"`
Regex string `json:"regex"`
Rollup bool `json:"rollup"`
}

// Relationship defines a section of a page that can be extract to provide a
// unique identifier relationship.
type Relationship struct {
Name string `json:"name"`
TriggerIdentifierRegex string `json:"triggeridentifierregex"`
ExtraRelationships []ExtraRelationship `json:"extrarelationships"`
}

// CrawlConfig defines user-specified options to tweak the current crawl.
type CrawlConfig struct {
Onion string `json:"onion"`
Base string `json:"base"`
Exclude []string `json:"exclude"`
Onion string `json:"onion"`
Base string `json:"base"`
Exclude []string `json:"exclude"`
Relationships []Relationship `json:"relationships"`
}

// GetRelationship provides a Relationship by its name.
func (cc *CrawlConfig) GetRelationship(name string) (Relationship, error) {
for _, relationship := range cc.Relationships {
if relationship.Name == name {
return relationship, nil
}
}
return Relationship{}, fmt.Errorf(`Could not find Relationship "%s"`, name)
}

// LoadCrawlConfig creates a CrawlConfig object by loading a given filename.
func LoadCrawlConfig(filename string) (CrawlConfig, error) {
dat, err := ioutil.ReadFile(filename)
if err != nil {
Expand Down
22 changes: 21 additions & 1 deletion config/onionscan_config.go
Original file line number Diff line number Diff line change
@@ -1,14 +1,18 @@
package config

import (
"bufio"
"fmt"
"github.com/s-rah/onionscan/crawldb"
"log"
"net/http"
"os"
"path/filepath"
"strings"
"time"
)

// OnionScanConfig defines options to tweak the overall OnionScan system.
type OnionScanConfig struct {
TorProxyAddress string
Depth int
Expand All @@ -19,9 +23,12 @@ type OnionScanConfig struct {
RescanDuration time.Duration
Scans []string
CrawlConfigs map[string]CrawlConfig
Cookies []*http.Cookie
}

func Configure(torProxyAddress string, directoryDepth int, fingerprint bool, timeout int, database string, scans []string, crawlconfigdir string, verbose bool) *OnionScanConfig {
// Configure creates a new OnionScanConfig object with a set of options.
// FIXME: We can make this a decorate and make it much nicer.
func Configure(torProxyAddress string, directoryDepth int, fingerprint bool, timeout int, database string, scans []string, crawlconfigdir string, cookie string, verbose bool) *OnionScanConfig {
osc := new(OnionScanConfig)
osc.TorProxyAddress = torProxyAddress
osc.Depth = directoryDepth
Expand All @@ -34,12 +41,22 @@ func Configure(torProxyAddress string, directoryDepth int, fingerprint bool, tim
osc.Scans = scans
osc.CrawlConfigs = make(map[string]CrawlConfig)

rawRequest := fmt.Sprintf("GET / HTTP/1.0\r\nCookie: %s\r\n\r\n", cookie)

req, err := http.ReadRequest(bufio.NewReader(strings.NewReader(rawRequest)))

if err == nil {
osc.Cookies = req.Cookies()
}

visit := func(path string, f os.FileInfo, err error) error {
if !f.IsDir() {
cc, err := LoadCrawlConfig(path)
if err == nil {
osc.LogInfo(fmt.Sprintf("Loading Crawl Config for %s %v", cc.Onion, cc))
osc.CrawlConfigs[cc.Onion] = cc
} else {
osc.LogError(err)
}
}
return nil
Expand All @@ -52,12 +69,15 @@ func Configure(torProxyAddress string, directoryDepth int, fingerprint bool, tim
return osc
}

// LogInfo logs an informational message to the log, assuming that the log level
// is set low enough.
func (os *OnionScanConfig) LogInfo(message string) {
if os.Verbose {
log.Printf("INFO: %v", message)
}
}

// LogError logs an error message to the log, always.
func (os *OnionScanConfig) LogError(err error) {
log.Printf("ERROR: %v", err)
}
168 changes: 153 additions & 15 deletions crawldb/crawldb.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,17 @@ import (
"fmt"
"github.com/HouzuoGuo/tiedot/db"
"github.com/s-rah/onionscan/model"
"log"
"time"
)

// CrawlDB is the main interface for persistent storage in OnionScan
type CrawlDB struct {
myDB *db.DB
}

// NewDB creates new new CrawlDB instance. If the database does not exist at the
// given dbdir, it will be created.
func (cdb *CrawlDB) NewDB(dbdir string) {
db, err := db.OpenDB(dbdir)
if err != nil {
Expand All @@ -26,29 +30,52 @@ func (cdb *CrawlDB) NewDB(dbdir string) {

}

// Initialize sets up a new database - should only be called when creating a
// new database.
func (cdb *CrawlDB) Initialize() {
log.Printf("Creating Database Bucket crawls...")
if err := cdb.myDB.Create("crawls"); err != nil {
panic(err)
}

// Allow searching by the URL
log.Printf("Indexing URL in crawls...")
crawls := cdb.myDB.Use("crawls")
if err := crawls.Index([]string{"URL"}); err != nil {
panic(err)
}

log.Printf("Creating Database Bucket relationships...")
if err := cdb.myDB.Create("relationships"); err != nil {
panic(err)
}

// Allowing searching by the Identifier Stirng
// Allowing searching by the Identifier String
log.Printf("Indexing Identifier in relationships...")
rels := cdb.myDB.Use("relationships")
if err := rels.Index([]string{"Identifier"}); err != nil {
panic(err)
}

// Allowing searching by the Onion String
log.Printf("Indexing Identifier in relationships...")
if err := rels.Index([]string{"Onion"}); err != nil {
panic(err)
}

log.Printf("Database Setup Complete")

}

// CrawlRecord defines a spider entry in the database
type CrawlRecord struct {
URL string
Timestamp time.Time
Page model.Page
}

// InsertCrawlRecord adds a new spider entry to the database and returns the
// record id.
func (cdb *CrawlDB) InsertCrawlRecord(url string, page *model.Page) (int, error) {
crawls := cdb.myDB.Use("crawls")
docID, err := crawls.Insert(map[string]interface{}{
Expand All @@ -58,12 +85,7 @@ func (cdb *CrawlDB) InsertCrawlRecord(url string, page *model.Page) (int, error)
return docID, err
}

type CrawlRecord struct {
URL string
Timestamp time.Time
Page model.Page
}

// GetCrawlRecord returns a CrawlRecord from the database given an ID.
func (cdb *CrawlDB) GetCrawlRecord(id int) (CrawlRecord, error) {
crawls := cdb.myDB.Use("crawls")
readBack, err := crawls.Read(id)
Expand All @@ -79,6 +101,8 @@ func (cdb *CrawlDB) GetCrawlRecord(id int) (CrawlRecord, error) {
return CrawlRecord{}, err
}

// HasCrawlRecord returns true if a given URL is associated with a crawl record
// in the database. Only records created after the given duration are considered.
func (cdb *CrawlDB) HasCrawlRecord(url string, duration time.Duration) (bool, int) {
var query interface{}
before := time.Now().Add(duration)
Expand Down Expand Up @@ -112,22 +136,135 @@ func (cdb *CrawlDB) HasCrawlRecord(url string, duration time.Duration) (bool, in
return false, 0
}

// Relationship defines a correltion record in the Database.
type Relationship struct {
ID int
Onion string
From string
Type string
Identifier string
FirstSeen time.Time
LastSeen time.Time
}

func (cdb *CrawlDB) InsertRelationship(onion string, from string, identifier string) (int, error) {
crawls := cdb.myDB.Use("relationships")
docID, err := crawls.Insert(map[string]interface{}{
// InsertRelationship creates a new Relationship in the database.
func (cdb *CrawlDB) InsertRelationship(onion string, from string, identiferType string, identifier string) (int, error) {

rels, err := cdb.GetRelationshipsWithOnion(onion)

// If we have seen this before, we will update rather than adding a
// new relationship
if err == nil {
for _, rel := range rels {
if rel.From == from && rel.Identifier == identifier && rel.Type == identiferType {
// Update the Relationships
log.Printf("Updating %s --- %s ---> %s (%s)", onion, from, identifier, identiferType)
relationships := cdb.myDB.Use("relationships")
err := relationships.Update(rel.ID, map[string]interface{}{
"Onion": onion,
"From": from,
"Type": identiferType,
"Identifier": identifier,
"FirstSeen": rel.FirstSeen,
"LastSeen": time.Now()})
return rel.ID, err
}
}
}

// Otherwise Insert New
log.Printf("Inserting %s --- %s ---> %s (%s)", onion, from, identifier, identiferType)
relationships := cdb.myDB.Use("relationships")
docID, err := relationships.Insert(map[string]interface{}{
"Onion": onion,
"From": from,
"Identifier": identifier})
"Type": identiferType,
"Identifier": identifier,
"FirstSeen": time.Now(),
"LastSeen": time.Now()})
return docID, err
}

func (cdb *CrawlDB) GetOnionsWithIdentifier(identifier string) ([]string, error) {
// GetRelationshipsWithOnion returns all relationships with an Onion field matching
// the onion parameter.
func (cdb *CrawlDB) GetRelationshipsWithOnion(onion string) ([]Relationship, error) {
var query interface{}

q := fmt.Sprintf(`{"eq":"%v", "in": ["Onion"]}`, onion)
json.Unmarshal([]byte(q), &query)

queryResult := make(map[int]struct{}) // query result (document IDs) goes into map keys
relationships := cdb.myDB.Use("relationships")
if err := db.EvalQuery(query, relationships, &queryResult); err != nil {
return nil, err
}

var rels []Relationship
for id := range queryResult {
// To get query result document, simply read it
readBack, err := relationships.Read(id)
if err == nil {
out, err := json.Marshal(readBack)
if err == nil {
var relationship Relationship
json.Unmarshal(out, &relationship)
rels = append(rels, relationship)
}
}
}
return rels, nil
}

// GetUserRelationshipFromOnion reconstructs a user relationship from a given
// identifier. fromonion is used as a filter to ensure that only user relationships
// from a given onion are reconstructed.
func (cdb *CrawlDB) GetUserRelationshipFromOnion(identifier string, fromonion string) (map[string]Relationship, error) {
results, err := cdb.GetRelationshipsWithOnion(identifier)

if err != nil {
return nil, err
}

relationships := make(map[string]Relationship)
for _, result := range results {
if result.From == fromonion {
relationships[result.Type] = result
}
}
return relationships, nil
}

// GetAllRelationshipsCount returns the total number of relationships stored in
// the database.
func (cdb *CrawlDB) GetAllRelationshipsCount() int {
queryResult := make(map[int]struct{}) // query result (document IDs) goes into map keys
relationships := cdb.myDB.Use("relationships")

if err := db.EvalAllIDs(relationships, &queryResult); err != nil {
return 0
}
return len(queryResult)
}

// GetRelationshipsCount returns the total number of relationships for a given
// identifier.
func (cdb *CrawlDB) GetRelationshipsCount(identifier string) int {
var query interface{}

q := fmt.Sprintf(`{"eq":"%v", "in": ["Identifier"]}`, identifier)
json.Unmarshal([]byte(q), &query)

queryResult := make(map[int]struct{}) // query result (document IDs) goes into map keys
relationships := cdb.myDB.Use("relationships")
if err := db.EvalQuery(query, relationships, &queryResult); err != nil {
return 0
}
return len(queryResult)
}

// GetRelationshipsWithIdentifier returns all relatioships associated with a
// given identifier.
func (cdb *CrawlDB) GetRelationshipsWithIdentifier(identifier string) ([]Relationship, error) {
var query interface{}

q := fmt.Sprintf(`{"eq":"%v", "in": ["Identifier"]}`, identifier)
Expand All @@ -139,7 +276,7 @@ func (cdb *CrawlDB) GetOnionsWithIdentifier(identifier string) ([]string, error)
return nil, err
}

onions := make([]string, 0)
var rels []Relationship
for id := range queryResult {
// To get query result document, simply read it
readBack, err := relationships.Read(id)
Expand All @@ -148,9 +285,10 @@ func (cdb *CrawlDB) GetOnionsWithIdentifier(identifier string) ([]string, error)
if err == nil {
var relationship Relationship
json.Unmarshal(out, &relationship)
onions = append(onions, relationship.Onion)
relationship.ID = id
rels = append(rels, relationship)
}
}
}
return onions, nil
return rels, nil
}
Loading

0 comments on commit c64403b

Please sign in to comment.