Skip to content

Commit

Permalink
Tagger now supports matching email and uri paths
Browse files Browse the repository at this point in the history
  • Loading branch information
awick committed Sep 18, 2014
1 parent 52019d1 commit 9e96483
Show file tree
Hide file tree
Showing 6 changed files with 143 additions and 57 deletions.
1 change: 1 addition & 0 deletions CHANGELOG
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
- Tagger now loads items from ES faster
- Tagger now supports setting almost any field using match;FIELD=value;FIELD2=value2
It is now possible to have a different tag per match
- Tagger now supports matching email and uri paths

0.11.1 2014/08/07
- NOTICE: ES 0.90.12+, 1.1.x, 1.2.0 are supported by this version.
Expand Down
175 changes: 121 additions & 54 deletions capture/plugins/tagger.c
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,10 @@ static int tagsField;
static int httpHostField;
static int httpXffField;
static int httpMd5Field;
static int httpPathField;
static int emailMd5Field;
static int emailSrcField;
static int emailDstField;
static int dnsHostField;

/******************************************************************************/
Expand Down Expand Up @@ -104,10 +107,14 @@ typedef struct tagger_info {
} TaggerInfo_t;

/******************************************************************************/
typedef HASH_VAR(s_, TaggerStringHash_t, TaggerStringHead_t, 37277);

TaggerStringHash_t allDomains;
TaggerStringHash_t allMD5s;
TaggerStringHash_t allEmails;
TaggerStringHash_t allURIs;

HASH_VAR(s_, allFiles, TaggerFileHead_t, 101);
HASH_VAR(s_, allDomains, TaggerStringHead_t, 7919);
HASH_VAR(s_, allMD5s, TaggerStringHead_t, 37277);

static patricia_tree_t *allIps;

Expand Down Expand Up @@ -224,6 +231,16 @@ void tagger_plugin_save(MolochSession_t *session, int UNUSED(final))
);
}

if (session->fields[httpPathField]) {
MolochStringHashStd_t *shash = session->fields[httpPathField]->shash;
HASH_FORALL(s_, *shash, hstring,
HASH_FIND_HASH(s_, allURIs, hstring->s_hash, hstring->str, tstring);
if (tstring) {
tagger_process_match(session, tstring->infos);
}
);
}

if (session->fields[emailMd5Field]) {
MolochStringHashStd_t *shash = session->fields[emailMd5Field]->shash;
HASH_FORALL(s_, *shash, hstring,
Expand All @@ -232,6 +249,24 @@ void tagger_plugin_save(MolochSession_t *session, int UNUSED(final))
tagger_process_match(session, tstring->infos);
);
}

if (session->fields[emailSrcField]) {
MolochStringHashStd_t *shash = session->fields[emailSrcField]->shash;
HASH_FORALL(s_, *shash, hstring,
HASH_FIND_HASH(s_, allEmails, hstring->s_hash, hstring->str, tstring);
if (tstring)
tagger_process_match(session, tstring->infos);
);
}

if (session->fields[emailDstField]) {
MolochStringHashStd_t *shash = session->fields[emailDstField]->shash;
HASH_FORALL(s_, *shash, hstring,
HASH_FIND_HASH(s_, allEmails, hstring->s_hash, hstring->str, tstring);
if (tstring)
tagger_process_match(session, tstring->infos);
);
}
}

/******************************************************************************/
Expand All @@ -253,6 +288,18 @@ void tagger_plugin_exit()
MOLOCH_TYPE_FREE(TaggerString_t, tstring);
);

HASH_FORALL_POP_HEAD(s_, allEmails, tstring,
free(tstring->str);
g_ptr_array_free(tstring->infos, TRUE);
MOLOCH_TYPE_FREE(TaggerString_t, tstring);
);

HASH_FORALL_POP_HEAD(s_, allURIs, tstring,
free(tstring->str);
g_ptr_array_free(tstring->infos, TRUE);
MOLOCH_TYPE_FREE(TaggerString_t, tstring);
);

TaggerFile_t *file;
HASH_FORALL_POP_HEAD(s_, allFiles, file,
free(file->str);
Expand Down Expand Up @@ -280,10 +327,10 @@ void tagger_remove_file(GPtrArray *infos, TaggerFile_t *file)
*/
void tagger_unload_file(TaggerFile_t *file) {
int i;
for (i = 0; file->elements[i]; i++) {
if (file->type[0] == 'i') {
if (file->type[0] == 'i') {
prefix_t prefix;

prefix_t prefix;
for (i = 0; file->elements[i]; i++) {
if (!ascii2prefix2(AF_INET, file->elements[i], &prefix)) {
LOG("Couldn't unload %s", file->elements[i]);
continue;
Expand All @@ -296,24 +343,37 @@ void tagger_unload_file(TaggerFile_t *file) {
}

tagger_remove_file(((TaggerIP_t *)(node->data))->infos, file);
} else if (file->type[0] == 'h') {
TaggerString_t *tstring;
HASH_FIND(s_, allDomains, file->elements[i], tstring);
if (tstring) {
tagger_remove_file(tstring->infos, file);
// We could check if files is now empty and remove the node, but the
// theory is most of the time it will be just readded in the load_file
}
} else if (file->type[0] == 'm') {
TaggerString_t *tstring;
HASH_FIND(s_, allMD5s, file->elements[i], tstring);
}
return;
}

TaggerStringHash_t *hash = 0;
switch (file->type[0]) {
case 'h':
hash = (TaggerStringHash_t *)&allDomains;
break;
case 'm':
hash = (TaggerStringHash_t *)&allMD5s;
break;
case 'e':
hash = (TaggerStringHash_t *)&allEmails;
break;
case 'u':
hash = (TaggerStringHash_t *)&allURIs;
break;
default:
LOG("ERROR - Unknown tagger type %s for %s", file->type, file->str);
}

TaggerString_t *tstring;
if (hash) {
for (i = 0; file->elements[i]; i++) {
HASH_FIND(s_, *hash, file->elements[i], tstring);
if (tstring) {
tagger_remove_file(tstring->infos, file);
// We could check if files is now empty and remove the node, but the
// theory is most of the time it will be just readded in the load_file
}
} else {
LOG("ERROR - Unknown tagger type %s for %s", file->type, file->str);
}
}

Expand Down Expand Up @@ -385,6 +445,9 @@ void tagger_load_file_cb(unsigned char *data, int data_len, gpointer uw)
moloch_db_get_tag(NULL, tagsField, file->tags[tag], NULL);
}

patricia_node_t *node;
TaggerIP_t *tip;

for (i = 0; file->elements[i]; i++) {

int p = 2;
Expand Down Expand Up @@ -445,21 +508,18 @@ void tagger_load_file_cb(unsigned char *data, int data_len, gpointer uw)
continue;
}




DLL_PUSH_TAIL(o_, &info->ops, op);

}

if (file->type[0] == 'i') {
patricia_node_t *node;
TaggerStringHash_t *hash = 0;
switch (file->type[0]) {
case 'i':
node = make_and_lookup(allIps, parts[0]);
if (!node) {
LOG("Couldn't create node for %s", parts[0]);
continue;
}
TaggerIP_t *tip;
if (!node->data) {
tip = MOLOCH_TYPE_ALLOC(TaggerIP_t);
tip->infos = g_ptr_array_new_with_free_func(tagger_info_free);
Expand All @@ -468,33 +528,35 @@ void tagger_load_file_cb(unsigned char *data, int data_len, gpointer uw)
tip = node->data;
}
g_ptr_array_add(tip->infos, info);
} else if (file->type[0] == 'h') {
TaggerString_t *tstring;

HASH_FIND(s_, allDomains, parts[0], tstring);
if (!tstring) {
tstring = MOLOCH_TYPE_ALLOC(TaggerString_t);
tstring->str = strdup(parts[0]); // Need to strdup since file might be unloaded
tstring->infos = g_ptr_array_new_with_free_func(tagger_info_free);
HASH_ADD(s_, allDomains, tstring->str, tstring);
}
g_ptr_array_add(tstring->infos, info);
} else if (file->type[0] == 'm') {
TaggerString_t *tstring;

HASH_FIND(s_, allMD5s, parts[0], tstring);
if (!tstring) {
tstring = MOLOCH_TYPE_ALLOC(TaggerString_t);
tstring->str = strdup(parts[0]); // Need to strdup since file might be unloaded
tstring->infos = g_ptr_array_new_with_free_func(tagger_info_free);
HASH_ADD(s_, allMD5s, tstring->str, tstring);
}
g_ptr_array_add(tstring->infos, info);
} else {
continue;
case 'h':
hash = (TaggerStringHash_t *)&allDomains;
break;
case 'm':
hash = (TaggerStringHash_t *)&allMD5s;
break;
case 'e':
hash = (TaggerStringHash_t *)&allEmails;
break;
case 'u':
hash = (TaggerStringHash_t *)&allURIs;
break;
default:
LOG("ERROR - Unknown tagger type %s for %s", file->type, file->str);
continue;
}

TaggerString_t *tstring;

HASH_FIND(s_, *hash, parts[0], tstring);
if (!tstring) {
tstring = MOLOCH_TYPE_ALLOC(TaggerString_t);
tstring->str = strdup(parts[0]); // Need to strdup since file might be unloaded
tstring->infos = g_ptr_array_new_with_free_func(tagger_info_free);
HASH_ADD(s_, *hash, tstring->str, tstring);
}
}
g_ptr_array_add(tstring->infos, info);
} /* for elements */
}
/******************************************************************************/
/*
Expand Down Expand Up @@ -604,6 +666,8 @@ void moloch_plugin_init()
HASH_INIT(s_, allFiles, moloch_string_hash, moloch_string_cmp);
HASH_INIT(s_, allDomains, moloch_string_hash, moloch_string_cmp);
HASH_INIT(s_, allMD5s, moloch_string_hash, moloch_string_cmp);
HASH_INIT(s_, allEmails, moloch_string_hash, moloch_string_cmp);
HASH_INIT(s_, allURIs, moloch_string_hash, moloch_string_cmp);
allIps = New_Patricia(32);

moloch_plugins_register("tagger", FALSE);
Expand All @@ -619,12 +683,15 @@ void moloch_plugin_init()
NULL
);

tagsField = moloch_field_by_db("ta");
httpHostField = moloch_field_by_db("ho");
httpXffField = moloch_field_by_db("xff");
httpMd5Field = moloch_field_by_db("hmd5");
emailMd5Field = moloch_field_by_db("emd5");
dnsHostField = moloch_field_by_db("dnsho");
tagsField = moloch_field_by_db("ta");
httpHostField = moloch_field_by_db("ho");
httpXffField = moloch_field_by_db("xff");
httpMd5Field = moloch_field_by_db("hmd5");
httpPathField = moloch_field_by_db("hpath");
emailMd5Field = moloch_field_by_db("emd5");
emailSrcField = moloch_field_by_db("esrc");
emailDstField = moloch_field_by_db("edst");
dnsHostField = moloch_field_by_db("dnsho");


/* Call right away sync, and schedule every 60 seconds async */
Expand Down
4 changes: 2 additions & 2 deletions capture/plugins/taggerUpload.pl
Original file line number Diff line number Diff line change
Expand Up @@ -11,11 +11,11 @@ ($)
{
my ($str) = @_;
print $str,"\n";
die "$0 ESHOST:ESPORT (ip|host|md5) filename tag1 [tag2..tagN]";
die "$0 ESHOST:ESPORT (ip|host|md5|email|uri) filename tag1 [tag2..tagN]";
}

showHelp("Missing arguments") if (@ARGV < 4);
showHelp("Must be ip, host, or md5 for file type instead of $ARGV[1]") if ($ARGV[1] !~ /^(host|ip|md5)$/);
showHelp("Must be ip, host, or md5 for file type instead of $ARGV[1]") if ($ARGV[1] !~ /^(host|ip|md5|email|uri)$/);
showHelp("file '$ARGV[2]' not found") if (! -f $ARGV[2]);
showHelp("file '$ARGV[2]' empty") if (-z $ARGV[2]);

Expand Down
3 changes: 3 additions & 0 deletions tests/email.tagger2.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
12345678@aol.com;email.dst=added1;tags=srcmatch
[email protected];email.src=added2;tags=dstmatch

14 changes: 13 additions & 1 deletion tests/tests.pl
Original file line number Diff line number Diff line change
Expand Up @@ -157,7 +157,7 @@ sub errTest {
sub doViewer {
my ($cmd) = @_;

plan tests => 927;
plan tests => 943;

die "Must run in tests directory" if (! -f "../db/db.pl");

Expand Down Expand Up @@ -185,6 +185,8 @@ sub doViewer {
system("../capture/plugins/taggerUpload.pl localhost:9200 ip ip.tagger2.json iptaggertest2");
system("../capture/plugins/taggerUpload.pl localhost:9200 host host.tagger2.json hosttaggertest2");
system("../capture/plugins/taggerUpload.pl localhost:9200 md5 md5.tagger2.json md5taggertest2");
system("../capture/plugins/taggerUpload.pl localhost:9200 email email.tagger2.json emailtaggertest2");
system("../capture/plugins/taggerUpload.pl localhost:9200 uri uri.tagger2.json uritaggertest2");

$main::userAgent->get("http://localhost:9200/_flush");
$main::userAgent->get("http://localhost:9200/_refresh");
Expand Down Expand Up @@ -711,6 +713,16 @@ sub doViewer {
countTest(1, "date=-1&expression=" . uri_escape("(file=$pwd/socks5-rdp.pcap||file=$pwd/http-content-gzip.pcap)&&tags=md5taggertest2"));
countTest(1, "date=-1&expression=" . uri_escape("(file=$pwd/socks5-rdp.pcap||file=$pwd/http-content-gzip.pcap)&&tags=bymd51&&mysql.user=bymd51mysqluser&&test.ip=133.133.133.133"));

countTest(2, "date=-1&expression=" . uri_escape("(file=$pwd/smtp-data-250.pcap||file=$pwd/smtp-data-521.pcap)&&tags=emailtaggertest2"));
countTest(1, "date=-1&expression=" . uri_escape("(file=$pwd/smtp-data-250.pcap||file=$pwd/smtp-data-521.pcap)&&tags=srcmatch"));
countTest(1, "date=-1&expression=" . uri_escape("(file=$pwd/smtp-data-250.pcap||file=$pwd/smtp-data-521.pcap)&&tags=dstmatch"));
countTest(1, "date=-1&expression=" . uri_escape("(file=$pwd/smtp-data-250.pcap||file=$pwd/smtp-data-521.pcap)&&email.dst=added1"));
countTest(1, "date=-1&expression=" . uri_escape("(file=$pwd/smtp-data-250.pcap||file=$pwd/smtp-data-521.pcap)&&email.src=added2"));

countTest(2, "date=-1&expression=" . uri_escape("(file=$pwd/http-500-head.pcap||file=$pwd/http-wrapped-header.pcap)&&tags=uritaggertest2"));
countTest(1, "date=-1&expression=" . uri_escape("(file=$pwd/http-500-head.pcap||file=$pwd/http-wrapped-header.pcap)&&http.referer=added1&&tags=firstmatch"));
countTest(1, "date=-1&expression=" . uri_escape("(file=$pwd/http-500-head.pcap||file=$pwd/http-wrapped-header.pcap)&&http.user-agent=added2&&tags=secondmatch"));

# bigendian pcap file tests
my $json = viewerGet("/sessions.json?date=-1&expression=" . uri_escape("file=$pwd/bigendian.pcap"));
is ($json->{iTotalDisplayRecords}, 1, "bigendian iTotalDisplayRecords");
Expand Down
3 changes: 3 additions & 0 deletions tests/uri.tagger2.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
/UpdataConfig.dat;http.referer=added1;tags=firstmatch
/x/xx/xxxxxxxxxxxxxxxxxxx/x/xxxxxx/xxxxxxxxxxxxxxx;http.user-agent=added2;tags=secondmatch

0 comments on commit 9e96483

Please sign in to comment.