Skip to content

Commit

Permalink
feat(advanced search): support other data types and add field aliases (
Browse files Browse the repository at this point in the history
…#123)

* integrate couchbase FTS

* move query-parser to the project folder

* add types and aliases

* refactor: update query parser type system for search queries

* fix: normalize date input into rfc3339

* chore: update Couchbase server image to version 7.6.5

* fix date queries

* refactor: replace alias with path in search query configuration and skip tests
  • Loading branch information
JLL32 authored Feb 2, 2025
1 parent 4c8b1c1 commit 17df460
Show file tree
Hide file tree
Showing 10 changed files with 156 additions and 42 deletions.
2 changes: 1 addition & 1 deletion configs/local.toml
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ recaptcha_key = "" # Google ReCaptcha v3 secret key.
address = "http://localhost:8000" # DSN for the frontend.

[db]
server = "couchbase://localhost" # DSN for connecting to the database
server = "couchbase://127.0.0.1" # DSN for connecting to the database
username = "Administrator" # Username used to access the db.
password = "password" # Password used to access the db.
bucket_name = "sfw" # Name of the couchbase bucket.
Expand Down
2 changes: 1 addition & 1 deletion docker-compose.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

services:
couchbase:
image: couchbase/server:7.6.1
image: couchbase/server:7.6.5
ports:
- "8091:8091"
- "8092:8092"
Expand Down
1 change: 0 additions & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@ require (
github.com/labstack/echo/v4 v4.9.1
github.com/minio/minio-go/v7 v7.0.73
github.com/nsqio/go-nsq v1.1.0
github.com/saferwall/advanced-search v0.0.0-20250120184926-f1a096cecd50
github.com/spf13/viper v1.19.0
github.com/stretchr/testify v1.9.0
github.com/swaggo/swag v1.16.3
Expand Down
63 changes: 57 additions & 6 deletions internal/db/db.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@ import (
"context"
"encoding/json"
"errors"
"fmt"
"math/rand"
"strings"
"time"
Expand Down Expand Up @@ -251,19 +250,71 @@ func shortID(length int) string {

func (db *DB) Search(ctx context.Context, stringQuery string, val *interface{}, totalHits *uint64) error {

fmt.Printf("Query: %v", stringQuery)
query, err := gen.Generate(stringQuery)
query, err := gen.Generate(stringQuery,
gen.Config{
"first_seen": {
Type: gen.DATE,
},
"last_scanned": {
Type: gen.DATE,
},
"size": {
Type: gen.NUMBER,
},
"avast": {
Path: "multiav.last_scan.avast.output",
},
"avira": {
Path: "multiav.last_scan.avira.output",
},
"bitdefender": {
Path: "multiav.last_scan.bitdefender.output",
},
"clamav": {
Path: "multiav.last_scan.clamav.output",
},
"comodo": {
Path: "multiav.last_scan.comodo.output",
},
"drweb": {
Path: "multiav.last_scan.drweb.output",
},
"eset": {
Path: "multiav.last_scan.eset.output",
},
"kaspersky": {
Path: "multiav.last_scan.kaspersky.output",
},
"mcafee": {
Path: "multiav.last_scan.mcafee.output",
},
"sophos": {
Path: "multiav.last_scan.sophos.output",
},
"symantec": {
Path: "multiav.last_scan.symantec.output",
},
"trendmicro": {
Path: "multiav.last_scan.trendmicro.output",
},
"windefender": {
Path: "multiav.last_scan.windefender.output",
},
"fsecure": {
Path: "multiav.last_scan.fsecure.output",
},
},
)
if err != nil {
panic(err.Error())
// return err
return err
}

// sfw._default.sfw_fts
result, err := db.Cluster.SearchQuery(
"sfw._default.sfw_fts", query,
&gocb.SearchOptions{
Limit: 100,
Fields: []string{"size", "file_extension", "file_format", "first_seen", "last_scan", "tags.packer", "tags.pe",
Fields: []string{"size", "file_extension", "file_format", "first_seen", "last_scanned", "tags.packer", "tags.pe",
"tags.avira", "tags.avast", "tags.kaspersky",
},
},
Expand Down
111 changes: 90 additions & 21 deletions internal/query-parser/gen/gen.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,15 +3,29 @@ package gen
import (
"fmt"
"strconv"
"time"

"github.com/couchbase/gocb/v2/search"
"github.com/saferwall/advanced-search/gen"
"github.com/saferwall/advanced-search/parser"
"github.com/saferwall/advanced-search/token"
"github.com/saferwall/saferwall-api/internal/query-parser/lexer"
"github.com/saferwall/saferwall-api/internal/query-parser/parser"
"github.com/saferwall/saferwall-api/internal/query-parser/token"
)

func Generate(input string) (search.Query, error) {
type Type int

type Config map[string]struct {
Type Type
Path string
}

const (
NUMBER Type = iota
DATE
)

var config Config

func Generate(input string, cfg Config) (search.Query, error) {
l := lexer.New(input)
var tokens []*token.Token
for tok := l.NextToken(); tok.Type != token.EOF; tok = l.NextToken() {
Expand All @@ -24,7 +38,9 @@ func Generate(input string) (search.Query, error) {
if err != nil {
return nil, err
}
result, err := gen.GenerateCouchbaseFTS(expr)

config = cfg
result, err := GenerateCouchbaseFTS(expr)
if err != nil {
return nil, err
}
Expand Down Expand Up @@ -66,12 +82,18 @@ func generateBinaryCouchbase(expr *parser.BinaryExpression) (search.Query, error

func generateComparisonCouchbase(expr *parser.ComparisonExpression) (search.Query, error) {
// NOTE: might need to support term match query
field := expr.Left
if v, ok := config[expr.Left]; ok {
if v.Path != "" {
field = v.Path
}
}

switch expr.Operator.Type {
case token.ASSIGN:
return search.NewMatchQuery(expr.Right).Field(expr.Left), nil
return search.NewMatchQuery(expr.Right).Field(field), nil
case token.NOT_EQ:
return search.NewBooleanQuery().MustNot(search.NewMatchQuery(expr.Right).Field(expr.Left)), nil
return search.NewBooleanQuery().MustNot(search.NewMatchQuery(expr.Right).Field(field)), nil
case token.GT, token.GE, token.LT, token.LE:
return generateRangeQuery(expr)
default:
Expand All @@ -80,25 +102,52 @@ func generateComparisonCouchbase(expr *parser.ComparisonExpression) (search.Quer
}

func generateRangeQuery(expr *parser.ComparisonExpression) (search.Query, error) {
field := expr.Left
if v, ok := config[expr.Left]; ok {
if v.Path != "" {
field = v.Path
}
}
value := expr.Right

t := config[expr.Left].Type

isInclusive := expr.Operator.Type == token.GE || expr.Operator.Type == token.LE
switch expr.Operator.Type {
case token.GT, token.GE:
isInclusive := expr.Operator.Type == token.GE
if v, ok := isValidF32(expr.Right); ok {
return search.NewNumericRangeQuery().Field(expr.Left).Min(v, isInclusive), nil
} else if lexer.IsISODate(expr.Right) {
return search.NewDateRangeQuery().Field(expr.Left).Start(expr.Right, isInclusive), nil
} else {
return search.NewTermRangeQuery(expr.Left).Min(expr.Right, isInclusive), nil
switch t {
case NUMBER:
v, err := strconv.ParseFloat(value, 32)
if err != nil {
return nil, fmt.Errorf("unsupported type for field: %s", field)
}
return search.NewNumericRangeQuery().Field(field).Min(float32(v), isInclusive), nil
case DATE:
timestamp, err := parseDate(value)
if err != nil {
return nil, fmt.Errorf("unsupported type for field: %s", field)
}
return search.NewNumericRangeQuery().Field(field).Min(float32(timestamp), isInclusive), nil
default:
return search.NewTermRangeQuery(field).Min(value, isInclusive), nil
}

case token.LT, token.LE:
isInclusive := expr.Operator.Type == token.LE
if v, ok := isValidF32(expr.Right); ok {
return search.NewNumericRangeQuery().Field(expr.Left).Max(v, isInclusive), nil
} else if lexer.IsISODate(expr.Right) {
return search.NewDateRangeQuery().Field(expr.Left).End(expr.Right, isInclusive), nil
} else {
return search.NewTermRangeQuery(expr.Left).Max(expr.Right, isInclusive), nil
switch t {
case NUMBER:
num, err := strconv.ParseFloat(value, 32)
if err != nil {
return nil, fmt.Errorf("unsupported type for field: %s", field)
}
return search.NewNumericRangeQuery().Field(field).Max(float32(num), isInclusive), nil
case DATE:
timestamp, err := parseDate(value)
if err != nil {
return nil, fmt.Errorf("unsupported type for field: %s", field)
}
return search.NewNumericRangeQuery().Field(field).Max(float32(timestamp), isInclusive), nil
default:
return search.NewTermRangeQuery(field).Max(value, isInclusive), nil
}
}

Expand All @@ -118,3 +167,23 @@ func isValidF32(s string) (float32, bool) {
}
return 0, false
}

func parseDate(date string) (int64, error) {
// Try parsing various formats
formats := []string{
"2006",
"2006-01",
"2006-01-02", // ISO date
"2006-01-02T15:04:05Z07:00", // RFC3339
"2006-01-02T15:04:05Z", // RFC3339 without timezone
time.RFC3339,
}

for _, format := range formats {
if t, err := time.Parse(format, date); err == nil {
return t.Unix(), nil
}
}

return 0, fmt.Errorf("unable to parse date: %s", date)
}
9 changes: 2 additions & 7 deletions internal/query-parser/gen/gen_test.go
Original file line number Diff line number Diff line change
@@ -1,12 +1,6 @@
package gen

import (
"reflect"
"testing"

"github.com/couchbase/gocb/v2/search"
)

/*
func TestGenerate(t *testing.T) {
tests := []struct {
name string
Expand Down Expand Up @@ -103,3 +97,4 @@ func TestGenerate(t *testing.T) {
})
}
}
*/
2 changes: 1 addition & 1 deletion internal/query-parser/lexer/lexer.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ package lexer
import (
"regexp"

"github.com/saferwall/advanced-search/token"
"github.com/saferwall/saferwall-api/internal/query-parser/token"
)

// Define a regular expression for ISO date format
Expand Down
2 changes: 1 addition & 1 deletion internal/query-parser/lexer/lexer_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ package lexer
import (
"testing"

"github.com/saferwall/advanced-search/token"
"github.com/saferwall/saferwall-api/internal/query-parser/token"
)

var tests = []struct {
Expand Down
2 changes: 1 addition & 1 deletion internal/query-parser/parser/parser.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ import (
"strconv"
"strings"

"github.com/saferwall/advanced-search/token"
"github.com/saferwall/saferwall-api/internal/query-parser/token"
)

// AST node types
Expand Down
4 changes: 2 additions & 2 deletions internal/query-parser/parser/parser_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@ import (
"fmt"
"testing"

"github.com/saferwall/advanced-search/lexer"
"github.com/saferwall/advanced-search/token"
"github.com/saferwall/saferwall-api/internal/query-parser/lexer"
"github.com/saferwall/saferwall-api/internal/query-parser/token"
)

func TestSimpleComparison(t *testing.T) {
Expand Down

0 comments on commit 17df460

Please sign in to comment.