Skip to content

Commit

Permalink
Indexing and searching of geo points are based out of
Browse files Browse the repository at this point in the history
multi-precision tokens generated from the morton
hash of the original geo point. And this commit
adds another configurable index level option to
load and use google's s2gemetry library for generating
the hierarchical spatial tokens for both indexing
and search flows. This has shown considerable
performance improvements to the memory usage as
well as to throughout of geo point queries.
(up to 5X)

In the absence of the config override for s2
plugin, the conventional ways of geo token
generation prevails. And this would let all the
existing indexes works as usual.
  • Loading branch information
sreekanth-cb committed Mar 24, 2022
1 parent 1544ddc commit ee524ab
Show file tree
Hide file tree
Showing 12 changed files with 399 additions and 64 deletions.
53 changes: 40 additions & 13 deletions document/field_geopoint.go
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,8 @@ type GeoPointField struct {
numPlainTextBytes uint64
length int
frequencies index.TokenFrequencies

spatialplugin index.SpatialAnalyzerPlugin
}

func (n *GeoPointField) Size() int {
Expand Down Expand Up @@ -75,7 +77,7 @@ func (n *GeoPointField) AnalyzedTokenFrequencies() index.TokenFrequencies {
}

func (n *GeoPointField) Analyze() {
tokens := make(analysis.TokenStream, 0)
tokens := make(analysis.TokenStream, 0, 8)
tokens = append(tokens, &analysis.Token{
Start: 0,
End: len(n.value),
Expand All @@ -84,24 +86,42 @@ func (n *GeoPointField) Analyze() {
Type: analysis.Numeric,
})

original, err := n.value.Int64()
if err == nil {
if n.spatialplugin != nil {
lat, _ := n.Lat()
lon, _ := n.Lon()
p := &geo.Point{Lat: lat, Lon: lon}
terms := n.spatialplugin.GetIndexTokens(p)

shift := GeoPrecisionStep
for shift < 64 {
shiftEncoded, err := numeric.NewPrefixCodedInt64(original, shift)
if err != nil {
break
}
for _, term := range terms {
token := analysis.Token{
Start: 0,
End: len(shiftEncoded),
Term: shiftEncoded,
End: len(term),
Term: []byte(term),
Position: 1,
Type: analysis.Numeric,
Type: analysis.AlphaNumeric,
}
tokens = append(tokens, &token)
shift += GeoPrecisionStep
}
} else {
original, err := n.value.Int64()
if err == nil {

shift := GeoPrecisionStep
for shift < 64 {
shiftEncoded, err := numeric.NewPrefixCodedInt64(original, shift)
if err != nil {
break
}
token := analysis.Token{
Start: 0,
End: len(shiftEncoded),
Term: shiftEncoded,
Position: 1,
Type: analysis.Numeric,
}
tokens = append(tokens, &token)
shift += GeoPrecisionStep
}
}
}

Expand Down Expand Up @@ -164,3 +184,10 @@ func NewGeoPointFieldWithIndexingOptions(name string, arrayPositions []uint64, l
numPlainTextBytes: uint64(8),
}
}

// SetSpatialAnalyzerPlugin implements the
// index.TokenisableSpatialField interface.
func (n *GeoPointField) SetSpatialAnalyzerPlugin(
plugin index.SpatialAnalyzerPlugin) {
n.spatialplugin = plugin
}
194 changes: 194 additions & 0 deletions geo/geo_s2_utils.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,194 @@
// Copyright (c) 2022 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package geo

import (
"sync"

index "github.com/blevesearch/bleve_index_api"
"github.com/blevesearch/geo/s2"
)

// spatialPluginsMap is spatial plugin cache.
var (
spatialPluginsMap = make(map[string]index.SpatialAnalyzerPlugin)
pluginsMapLock = sync.RWMutex{}
)

// RegisterSpatialAnalyzerPlugin registers the given plugin implementation.
func RegisterSpatialAnalyzerPlugin(plugin index.SpatialAnalyzerPlugin) {
pluginsMapLock.Lock()
spatialPluginsMap[plugin.Type()] = plugin
pluginsMapLock.Unlock()
}

// GetSpatialAnalyzerPlugin retrieves the given implementation type.
func GetSpatialAnalyzerPlugin(typ string) index.SpatialAnalyzerPlugin {
pluginsMapLock.RLock()
rv := spatialPluginsMap[typ]
pluginsMapLock.RUnlock()
return rv
}

func init() {
registerS2RegionTermIndexer()
}

func registerS2RegionTermIndexer() {
// refer for detailed commentary on s2 options here
// https://github.com/sreekanth-cb/geo/blob/806f1c56fffb418d53d2ea6ce6aabaa376355d67/s2/region_term_indexer.go#L92
options := &s2.Options{}

// maxLevel control the maximum size of the
// S2Cells used to approximate regions.
options.SetMaxLevel(16)

// minLevel control the minimum size of the
// S2Cells used to approximate regions.
options.SetMinLevel(4)

// levelMod value greater than 1 increases the effective branching
// factor of the S2Cell hierarchy by skipping some levels.
options.SetLevelMod(2)

// maxCells controls the maximum number of cells
// when approximating each s2 region.
options.SetMaxCells(8)

// If the index will only contain points (rather than regions), be sure
// to set this flag. This will generate smaller and faster queries that
// are specialized for the points-only case.
options.SetPointsOnly(true)

spatialPlugin := S2SpatialAnalyzerPlugin{
s2Indexer: s2.NewRegionTermIndexerWithOptions(*options)}

RegisterSpatialAnalyzerPlugin(&spatialPlugin)
}

// S2SpatialAnalyzerPlugin is an implementation of
// the index.SpatialAnalyzerPlugin interface.
type S2SpatialAnalyzerPlugin struct {
s2Indexer *s2.RegionTermIndexer
}

func (s *S2SpatialAnalyzerPlugin) Type() string {
return "s2"
}

func (s *S2SpatialAnalyzerPlugin) GetIndexTokens(shape index.GeoJSON) []string {
if shape.Type() == "point" {
if point, ok := shape.(*Point); ok {
// generate the tokens for indexing.
return s.s2Indexer.GetIndexTermsForPoint(s2.PointFromLatLng(
s2.LatLngFromDegrees(point.Lat, point.Lon)), "")
}
}
return nil
}

func (s *S2SpatialAnalyzerPlugin) GetQueryTokens(shape index.GeoJSON) []string {
if pd, ok := shape.(*pointDistance); ok {
// obtain the covering query region from the given points.
queryRegion := s2.CapFromCenterAndRadius(pd.centerLat, pd.centerLon, pd.dist)

// obtain the query terms for the query region.
terms := s.s2Indexer.GetQueryTermsForRegion(queryRegion, "")

// since we index only one dimensional points, let's filter out
// or prune our search time terms. This needs to be removed once
// we start indexing 2 or higher dimensional shapes.
return s2.FilterOutCoveringTerms(terms)
}

if br, ok := shape.(*boundedRectangle); ok {
rect := s2.RectFromDegrees(br.minLat, br.minLon, br.maxLat, br.maxLon)

// obtain the terms to be searched for the given bounding box.
terms := s.s2Indexer.GetQueryTermsForRegion(rect, "")

// since we index only one dimensional points, let's filter out
// or prune our search time terms. This needs to be removed once
// we start indexing 2 or higher dimensional shapes.
return s2.FilterOutCoveringTerms(terms)
}

if bg, ok := shape.(*boundedPolygon); ok {
coordinates := bg.coordinates
vertices := make([]s2.Point, len(coordinates))
for i, point := range coordinates {
vertices[i] = s2.PointFromLatLng(s2.LatLngFromDegrees(point.Lat, point.Lon))
}
polygon := s2.PolygonFromLoops([]*s2.Loop{s2.LoopFromPoints(vertices)})

// obtain the terms to be searched for the given polygon.
terms := s.s2Indexer.GetQueryTermsForRegion(
polygon.CapBound(), "")

// since we index only one dimensional points, let's filter out
// or prune our search time terms. This needs to be removed once
// we start indexing 2 or higher dimensional shapes.
return s2.FilterOutCoveringTerms(terms)
}

return nil
}

type boundedRectangle struct {
minLat float64
maxLat float64
minLon float64
maxLon float64
}

func (br *boundedRectangle) Type() string {
return "boundedRectangle"
}

func NewBoundedRectangle(minLat, minLon, maxLat, maxLon float64) *boundedRectangle {
return &boundedRectangle{minLat: minLat, maxLat: maxLat,
minLon: minLon, maxLon: maxLon}
}

type boundedPolygon struct {
coordinates []Point
}

func (bp *boundedPolygon) Type() string {
return "boundedPolygon"
}

func NewBoundedPolygon(coordinates []Point) *boundedPolygon {
return &boundedPolygon{coordinates: coordinates}
}

type pointDistance struct {
dist float64
centerLat float64
centerLon float64
}

func (p *pointDistance) Type() string {
return "pointDistance"
}

func NewPointDistance(centerLat, centerLon, dist float64) *pointDistance {
return &pointDistance{centerLat: centerLat,
centerLon: centerLon, dist: dist}
}

func (p *Point) Type() string {
return "point"
}
8 changes: 4 additions & 4 deletions geo/parse_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,7 @@ func TestExtractGeoPoint(t *testing.T) {
},
// test going throug interface with lng variant
{
in: &s2{
in: &s12{
lng: 4.0,
lat: 6.9,
},
Expand Down Expand Up @@ -187,15 +187,15 @@ func (s *s1) Lat() float64 {
return s.lat
}

type s2 struct {
type s12 struct {
lng float64
lat float64
}

func (s *s2) Lng() float64 {
func (s *s12) Lng() float64 {
return s.lng
}

func (s *s2) Lat() float64 {
func (s *s12) Lat() float64 {
return s.lat
}
5 changes: 3 additions & 2 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,9 @@ go 1.13
require (
github.com/RoaringBitmap/roaring v0.9.4
github.com/bits-and-blooms/bitset v1.2.0
github.com/blevesearch/bleve_index_api v1.0.1
github.com/blevesearch/go-metrics v0.0.0-20190826022208-cac0b30c2563
github.com/blevesearch/bleve_index_api v1.0.2-0.20220322163856-157bc7f3ed4d
github.com/blevesearch/geo v0.1.9
github.com/blevesearch/go-metrics v0.0.0-20201227073835-cf1acfcdf475
github.com/blevesearch/go-porterstemmer v1.0.3
github.com/blevesearch/goleveldb v1.0.1
github.com/blevesearch/gtreap v0.1.1
Expand Down
11 changes: 8 additions & 3 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,13 @@ github.com/RoaringBitmap/roaring v0.9.4/go.mod h1:icnadbWcNyfEHlYdr+tDlOTih1Bf/h
github.com/armon/consul-api v0.0.0-20180202201655-eb2c6b5be1b6/go.mod h1:grANhF5doyWs3UAsr3K4I6qtAmlQcZDesFNEHPZAzj8=
github.com/bits-and-blooms/bitset v1.2.0 h1:Kn4yilvwNtMACtf1eYDlG8H77R07mZSPbMjLyS07ChA=
github.com/bits-and-blooms/bitset v1.2.0/go.mod h1:gIdJ4wp64HaoK2YrL1Q5/N7Y16edYb8uY+O0FJTyyDA=
github.com/blevesearch/bleve_index_api v1.0.1 h1:nx9++0hnyiGOHJwQQYfsUGzpRdEVE5LsylmmngQvaFk=
github.com/blevesearch/bleve_index_api v1.0.1/go.mod h1:fiwKS0xLEm+gBRgv5mumf0dhgFr2mDgZah1pqv1c1M4=
github.com/blevesearch/go-metrics v0.0.0-20190826022208-cac0b30c2563 h1:mQtHArdP4SdgdR2mY8i+W3u9ekCDWDxcM/3nwehEdiw=
github.com/blevesearch/go-metrics v0.0.0-20190826022208-cac0b30c2563/go.mod h1:9eJDeqxJ3E7WnLebQUlPD7ZjSce7AnDb9vjGmMCbD0A=
github.com/blevesearch/bleve_index_api v1.0.2-0.20220322163856-157bc7f3ed4d h1:fu5XNK9duzvlgrb2CQv4Lw/P+/W/UAxQlp8DRq2dpAI=
github.com/blevesearch/bleve_index_api v1.0.2-0.20220322163856-157bc7f3ed4d/go.mod h1:fiwKS0xLEm+gBRgv5mumf0dhgFr2mDgZah1pqv1c1M4=
github.com/blevesearch/geo v0.1.9 h1:3PkWcg/Os+AVezVKptfHVv6HbIL0qZ9hodQTnoBnLn8=
github.com/blevesearch/geo v0.1.9/go.mod h1:XqONL2MSA0A3hDWq35mX+dkXbJwq+LNdSgDHvqOhQZM=
github.com/blevesearch/go-metrics v0.0.0-20201227073835-cf1acfcdf475 h1:kDy+zgJFJJoJYBvdfBSiZYBbdsUL0XcjHYWezpQBGPA=
github.com/blevesearch/go-metrics v0.0.0-20201227073835-cf1acfcdf475/go.mod h1:9eJDeqxJ3E7WnLebQUlPD7ZjSce7AnDb9vjGmMCbD0A=
github.com/blevesearch/go-porterstemmer v1.0.3 h1:GtmsqID0aZdCSNiY8SkuPJ12pD4jI+DdXTAn4YRcHCo=
github.com/blevesearch/go-porterstemmer v1.0.3/go.mod h1:angGc5Ht+k2xhJdZi511LtmxuEf0OVpvUUNrwmM1P7M=
github.com/blevesearch/goleveldb v1.0.1 h1:iAtV2Cu5s0GD1lwUiekkFHe2gTMCCNVj2foPclDLIFI=
Expand Down Expand Up @@ -52,6 +55,8 @@ github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/fsnotify/fsnotify v1.4.7 h1:IXs+QLmnXW2CcXuY+8Mzv/fWEsPGWxqefPtCP5CnV9I=
github.com/fsnotify/fsnotify v1.4.7/go.mod h1:jwhsz4b93w/PPRr/qN1Yymfu8t87LnFCMoQvtojpjFo=
github.com/golang/geo v0.0.0-20210211234256-740aa86cb551 h1:gtexQ/VGyN+VVFRXSFiguSNcXmS6rkKT+X7FdIrTtfo=
github.com/golang/geo v0.0.0-20210211234256-740aa86cb551/go.mod h1:QZ0nwyI2jOfgRAoBvP+ab5aRr7c9x7lhGEJrKvBwjWI=
github.com/golang/protobuf v1.2.0/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U=
github.com/golang/protobuf v1.3.2 h1:6nsPYzhq5kReh6QImI3k5qWzO4PEbvbIW2cwSfR/6xs=
github.com/golang/protobuf v1.3.2/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U=
Expand Down
2 changes: 1 addition & 1 deletion index/scorch/builder.go
Original file line number Diff line number Diff line change
Expand Up @@ -140,7 +140,7 @@ func (o *Builder) executeBatchLOCKED(batch *index.Batch) (err error) {
// insert _id field
doc.AddIDField()
// perform analysis directly
analyze(doc)
analyze(doc, nil)
analysisResults = append(analysisResults, doc)
}
}
Expand Down
Loading

0 comments on commit ee524ab

Please sign in to comment.