Skip to content

Commit

Permalink
only index dictionary fields
Browse files Browse the repository at this point in the history
  • Loading branch information
gernest committed Feb 12, 2024
1 parent fda198d commit 72e02b7
Showing 1 changed file with 2 additions and 16 deletions.
18 changes: 2 additions & 16 deletions staples/index.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@ import (
"github.com/RoaringBitmap/roaring"
"github.com/apache/arrow/go/v15/arrow"
"github.com/apache/arrow/go/v15/arrow/array"
"github.com/vinceanalytics/vince/columns"
"github.com/vinceanalytics/vince/db"
"github.com/vinceanalytics/vince/filters"
"github.com/vinceanalytics/vince/index"
Expand All @@ -22,32 +21,19 @@ func NewIndex() *Index {

var _ index.Index = (*Index)(nil)

var skip = map[string]bool{
columns.Timestamp: true,
columns.ID: true,
columns.Session: true,
columns.Bounce: true,
columns.Duration: true,
columns.View: true,
}

func (idx *Index) Index(r arrow.Record) (index.Full, error) {
cIdx := index.NewColIdx()
defer cIdx.Release()

o := make(map[string]*index.FullColumn)
for i := 0; i < int(r.NumCols()); i++ {
name := r.ColumnName(i)
if skip[name] {
continue
}
a := r.Column(i)
if a.NullN() == a.Len() {
if a.DataType().ID() != arrow.DICTIONARY || a.NullN() == a.Len() {
// skip columns that only nulls. This happens for instance when geo ip is not
// configured or cases of utm* properties
continue
}
cIdx.Index(a.(*array.Dictionary))
name := r.ColumnName(i)
n, err := cIdx.Build(name)
if err != nil {
return nil, err
Expand Down

0 comments on commit 72e02b7

Please sign in to comment.