Skip to content

Commit

Permalink
test index matching
Browse files Browse the repository at this point in the history
  • Loading branch information
gernest committed Feb 23, 2024
1 parent d7c3afd commit d6c4d77
Show file tree
Hide file tree
Showing 2 changed files with 43 additions and 13 deletions.
22 changes: 11 additions & 11 deletions internal/indexer/indexer.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,15 +12,15 @@ import (
"github.com/vinceanalytics/vince/internal/logger"
)

type Arrow struct{}
type ArrowIndexer struct{}

func New() *Arrow {
return new(Arrow)
func New() *ArrowIndexer {
return new(ArrowIndexer)
}

var _ index.Index = (*Arrow)(nil)
var _ index.Index = (*ArrowIndexer)(nil)

func (idx *Arrow) Index(r arrow.Record) (index.Full, error) {
func (idx *ArrowIndexer) Index(r arrow.Record) (index.Full, error) {
cIdx := index.NewColIdx()
defer cIdx.Release()
o := make(map[string]*index.FullColumn)
Expand All @@ -41,7 +41,7 @@ func (idx *Arrow) Index(r arrow.Record) (index.Full, error) {
cIdx.Reset()
}
lo, hi := Timestamps(r)
return NewFullIdx(o, uint64(lo), uint64(hi)), nil
return NewFullIdx(o, uint64(lo), uint64(hi), uint64(r.NumRows())), nil
}

func Timestamps(r arrow.Record) (lo, hi int64) {
Expand All @@ -55,16 +55,16 @@ func Timestamps(r arrow.Record) (lo, hi int64) {
}

type FullIndex struct {
m map[string]*index.FullColumn
keys []string
min, max, size uint64
m map[string]*index.FullColumn
keys []string
min, max, size, rows uint64
}

var _ index.Full = (*FullIndex)(nil)

var baseIndexSize = uint64(unsafe.Sizeof(FullIndex{}))

func NewFullIdx(m map[string]*index.FullColumn, min, max uint64) *FullIndex {
func NewFullIdx(m map[string]*index.FullColumn, min, max, rows uint64) *FullIndex {
keys := make([]string, 0, len(m))
n := baseIndexSize
for k, v := range m {
Expand All @@ -74,7 +74,7 @@ func NewFullIdx(m map[string]*index.FullColumn, min, max uint64) *FullIndex {
}
n += uint64(len(keys) * 2)
sort.Strings(keys)
return &FullIndex{keys: keys, m: m, min: min, max: max, size: n}
return &FullIndex{keys: keys, m: m, min: min, max: max, size: n, rows: rows}
}

func (idx *FullIndex) CanIndex() bool {
Expand Down
34 changes: 32 additions & 2 deletions internal/indexer/indexer_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,13 @@ import (
"testing"
"time"

"github.com/RoaringBitmap/roaring"
"github.com/apache/arrow/go/v15/arrow/memory"
"github.com/stretchr/testify/require"
"github.com/vinceanalytics/vince/internal/buffers"
"github.com/vinceanalytics/vince/internal/closter/events"
"github.com/vinceanalytics/vince/internal/columns"
"github.com/vinceanalytics/vince/internal/filters"
"github.com/vinceanalytics/vince/internal/index"
)

Expand All @@ -21,9 +24,9 @@ func TestIndexer(t *testing.T) {
r := events.New(memory.DefaultAllocator).Write(ls)
defer r.Release()
idx := New()
full, err := idx.Index(r)
fidx, err := idx.Index(r)
require.NoError(t, err)

full := fidx.(*FullIndex)
t.Run("Sets min,max timestamp", func(t *testing.T) {
lo := time.UnixMilli(int64(full.Min())).UTC()
require.Truef(t, now.Equal(lo), "now=%v lo=%v", now, lo)
Expand Down Expand Up @@ -66,4 +69,31 @@ func TestIndexer(t *testing.T) {
require.NoError(t, err)
require.True(t, bytes.Equal(b.Bytes(), data))
})
t.Run("Match", func(t *testing.T) {
type Case struct {
d string
f []*filters.CompiledFilter
w []uint32
}

cases := []Case{
{d: "Single column exact match", f: []*filters.CompiledFilter{
{Column: columns.Domain, Value: []byte("vinceanalytics.com")},
}, w: []uint32{0, 1, 2, 3, 4, 5, 6, 7, 8, 9}},
{d: "Multi column exact match", f: []*filters.CompiledFilter{
{Column: columns.Domain, Value: []byte("vinceanalytics.com")},
{Column: "browser", Value: []byte("Chrome Webview")},
}, w: []uint32{5, 8}},
}
for _, v := range cases {
t.Run(v.d, func(t *testing.T) {
o := new(roaring.Bitmap)
for i := 0; i < int(full.rows); i++ {
o.Add(uint32(i))
}
full.Match(o, v.f)
require.Equal(t, v.w, o.ToArray())
})
}
})
}

0 comments on commit d6c4d77

Please sign in to comment.