Skip to content

Commit

Permalink
compress indexed columns
Browse files Browse the repository at this point in the history
  • Loading branch information
gernest committed Feb 10, 2024
1 parent fd00296 commit 50f9eda
Show file tree
Hide file tree
Showing 6 changed files with 46 additions and 17 deletions.
36 changes: 23 additions & 13 deletions gen/go/staples/v1/scan.pb.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

24 changes: 21 additions & 3 deletions index/file.go
Original file line number Diff line number Diff line change
Expand Up @@ -113,14 +113,22 @@ func (f *FileIndex) get(name string) (*FullColumn, error) {
func readColumn(r ReaderAtSeeker, meta *v1.Metadata_Column) (*FullColumn, error) {
buf := get()
defer buf.Release()
data := buf.get(int(meta.Size))
n, err := r.ReadAt(data, int64(meta.Offset))

compress := get()
defer compress.Release()

raw := buf.get(int(meta.Size))
n, err := r.ReadAt(raw, int64(meta.Offset))
if err != nil {
return nil, err
}
if n != int(meta.Size) {
return nil, fmt.Errorf("index: Too little data read want=%d got %d", meta.Size, n)
}
data, err := ZSTDDecompress(compress.get(int(meta.RawSize)), raw)
if err != nil {
return nil, err
}
o := &FullColumn{
name: meta.Name,
numRows: meta.NumRows,
Expand Down Expand Up @@ -149,6 +157,8 @@ func chuckFromRaw(raw []byte, chunk *v1.Metadata_Chunk) []byte {
func WriteFull(w io.Writer, full Full, id string) error {
b := buffers.Bytes()
defer b.Release()
compress := get()
defer compress.Release()

meta := &v1.Metadata{
Id: id,
Expand All @@ -161,7 +171,14 @@ func WriteFull(w io.Writer, full Full, id string) error {
if err != nil {
return err
}
n, err := w.Write(data)
out, err := ZSTDCompress(
compress.get(ZSTDCompressBound(len(data))),
data, ZSTDCompressionLevel,
)
if err != nil {
return err
}
n, err := w.Write(out)
if err != nil {
return err
}
Expand All @@ -171,6 +188,7 @@ func WriteFull(w io.Writer, full Full, id string) error {
FstOffset: uint32(offset),
Offset: startOffset,
Size: uint32(n),
RawSize: uint32(len(data)),
})
startOffset += uint64(n)
return
Expand Down
2 changes: 1 addition & 1 deletion index/file_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ import (
)

func TestReadIndexFile(t *testing.T) {
f, err := os.Open("testdata/01HPA7QZNP1E8DP8H1SKK253HQ")
f, err := os.Open("testdata/01HPA98Z0TVKPC4QC7HQBTF30Q")
if err != nil {
t.Fatal(err)
}
Expand Down
Binary file removed index/testdata/01HPA7QZNP1E8DP8H1SKK253HQ
Binary file not shown.
Binary file added index/testdata/01HPA98Z0TVKPC4QC7HQBTF30Q
Binary file not shown.
1 change: 1 addition & 0 deletions proto/staples/v1/scan.proto
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,7 @@ message Metadata {
uint32 fst_offset = 4;
uint64 offset = 5;
uint32 size = 6;
uint32 raw_size = 7;
}

message Chunk {
Expand Down

0 comments on commit 50f9eda

Please sign in to comment.