Skip to content

Commit

Permalink
Read line by line instead of all lines
Browse files Browse the repository at this point in the history
(cherry picked from commit 6070755)
  • Loading branch information
aswinkarthik committed Apr 15, 2018
1 parent 34a2a2c commit 9a0da7f
Show file tree
Hide file tree
Showing 2 changed files with 12 additions and 47 deletions.
44 changes: 12 additions & 32 deletions pkg/digest/digest.go
Original file line number Diff line number Diff line change
@@ -1,8 +1,6 @@
package digest

import (
"bufio"
"bytes"
"encoding/csv"
"io"
"strings"
Expand All @@ -21,22 +19,14 @@ type Digest struct {
// CreateDigest creates a Digest for each line of csv.
// There will be one Digest per line
func CreateDigest(csv []string, keyPositions []int) Digest {
var keyBuffer bytes.Buffer
return CreateDigestWithBuffer(csv, keyPositions, &keyBuffer)
}

// CreateDigestWithBuffer creates a Digest for each line of csv.
// Also takes a buffer which can be passed to optimize on allocating a buffer for
// computing digest of the key
func CreateDigestWithBuffer(csv []string, keyPositions []int, b *bytes.Buffer) Digest {
for _, pos := range keyPositions {
b.WriteString(csv[pos])
keyCsv := make([]string, len(keyPositions))
for i, pos := range keyPositions {
keyCsv[i] = csv[pos]
}

key := xxhash.Sum64(b.Bytes())
key := xxhash.Sum64String(strings.Join(keyCsv, ","))
digest := xxhash.Sum64String(strings.Join(csv, ","))

b.Reset()
return Digest{Key: key, Value: digest}

}
Expand All @@ -49,31 +39,21 @@ type DigestConfig struct {
}

func DigestForFile(config DigestConfig) error {
bufferedReader := bufio.NewReader(config.Reader)
reader := csv.NewReader(bufferedReader)
lines, err := reader.ReadAll()
output := make([]Digest, len(lines))
for i, line := range lines {
reader := csv.NewReader(config.Reader)

output := make(map[uint64]uint64)
for {
line, err := reader.Read()
if err != nil {
if err == io.EOF {
return nil
break
}
return err
}
output[i] = CreateDigest(line, config.KeyPositions)
digest := CreateDigest(line, config.KeyPositions)
output[digest.Key] = digest.Value
}

config.Encoder.Encode(toHash(output), config.Writer)
config.Encoder.Encode(output, config.Writer)
return nil
}

func toHash(digests []Digest) map[uint64]uint64 {
result := make(map[uint64]uint64, len(digests))

for _, digest := range digests {
result[digest.Key] = digest.Value
}

return result
}
15 changes: 0 additions & 15 deletions pkg/digest/digest_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -49,18 +49,3 @@ func TestDigestForFile(t *testing.T) {
assert.Nil(t, err, "error at DigestForFile")
assert.Equal(t, expectedDigest, actualDigest)
}

func TestToHash(t *testing.T) {
digests := []Digest{
Digest{Key: 13237225503670494420, Value: 17613682921943161199},
Digest{Key: 6927017134761466251, Value: 5830873111732207531},
}

actualHash := toHash(digests)
expectedHash := map[uint64]uint64{
13237225503670494420: 17613682921943161199,
6927017134761466251: 5830873111732207531,
}

assert.Equal(t, expectedHash, actualHash)
}

0 comments on commit 9a0da7f

Please sign in to comment.