Skip to content

Commit

Permalink
Introduce flag in config to keep source optionally
Browse files Browse the repository at this point in the history
  • Loading branch information
aswinkarthik committed Feb 28, 2019
1 parent 85937ad commit 0b31183
Show file tree
Hide file tree
Showing 3 changed files with 90 additions and 13 deletions.
2 changes: 2 additions & 0 deletions cmd/root.go
Original file line number Diff line number Diff line change
Expand Up @@ -77,12 +77,14 @@ Most suitable for csv files created from database tables`,
config.GetPrimaryKeys(),
config.GetValueColumns(),
config.GetIncludeColumnPositions(),
false,
)
deltaConfig := digest.NewConfig(
deltaFile,
config.GetPrimaryKeys(),
config.GetValueColumns(),
config.GetIncludeColumnPositions(),
false,
)

diff, err := digest.Diff(baseConfig, deltaConfig)
Expand Down
49 changes: 37 additions & 12 deletions pkg/digest/digest.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,9 @@ const Separator = ","
// Digest represents the binding of the key of each csv line
// and the digest that gets created for the entire line
type Digest struct {
Key uint64
Value uint64
Key uint64
Value uint64
Source []string
}

// CreateDigest creates a Digest for each line of csv.
Expand All @@ -26,7 +27,15 @@ func CreateDigest(csv []string, pKey Positions, pRow Positions) Digest {
digest := xxhash.Sum64String(pRow.MapToValue(csv))

return Digest{Key: key, Value: digest}
}

// CreateDigestWithSource creates a Digest for each line of csv.
// There will be one Digest per line
func CreateDigestWithSource(csv []string, pKey Positions, pRow Positions) Digest {
key := xxhash.Sum64String(pKey.MapToValue(csv))
digest := xxhash.Sum64String(pRow.MapToValue(csv))

return Digest{Key: key, Value: digest, Source: csv}
}

// Config represents configurations that can be passed
Expand All @@ -35,24 +44,33 @@ func CreateDigest(csv []string, pKey Positions, pRow Positions) Digest {
// Key: The primary key positions
// Value: The Value positions that needs to be compared for diff
// Include: Include these positions in output. It is Value positions by default.
// KeepSource: return the source and target string if diff is computed
type Config struct {
Key Positions
Value Positions
Include Positions
Reader io.Reader
Key Positions
Value Positions
Include Positions
Reader io.Reader
KeepSource bool
}

// NewConfig creates an instance of Config struct.
func NewConfig(r io.Reader, primaryKey Positions, valueColumns Positions, includeColumns Positions) *Config {
func NewConfig(
r io.Reader,
primaryKey Positions,
valueColumns Positions,
includeColumns Positions,
keepSource bool,
) *Config {
if len(includeColumns) == 0 {
includeColumns = valueColumns
}

return &Config{
Reader: r,
Key: primaryKey,
Value: valueColumns,
Include: includeColumns,
Reader: r,
Key: primaryKey,
Value: valueColumns,
Include: includeColumns,
KeepSource: keepSource,
}
}

Expand Down Expand Up @@ -115,9 +133,16 @@ func createDigestForNLines(lines [][]string,
wg *sync.WaitGroup,
) {
output := make([]Digest, len(lines))
var createDigestFunc func(csv []string, pKey Positions, pRow Positions) Digest

if config.KeepSource {
createDigestFunc = CreateDigestWithSource
} else {
createDigestFunc = CreateDigest
}

for i, line := range lines {
output[i] = CreateDigest(line, config.Key, config.Value)
output[i] = createDigestFunc(line, config.Key, config.Value)
}

digestChannel <- output
Expand Down
52 changes: 51 additions & 1 deletion pkg/digest/digest_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,13 +15,29 @@ func TestCreateDigest(t *testing.T) {
firstKey := xxhash.Sum64String("1")
firstLineDigest := xxhash.Sum64String(firstLine)

expectedDigest := digest.Digest{Key: firstKey, Value: firstLineDigest}
expectedDigest := digest.Digest{Key: firstKey, Value: firstLineDigest, Source: nil}

actualDigest := digest.CreateDigest(strings.Split(firstLine, digest.Separator), []int{0}, []int{})

assert.Equal(t, expectedDigest, actualDigest)
}

func TestCreateDigestWithSource(t *testing.T) {
firstLine := "1,someline"
firstKey := xxhash.Sum64String("1")
firstLineDigest := xxhash.Sum64String(firstLine)

expectedDigest := digest.Digest{
Key: firstKey,
Value: firstLineDigest,
Source: strings.Split(firstLine, ","),
}

actualDigest := digest.CreateDigestWithSource(strings.Split(firstLine, digest.Separator), []int{0}, []int{})

assert.Equal(t, expectedDigest, actualDigest)
}

func TestDigestForFile(t *testing.T) {
firstLine := "1,first-line,some-columne,friday"
firstKey := xxhash.Sum64String("1")
Expand Down Expand Up @@ -78,3 +94,37 @@ func TestDigestForFile(t *testing.T) {
assert.Nil(t, actualDigest)
})
}

func TestNewConfig(t *testing.T) {
r := strings.NewReader("a,csv,as,str")
primaryColumns := digest.Positions{0}
values := digest.Positions{0, 1, 2}
include := digest.Positions{0, 1}
keepSource := true

t.Run("should create config from given params", func(t *testing.T) {
conf := digest.NewConfig(r, primaryColumns, values, include, keepSource)
expectedConf := digest.Config{
Reader: r,
Key: primaryColumns,
Value: values,
Include: include,
KeepSource: keepSource,
}

assert.Equal(t, expectedConf, *conf)
})

t.Run("should use valueColumns as includeColumns for includes not specified", func(t *testing.T) {
conf := digest.NewConfig(r, primaryColumns, values, nil, keepSource)
expectedConf := digest.Config{
Reader: r,
Key: primaryColumns,
Value: values,
Include: values,
KeepSource: keepSource,
}

assert.Equal(t, expectedConf, *conf)
})
}

0 comments on commit 0b31183

Please sign in to comment.