Skip to content

Commit

Permalink
Exposing lazy quote parameter to allow support for unescaped quotes i…
Browse files Browse the repository at this point in the history
…n data
  • Loading branch information
kdunn926 authored and aswinkarthik committed Jan 10, 2020
1 parent 468af98 commit e123dac
Show file tree
Hide file tree
Showing 13 changed files with 137 additions and 42 deletions.
32 changes: 19 additions & 13 deletions cmd/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ type Context struct {
deltaFile afero.File
recordCount int
separator rune
lazyQuotes bool
}

// NewContext can take all CLI flags and create a cmd.Context
Expand All @@ -39,13 +40,14 @@ func NewContext(
baseFilename string,
deltaFilename string,
separator rune,
lazyQuotes bool,
) (*Context, error) {
baseRecordCount, err := getColumnsCount(fs, baseFilename, separator)
baseRecordCount, err := getColumnsCount(fs, baseFilename, separator, lazyQuotes)
if err != nil {
return nil, fmt.Errorf("error in base-file: %v", err)
}

deltaRecordCount, err := getColumnsCount(fs, deltaFilename, separator)
deltaRecordCount, err := getColumnsCount(fs, deltaFilename, separator, lazyQuotes)
if err != nil {
return nil, fmt.Errorf("error in delta-file: %v", err)
}
Expand Down Expand Up @@ -81,6 +83,7 @@ func NewContext(
deltaFile: deltaFile,
recordCount: baseRecordCount,
separator: separator,
lazyQuotes: lazyQuotes,
}

if err := ctx.validate(); err != nil {
Expand Down Expand Up @@ -178,14 +181,15 @@ func assertAll(elements []int, assertFn func(element int) bool) bool {
return true
}

func getColumnsCount(fs afero.Fs, filename string, separator rune) (int, error) {
func getColumnsCount(fs afero.Fs, filename string, separator rune, lazyQuotes bool) (int, error) {
base, err := fs.Open(filename)
if err != nil {
return 0, err
}
defer base.Close()
csvReader := csv.NewReader(base)
csvReader.Comma = separator
csvReader.LazyQuotes = lazyQuotes
record, err := csvReader.Read()
if err != nil {
if err == io.EOF {
Expand All @@ -201,23 +205,25 @@ func getColumnsCount(fs afero.Fs, filename string, separator rune) (int, error)
// that is needed to start the diff process
func (c *Context) BaseDigestConfig() (digest.Config, error) {
return digest.Config{
Reader: c.baseFile,
Value: c.valueColumnPositions,
Key: c.primaryKeyPositions,
Include: c.includeColumnPositions,
Separator: c.separator,
Reader: c.baseFile,
Value: c.valueColumnPositions,
Key: c.primaryKeyPositions,
Include: c.includeColumnPositions,
Separator: c.separator,
LazyQuotes: c.lazyQuotes,
}, nil
}

// DeltaDigestConfig creates a digest.Context from cmd.Context
// that is needed to start the diff process
func (c *Context) DeltaDigestConfig() (digest.Config, error) {
return digest.Config{
Reader: c.deltaFile,
Value: c.valueColumnPositions,
Key: c.primaryKeyPositions,
Include: c.includeColumnPositions,
Separator: c.separator,
Reader: c.deltaFile,
Value: c.valueColumnPositions,
Key: c.primaryKeyPositions,
Include: c.includeColumnPositions,
Separator: c.separator,
LazyQuotes: c.lazyQuotes,
}, nil
}

Expand Down
16 changes: 16 additions & 0 deletions cmd/config_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ func TestPrimaryKeyPositions(t *testing.T) {
"/base.csv",
"/delta.csv",
',',
false,
)
assert.NoError(t, err)
assert.Equal(t, tt.out, ctx.GetPrimaryKeys())
Expand Down Expand Up @@ -91,6 +92,7 @@ func TestValueColumnPositions(t *testing.T) {
"/base.csv",
"/delta.csv",
',',
false,
)
assert.NoError(t, err)
assert.Equal(t, tt.out, ctx.GetValueColumns())
Expand All @@ -117,6 +119,7 @@ func TestNewContext(t *testing.T) {
"/base.csv",
"/delta.csv",
',',
false,
)

assert.EqualError(t, err, "validation failed: specified format is not valid")
Expand All @@ -133,6 +136,7 @@ func TestNewContext(t *testing.T) {
"/base.csv",
"/delta.csv",
',',
false,
)

assert.NoError(t, err)
Expand All @@ -149,6 +153,7 @@ func TestNewContext(t *testing.T) {
"/base.csv",
"/delta.csv",
',',
false,
)

assert.NoError(t, err)
Expand All @@ -168,6 +173,7 @@ func TestNewContext(t *testing.T) {
"/base.csv",
"/delta.csv",
',',
false,
)
assert.EqualError(t, err, "error in base-file: open "+string(os.PathSeparator)+"base.csv: file does not exist")
})
Expand All @@ -189,6 +195,7 @@ func TestNewContext(t *testing.T) {
"/base.csv",
"/delta.csv",
',',
false,
)
assert.EqualError(t, err, "error in base-file: unable to process headers from csv file. EOF reached. invalid CSV file")
})
Expand All @@ -210,6 +217,7 @@ func TestNewContext(t *testing.T) {
"/base.csv",
"/delta.csv",
',',
false,
)
assert.EqualError(t, err, "error in delta-file: unable to process headers from csv file. EOF reached. invalid CSV file")
})
Expand All @@ -228,6 +236,7 @@ func TestNewContext(t *testing.T) {
"/base.csv",
"/delta.csv",
',',
false,
)
assert.NoError(t, err)
})
Expand Down Expand Up @@ -256,6 +265,7 @@ func TestNewContext(t *testing.T) {
"/base.csv",
"/delta.csv",
',',
false,
)

assert.EqualError(t, err, "validation failed: --primary-key positions are out of bounds")
Expand All @@ -272,6 +282,7 @@ func TestNewContext(t *testing.T) {
"/base.csv",
"/delta.csv",
',',
false,
)

assert.EqualError(t, err, "validation failed: --include positions are out of bounds")
Expand All @@ -288,6 +299,7 @@ func TestNewContext(t *testing.T) {
"/base.csv",
"/delta.csv",
',',
false,
)

assert.EqualError(t, err, "validation failed: --columns positions are out of bounds")
Expand All @@ -310,6 +322,7 @@ func TestNewContext(t *testing.T) {
"/base.csv",
"/delta.csv",
',',
false,
)
assert.EqualError(t, err, "base-file and delta-file columns count do not match")
})
Expand All @@ -329,6 +342,7 @@ func TestNewContext(t *testing.T) {
"/base.csv",
"/delta.csv",
',',
false,
)

assert.EqualError(t, err, "only one of --columns or --ignore-columns")
Expand All @@ -353,6 +367,7 @@ func TestConfig_DigestConfig(t *testing.T) {
"/base.csv",
"/delta.csv",
',',
false,
)
assert.NoError(t, err)

Expand Down Expand Up @@ -388,6 +403,7 @@ func TestConfig_DigestConfig(t *testing.T) {
"/base.csv",
"/delta.csv",
',',
false,
)
assert.NoError(t, err)

Expand Down
3 changes: 3 additions & 0 deletions cmd/root.go
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,7 @@ Most suitable for csv files created from database tables`,
baseFilename,
deltaFilename,
runeSeparator,
lazyQuotes,
)

if err != nil {
Expand Down Expand Up @@ -125,6 +126,7 @@ var (
includeColumnPositions []int
format string
separator string
lazyQuotes bool
)

func init() {
Expand All @@ -138,6 +140,7 @@ func init() {
rootCmd.Flags().StringVarP(&separator, "separator", "s", ",", "use specific separator (\\t, or any one character string)")

rootCmd.Flags().BoolVarP(&timed, "time", "", false, "Measure time")
rootCmd.Flags().BoolVarP(&lazyQuotes, "lazyquotes", "", false, "allow unescaped quotes")
}

func timeTrack(start time.Time, name string) {
Expand Down
1 change: 1 addition & 0 deletions cmd/root_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ func TestRunContext(t *testing.T) {
"/base.csv",
"/delta.csv",
',',
false,
)
assert.NoError(t, err)

Expand Down
6 changes: 6 additions & 0 deletions examples/lazy_quotes.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
15 12 wordpress".com com 207790 792348 wordpress".com com 15 12 207589 791634
43 1 europa.eu eu 116613 353412 europa.eu eu 41 1 119129 359818
69 48 "aol.com com 97543 225532 "aol.com com 70 49 97328 224491
1615 905 proboards.com com 19833 33110 proboards.com com 1613 902 19835 33135
1616 906 ccleaner.com com 19831 32507 ccleaner.com com 1614 903 19834 32463
1617 907 doodle.com com 19827 32902 doodle.com com 1621 909 19787 32822
4 changes: 4 additions & 0 deletions examples/lazy_quotes_delta.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
15 12 wordpress".com com 207790 792348 wordpress".com com 15 12 207589 791634
43 1 europa.eu eu 116613 353412 europa.eu eu 41 1 119129 359818
69 1048 "aol.com com 97543 225532 "aol.com com 70 49 97328 224491
24564 907 completely-newsite.com com 19827 32902 completely-newsite.com com 1621 909 19787 32822
2 changes: 1 addition & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ require (
github.com/spaolacci/murmur3 v1.1.0 // indirect
github.com/spf13/afero v1.1.2
github.com/spf13/cobra v0.0.5
github.com/stretchr/testify v1.3.0
github.com/stretchr/testify v1.4.0
golang.org/x/sys v0.0.0-20190804053845-51ab0e2deafa // indirect
)

Expand Down
3 changes: 3 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,8 @@ github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+
github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs=
github.com/stretchr/testify v1.3.0 h1:TivCn/peBQ7UY8ooIcPgZFpTNSz0Q2U6UrFlUfqbe0Q=
github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
github.com/stretchr/testify v1.4.0 h1:2E4SXV/wtOkTonXsotYi4li6zVWxYlZuYNCXe9XRJyk=
github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4=
github.com/ugorji/go/codec v0.0.0-20181204163529-d75b2dcb6bc8/go.mod h1:VFNgLljTbGfSG7qAOspJ7OScBnGdDN/yBr0sguwnwf0=
github.com/xordataexchange/crypt v0.0.3-0.20170626215501-b2862e3d0a77/go.mod h1:aYKd//L2LvnjZzWKhF00oedf4jCCReLcmhLdhm1A27Q=
golang.org/x/crypto v0.0.0-20181203042331-505ab145d0a9/go.mod h1:6SG95UA2DQfeDnfUPMdvaQW0Q7yPrPDi9nlGo2tz2b4=
Expand All @@ -59,4 +61,5 @@ golang.org/x/sys v0.0.0-20190804053845-51ab0e2deafa/go.mod h1:h1NjWce9XRLGQEsW7w
golang.org/x/text v0.3.0 h1:g61tztE5qeGQ89tm6NTjjM9VPIm088od1l6aSorWRWg=
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/yaml.v2 v2.2.2 h1:ZCJp+EgiOT7lHqUV2J862kp8Qj64Jo6az82+3Td9dZw=
gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
23 changes: 13 additions & 10 deletions pkg/digest/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,12 @@ import "io"
// Value: The Value positions that needs to be compared for diff
// Include: Include these positions in output. It is Value positions by default.
type Config struct {
Key Positions
Value Positions
Include Positions
Reader io.Reader
Separator rune
Key Positions
Value Positions
Include Positions
Reader io.Reader
Separator rune
LazyQuotes bool
}

// NewConfig creates an instance of Config struct.
Expand All @@ -23,16 +24,18 @@ func NewConfig(
valueColumns Positions,
includeColumns Positions,
separator rune,
lazyQuotes bool,
) *Config {
if len(includeColumns) == 0 {
includeColumns = valueColumns
}

return &Config{
Reader: r,
Key: primaryKey,
Value: valueColumns,
Include: includeColumns,
Separator: separator,
Reader: r,
Key: primaryKey,
Value: valueColumns,
Include: includeColumns,
Separator: separator,
LazyQuotes: lazyQuotes,
}
}
61 changes: 55 additions & 6 deletions pkg/digest/diff_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,15 +24,17 @@ func TestDiff(t *testing.T) {

t.Run("default config", func(t *testing.T) {
baseConfig := &digest.Config{
Reader: strings.NewReader(base),
Key: []int{0},
Separator: ',',
Reader: strings.NewReader(base),
Key: []int{0},
Separator: ',',
LazyQuotes: false,
}

deltaConfig := &digest.Config{
Reader: strings.NewReader(delta),
Key: []int{0},
Separator: ',',
Reader: strings.NewReader(delta),
Key: []int{0},
Separator: ',',
LazyQuotes: false,
}

expected := digest.Differences{
Expand All @@ -59,4 +61,51 @@ func TestDiff(t *testing.T) {
assert.NoError(t, err)
assert.Equal(t, expected, actual)
})

deltaLazyQuotes := `1,col-1,col-2,col-3,one-value
2,col-1,col-2,col-3,two-value-modified
4,col-1,col-2,col-3,four"-added
100,col-1-modified,col-2,col-3,hundred-value-modified
5,col-1,col-2,col-3,five"-added
`

t.Run("lazy quotes in delta config", func(t *testing.T) {
baseConfig := &digest.Config{
Reader: strings.NewReader(base),
Key: []int{0},
Separator: ',',
LazyQuotes: false,
}

deltaConfig := &digest.Config{
Reader: strings.NewReader(deltaLazyQuotes),
Key: []int{0},
Separator: ',',
LazyQuotes: true,
}

expected := digest.Differences{
Additions: []digest.Addition{
strings.Split("4,col-1,col-2,col-3,four\"-added", ","),
strings.Split("5,col-1,col-2,col-3,five\"-added", ","),
},
Modifications: []digest.Modification{
{
Current: strings.Split("2,col-1,col-2,col-3,two-value-modified", ","),
Original: strings.Split("2,col-1,col-2,col-3,two-value", ","),
},
{
Current: strings.Split("100,col-1-modified,col-2,col-3,hundred-value-modified", ","),
Original: strings.Split("100,col-1,col-2,col-3,hundred-value", ","),
},
},
Deletions: []digest.Deletion{
strings.Split("3,col-1,col-2,col-3,three-value", ","),
},
}

actual, err := digest.Diff(*baseConfig, *deltaConfig)
assert.NoError(t, err)
assert.Equal(t, expected, actual)
})
}
Loading

0 comments on commit e123dac

Please sign in to comment.