Skip to content

Commit

Permalink
Make digest cmd take input from stdin and output to stdout using Json…
Browse files Browse the repository at this point in the history
… encoder
  • Loading branch information
aswinkarthik committed Apr 15, 2018
1 parent c5276a9 commit 0e67134
Show file tree
Hide file tree
Showing 7 changed files with 243 additions and 3 deletions.
76 changes: 76 additions & 0 deletions cmd/digest.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
// Copyright © 2018 NAME HERE <EMAIL ADDRESS>
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package cmd

import (
"errors"
"log"
"os"

"github.com/aswinkarthik93/csv-digest/pkg/digest"
"github.com/aswinkarthik93/csv-digest/pkg/encoder"
"github.com/spf13/cobra"
)

// digestCmd represents the digest command
var digestCmd = &cobra.Command{
Use: "digest <csv-file>",
Short: "Takes in a csv and creates a digest of each line",
Long: `Takes a Csv file and creates a digest for each line.
The tool can output to stdout or a file in plaintext.
It can also serialize the output as a binary file for any other go program to consume directly`,
Args: func(cmd *cobra.Command, args []string) error {
if len(args) == 1 {
return nil
} else if len(args) > 1 {
return errors.New("requires exactly one arg - the csv file")
}
return errors.New("requires atleast one arg - the csv file")
},
Run: func(cmd *cobra.Command, args []string) {
runDigest(args[0])
},
}

func runDigest(csvFile string) {
config := digest.DigestConfig{
KeyPositions: primaryKeyPositions(),
Encoder: encoder.JsonEncoder{},
Reader: os.Stdin,
Writer: os.Stdout,
}

err := digest.DigestForFile(config)
if err != nil {
log.Fatal(err)
}
}

func primaryKeyPositions() []int {
return []int{0}
}

func init() {
rootCmd.AddCommand(digestCmd)

// Here you will define your flags and configuration settings.

// Cobra supports Persistent Flags which will work for this command
// and all subcommands, e.g.:

// Cobra supports local flags which will only run when this command
// is called directly, e.g.:
// digestCmd.Flags().BoolP("toggle", "t", false, "Help message for toggle")
}
5 changes: 2 additions & 3 deletions cmd/root.go
Original file line number Diff line number Diff line change
Expand Up @@ -40,9 +40,8 @@ The tool can output to stdout or a file in plaintext.
It can also serialize the output as a binary file for any other go program to consume directly`,
// Uncomment the following line if your bare application
// has an action associated with it:
Run: func(cmd *cobra.Command, args []string) {
fmt.Println("Main method getting called")
},
// Run: func(cmd *cobra.Command, args []string) {
// },
}

// Execute adds all child commands to the root command and sets flags appropriately.
Expand Down
66 changes: 66 additions & 0 deletions pkg/digest/digest.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
package digest

import (
"bytes"
"encoding/csv"
"io"
"strings"

"github.com/aswinkarthik93/csv-digest/pkg/encoder"
"github.com/cespare/xxhash"
)

// CsvDigest represents the binding of the key of each csv line
// and the digest that gets created for the entire line
type CsvDigest struct {
Key uint64
Digest uint64
}

// CreateDigest creates a Digest for each line of csv.
// There will be one CsvDigest per line
func CreateDigest(csv []string, keyPositions []int) CsvDigest {
var keyBuffer bytes.Buffer
return CreateDigestWithBuffer(csv, keyPositions, &keyBuffer)
}

// CreateDigestWithBuffer creates a Digest for each line of csv.
// Also takes a buffer which can be passed to optimize on allocating a buffer for
// computing digest of the key
func CreateDigestWithBuffer(csv []string, keyPositions []int, b *bytes.Buffer) CsvDigest {
for _, pos := range keyPositions {
b.WriteString(csv[pos])
}

key := xxhash.Sum64(b.Bytes())
digest := xxhash.Sum64String(strings.Join(csv, ","))

b.Reset()
return CsvDigest{Key: key, Digest: digest}

}

type DigestConfig struct {
KeyPositions []int
Encoder encoder.Encoder
Reader io.Reader
Writer io.Writer
}

func DigestForFile(config DigestConfig) error {
reader := csv.NewReader(config.Reader)
for {
line, err := reader.Read()

if err != nil {
if err == io.EOF {
return nil
}
return err
}

config.Encoder.Encode(CreateDigest(line, config.KeyPositions), config.Writer)
}

return nil
}
51 changes: 51 additions & 0 deletions pkg/digest/digest_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
package digest

import (
"bytes"
"fmt"
"strings"
"testing"

"github.com/aswinkarthik93/csv-digest/pkg/encoder"
"github.com/cespare/xxhash"
"github.com/stretchr/testify/assert"
)

func TestCreateDigest(t *testing.T) {
firstLine := "1,someline"
firstKey := xxhash.Sum64String("1")
firstLineDigest := xxhash.Sum64String(firstLine)

expectedDigest := CsvDigest{Key: firstKey, Digest: firstLineDigest}

actualDigest := CreateDigest(strings.Split(firstLine, ","), []int{0})

assert.Equal(t, expectedDigest, actualDigest)
}

func TestDigestForFile(t *testing.T) {
firstLine := "1,first-line"
firstKey := xxhash.Sum64String("1")
firstDigest := xxhash.Sum64String(firstLine)

secondLine := "2,second-line"
secondKey := xxhash.Sum64String("2")
secondDigest := xxhash.Sum64String(secondLine)

var outputBuffer bytes.Buffer

testConfig := DigestConfig{
Reader: strings.NewReader(firstLine + "\n" + secondLine),
Writer: &outputBuffer,
Encoder: encoder.JsonEncoder{},
KeyPositions: []int{0},
}

err := DigestForFile(testConfig)

actualDigest := outputBuffer.String()
expectedDigest := fmt.Sprintf(`{"Key":%d,"Digest":%d}{"Key":%d,"Digest":%d}`, firstKey, firstDigest, secondKey, secondDigest)

assert.Nil(t, err, "error at DigestForFile")
assert.Equal(t, expectedDigest, actualDigest)
}
7 changes: 7 additions & 0 deletions pkg/encoder/encoder.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
package encoder

import "io"

type Encoder interface {
Encode(interface{}, io.Writer) error
}
17 changes: 17 additions & 0 deletions pkg/encoder/json.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
package encoder

import (
"encoding/json"
"io"
)

type JsonEncoder struct{}

func (e JsonEncoder) Encode(v interface{}, w io.Writer) error {
if b, err := json.Marshal(v); err != nil {
return err
} else {
w.Write(b)
return nil
}
}
24 changes: 24 additions & 0 deletions pkg/encoder/json_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
package encoder

import (
"bytes"
"testing"

"github.com/stretchr/testify/assert"
)

func TestJsonEncoder(t *testing.T) {
var buffer bytes.Buffer

var encoder Encoder
encoder = JsonEncoder{}
testData := map[string]string{"key": "value"}
expectedData := []byte(`{"key":"value"}`)

err := encoder.Encode(testData, &buffer)

actualData := buffer.Bytes()

assert.Nil(t, err, "Error in encoder.Encode")
assert.Equal(t, expectedData, actualData)
}

0 comments on commit 0e67134

Please sign in to comment.