-
Notifications
You must be signed in to change notification settings - Fork 56
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Introduce v2 version of finding differences
- Loading branch information
1 parent
00e3b5b
commit 8016dff
Showing
7 changed files
with
234 additions
and
221 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,123 +1,102 @@ | ||
package digest | ||
|
||
import ( | ||
"encoding/csv" | ||
"fmt" | ||
"runtime" | ||
"sync" | ||
) | ||
|
||
// Difference represents the additions and modifications | ||
// between the two Configs | ||
type Difference struct { | ||
Additions []string | ||
Modifications []string | ||
} | ||
|
||
type messageType int | ||
|
||
const ( | ||
addition messageType = iota | ||
modification messageType = iota | ||
) | ||
|
||
type diffMessage struct { | ||
_type messageType | ||
value string | ||
// Differences represents the differences | ||
// between 2 csv content | ||
type Differences struct { | ||
Additions []Addition | ||
Modifications []Modification | ||
} | ||
|
||
// Diff will differentiate between two given configs | ||
func Diff(baseConfig, deltaConfig *Config) (Difference, error) { | ||
maxProcs := runtime.NumCPU() | ||
base, _, err := Create(baseConfig) | ||
|
||
if err != nil { | ||
return Difference{}, fmt.Errorf("error in base file: %v", err) | ||
} | ||
// Addition is a row appearing in delta but missing in base | ||
type Addition []string | ||
|
||
additions := make([]string, 0, len(base)) | ||
modifications := make([]string, 0, len(base)) | ||
// Modification is a row present in both delta and base | ||
// with the values column changed in delta | ||
type Modification struct { | ||
Original []string | ||
Current []string | ||
} | ||
|
||
messageChan := make(chan []diffMessage, bufferSize*maxProcs) | ||
errorChannel := make(chan error) | ||
defer close(errorChannel) | ||
type message struct { | ||
original []string | ||
current []string | ||
_type messageType | ||
} | ||
|
||
go readAndCompare(base, deltaConfig, messageChan, errorChannel) | ||
// Diff finds the Differences between baseConfig and deltaConfig | ||
func Diff(baseConfig, deltaConfig Config) (Differences, error) { | ||
baseEngine := NewEngine(baseConfig) | ||
baseDigestChannel, baseErrorChannel := baseEngine.StreamDigests() | ||
|
||
for msgs := range messageChan { | ||
for _, msg := range msgs { | ||
if msg._type == addition { | ||
additions = append(additions, msg.value) | ||
} else if msg._type == modification { | ||
modifications = append(modifications, msg.value) | ||
} | ||
baseFileDigest := NewFileDigest() | ||
for digests := range baseDigestChannel { | ||
for _, d := range digests { | ||
baseFileDigest.Append(d) | ||
} | ||
} | ||
|
||
if err := <-errorChannel; err != nil { | ||
return Difference{}, fmt.Errorf("error in delta file: %v", err) | ||
if err := <-baseErrorChannel; err != nil { | ||
return Differences{}, fmt.Errorf("error processing base file: %v", err) | ||
} | ||
|
||
return Difference{Additions: additions, Modifications: modifications}, nil | ||
} | ||
|
||
func readAndCompare(base map[uint64]uint64, config *Config, msgChannel chan<- []diffMessage, errorChannel chan<- error) { | ||
reader := csv.NewReader(config.Reader) | ||
var wg sync.WaitGroup | ||
for { | ||
lines, eofReached, err := getNextNLines(reader) | ||
|
||
if err != nil { | ||
wg.Wait() | ||
close(msgChannel) | ||
errorChannel <- err | ||
return | ||
} | ||
deltaConfig.KeepSource = true | ||
deltaEngine := NewEngine(deltaConfig) | ||
deltaDigestChannel, deltaErrorChannel := deltaEngine.StreamDigests() | ||
|
||
wg.Add(1) | ||
go compareDigestForNLines(base, lines, config, msgChannel, &wg) | ||
additions := make([]Addition, 0) | ||
modifications := make([]Modification, 0) | ||
|
||
if eofReached { | ||
break | ||
msgChannel := streamDifferences(baseFileDigest, deltaDigestChannel) | ||
for msg := range msgChannel { | ||
switch msg._type { | ||
case addition: | ||
additions = append(additions, msg.current) | ||
case modification: | ||
modifications = append(modifications, Modification{Original: msg.original, Current: msg.current}) | ||
} | ||
} | ||
wg.Wait() | ||
close(msgChannel) | ||
errorChannel <- nil | ||
|
||
if err := <-deltaErrorChannel; err != nil { | ||
return Differences{}, fmt.Errorf("error processing delta file: %v", err) | ||
} | ||
|
||
return Differences{Additions: additions, Modifications: modifications}, nil | ||
} | ||
|
||
func compareDigestForNLines(base map[uint64]uint64, | ||
lines [][]string, | ||
config *Config, | ||
msgChannel chan<- []diffMessage, | ||
wg *sync.WaitGroup, | ||
) { | ||
output := make([]diffMessage, len(lines)) | ||
diffCounter := 0 | ||
for _, line := range lines { | ||
digest := CreateDigest(line, config.Key, config.Value) | ||
if baseValue, present := base[digest.Key]; present { | ||
// Present in both base and delta | ||
if baseValue != digest.Value { | ||
value := config.Include.MapToValue(line) | ||
// Modification | ||
output[diffCounter] = diffMessage{ | ||
value: value, | ||
_type: modification, | ||
func streamDifferences(baseFileDigest *FileDigest, digestChannel chan []Digest) chan message { | ||
maxProcs := runtime.NumCPU() | ||
msgChannel := make(chan message, maxProcs*bufferSize) | ||
|
||
go func(base *FileDigest, digestChannel chan []Digest, msgChannel chan message) { | ||
defer close(msgChannel) | ||
|
||
for digests := range digestChannel { | ||
for _, d := range digests { | ||
if baseValue, present := base.Digests[d.Key]; present { | ||
if baseValue != d.Value { | ||
// Modification | ||
msgChannel <- message{_type: modification, current: d.Source, original: base.SourceMap[d.Key]} | ||
} | ||
} else { | ||
// Addition | ||
msgChannel <- message{_type: addition, current: d.Source} | ||
} | ||
diffCounter++ | ||
} | ||
} else { | ||
value := config.Include.MapToValue(line) | ||
// Not present in base. So Addition. | ||
output[diffCounter] = diffMessage{ | ||
value: value, | ||
_type: addition, | ||
} | ||
diffCounter++ | ||
} | ||
} | ||
|
||
msgChannel <- output[:diffCounter] | ||
wg.Done() | ||
}(baseFileDigest, digestChannel, msgChannel) | ||
|
||
return msgChannel | ||
} |
Oops, something went wrong.