Skip to content

Commit

Permalink
Add unified diff format (#2)
Browse files Browse the repository at this point in the history
  • Loading branch information
peter-evans committed Mar 26, 2022
1 parent d379ee2 commit bd33d36
Show file tree
Hide file tree
Showing 7 changed files with 1,023 additions and 123 deletions.
96 changes: 72 additions & 24 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,78 @@

Go implementation of the Patience Diff algorithm.

This library generates line-oriented diffs between source and destination inputs, using the Patience Diff algorithm.

## Features

Supports both plain format and [Unified format](https://en.wikipedia.org/wiki/Diff#Unified_format) (unidiff).

Plain format:
```diff
the
quick
brown
-chicken
+fox
jumps
over
the
+lazy
dog
```

Unified format (unidiff):

```diff
--- a.txt
+++ b.txt
@@ -3,3 +3,3 @@
brown
-chicken
+fox
jumps
@@ -7,2 +7,3 @@
the
+lazy
dog
```

## Installation

```sh
go get github.com/peter-evans/patience
```

## Usage

```go
a := strings.Split(textA, "\n")
b := strings.Split(textB, "\n")

diffs := patience.Diff(a, b)

// Combined diff
diff := patience.DiffText(diffs)

// Split diffs
diffA := patience.DiffTextA(diffs)
diffB := patience.DiffTextB(diffs)

// Unified diff
unidiff := patience.UnifiedDiffText(diffs)

// Unified diff with options
unidiffopts := patience.UnifiedDiffTextWithOptions(
diffs,
UnifiedDiffOptions{
Precontext: 2,
Postcontext: 2,
SrcHeader: "a.txt",
DstHeader: "b.txt",
},
)
```

## About

Patience Diff is an algorithm credited to [Bram Cohen](https://bramcohen.livejournal.com/73318.html) that produces diffs tending to be more human-readable than the common diff algorithm.
Expand All @@ -16,8 +88,6 @@ While the diffs generated by this algorithm are efficient, in many cases they te

Patience Diff, while also relying on computing the longest common subsequence, takes a different approach. It only computes the longest common subsequence of the *unique*, *common* elements of both texts. This means that lines that are frequently non-unique, such as those containing a single brace or new line character, are ignored. The result is that distinctive lines, such as function declarations, become the anchor points of commonality between the two texts.

## Example

This is an example comparing Patience Diff to the common diff algorithm (Myers).

Patience Diff
Expand Down Expand Up @@ -100,28 +170,6 @@ Common diff (Myers)
}
```

## Installation

```sh
go get github.com/peter-evans/patience
```

## Usage

```go
a := strings.Split(textA, "\n")
b := strings.Split(textB, "\n")

diffs := patience.Diff(a, b)

// Combined diff
diff := patience.DiffText(diffs)

// Split diffs
diffA := patience.DiffTextA(diffs)
diffB := patience.DiffTextB(diffs)
```

## References

- [Patience Diff Advantages](https://bramcohen.livejournal.com/73318.html) by Bram Cohen
Expand Down
43 changes: 43 additions & 0 deletions format.go
Original file line number Diff line number Diff line change
Expand Up @@ -63,3 +63,46 @@ func DiffTextB(diffs []DiffLine) string {
}
return strings.Join(s, "\n")
}

// UnifiedDiffOptions represents the options for UnifiedDiffTextWithOptions.
type UnifiedDiffOptions struct {
// Precontext is the number of lines of context before each change in a hunk.
Precontext int
// Postcontext is the number of lines of context after each change in a hunk.
Postcontext int
// SrcHeader is the header for the source file.
SrcHeader string
// DstHeader is the header for the destination file.
DstHeader string
}

// UnifiedDiffTextWithOptions returns the diff text in unidiff format.
func UnifiedDiffTextWithOptions(diffs []DiffLine, opts UnifiedDiffOptions) string {
hunks := makeHunks(diffs, opts.Precontext, opts.Postcontext)
s := []string{}
if len(opts.SrcHeader) > 0 {
s = append(s, fmt.Sprintf("--- %s", opts.SrcHeader))
}
if len(opts.DstHeader) > 0 {
s = append(s, fmt.Sprintf("+++ %s", opts.DstHeader))
}
for _, h := range hunks {
s = append(s, fmt.Sprintf("@@ -%d,%d +%d,%d @@", h.SrcStart, h.SrcLines, h.DstStart, h.DstLines))
for _, l := range h.Diffs {
if l.Type == Equal && len(l.Text) == 0 {
s = append(s, "")
} else {
s = append(s, fmt.Sprintf("%s%s", typeSymbol(l.Type), l.Text))
}
}
}
return strings.Join(s, "\n")
}

// UnifiedDiffText returns the diff text in unidiff format with a context of 3 lines.
func UnifiedDiffText(diffs []DiffLine) string {
return UnifiedDiffTextWithOptions(
diffs,
UnifiedDiffOptions{Precontext: 3, Postcontext: 3},
)
}
62 changes: 62 additions & 0 deletions format_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -153,3 +153,65 @@ func TestDiffTextB(t *testing.T) {
})
}
}

func TestUnifiedDiffTextWithOptions(t *testing.T) {
type args struct {
diffs []DiffLine
opts UnifiedDiffOptions
}
tests := []struct {
name string
args args
want string
}{
{
name: "Test multiple hunks with context",
args: args{
diffs: []DiffLine{
{Type: Equal, Text: "a"},
{Type: Equal, Text: "b"},
{Type: Insert, Text: "c"},
{Type: Equal, Text: "d"},
{Type: Equal, Text: "e"},
{Type: Equal, Text: "f"},
{Type: Delete, Text: "g"},
{Type: Insert, Text: "h"},
{Type: Equal, Text: "i"},
{Type: Insert, Text: "j"},
{Type: Equal, Text: "k"},
{Type: Equal, Text: "l"},
},
opts: UnifiedDiffOptions{
Precontext: 1,
Postcontext: 1,
},
},
want: "@@ -2,2 +2,3 @@\n b\n+c\n d\n@@ -5,4 +6,5 @@\n f\n-g\n+h\n i\n+j\n k",
},
{
name: "Test source and destination file headers",
args: args{
diffs: []DiffLine{
{Type: Equal, Text: "a"},
{Type: Equal, Text: "b"},
{Type: Insert, Text: "c"},
{Type: Equal, Text: ""},
},
opts: UnifiedDiffOptions{
Precontext: 1,
Postcontext: 1,
SrcHeader: "a.txt",
DstHeader: "b.txt",
},
},
want: "--- a.txt\n+++ b.txt\n@@ -2,2 +2,3 @@\n b\n+c\n",
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
if got := UnifiedDiffTextWithOptions(tt.args.diffs, tt.args.opts); got != tt.want {
t.Errorf("UnifiedDiffTextWithOptions() = %v, want %v", got, tt.want)
}
})
}
}
Loading

0 comments on commit bd33d36

Please sign in to comment.