Add unified diff format (#2)

peter-evans · Mar 26, 2022 · bd33d36 · bd33d36
1 parent d379ee2
commit bd33d36
Show file tree

Hide file tree

Showing 7 changed files with 1,023 additions and 123 deletions.
diff --git a/README.md b/README.md
@@ -6,6 +6,78 @@
 
 Go implementation of the Patience Diff algorithm.
 
+This library generates line-oriented diffs between source and destination inputs, using the Patience Diff algorithm.
+
+## Features
+
+Supports both plain format and [Unified format](https://en.wikipedia.org/wiki/Diff#Unified_format) (unidiff).
+
+Plain format:
+```diff
+ the
+ quick
+ brown
+-chicken
++fox
+ jumps
+ over
+ the
++lazy
+ dog
+```
+
+Unified format (unidiff):
+
+```diff
+--- a.txt
++++ b.txt
+@@ -3,3 +3,3 @@
+ brown
+-chicken
++fox
+ jumps
+@@ -7,2 +7,3 @@
+ the
++lazy
+ dog
+```
+
+## Installation
+
+```sh
+go get github.com/peter-evans/patience
+```
+
+## Usage
+
+```go
+a := strings.Split(textA, "\n")
+b := strings.Split(textB, "\n")
+
+diffs := patience.Diff(a, b)
+
+// Combined diff
+diff := patience.DiffText(diffs)
+
+// Split diffs
+diffA := patience.DiffTextA(diffs)
+diffB := patience.DiffTextB(diffs)
+
+// Unified diff
+unidiff := patience.UnifiedDiffText(diffs)
+
+// Unified diff with options
+unidiffopts := patience.UnifiedDiffTextWithOptions(
+     diffs,
+     UnifiedDiffOptions{
+          Precontext: 2, 
+          Postcontext: 2,
+          SrcHeader:   "a.txt",
+          DstHeader:   "b.txt",
+     },
+)
+```
+
 ## About
 
 Patience Diff is an algorithm credited to [Bram Cohen](https://bramcohen.livejournal.com/73318.html) that produces diffs tending to be more human-readable than the common diff algorithm.
@@ -16,8 +88,6 @@ While the diffs generated by this algorithm are efficient, in many cases they te
 
 Patience Diff, while also relying on computing the longest common subsequence, takes a different approach. It only computes the longest common subsequence of the *unique*, *common* elements of both texts. This means that lines that are frequently non-unique, such as those containing a single brace or new line character, are ignored. The result is that distinctive lines, such as function declarations, become the anchor points of commonality between the two texts.
 
-## Example
-
 This is an example comparing Patience Diff to the common diff algorithm (Myers).
 
 Patience Diff
@@ -100,28 +170,6 @@ Common diff (Myers)
  }
 ```
 
-## Installation
-
-```sh
-go get github.com/peter-evans/patience
-```
-
-## Usage
-
-```go
-a := strings.Split(textA, "\n")
-b := strings.Split(textB, "\n")
-
-diffs := patience.Diff(a, b)
-
-// Combined diff
-diff := patience.DiffText(diffs)
-
-// Split diffs
-diffA := patience.DiffTextA(diffs)
-diffB := patience.DiffTextB(diffs)
-```
-
 ## References
 
 - [Patience Diff Advantages](https://bramcohen.livejournal.com/73318.html) by Bram Cohen

diff --git a/format.go b/format.go
@@ -63,3 +63,46 @@ func DiffTextB(diffs []DiffLine) string {
 	}
 	return strings.Join(s, "\n")
 }
+
+// UnifiedDiffOptions represents the options for UnifiedDiffTextWithOptions.
+type UnifiedDiffOptions struct {
+	// Precontext is the number of lines of context before each change in a hunk.
+	Precontext int
+	// Postcontext is the number of lines of context after each change in a hunk.
+	Postcontext int
+	// SrcHeader is the header for the source file.
+	SrcHeader string
+	// DstHeader is the header for the destination file.
+	DstHeader string
+}
+
+// UnifiedDiffTextWithOptions returns the diff text in unidiff format.
+func UnifiedDiffTextWithOptions(diffs []DiffLine, opts UnifiedDiffOptions) string {
+	hunks := makeHunks(diffs, opts.Precontext, opts.Postcontext)
+	s := []string{}
+	if len(opts.SrcHeader) > 0 {
+		s = append(s, fmt.Sprintf("--- %s", opts.SrcHeader))
+	}
+	if len(opts.DstHeader) > 0 {
+		s = append(s, fmt.Sprintf("+++ %s", opts.DstHeader))
+	}
+	for _, h := range hunks {
+		s = append(s, fmt.Sprintf("@@ -%d,%d +%d,%d @@", h.SrcStart, h.SrcLines, h.DstStart, h.DstLines))
+		for _, l := range h.Diffs {
+			if l.Type == Equal && len(l.Text) == 0 {
+				s = append(s, "")
+			} else {
+				s = append(s, fmt.Sprintf("%s%s", typeSymbol(l.Type), l.Text))
+			}
+		}
+	}
+	return strings.Join(s, "\n")
+}
+
+// UnifiedDiffText returns the diff text in unidiff format with a context of 3 lines.
+func UnifiedDiffText(diffs []DiffLine) string {
+	return UnifiedDiffTextWithOptions(
+		diffs,
+		UnifiedDiffOptions{Precontext: 3, Postcontext: 3},
+	)
+}
diff --git a/format_test.go b/format_test.go
@@ -153,3 +153,65 @@ func TestDiffTextB(t *testing.T) {
 		})
 	}
 }
+
+func TestUnifiedDiffTextWithOptions(t *testing.T) {
+	type args struct {
+		diffs []DiffLine
+		opts  UnifiedDiffOptions
+	}
+	tests := []struct {
+		name string
+		args args
+		want string
+	}{
+		{
+			name: "Test multiple hunks with context",
+			args: args{
+				diffs: []DiffLine{
+					{Type: Equal, Text: "a"},
+					{Type: Equal, Text: "b"},
+					{Type: Insert, Text: "c"},
+					{Type: Equal, Text: "d"},
+					{Type: Equal, Text: "e"},
+					{Type: Equal, Text: "f"},
+					{Type: Delete, Text: "g"},
+					{Type: Insert, Text: "h"},
+					{Type: Equal, Text: "i"},
+					{Type: Insert, Text: "j"},
+					{Type: Equal, Text: "k"},
+					{Type: Equal, Text: "l"},
+				},
+				opts: UnifiedDiffOptions{
+					Precontext:  1,
+					Postcontext: 1,
+				},
+			},
+			want: "@@ -2,2 +2,3 @@\n b\n+c\n d\n@@ -5,4 +6,5 @@\n f\n-g\n+h\n i\n+j\n k",
+		},
+		{
+			name: "Test source and destination file headers",
+			args: args{
+				diffs: []DiffLine{
+					{Type: Equal, Text: "a"},
+					{Type: Equal, Text: "b"},
+					{Type: Insert, Text: "c"},
+					{Type: Equal, Text: ""},
+				},
+				opts: UnifiedDiffOptions{
+					Precontext:  1,
+					Postcontext: 1,
+					SrcHeader:   "a.txt",
+					DstHeader:   "b.txt",
+				},
+			},
+			want: "--- a.txt\n+++ b.txt\n@@ -2,2 +2,3 @@\n b\n+c\n",
+		},
+	}
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			if got := UnifiedDiffTextWithOptions(tt.args.diffs, tt.args.opts); got != tt.want {
+				t.Errorf("UnifiedDiffTextWithOptions() = %v, want %v", got, tt.want)
+			}
+		})
+	}
+}