Skip to content

Commit

Permalink
Merge pull request #1 from 84codes/refactor
Browse files Browse the repository at this point in the history
Bug fixes and refactoring
  • Loading branch information
naqvis committed Jul 5, 2023
2 parents af38104 + 070f5e5 commit 6cc0d00
Show file tree
Hide file tree
Showing 7 changed files with 262 additions and 138 deletions.
8 changes: 4 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
# Crystal LZ4 Compression

Crystal bindings to the [LZ4](https://lz4.github.io/lz4/) compression library. Bindings provided in this shard cover the [frame format](https://github.com/lz4/lz4/blob/master/doc/lz4_Frame_format.md) as the frame format is recommended one to use and guarantees interoperability with other implementations and language bindings.

Crystal bindings to the [LZ4](https://lz4.github.io/lz4/) compression library. Bindings provided in this shard cover the [frame format](https://github.com/lz4/lz4/blob/dev/doc/lz4_Frame_format.md) as the frame format is recommended one to use and guarantees interoperability with other implementations and language bindings.

LZ4 is lossless compression algorithm, providing compression speed > 500 MB/s per core (>0.15 Bytes/cycle). It features an extremely fast decoder, with speed in multiple GB/s per core (~1 Byte/cycle).

Expand Down Expand Up @@ -30,7 +29,7 @@ require "lz4"
```crystal
require "lz4"
string = File.open("file.xz") do |file|
string = File.open("file.lz4") do |file|
Compress::LZ4::Reader.open(file) do |lz4|
lz4.gets_to_end
end
Expand All @@ -46,7 +45,7 @@ require "lz4"
File.write("file.txt", "abcd")
File.open("./file.txt", "r") do |input_file|
File.open("./file.xz", "w") do |output_file|
File.open("./file.lz4", "w") do |output_file|
Compress::LZ4::Writer.open(output_file) do |lz4|
IO.copy(input_file, lz4)
end
Expand All @@ -66,3 +65,4 @@ end
## Contributors

- [Ali Naqvi](https://github.com/naqvis) - creator and maintainer
- [Carl Hörberg](https://github.com/carlhoerberg)
4 changes: 3 additions & 1 deletion shard.yml
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
name: lz4
version: 0.1.4
version: 1.0.0

authors:
- Ali Naqvi <[email protected]>
- Carl Hörberg <[email protected]>

description: |
Crystal bindings to the LZ4 compression library.
Expand Down
116 changes: 116 additions & 0 deletions spec/lz4_spec.cr
Original file line number Diff line number Diff line change
@@ -1,4 +1,120 @@
require "./spec_helper"

describe Compress::LZ4 do
it "can encode and decode" do
text = "foobar" * 1000
encoded = Compress::LZ4.encode(text)
encoded.size.should be < text.bytesize
decoded = Compress::LZ4.decode(encoded)
decoded.should eq text.to_slice
end

it "can compress" do
input = IO::Memory.new("foobar" * 100000)
output = IO::Memory.new
Compress::LZ4::Writer.open(output) do |lz4|
IO.copy(input, lz4)
end
output.bytesize.should be < input.bytesize
end

it "can decompress" do
bytes = Random::DEFAULT.random_bytes(10 * 1024**2)
input = IO::Memory.new(bytes)
compressed = IO::Memory.new
writer = Compress::LZ4::Writer.new(compressed)
writer.write bytes
writer.close

compressed.rewind

output = IO::Memory.new
Compress::LZ4::Reader.open(compressed) do |lz4|
cnt = IO.copy(lz4, output)
end
output.bytesize.should eq bytes.bytesize
output.to_slice.should eq bytes
end

it "can decompress small parts" do
input = IO::Memory.new("foobar" * 100000)
output = IO::Memory.new
Compress::LZ4::Writer.open(output) do |lz4|
IO.copy(input, lz4)
end
output.rewind
reader = Compress::LZ4::Reader.new(output)
reader.read_string(6).should eq "foobar"
reader.close
end

it "can stream large amounts" do
src = "a" * 1024**2
output = IO::Memory.new
writer = Compress::LZ4::Writer.new(output)
writer.write src.to_slice
output.rewind
reader = Compress::LZ4::Reader.new(output)
dst = Bytes.new(1024**2)
read_count = reader.read(dst)
read_count.should eq 1024**2
reader.close
end

it "can rewind" do
src = "a" * 1024**2
output = IO::Memory.new
writer = Compress::LZ4::Writer.new(output)
writer.write src.to_slice
output.rewind
reader = Compress::LZ4::Reader.new(output)
dst = Bytes.new(1024**2)
read_count = reader.read(dst)
read_count.should eq 1024**2
reader.rewind
read_count = reader.read(dst)
read_count.should eq 1024**2
reader.close
end

it "can not read more than there is" do
src = "a"
output = IO::Memory.new
writer = Compress::LZ4::Writer.new(output)
writer.write src.to_slice
writer.flush
output.rewind
reader = Compress::LZ4::Reader.new(output)
dst = Bytes.new(1024)
read_count = reader.read(dst)
read_count.should eq 1
reader.close
end

it "can compress and decompress small parts" do
rp, wp = IO.pipe
writer = Compress::LZ4::Writer.new(wp)
reader = Compress::LZ4::Reader.new(rp)
writer.print "foo"
writer.flush
reader.read_byte.should eq 'f'.ord
reader.read_byte.should eq 'o'.ord
reader.read_byte.should eq 'o'.ord
writer.close
reader.read_byte.should be_nil
end

it "can rewind a reader" do
input = IO::Memory.new("foobar" * 100000)
output = IO::Memory.new
Compress::LZ4::Writer.open(output) do |lz4|
IO.copy(input, lz4)
end
output.rewind
Compress::LZ4::Reader.open(output) do |lz4|
lz4.read_byte.should eq 'f'.ord
lz4.rewind
lz4.read_byte.should eq 'f'.ord
end
end
end
17 changes: 9 additions & 8 deletions src/lz4.cr
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
require "semantic_version"

module Compress::LZ4
VERSION = "0.1.4"
VERSION = "1.0.0"

LZ4_VERSION = SemanticVersion.parse String.new(LibLZ4.version_string)
LZ4_VERSION_MINIMUM = SemanticVersion.parse("1.9.2")
Expand All @@ -11,19 +11,20 @@ module Compress::LZ4
class LZ4Error < Exception
end

def self.decode(compressed : Slice)
buf = IO::Memory.new(compressed)
uncompressed = Reader.open(buf) do |br|
br.gets_to_end
def self.decode(compressed : Bytes) : Bytes
input = IO::Memory.new(compressed)
output = IO::Memory.new
Reader.open(input) do |br|
IO.copy(br, output)
end
uncompressed.to_slice
output.to_slice
end

def self.encode(content : String)
encode(content.to_slice)
end

def self.encode(content : Slice)
def self.encode(content : Bytes)
buf = IO::Memory.new
Writer.open(buf) do |br|
br.write content
Expand All @@ -33,4 +34,4 @@ module Compress::LZ4
end
end

require "./**"
require "./lz4/*"
2 changes: 1 addition & 1 deletion src/lz4/lib.cr
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
module Compress::LZ4
@[Link(ldflags: "`command -v pkg-config > /dev/null && pkg-config --libs liblz4 2> /dev/null|| printf %s '--llz4'`")]
@[Link("lz4")]
lib LibLZ4
alias ErrorCodeT = LibC::SizeT
alias Uint32T = LibC::UInt
Expand Down
122 changes: 60 additions & 62 deletions src/lz4/reader.cr
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
require "./lib"

# A read-only `IO` object to decompress data in the LZ4 frame format.
#
# Instances of this class wrap another IO object. When you read from this instance
# instance, it reads data from the underlying IO, decompresses it, and returns
# it to the caller.
# ## Example: decompress an lz4 file
# ```crystal
# ```
# require "lz4"

# string = File.open("file.lz4") do |file|
Expand All @@ -15,27 +17,18 @@
# pp string
# ```
class Compress::LZ4::Reader < IO
include IO::Buffered

# If `#sync_close?` is `true`, closing this IO will close the underlying IO.
property? sync_close : Bool

# Returns `true` if this reader is closed.
getter? closed = false

getter compressed_bytes = 0u64
getter uncompressed_bytes = 0u64
@context : LibLZ4::Dctx
@opts = LibLZ4::DecompressOptionsT.new(stable_dst: 0)

# buffer size that avoids execessive round-trips between C and Crystal but doesn't waste too much
# memory on buffering. Its arbitrarily chosen.
BUF_SIZE = 64 * 1024

# Creates an instance of LZ4::Reader.
def initialize(@io : IO, @sync_close : Bool = false)
@buffer = Bytes.new(BUF_SIZE)
@chunk = Bytes.empty

def initialize(@io : IO, @sync_close = false)
ret = LibLZ4.create_decompression_context(out @context, LibLZ4::VERSION)
raise LZ4Error.new("Unable to create lz4 decoder instance: #{String.new(LibLZ4.get_error_name(ret))}") unless LibLZ4.is_error(ret) == 0
raise_if_error(ret, "Failed to create decompression context")
@buffer = Bytes.new(64 * 1024)
@buffer_rem = Bytes.empty
end

# Creates a new reader from the given *io*, yields it to the given block,
Expand Down Expand Up @@ -65,69 +58,74 @@ class Compress::LZ4::Reader < IO
end

# Always raises `IO::Error` because this is a read-only `IO`.
def unbuffered_write(slice : Bytes)
def write(slice : Bytes) : Nil
raise IO::Error.new "Can't write to LZ4::Reader"
end

def unbuffered_read(slice : Bytes)
def read(slice : Bytes) : Int32
check_open

return 0 if slice.empty?

if @chunk.empty?
m = @io.read(@buffer)
return m if m == 0
@chunk = @buffer[0, m]
end

decompressed_bytes = 0
hint = 0u64 # the hint from the last decompression
loop do
in_remaining = @chunk.size.to_u64
out_remaining = slice.size.to_u64

in_ptr = @chunk.to_unsafe
out_ptr = slice.to_unsafe

ret = LibLZ4.decompress(@context, out_ptr, pointerof(out_remaining), in_ptr, pointerof(in_remaining), nil)
raise LZ4Error.new("lz4 decompression error: #{String.new(LibLZ4.get_error_name(ret))}") unless LibLZ4.is_error(ret) == 0

@chunk = @chunk[in_remaining..]
return out_remaining if ret == 0

if out_remaining == 0
# Probably ran out of data and buffer needs a refill
enc_n = @io.read(@buffer)
return 0 if enc_n == 0
@chunk = @buffer[0, enc_n]
next
end

return out_remaining
src_remaining = @buffer_rem.size.to_u64
src_remaining = Math.min(hint, src_remaining) unless hint.zero?
dst_remaining = slice.size.to_u64

hint = LibLZ4.decompress(@context, slice, pointerof(dst_remaining), @buffer_rem, pointerof(src_remaining), pointerof(@opts))
raise_if_error(hint, "Failed to decompress")

@buffer_rem += src_remaining
slice += dst_remaining
decompressed_bytes += dst_remaining
break if slice.empty? # got all we needed
break if hint.zero? # hint of how much more src data is needed
refill_buffer
break if @buffer_rem.empty?
end
0
@uncompressed_bytes &+= decompressed_bytes
decompressed_bytes
end

def unbuffered_flush
def flush
raise IO::Error.new "Can't flush LZ4::Reader"
end

# Closes this reader.
def unbuffered_close
return if @closed || @context.nil?
@closed = true
def close
if @sync_close
@io.close
@closed = true # Only really closed if io is closed
end
end

def finalize
LibLZ4.free_decompression_context(@context)
@io.close if @sync_close
end

def unbuffered_rewind
check_open

def rewind
@io.rewind
initialize(@io, @sync_close)
@buffer_rem = Bytes.empty
@uncompressed_bytes = 0u64
@compressed_bytes = 0u64
LibLZ4.reset_decompression_context(@context)
end

private def refill_buffer
return unless @buffer_rem.empty? # never overwrite existing buffer
cnt = @io.read(@buffer)
@compressed_bytes &+= cnt
@buffer_rem = @buffer[0, cnt]
end

private def raise_if_error(ret : Int, msg : String)
if LibLZ4.is_error(ret) != 0
raise LZ4Error.new("#{msg}: #{String.new(LibLZ4.get_error_name(ret))}")
end
end

# :nodoc:
def inspect(io : IO) : Nil
to_s(io)
# Uncompressed bytes outputted / compressed bytes read so far in the stream
def compression_ratio : Float64
return 0.0 if @compressed_bytes.zero?
@uncompressed_bytes / @compressed_bytes
end
end
Loading

0 comments on commit 6cc0d00

Please sign in to comment.