diff --git a/lang/perl/Changes b/lang/perl/Changes index c1551566f5f..462f90e903a 100644 --- a/lang/perl/Changes +++ b/lang/perl/Changes @@ -15,6 +15,18 @@ Revision history for Perl extension Avro are encoded in fields of type 'byte' or 'fixed'. Errors resulting from this process will be raised as Avro::BinaryEncoder::Error exceptions + - Fixed a bug with the detection of valid boolean values + which was not using anchors in a regular expression. + Valid values are (case insensitively) 'yes', 'y', 'no', + 'n', 'true', 't', 'false', 'f', 0, 1, and anything that + is accepted by JSON::PP::is_bool. References rejected by + this last function are not valid + - Fixed some issues around the binary encoding of boolean + values. In particular, Avro::BinaryEncoder now correctly + encodes all the values that Avro::Schema accepts as + valid boolean values, which before were blindly evaluated + in boolean context resulting in strings like 'false' + being serialised as if they were true 1.00 Fri Jan 17 15:00:00 2014 - Relicense under apache license 2.0 diff --git a/lang/perl/lib/Avro/BinaryEncoder.pm b/lang/perl/lib/Avro/BinaryEncoder.pm index 47eb93116f0..51399a64c8d 100644 --- a/lang/perl/lib/Avro/BinaryEncoder.pm +++ b/lang/perl/lib/Avro/BinaryEncoder.pm @@ -23,6 +23,7 @@ use Config; use Encode(); use Error::Simple; use Regexp::Common qw(number); +use JSON::PP; # For is_bool our $VERSION = '++MODULE_VERSION++'; @@ -91,7 +92,28 @@ sub encode_null { sub encode_boolean { my $class = shift; my ($schema, $data, $cb) = @_; - $cb->( $data ? \"\x1" : \"\x0" ); + + throw Avro::BinaryEncoder::Error( " is not a valid boolean value") + unless defined $data; + + if ( my $type = ref $data ) { + throw Avro::BinaryEncoder::Error("cannot encode a '$type' reference as boolean") + unless $type eq 'JSON::PP::Boolean'; + } + else { + throw Avro::BinaryEncoder::Error( "'$data' is not a valid boolean value") + unless $data eq '' # For Perl versions without builtin::is_bool + || JSON::PP::is_bool($data) + || $data =~ /^(?:true|t|false|f|yes|y|no|n|0|1)$/i; + } + + # Some values might be false but evaluate to "truthy" for Perl, + # like the string 'false'. For these known exceptions, which + # would otherwise be false, we read a false value. For everything + # else, we rely on their value evaluated in a boolean context. + my $true = $data =~ /^(?:no|n|false|f)$/i ? 0 : !!$data; + + $cb->( $true ? \"\x1" : \"\x0" ); } sub encode_int { diff --git a/lang/perl/lib/Avro/Schema.pm b/lang/perl/lib/Avro/Schema.pm index 200d2947cd7..0543a90f062 100644 --- a/lang/perl/lib/Avro/Schema.pm +++ b/lang/perl/lib/Avro/Schema.pm @@ -321,9 +321,9 @@ sub is_data_valid { return 1 unless ref $data; } if ($type eq 'boolean') { + return 1 if JSON::PP::is_bool($data); return 0 if ref $data; # sometimes risky - return 1 if $data =~ m{yes|no|y|n|t|f|true|false}i; - return 0; + return $data =~ m{^(?:yes|no|y|n|t|f|true|false|0|1)$}i; } return 0; } diff --git a/lang/perl/t/02_bin_encode.t b/lang/perl/t/02_bin_encode.t index 4a3001fe978..5be10e7b7d6 100644 --- a/lang/perl/t/02_bin_encode.t +++ b/lang/perl/t/02_bin_encode.t @@ -24,18 +24,26 @@ use Config; use Test::More; use Test::Exception; use Math::BigInt; +use JSON::PP; # For booleans use_ok 'Avro::BinaryEncoder'; -sub primitive_ok { - my ($primitive_type, $primitive_val, $expected_enc) = @_; +sub primitive { + my ($type, $data) = @_; - my $data; - my $meth = "encode_$primitive_type"; - Avro::BinaryEncoder->$meth( - undef, $primitive_val, sub { $data = ${$_[0]} } + my $encoded; + my $method = "encode_$type"; + Avro::BinaryEncoder->$method( + undef, $data, sub { $encoded = ${$_[0]} } ); - is $data, $expected_enc, "primitive $primitive_type encoded correctly"; + return $encoded; +} + +sub primitive_ok { + my ($type, $have, $want) = @_; + my $data = primitive( $type => $have ); + is primitive( $type => $have ), $want, + "primitive $type encoded @{[ $have // '' ]} correctly"; return $data; } @@ -44,8 +52,48 @@ sub primitive_ok { primitive_ok null => undef, ''; primitive_ok null => 'whatev', ''; - primitive_ok boolean => 0, "\x0"; - primitive_ok boolean => 1, "\x1"; + primitive_ok boolean => 0, "\x0"; + primitive_ok boolean => 1, "\x1"; + primitive_ok boolean => 'false', "\x0"; + primitive_ok boolean => 'true', "\x1"; + primitive_ok boolean => 'f', "\x0"; + primitive_ok boolean => 't', "\x1"; + primitive_ok boolean => 'no', "\x0"; + primitive_ok boolean => 'yes', "\x1"; + primitive_ok boolean => 'n', "\x0"; + primitive_ok boolean => 'y', "\x1"; + primitive_ok boolean => 'FALSE', "\x0"; + primitive_ok boolean => 'TRUE', "\x1"; + primitive_ok boolean => 'F', "\x0"; + primitive_ok boolean => 'T', "\x1"; + primitive_ok boolean => 'NO', "\x0"; + primitive_ok boolean => 'YES', "\x1"; + primitive_ok boolean => 'N', "\x0"; + primitive_ok boolean => 'Y', "\x1"; + primitive_ok boolean => !!0, "\x0"; # Native false + primitive_ok boolean => !!1, "\x1"; # Native true + primitive_ok boolean => $JSON::PP::false, "\x0"; + primitive_ok boolean => $JSON::PP::true, "\x1"; + + throws_ok { + primitive boolean => undef; + } "Avro::BinaryEncoder::Error", " is not a valid boolean value"; + + throws_ok { + primitive boolean => 2; + } "Avro::BinaryEncoder::Error", "'2' is not a valid boolean value"; + + throws_ok { + primitive boolean => -1; + } "Avro::BinaryEncoder::Error", "'-1' is not a valid boolean value"; + + throws_ok { + primitive boolean => 'anything truthy'; + } "Avro::BinaryEncoder::Error", "'anything truthy' is not a valid boolean value"; + + throws_ok { + primitive boolean => {}; + } "Avro::BinaryEncoder::Error", "cannot encode a 'HASH' reference as boolean"; ## - high-bit of each byte should be set except for last one ## - rest of bits are: @@ -74,33 +122,30 @@ sub primitive_ok { primitive_ok long => -Math::BigInt->new(2)**63, "\xff\xff\xff\xff\xff\xff\xff\xff\xff\x01"; throws_ok { - primitive_ok int => Math::BigInt->new(2)**31; + primitive int => Math::BigInt->new(2)**31; } "Avro::BinaryEncoder::Error", "32-bit signed int overflow"; throws_ok { - primitive_ok int => -Math::BigInt->new(2)**31 - 1; + primitive int => -Math::BigInt->new(2)**31 - 1; } "Avro::BinaryEncoder::Error", "32-bit signed int underflow"; throws_ok { - primitive_ok int => Math::BigInt->new(2)**63; + primitive int => Math::BigInt->new(2)**63; } "Avro::BinaryEncoder::Error", "64-bit signed int overflow"; throws_ok { - primitive_ok long => -Math::BigInt->new(2)**63 - 1; + primitive long => -Math::BigInt->new(2)**63 - 1; } "Avro::BinaryEncoder::Error", "64-bit signed int underflow"; - for (qw(long int)) { - throws_ok { - primitive_ok $_ => "x", undef; - } 'Avro::BinaryEncoder::Error', 'numeric values only'; - }; + throws_ok { + primitive $_ => "x"; + } 'Avro::BinaryEncoder::Error', 'numeric values only' for qw(long int); + # In Unicode, there are decimals that aren't 0-9. # Make sure we handle non-ascii decimals cleanly. - for (qw(long int)) { - throws_ok { - primitive_ok $_ => "\N{U+0661}", undef; - } 'Avro::BinaryEncoder::Error', 'ascii decimals only'; - }; + throws_ok { + primitive $_ => "\N{U+0661}"; + } 'Avro::BinaryEncoder::Error', 'ascii decimals only' for qw(long int); } ## spec examples