From c2931218bb803316c5cf4b034fe8b2f66fae3f77 Mon Sep 17 00:00:00 2001 From: Szabolcs Berecz Date: Mon, 14 May 2018 10:55:09 +0200 Subject: [PATCH] Add nom based parser --- Cargo.toml | 4 ++ benches/parser.rs | 19 ++++++ src/header.rs | 44 +++++++++++++- src/lib.rs | 3 + src/parsers.rs | 146 ++++++++++++++++++++++++++++++++++++++++++++++ tests/invalid.rs | 14 +++++ 6 files changed, 228 insertions(+), 2 deletions(-) create mode 100644 benches/parser.rs create mode 100644 src/parsers.rs create mode 100644 tests/invalid.rs diff --git a/Cargo.toml b/Cargo.toml index 9dcaa0b..1713f3d 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -11,3 +11,7 @@ readme = "README.md" [dependencies] failure = "0.1.1" byteorder = "1.2.1" + +[dependencies.nom] +version = "~4.0.0" +features = ["verbose-errors"] diff --git a/benches/parser.rs b/benches/parser.rs new file mode 100644 index 0000000..bd11c5c --- /dev/null +++ b/benches/parser.rs @@ -0,0 +1,19 @@ +#![feature(test)] +extern crate test; + +extern crate sleep_parser; + +use sleep_parser::Header; +use test::Bencher; + +const HEADER: &[u8; 32] = b"\x05\x02W\x01\x00\x00\x28\x07BLAKE2b\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"; + +#[bench] +fn hand_rolled(b: &mut Bencher) { + b.iter(|| Header::from_vec(HEADER)); +} + +#[bench] +fn nom(b: &mut Bencher) { + b.iter(|| Header::from_bytes(HEADER)); +} diff --git a/src/header.rs b/src/header.rs index 92b2f65..e96aea0 100644 --- a/src/header.rs +++ b/src/header.rs @@ -2,6 +2,8 @@ extern crate byteorder; use self::byteorder::{BigEndian, ReadBytesExt, WriteBytesExt}; use failure::Error; +use nom; +use parsers; use std::io::Cursor; /// Algorithm used for hashing the data. @@ -42,14 +44,14 @@ pub enum FileType { } /// SLEEP Protocol version. -#[derive(Debug)] +#[derive(Debug, PartialEq)] pub enum ProtocolVersion { /// The version specified as per the paper released in 2017-09. V0, } /// Structural representation of 32 byte SLEEP headers. -#[derive(Debug)] +#[derive(Debug, PartialEq)] pub struct Header { /// Type of file. pub file_type: FileType, @@ -76,6 +78,11 @@ impl Header { } } + /// Parses a 32 byte buffer slice into a valid Header. + pub fn from_bytes(buf: &[u8]) -> Result { + convert_nom_result(buf, parsers::header(buf)) + } + /// Parse a 32 byte buffer slice into a valid Header. pub fn from_vec(buffer: &[u8]) -> Result { ensure!(buffer.len() == 32, "buffer should be 32 bytes"); @@ -215,3 +222,36 @@ impl Header { && self.hash_type == HashType::BLAKE2b } } + +fn convert_nom_result( + buf: &[u8], + result: Result<(&[u8], Header), nom::Err<&[u8]>>, +) -> Result { + match result { + Ok((&[], h)) => Ok(h), + Ok((remaining, _)) => { + assert!( + buf.len() > parsers::HEADER_LENGTH, + "broken parser: input length is {}, but got unparsed input of length {}", + buf.len(), + remaining.len() + ); + Err(format_err!("input must be {} bytes", parsers::HEADER_LENGTH)) + } + Err(e @ nom::Err::Incomplete(_)) => { + assert!( + buf.len() < parsers::HEADER_LENGTH, + "broken parser: input length is {}, but got error: {:?}", + buf.len(), + e + ); + Err(format_err!("input must be {} bytes", parsers::HEADER_LENGTH)) + } + Err(nom::Err::Error(context)) => { + Err(format_err!("nom error: {:?}", context.into_error_kind())) + } + Err(nom::Err::Failure(context)) => { + Err(format_err!("nom failure: {:?}", context.into_error_kind())) + } + } +} diff --git a/src/lib.rs b/src/lib.rs index 7f75cdf..b08c1c5 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -5,8 +5,11 @@ #[macro_use] extern crate failure; +#[macro_use] +extern crate nom; mod header; +mod parsers; pub use header::*; diff --git a/src/parsers.rs b/src/parsers.rs new file mode 100644 index 0000000..a39e361 --- /dev/null +++ b/src/parsers.rs @@ -0,0 +1,146 @@ +#![cfg_attr(feature = "cargo-clippy", allow(clippy))] + +use header::*; +use nom::{be_u16, be_u8, rest}; +use std::str; + +pub(crate) const HEADER_LENGTH: usize = 32; +const VERIFY_TRAILING_ZEROS: bool = true; + +named!( + file_type, + switch!(be_u8, + 0 => value!(FileType::BitField) | + 1 => value!(FileType::Signatures) | + 2 => value!(FileType::Tree) + ) +); + +named!( + protocol_version, + switch!(be_u8, + 0 => value!(ProtocolVersion::V0) + ) +); + +named_args!( + algorithm(len: u8), + switch!(map_res!(take!(len), str::from_utf8), + "BLAKE2b" => value!(HashType::BLAKE2b) | + "Ed25519" => value!(HashType::Ed25519) | + "" => value!(HashType::None) + ) +); + +named!( + pub header
, + flat_map!( + take!(HEADER_LENGTH), + do_parse!( + tag!(b"\x05\x02\x57") >> + file_type: file_type >> + protocol_version: protocol_version >> + entry_size: be_u16 >> + + algorithm_len: verify!(be_u8, |len: u8| len <= HEADER_LENGTH as u8 - 8) >> + algorithm: apply!(algorithm, algorithm_len) >> + + verify!(rest, |bytes: &[u8]| { + let header_consumed = bytes.len() + algorithm_len as usize + 8 == HEADER_LENGTH; + let trailing_zeros = !VERIFY_TRAILING_ZEROS || bytes.iter().all(|&b| b == 0u8); + header_consumed && trailing_zeros + }) >> + + (Header { + file_type, + protocol_version, + entry_size, + hash_type: algorithm, + }) + ) + ) +); + +#[cfg(test)] +mod test { + use super::*; + + use nom; + + #[test] + fn parse_file_type() { + assert_eq!( + file_type(b"\x00"), + Ok((&[][..], FileType::BitField)) + ); + assert_eq!( + file_type(b"\x01"), + Ok((&[][..], FileType::Signatures)) + ); + assert_eq!( + file_type(b"\x02"), + Ok((&[][..], FileType::Tree)) + ); + assert!(file_type(b"\xff").is_err()); + } + + #[test] + fn parse_header() { + fn mk_header(prefix: &[u8]) -> [u8; 32] { + let mut h = [0u8; 32]; + h[0..prefix.len()].clone_from_slice(prefix); + h + } + + assert_eq!( + header(&mk_header( + b"\x05\x02W\x01\x00\x00\x28\x07BLAKE2b" + )), + Ok(( + &[][..], + Header { + file_type: FileType::Signatures, + protocol_version: ProtocolVersion::V0, + entry_size: 40, + hash_type: HashType::BLAKE2b + } + )) + ); + assert_eq!( + header(&mk_header( + b"\x05\x02W\x01\x00\x00\x28\x07BLAKE2b" + )).unwrap() + .1 + .hash_type, + HashType::BLAKE2b + ); + assert_eq!( + header(&mk_header( + b"\x05\x02W\x01\x00\x00\x28\x07Ed25519" + )).unwrap() + .1 + .hash_type, + HashType::Ed25519 + ); + assert_eq!( + header(&mk_header(b"\x05\x02W\x01\x00\x00\x28\x00")) + .unwrap() + .1 + .hash_type, + HashType::None + ); + assert!(header(&mk_header(b"\x05\x02W\x01\x00\x00\x28\x01B")).is_err()); + assert!(header(&mk_header(b"\x05\x02W\x01\x00\x00\x28\x01B")).is_err()); + + let h = b"\x05\x02W\x01\x00\x00\x28\x19BLAKE2bXXXXXXXXXXXXXXXXXX"; + assert!(header(h).is_err()); + } + + #[test] + fn invalid_algorithm_len() { + match header(b"\x05\x02W\x00\x00\x00\x00\xFF\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00") { + Err(nom::Err::Error(nom::Context::Code(_, nom::ErrorKind::Verify))) => (), + x => panic!("{:?}", x), + } + } +} diff --git a/tests/invalid.rs b/tests/invalid.rs new file mode 100644 index 0000000..6cb75e4 --- /dev/null +++ b/tests/invalid.rs @@ -0,0 +1,14 @@ +extern crate sleep_parser; + +use sleep_parser::*; + +#[test] +fn issue_3() { + // https://github.com/datrs/sleep-parser/issues/3 + + let data = b"\x05\x02W\x01\x00\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xfb\x03p\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xbb9\xb0\xf5\xf5"; + assert!(Header::from_bytes(data).is_err()); + + let data = b"\x05\x02W\x01\x00\x00\x00\x12\x12\x12\x00\x00S\xc3\xcf\x8a2\xcc\xd1\xce9\xc4K\x9343\x00602\xb5\x07"; + assert!(Header::from_bytes(data).is_err()); +}