Skip to content

Commit

Permalink
added a first version of a parser for #290
Browse files Browse the repository at this point in the history
  • Loading branch information
nlohmann committed Aug 15, 2016
1 parent f791c5f commit 5e67f7a
Show file tree
Hide file tree
Showing 3 changed files with 215 additions and 21 deletions.
93 changes: 83 additions & 10 deletions src/json.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -5962,6 +5962,16 @@ class basic_json
return parser(s, cb).parse();
}

/*!
@brief deserialize from string literal
@copydoc parse(const string_t&, const parser_callback_t)
*/
static basic_json parse(const typename string_t::value_type* s,
const parser_callback_t cb = nullptr)
{
return parser(s, cb).parse();
}

/*!
@brief deserialize from stream
Expand Down Expand Up @@ -6001,6 +6011,75 @@ class basic_json
return parser(i, cb).parse();
}

/*!
@brief deserialize from a container with contiguous storage
This function reads from a nonempty iterator range of a container with
contiguous storage of 1-byte values. Compatible container types include
`std::vector`, `std::string`, `std::array`, `std::valarray`, and
`std::initializer_list`. Furthermore, C-style arrays can be used with
`std::begin()`/`std::end()`. User-defined containers can be used as long
as they implement random-access iterators and a contiguous storage.
@pre The iterator range is contiguous. Violating this precondition yields
undefined behavior. **This precondition is enforced with an assertion.**
@pre Each element in the range has a size of 1 byte. Violating this
precondition yields undefined behavior. **This precondition is enforced
with an assertion.**
@pre The iterator range is nonempty. Violating this precondition yields
undefined behavior. **This precondition is enforced with an assertion.**
@warning There is no way to enforce the preconditions at compile-time. If
the function is called with noncompliant iterators, the behavior
is undefined and will most liekely yield segmentation violation.
@param[in] first begin of the range to parse (included)
@param[in] last end of the range to parse (excluded)
@param[in] cb a parser callback function of type @ref parser_callback_t
which is used to control the deserialization by filtering unwanted values
(optional)
@return result of the deserialization
@complexity Linear in the length of the input. The parser is a predictive
LL(1) parser. The complexity can be higher if the parser callback function
@a cb has a super-linear complexity.
@note A UTF-8 byte order mark is silently ignored.
@todo Example and references.
@since version 2.0.3
*/
template <class IteratorType, typename
std::enable_if<
std::is_same<typename std::iterator_traits<IteratorType>::iterator_category, std::random_access_iterator_tag>::value
, int>::type
= 0>
static basic_json parse(IteratorType first, IteratorType last,
const parser_callback_t cb = nullptr)
{
// assertion to check that the iterator range is indeed contiguous,
// see http://stackoverflow.com/a/35008842/266378 for more discussion
assert(std::accumulate(first, last, std::make_pair<bool, int>(true, 0),
[&first](std::pair<bool, int> res, decltype(*first) val)
{
res.first &= (val == *(std::next(std::addressof(*first), res.second++)));
return res;
}).first);

// assertion to check that each element is 1 byte long
assert(std::all_of(first, last, [](decltype(*first) val)
{
return sizeof(val) == 1;
}));

// assertion that the iterator range is not empty
assert(std::distance(first, last) > 0);

return parser(first, last, cb).parse();
}

/*!
@brief deserialize from stream
Expand Down Expand Up @@ -8875,10 +8954,10 @@ class basic_json
{
public:
/// a parser reading from a string literal
parser(const typename string_t::value_type* buff, parser_callback_t cb = nullptr)
parser(const typename string_t::value_type* buff,
const parser_callback_t cb = nullptr)
: callback(cb),
m_lexer(reinterpret_cast<const typename lexer::lexer_char_t*>(buff),
strlen(buff))
m_lexer(reinterpret_cast<const typename lexer::lexer_char_t*>(buff), strlen(buff))
{}

/// a parser reading from a string container
Expand All @@ -8902,13 +8981,7 @@ class basic_json
: callback(cb),
m_lexer(reinterpret_cast<const typename lexer::lexer_char_t*>(&(*first)),
static_cast<size_t>(std::distance(first, last)))
{
int i = 0;
assert(std::accumulate(first, last, true, [&i, &first](bool res, decltype(*first) val)
{
return res and (val == *(std::next(std::addressof(*first), i++)));
}));
}
{}

/// public parser interface
basic_json parse()
Expand Down
93 changes: 83 additions & 10 deletions src/json.hpp.re2c
Original file line number Diff line number Diff line change
Expand Up @@ -5962,6 +5962,16 @@ class basic_json
return parser(s, cb).parse();
}

/*!
@brief deserialize from string literal
@copydoc parse(const string_t&, const parser_callback_t)
*/
static basic_json parse(const typename string_t::value_type* s,
const parser_callback_t cb = nullptr)
{
return parser(s, cb).parse();
}

/*!
@brief deserialize from stream

Expand Down Expand Up @@ -6001,6 +6011,75 @@ class basic_json
return parser(i, cb).parse();
}

/*!
@brief deserialize from a container with contiguous storage

This function reads from a nonempty iterator range of a container with
contiguous storage of 1-byte values. Compatible container types include
`std::vector`, `std::string`, `std::array`, `std::valarray`, and
`std::initializer_list`. Furthermore, C-style arrays can be used with
`std::begin()`/`std::end()`. User-defined containers can be used as long
as they implement random-access iterators and a contiguous storage.

@pre The iterator range is contiguous. Violating this precondition yields
undefined behavior. **This precondition is enforced with an assertion.**
@pre Each element in the range has a size of 1 byte. Violating this
precondition yields undefined behavior. **This precondition is enforced
with an assertion.**
@pre The iterator range is nonempty. Violating this precondition yields
undefined behavior. **This precondition is enforced with an assertion.**

@warning There is no way to enforce the preconditions at compile-time. If
the function is called with noncompliant iterators, the behavior
is undefined and will most liekely yield segmentation violation.

@param[in] first begin of the range to parse (included)
@param[in] last end of the range to parse (excluded)
@param[in] cb a parser callback function of type @ref parser_callback_t
which is used to control the deserialization by filtering unwanted values
(optional)

@return result of the deserialization

@complexity Linear in the length of the input. The parser is a predictive
LL(1) parser. The complexity can be higher if the parser callback function
@a cb has a super-linear complexity.

@note A UTF-8 byte order mark is silently ignored.

@todo Example and references.

@since version 2.0.3
*/
template <class IteratorType, typename
std::enable_if<
std::is_same<typename std::iterator_traits<IteratorType>::iterator_category, std::random_access_iterator_tag>::value
, int>::type
= 0>
static basic_json parse(IteratorType first, IteratorType last,
const parser_callback_t cb = nullptr)
{
// assertion to check that the iterator range is indeed contiguous,
// see http://stackoverflow.com/a/35008842/266378 for more discussion
assert(std::accumulate(first, last, std::make_pair<bool, int>(true, 0),
[&first](std::pair<bool, int> res, decltype(*first) val)
{
res.first &= (val == *(std::next(std::addressof(*first), res.second++)));
return res;
}).first);

// assertion to check that each element is 1 byte long
assert(std::all_of(first, last, [](decltype(*first) val)
{
return sizeof(val) == 1;
}));

// assertion that the iterator range is not empty
assert(std::distance(first, last) > 0);

return parser(first, last, cb).parse();
}

/*!
@brief deserialize from stream

Expand Down Expand Up @@ -8172,10 +8251,10 @@ class basic_json
{
public:
/// a parser reading from a string literal
parser(const typename string_t::value_type* buff, parser_callback_t cb = nullptr)
parser(const typename string_t::value_type* buff,
const parser_callback_t cb = nullptr)
: callback(cb),
m_lexer(reinterpret_cast<const typename lexer::lexer_char_t*>(buff),
strlen(buff))
m_lexer(reinterpret_cast<const typename lexer::lexer_char_t*>(buff), strlen(buff))
{}

/// a parser reading from a string container
Expand All @@ -8199,13 +8278,7 @@ class basic_json
: callback(cb),
m_lexer(reinterpret_cast<const typename lexer::lexer_char_t*>(&(*first)),
static_cast<size_t>(std::distance(first, last)))
{
int i = 0;
assert(std::accumulate(first, last, true, [&i, &first](bool res, decltype(*first) val)
{
return res and (val == *(std::next(std::addressof(*first), i++)));
}));
}
{}

/// public parser interface
basic_json parse()
Expand Down
50 changes: 49 additions & 1 deletion test/src/unit-deserialization.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,8 @@ SOFTWARE.
#include "json.hpp"
using nlohmann::json;

#include <valarray>

TEST_CASE("deserialization")
{
SECTION("stream")
Expand All @@ -41,13 +43,20 @@ TEST_CASE("deserialization")
CHECK(j == json({"foo", 1, 2, 3, false, {{"one", 1}}}));
}

SECTION("string")
SECTION("string literal")
{
auto s = "[\"foo\",1,2,3,false,{\"one\":1}]";
json j = json::parse(s);
CHECK(j == json({"foo", 1, 2, 3, false, {{"one", 1}}}));
}

SECTION("string_t")
{
json::string_t s = "[\"foo\",1,2,3,false,{\"one\":1}]";
json j = json::parse(s);
CHECK(j == json({"foo", 1, 2, 3, false, {{"one", 1}}}));
}

SECTION("operator<<")
{
std::stringstream ss;
Expand All @@ -70,4 +79,43 @@ TEST_CASE("deserialization")
{
CHECK("[\"foo\",1,2,3,false,{\"one\":1}]"_json == json({"foo", 1, 2, 3, false, {{"one", 1}}}));
}

SECTION("contiguous containers")
{
SECTION("from std::vector")
{
std::vector<uint8_t> v = {'t', 'r', 'u', 'e', '\0'};
CHECK(json::parse(std::begin(v), std::end(v)) == json(true));
}

SECTION("from std::array")
{
std::array<uint8_t, 5> v { {'t', 'r', 'u', 'e', '\0'} };
CHECK(json::parse(std::begin(v), std::end(v)) == json(true));
}

SECTION("from array")
{
uint8_t v[] = {'t', 'r', 'u', 'e'};
CHECK(json::parse(std::begin(v), std::end(v)) == json(true));
}

SECTION("from std::string")
{
std::string v = {'t', 'r', 'u', 'e'};
CHECK(json::parse(std::begin(v), std::end(v)) == json(true));
}

SECTION("from std::initializer_list")
{
std::initializer_list<uint8_t> v = {'t', 'r', 'u', 'e', '\0'};
CHECK(json::parse(std::begin(v), std::end(v)) == json(true));
}

SECTION("from std::valarray")
{
std::valarray<uint8_t> v = {'t', 'r', 'u', 'e', '\0'};
CHECK(json::parse(std::begin(v), std::end(v)) == json(true));
}
}
}

0 comments on commit 5e67f7a

Please sign in to comment.