Skip to content

Commit

Permalink
Merge pull request #8 from itzmeanjan/update-sha3
Browse files Browse the repository at this point in the history
Update Dependency `sha3`
  • Loading branch information
itzmeanjan committed Sep 17, 2023
2 parents 95f5926 + da40353 commit b0f5ac2
Show file tree
Hide file tree
Showing 18 changed files with 220 additions and 170 deletions.
68 changes: 42 additions & 26 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -2,47 +2,63 @@ CXX = g++
CXX_FLAGS = -std=c++20
WARN_FLAGS = -Wall -Wextra -pedantic
OPT_FLAGS = -O3 -march=native -mtune=native
LINK_FLAGS = -flto
I_FLAGS = -I ./include
DEP_IFLAGS = -I ./sha3/include -I ./subtle/include

all: test
SRC_DIR = include
SABER_SOURCES := $(wildcard $(SRC_DIR)/*.hpp)
BUILD_DIR = build

tests/test_polynomial.o: tests/test_polynomial.cpp include/*.hpp
$(CXX) $(CXX_FLAGS) $(WARN_FLAGS) $(OPT_FLAGS) $(I_FLAGS) $(DEP_IFLAGS) -c $< -o $@
TEST_DIR = tests
TEST_SOURCES := $(wildcard $(TEST_DIR)/*.cpp)
TEST_OBJECTS := $(addprefix $(BUILD_DIR)/, $(notdir $(patsubst %.cpp,%.o,$(TEST_SOURCES))))
TEST_LINK_FLAGS = -lgtest -lgtest_main
TEST_BINARY = $(BUILD_DIR)/test.out

tests/test_poly_matrix.o: tests/test_poly_matrix.cpp include/*.hpp
$(CXX) $(CXX_FLAGS) $(WARN_FLAGS) $(OPT_FLAGS) $(I_FLAGS) $(DEP_IFLAGS) -c $< -o $@
BENCHMARK_DIR = benchmarks
BENCHMARK_SOURCES := $(wildcard $(BENCHMARK_DIR)/*.cpp)
BENCHMARK_OBJECTS := $(addprefix $(BUILD_DIR)/, $(notdir $(patsubst %.cpp,%.o,$(BENCHMARK_SOURCES))))
BENCHMARK_LINK_FLAGS = -lbenchmark -lbenchmark_main -lpthread
BENCHMARK_BINARY = $(BUILD_DIR)/bench.out
PERF_LINK_FLAGS = -lbenchmark -lbenchmark_main -lpfm -lpthread
PERF_BINARY = $(BUILD_DIR)/perf.out

tests/test_pke.o: tests/test_pke.cpp include/*.hpp
$(CXX) $(CXX_FLAGS) $(WARN_FLAGS) $(OPT_FLAGS) $(I_FLAGS) $(DEP_IFLAGS) -c $< -o $@
all: test

tests/test_kem.o: tests/test_kem.cpp include/*.hpp
$(BUILD_DIR):
mkdir -p $@

$(BUILD_DIR)/%.o: $(TEST_DIR)/%.cpp $(BUILD_DIR)
$(CXX) $(CXX_FLAGS) $(WARN_FLAGS) $(OPT_FLAGS) $(I_FLAGS) $(DEP_IFLAGS) -c $< -o $@

tests/a.out: tests/test_polynomial.o tests/test_poly_matrix.o tests/test_pke.o tests/test_kem.o
$(CXX) $(OPT_FLAGS) $^ -lgtest -lgtest_main -o $@
$(TEST_BINARY): $(TEST_OBJECTS)
$(CXX) $(OPT_FLAGS) $(LINK_FLAGS) $^ $(TEST_LINK_FLAGS) -o $@

test: tests/a.out
test: $(TEST_BINARY)
./$<

benchmarks/bench.out: benchmarks/bench_kem.cpp include/*.hpp
# If your google-benchmark library is not built with libPFM support.
# More @ https://gist.github.com/itzmeanjan/05dc3e946f635d00c5e0b21aae6203a7
$(CXX) $(CXX_FLAGS) $(WARN_FLAGS) $(OPT_FLAGS) $(I_FLAGS) $(DEP_IFLAGS) $< -lbenchmark -lpthread -lbenchmark_main -o $@
$(BUILD_DIR)/%.o: $(BENCHMARK_DIR)/%.cpp $(BUILD_DIR)
$(CXX) $(CXX_FLAGS) $(WARN_FLAGS) $(OPT_FLAGS) $(I_FLAGS) $(DEP_IFLAGS) -c $< -o $@

$(BENCHMARK_BINARY): $(BENCHMARK_OBJECTS)
$(CXX) $(OPT_FLAGS) $(LINK_FLAGS) $^ $(BENCHMARK_LINK_FLAGS) -o $@

benchmark: $(BENCHMARK_BINARY)
# Must *not* build google-benchmark with libPFM
./$< --benchmark_time_unit=us --benchmark_min_warmup_time=.5 --benchmark_enable_random_interleaving=true --benchmark_repetitions=8 --benchmark_min_time=0.1s --benchmark_display_aggregates_only=true --benchmark_counters_tabular=true

benchmarks/perf.out: benchmarks/bench_kem.cpp include/*.hpp
# Must use this if your google-benchmark library is built with libPFM support.
# More @ https://gist.github.com/itzmeanjan/05dc3e946f635d00c5e0b21aae6203a7
$(CXX) $(CXX_FLAGS) $(WARN_FLAGS) $(OPT_FLAGS) $(I_FLAGS) $(DEP_IFLAGS) $< -lbenchmark -lpthread -lpfm -lbenchmark_main -o $@
$(PERF_BINARY): $(BENCHMARK_OBJECTS)
$(CXX) $(OPT_FLAGS) $(LINK_FLAGS) $^ $(PERF_LINK_FLAGS) -o $@

benchmark: benchmarks/bench.out
./$< --benchmark_min_warmup_time=.5 --benchmark_time_unit=us --benchmark_counters_tabular=true
perf: $(PERF_BINARY)
# Must build google-benchmark with libPFM, follow https://gist.github.com/itzmeanjan/05dc3e946f635d00c5e0b21aae6203a7
./$< --benchmark_time_unit=us --benchmark_min_warmup_time=.5 --benchmark_enable_random_interleaving=true --benchmark_repetitions=8 --benchmark_min_time=0.1s --benchmark_display_aggregates_only=true --benchmark_counters_tabular=true --benchmark_perf_counters=CYCLES

perf: benchmarks/perf.out
./$< --benchmark_min_warmup_time=.5 --benchmark_time_unit=us --benchmark_counters_tabular=true --benchmark_perf_counters=CYCLES
.PHONY: format clean

clean:
find . -name '*.out' -o -name '*.o' -o -name '*.so' -o -name '*.gch' | xargs rm -rf
rm -rf $(BUILD_DIR)

format:
find . -maxdepth 2 -name '*.cpp' -o -name '*.hpp' | xargs clang-format -i
format: $(SABER_SOURCES) $(TEST_SOURCES) $(BENCHMARK_SOURCES)
clang-format -i $^
126 changes: 89 additions & 37 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ For testing functional correctness and conformance to the specification for this
> **Warning** Conformance to the specification is ensured by using known answer test files, generated by following instructions @ https://gist.github.com/itzmeanjan/e499eba2b8c42f150a795d9e1c3c5dea. Generated known answer test files live under [kats](./kats/) directory.
```bash
make -j $(nproc --all)
make -j
```

```bash
Expand Down Expand Up @@ -117,58 +117,110 @@ make benchmark # If you haven't built google-benchmark library with libPFM supp
make perf # Must do if you have built google-benchmark library with libPFM support.
```

### On 12th Gen Intel(R) Core(TM) i7-1260P ( compiled with Clang-16.0 )
### On 12th Gen Intel(R) Core(TM) i7-1260P ( compiled with Clang-16.0.0 )

```bash
2023-08-15T17:42:17+04:00
Running ./benchmarks/perf.out
Run on (16 X 4709.07 MHz CPU s)
2023-09-17T17:06:05+04:00
Running ./build/perf.out
Run on (16 X 4667.57 MHz CPU s)
CPU Caches:
L1 Data 48 KiB (x8)
L1 Instruction 32 KiB (x8)
L2 Unified 1280 KiB (x8)
L3 Unified 18432 KiB (x1)
Load Average: 0.74, 0.60, 0.62
***WARNING*** There are 9 benchmarks with threads and 1 performance counters were requested. Beware counters will reflect the combined usage across all threads.
----------------------------------------------------------------------------------------
Benchmark Time CPU Iterations CYCLES items_per_second
----------------------------------------------------------------------------------------
lightsaber/keygen 24.4 us 24.4 us 28709 114.429k 40.9108k/s
lightsaber/encaps 35.3 us 35.3 us 19847 164.976k 28.3529k/s
lightsaber/decaps 37.7 us 37.7 us 18586 176.386k 26.5178k/s
saber/keygen 50.1 us 50.1 us 10000 233.818k 19.9749k/s
saber/encaps 64.7 us 64.7 us 10834 302.231k 15.4539k/s
saber/decaps 67.2 us 67.2 us 10419 313.642k 14.8837k/s
firesaber/keygen 80.7 us 80.7 us 8625 377.135k 12.394k/s
firesaber/encaps 98.8 us 98.8 us 7084 462.232k 10.1164k/s
firesaber/decaps 105 us 105 us 6698 489.383k 9.55764k/s
Load Average: 0.50, 0.55, 0.54
-----------------------------------------------------------------------------------------------
Benchmark Time CPU Iterations CYCLES items_per_second
-----------------------------------------------------------------------------------------------
lightsaber/keygen_mean 19.0 us 19.0 us 8 80.3038k 52.5734k/s
lightsaber/keygen_median 19.1 us 19.0 us 8 80.2622k 52.5177k/s
lightsaber/keygen_stddev 0.182 us 0.181 us 8 110.426 500.806/s
lightsaber/keygen_cv 0.95 % 0.95 % 8 0.14% 0.95%
firesaber/keygen_mean 63.4 us 63.4 us 8 265.249k 15.7782k/s
firesaber/keygen_median 63.3 us 63.2 us 8 264.841k 15.8152k/s
firesaber/keygen_stddev 0.616 us 0.623 us 8 998.488 153.432/s
firesaber/keygen_cv 0.97 % 0.98 % 8 0.38% 0.97%
saber/encaps_mean 50.0 us 50.0 us 8 210.935k 20.0109k/s
saber/encaps_median 50.2 us 50.1 us 8 210.622k 19.9442k/s
saber/encaps_stddev 0.432 us 0.440 us 8 802.152 177.695/s
saber/encaps_cv 0.86 % 0.88 % 8 0.38% 0.89%
saber/decaps_mean 58.5 us 58.5 us 8 246.31k 17.1067k/s
saber/decaps_median 58.4 us 58.4 us 8 245.752k 17.1277k/s
saber/decaps_stddev 0.301 us 0.306 us 8 1.1059k 89.4013/s
saber/decaps_cv 0.51 % 0.52 % 8 0.45% 0.52%
firesaber/decaps_mean 93.3 us 93.2 us 8 391.935k 10.7302k/s
firesaber/decaps_median 93.8 us 93.7 us 8 391.358k 10.6707k/s
firesaber/decaps_stddev 1.05 us 1.07 us 8 1.49139k 124.328/s
firesaber/decaps_cv 1.13 % 1.15 % 8 0.38% 1.16%
saber/keygen_mean 37.3 us 37.3 us 8 156.618k 26.806k/s
saber/keygen_median 37.5 us 37.4 us 8 156.399k 26.7073k/s
saber/keygen_stddev 0.470 us 0.474 us 8 635.079 342.905/s
saber/keygen_cv 1.26 % 1.27 % 8 0.41% 1.28%
lightsaber/decaps_mean 31.8 us 31.8 us 8 133.023k 31.4892k/s
lightsaber/decaps_median 31.8 us 31.8 us 8 132.634k 31.4577k/s
lightsaber/decaps_stddev 0.233 us 0.231 us 8 667.233 229.364/s
lightsaber/decaps_cv 0.73 % 0.73 % 8 0.50% 0.73%
firesaber/encaps_mean 79.8 us 79.7 us 8 335.233k 12.5428k/s
firesaber/encaps_median 79.8 us 79.8 us 8 335.303k 12.5377k/s
firesaber/encaps_stddev 0.977 us 0.968 us 8 358.615 152.474/s
firesaber/encaps_cv 1.22 % 1.21 % 8 0.11% 1.22%
lightsaber/encaps_mean 26.6 us 26.6 us 8 112.1k 37.6088k/s
lightsaber/encaps_median 26.6 us 26.6 us 8 111.97k 37.6006k/s
lightsaber/encaps_stddev 0.291 us 0.294 us 8 405.997 418.648/s
lightsaber/encaps_cv 1.09 % 1.11 % 8 0.36% 1.11%
```

### On 12th Gen Intel(R) Core(TM) i7-1260P ( compiled with GCC-13.1 )
### On 12th Gen Intel(R) Core(TM) i7-1260P ( compiled with GCC-13.1.0 )

```bash
2023-08-15T17:40:13+04:00
Running ./benchmarks/perf.out
Run on (16 X 2500 MHz CPU s)
2023-09-17T17:04:18+04:00
Running ./build/perf.out
Run on (16 X 2989.53 MHz CPU s)
CPU Caches:
L1 Data 48 KiB (x8)
L1 Instruction 32 KiB (x8)
L2 Unified 1280 KiB (x8)
L3 Unified 18432 KiB (x1)
Load Average: 0.48, 0.54, 0.60
***WARNING*** There are 9 benchmarks with threads and 1 performance counters were requested. Beware counters will reflect the combined usage across all threads.
----------------------------------------------------------------------------------------
Benchmark Time CPU Iterations CYCLES items_per_second
----------------------------------------------------------------------------------------
lightsaber/keygen 42.7 us 42.7 us 16448 199.181k 23.4172k/s
lightsaber/encaps 62.4 us 62.4 us 11213 291.544k 16.0302k/s
lightsaber/decaps 79.5 us 79.5 us 8827 371.144k 12.5831k/s
saber/keygen 90.5 us 90.5 us 7755 423.147k 11.0511k/s
saber/encaps 120 us 120 us 5817 560.616k 8.33287k/s
saber/decaps 146 us 146 us 4824 679.251k 6.87212k/s
firesaber/keygen 159 us 159 us 4394 742.446k 6.29437k/s
firesaber/encaps 198 us 198 us 3541 924.359k 5.06157k/s
firesaber/decaps 232 us 232 us 3010 1083.93k 4.31128k/s
Load Average: 0.44, 0.55, 0.53
-----------------------------------------------------------------------------------------------
Benchmark Time CPU Iterations CYCLES items_per_second
-----------------------------------------------------------------------------------------------
lightsaber/decaps_mean 79.3 us 79.3 us 8 369.421k 12.6136k/s
lightsaber/decaps_median 79.2 us 79.2 us 8 369.461k 12.6186k/s
lightsaber/decaps_stddev 0.185 us 0.184 us 8 291.383 29.2546/s
lightsaber/decaps_cv 0.23 % 0.23 % 8 0.08% 0.23%
firesaber/decaps_mean 232 us 232 us 8 1.07915M 4.31664k/s
firesaber/decaps_median 231 us 231 us 8 1.07916M 4.32059k/s
firesaber/decaps_stddev 0.561 us 0.561 us 8 518.472 10.436/s
firesaber/decaps_cv 0.24 % 0.24 % 8 0.05% 0.24%
saber/decaps_mean 144 us 144 us 8 673.033k 6.92433k/s
saber/decaps_median 144 us 144 us 8 672.948k 6.927k/s
saber/decaps_stddev 0.256 us 0.256 us 8 546.228 12.2588/s
saber/decaps_cv 0.18 % 0.18 % 8 0.08% 0.18%
firesaber/keygen_mean 158 us 158 us 8 737.507k 6.31831k/s
firesaber/keygen_median 158 us 158 us 8 737.537k 6.31944k/s
firesaber/keygen_stddev 0.272 us 0.275 us 8 464.613 10.9647/s
firesaber/keygen_cv 0.17 % 0.17 % 8 0.06% 0.17%
firesaber/encaps_mean 197 us 197 us 8 919.664k 5.07095k/s
firesaber/encaps_median 197 us 197 us 8 919.706k 5.07029k/s
firesaber/encaps_stddev 0.143 us 0.147 us 8 845.157 3.77785/s
firesaber/encaps_cv 0.07 % 0.07 % 8 0.09% 0.07%
saber/keygen_mean 89.8 us 89.8 us 8 418.859k 11.1335k/s
saber/keygen_median 89.8 us 89.8 us 8 418.871k 11.1324k/s
saber/keygen_stddev 0.171 us 0.170 us 8 626.854 21.1389/s
saber/keygen_cv 0.19 % 0.19 % 8 0.15% 0.19%
lightsaber/keygen_mean 42.1 us 42.1 us 8 196.456k 23.7336k/s
lightsaber/keygen_median 42.1 us 42.1 us 8 196.461k 23.7364k/s
lightsaber/keygen_stddev 0.107 us 0.107 us 8 234.03 59.9809/s
lightsaber/keygen_cv 0.25 % 0.25 % 8 0.12% 0.25%
saber/encaps_mean 119 us 119 us 8 556.87k 8.36951k/s
saber/encaps_median 119 us 119 us 8 556.878k 8.37355k/s
saber/encaps_stddev 0.199 us 0.200 us 8 138.96 13.9991/s
saber/encaps_cv 0.17 % 0.17 % 8 0.02% 0.17%
lightsaber/encaps_mean 62.3 us 62.3 us 8 290.002k 16.0445k/s
lightsaber/encaps_median 62.2 us 62.2 us 8 290.035k 16.0647k/s
lightsaber/encaps_stddev 0.200 us 0.200 us 8 237.475 51.1426/s
lightsaber/encaps_cv 0.32 % 0.32 % 8 0.08% 0.32%
```

## Usage
Expand Down
46 changes: 23 additions & 23 deletions benchmarks/bench_kem.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,13 @@
#include <cassert>

// Benchmark Saber KEM key generation algorithm for various suggested parameters.
template<const size_t L,
const size_t EQ,
const size_t EP,
const size_t MU,
const size_t seedBytes,
const size_t noiseBytes,
const size_t keyBytes>
template<size_t L,
size_t EQ,
size_t EP,
size_t MU,
size_t seedBytes,
size_t noiseBytes,
size_t keyBytes>
void
keygen(benchmark::State& state)
{
Expand Down Expand Up @@ -52,14 +52,14 @@ keygen(benchmark::State& state)
}

// Benchmark Saber KEM encapsulation algorithm for various suggested parameters.
template<const size_t L,
const size_t EQ,
const size_t EP,
const size_t ET,
const size_t MU,
const size_t seedBytes,
const size_t noiseBytes,
const size_t keyBytes>
template<size_t L,
size_t EQ,
size_t EP,
size_t ET,
size_t MU,
size_t seedBytes,
size_t noiseBytes,
size_t keyBytes>
void
encaps(benchmark::State& state)
{
Expand Down Expand Up @@ -109,14 +109,14 @@ encaps(benchmark::State& state)
}

// Benchmark Saber KEM decapsulation algorithm for various suggested parameters.
template<const size_t L,
const size_t EQ,
const size_t EP,
const size_t ET,
const size_t MU,
const size_t seedBytes,
const size_t noiseBytes,
const size_t keyBytes>
template<size_t L,
size_t EQ,
size_t EP,
size_t ET,
size_t MU,
size_t seedBytes,
size_t noiseBytes,
size_t keyBytes>
void
decaps(benchmark::State& state)
{
Expand Down
2 changes: 1 addition & 1 deletion include/cbd.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ namespace saber_utils {
// https://github.com/KULeuven-COSIC/SABER/blob/f7f39e4db2f3e22a21e1dd635e0601caae2b4510/Reference_Implementation_KEM/cbd.c.
// Similar sort of sampling routine can also be found in
// https://github.com/itzmeanjan/kyber/blob/8cbb09472dc5f7e5ae8bc52cbcbf6344f637d4fe/include/sampling.hpp#L88-L152.
template<const uint16_t moduli, const size_t mu>
template<uint16_t moduli, size_t mu>
inline poly::poly_t<moduli>
cbd(std::span<const uint8_t> bytes)
requires((mu == 10) || (mu == 8) || (mu == 6))
Expand Down
6 changes: 3 additions & 3 deletions include/consts.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
namespace saber_consts {

// Compile-time compute constant polynomial h1 ∈ Rq, following section 2.3 of spec.
template<const uint16_t moduli, const uint16_t εq, const uint16_t εp>
template<uint16_t moduli, uint16_t εq, uint16_t εp>
inline constexpr poly::poly_t<moduli>
compute_poly_h1()
requires((εq > εp) && (moduli == (1u << εq)))
Expand All @@ -22,7 +22,7 @@ compute_poly_h1()
}

// Compile-time compute constant vector h ∈ Rq^(lx1), following section 2.3 of spec.
template<const size_t L, const uint16_t moduli, const uint16_t εq, const uint16_t εp>
template<size_t L, uint16_t moduli, uint16_t εq, uint16_t εp>
inline constexpr mat::poly_matrix_t<L, 1, moduli>
compute_polyvec_h()
{
Expand All @@ -37,7 +37,7 @@ compute_polyvec_h()
}

// Compile-time compute constant polynomial h2 ∈ Rq, following section 2.3 of spec.
template<const uint16_t moduli, const uint16_t εq, const uint16_t εp, const uint16_t εt>
template<uint16_t moduli, uint16_t εq, uint16_t εp, uint16_t εt>
inline constexpr poly::poly_t<moduli>
compute_poly_h2()
requires(((εq > εp) && (εp > εt)) && (moduli == (1u << εq)))
Expand Down
Loading

0 comments on commit b0f5ac2

Please sign in to comment.