Merge pull request #8 from itzmeanjan/update-sha3

Update Dependency `sha3`
itzmeanjan · Sep 17, 2023 · b0f5ac2 · b0f5ac2
2 parents 95f5926 + da40353
commit b0f5ac2
Show file tree

Hide file tree

Showing 18 changed files with 220 additions and 170 deletions.
diff --git a/Makefile b/Makefile
@@ -2,47 +2,63 @@ CXX = g++
 CXX_FLAGS = -std=c++20
 WARN_FLAGS = -Wall -Wextra -pedantic
 OPT_FLAGS = -O3 -march=native -mtune=native
+LINK_FLAGS = -flto
 I_FLAGS = -I ./include
 DEP_IFLAGS = -I ./sha3/include -I ./subtle/include
 
-all: test
+SRC_DIR = include
+SABER_SOURCES := $(wildcard $(SRC_DIR)/*.hpp)
+BUILD_DIR = build
 
-tests/test_polynomial.o: tests/test_polynomial.cpp include/*.hpp
-	$(CXX) $(CXX_FLAGS) $(WARN_FLAGS) $(OPT_FLAGS) $(I_FLAGS) $(DEP_IFLAGS) -c $< -o $@
+TEST_DIR = tests
+TEST_SOURCES := $(wildcard $(TEST_DIR)/*.cpp)
+TEST_OBJECTS := $(addprefix $(BUILD_DIR)/, $(notdir $(patsubst %.cpp,%.o,$(TEST_SOURCES))))
+TEST_LINK_FLAGS = -lgtest -lgtest_main
+TEST_BINARY = $(BUILD_DIR)/test.out
 
-tests/test_poly_matrix.o: tests/test_poly_matrix.cpp include/*.hpp
-	$(CXX) $(CXX_FLAGS) $(WARN_FLAGS) $(OPT_FLAGS) $(I_FLAGS) $(DEP_IFLAGS) -c $< -o $@
+BENCHMARK_DIR = benchmarks
+BENCHMARK_SOURCES := $(wildcard $(BENCHMARK_DIR)/*.cpp)
+BENCHMARK_OBJECTS := $(addprefix $(BUILD_DIR)/, $(notdir $(patsubst %.cpp,%.o,$(BENCHMARK_SOURCES))))
+BENCHMARK_LINK_FLAGS = -lbenchmark -lbenchmark_main -lpthread
+BENCHMARK_BINARY = $(BUILD_DIR)/bench.out
+PERF_LINK_FLAGS = -lbenchmark -lbenchmark_main -lpfm -lpthread
+PERF_BINARY = $(BUILD_DIR)/perf.out
 
-tests/test_pke.o: tests/test_pke.cpp include/*.hpp
-	$(CXX) $(CXX_FLAGS) $(WARN_FLAGS) $(OPT_FLAGS) $(I_FLAGS) $(DEP_IFLAGS) -c $< -o $@
+all: test
 
-tests/test_kem.o: tests/test_kem.cpp include/*.hpp
+$(BUILD_DIR):
+	mkdir -p $@
+
+$(BUILD_DIR)/%.o: $(TEST_DIR)/%.cpp $(BUILD_DIR)
 	$(CXX) $(CXX_FLAGS) $(WARN_FLAGS) $(OPT_FLAGS) $(I_FLAGS) $(DEP_IFLAGS) -c $< -o $@
 
-tests/a.out: tests/test_polynomial.o tests/test_poly_matrix.o tests/test_pke.o tests/test_kem.o
-	$(CXX) $(OPT_FLAGS) $^ -lgtest -lgtest_main -o $@
+$(TEST_BINARY): $(TEST_OBJECTS)
+	$(CXX) $(OPT_FLAGS) $(LINK_FLAGS) $^ $(TEST_LINK_FLAGS) -o $@
 
-test: tests/a.out
+test: $(TEST_BINARY)
 	./$<
 
-benchmarks/bench.out: benchmarks/bench_kem.cpp include/*.hpp
-	# If your google-benchmark library is not built with libPFM support.
-	# More @ https://gist.github.com/itzmeanjan/05dc3e946f635d00c5e0b21aae6203a7
-	$(CXX) $(CXX_FLAGS) $(WARN_FLAGS) $(OPT_FLAGS) $(I_FLAGS) $(DEP_IFLAGS) $< -lbenchmark -lpthread -lbenchmark_main -o $@
+$(BUILD_DIR)/%.o: $(BENCHMARK_DIR)/%.cpp $(BUILD_DIR)
+	$(CXX) $(CXX_FLAGS) $(WARN_FLAGS) $(OPT_FLAGS) $(I_FLAGS) $(DEP_IFLAGS) -c $< -o $@
+
+$(BENCHMARK_BINARY): $(BENCHMARK_OBJECTS)
+	$(CXX) $(OPT_FLAGS) $(LINK_FLAGS) $^ $(BENCHMARK_LINK_FLAGS) -o $@
+
+benchmark: $(BENCHMARK_BINARY)
+	# Must *not* build google-benchmark with libPFM
+	./$< --benchmark_time_unit=us --benchmark_min_warmup_time=.5 --benchmark_enable_random_interleaving=true --benchmark_repetitions=8 --benchmark_min_time=0.1s --benchmark_display_aggregates_only=true --benchmark_counters_tabular=true
 
-benchmarks/perf.out: benchmarks/bench_kem.cpp include/*.hpp
-	# Must use this if your google-benchmark library is built with libPFM support.
-	# More @ https://gist.github.com/itzmeanjan/05dc3e946f635d00c5e0b21aae6203a7
-	$(CXX) $(CXX_FLAGS) $(WARN_FLAGS) $(OPT_FLAGS) $(I_FLAGS) $(DEP_IFLAGS) $< -lbenchmark -lpthread -lpfm -lbenchmark_main -o $@
+$(PERF_BINARY): $(BENCHMARK_OBJECTS)
+	$(CXX) $(OPT_FLAGS) $(LINK_FLAGS) $^ $(PERF_LINK_FLAGS) -o $@
 
-benchmark: benchmarks/bench.out
-	./$< --benchmark_min_warmup_time=.5 --benchmark_time_unit=us --benchmark_counters_tabular=true
+perf: $(PERF_BINARY)
+	# Must build google-benchmark with libPFM, follow https://gist.github.com/itzmeanjan/05dc3e946f635d00c5e0b21aae6203a7
+	./$< --benchmark_time_unit=us --benchmark_min_warmup_time=.5 --benchmark_enable_random_interleaving=true --benchmark_repetitions=8 --benchmark_min_time=0.1s --benchmark_display_aggregates_only=true --benchmark_counters_tabular=true --benchmark_perf_counters=CYCLES
 
-perf: benchmarks/perf.out
-	./$< --benchmark_min_warmup_time=.5 --benchmark_time_unit=us --benchmark_counters_tabular=true --benchmark_perf_counters=CYCLES
+.PHONY: format clean
 
 clean:
-	find . -name '*.out' -o -name '*.o' -o -name '*.so' -o -name '*.gch' | xargs rm -rf
+	rm -rf $(BUILD_DIR)
 
-format:
-	find . -maxdepth 2 -name '*.cpp' -o -name '*.hpp' | xargs clang-format -i
+format: $(SABER_SOURCES) $(TEST_SOURCES) $(BENCHMARK_SOURCES)
+	clang-format -i $^
diff --git a/README.md b/README.md
@@ -68,7 +68,7 @@ For testing functional correctness and conformance to the specification for this
 > **Warning** Conformance to the specification is ensured by using known answer test files, generated by following instructions @ https://gist.github.com/itzmeanjan/e499eba2b8c42f150a795d9e1c3c5dea. Generated known answer test files live under [kats](./kats/) directory.
 
 ```bash
-make -j $(nproc --all)
+make -j
 ```
 
 ```bash
@@ -117,58 +117,110 @@ make benchmark  # If you haven't built google-benchmark library with libPFM supp
 make perf       # Must do if you have built google-benchmark library with libPFM support.
 ```
 
-### On 12th Gen Intel(R) Core(TM) i7-1260P ( compiled with Clang-16.0 )
+### On 12th Gen Intel(R) Core(TM) i7-1260P ( compiled with Clang-16.0.0 )
 
 ```bash
-2023-08-15T17:42:17+04:00
-Running ./benchmarks/perf.out
-Run on (16 X 4709.07 MHz CPU s)
+2023-09-17T17:06:05+04:00
+Running ./build/perf.out
+Run on (16 X 4667.57 MHz CPU s)
 CPU Caches:
   L1 Data 48 KiB (x8)
   L1 Instruction 32 KiB (x8)
   L2 Unified 1280 KiB (x8)
   L3 Unified 18432 KiB (x1)
-Load Average: 0.74, 0.60, 0.62
-***WARNING*** There are 9 benchmarks with threads and 1 performance counters were requested. Beware counters will reflect the combined usage across all threads.
-----------------------------------------------------------------------------------------
-Benchmark                  Time             CPU   Iterations     CYCLES items_per_second
-----------------------------------------------------------------------------------------
-lightsaber/keygen       24.4 us         24.4 us        28709   114.429k       40.9108k/s
-lightsaber/encaps       35.3 us         35.3 us        19847   164.976k       28.3529k/s
-lightsaber/decaps       37.7 us         37.7 us        18586   176.386k       26.5178k/s
-saber/keygen            50.1 us         50.1 us        10000   233.818k       19.9749k/s
-saber/encaps            64.7 us         64.7 us        10834   302.231k       15.4539k/s
-saber/decaps            67.2 us         67.2 us        10419   313.642k       14.8837k/s
-firesaber/keygen        80.7 us         80.7 us         8625   377.135k        12.394k/s
-firesaber/encaps        98.8 us         98.8 us         7084   462.232k       10.1164k/s
-firesaber/decaps         105 us          105 us         6698   489.383k       9.55764k/s
+Load Average: 0.50, 0.55, 0.54
+-----------------------------------------------------------------------------------------------
+Benchmark                         Time             CPU   Iterations     CYCLES items_per_second
+-----------------------------------------------------------------------------------------------
+lightsaber/keygen_mean         19.0 us         19.0 us            8   80.3038k       52.5734k/s
+lightsaber/keygen_median       19.1 us         19.0 us            8   80.2622k       52.5177k/s
+lightsaber/keygen_stddev      0.182 us        0.181 us            8    110.426        500.806/s
+lightsaber/keygen_cv           0.95 %          0.95 %             8      0.14%            0.95%
+firesaber/keygen_mean          63.4 us         63.4 us            8   265.249k       15.7782k/s
+firesaber/keygen_median        63.3 us         63.2 us            8   264.841k       15.8152k/s
+firesaber/keygen_stddev       0.616 us        0.623 us            8    998.488        153.432/s
+firesaber/keygen_cv            0.97 %          0.98 %             8      0.38%            0.97%
+saber/encaps_mean              50.0 us         50.0 us            8   210.935k       20.0109k/s
+saber/encaps_median            50.2 us         50.1 us            8   210.622k       19.9442k/s
+saber/encaps_stddev           0.432 us        0.440 us            8    802.152        177.695/s
+saber/encaps_cv                0.86 %          0.88 %             8      0.38%            0.89%
+saber/decaps_mean              58.5 us         58.5 us            8    246.31k       17.1067k/s
+saber/decaps_median            58.4 us         58.4 us            8   245.752k       17.1277k/s
+saber/decaps_stddev           0.301 us        0.306 us            8    1.1059k        89.4013/s
+saber/decaps_cv                0.51 %          0.52 %             8      0.45%            0.52%
+firesaber/decaps_mean          93.3 us         93.2 us            8   391.935k       10.7302k/s
+firesaber/decaps_median        93.8 us         93.7 us            8   391.358k       10.6707k/s
+firesaber/decaps_stddev        1.05 us         1.07 us            8   1.49139k        124.328/s
+firesaber/decaps_cv            1.13 %          1.15 %             8      0.38%            1.16%
+saber/keygen_mean              37.3 us         37.3 us            8   156.618k        26.806k/s
+saber/keygen_median            37.5 us         37.4 us            8   156.399k       26.7073k/s
+saber/keygen_stddev           0.470 us        0.474 us            8    635.079        342.905/s
+saber/keygen_cv                1.26 %          1.27 %             8      0.41%            1.28%
+lightsaber/decaps_mean         31.8 us         31.8 us            8   133.023k       31.4892k/s
+lightsaber/decaps_median       31.8 us         31.8 us            8   132.634k       31.4577k/s
+lightsaber/decaps_stddev      0.233 us        0.231 us            8    667.233        229.364/s
+lightsaber/decaps_cv           0.73 %          0.73 %             8      0.50%            0.73%
+firesaber/encaps_mean          79.8 us         79.7 us            8   335.233k       12.5428k/s
+firesaber/encaps_median        79.8 us         79.8 us            8   335.303k       12.5377k/s
+firesaber/encaps_stddev       0.977 us        0.968 us            8    358.615        152.474/s
+firesaber/encaps_cv            1.22 %          1.21 %             8      0.11%            1.22%
+lightsaber/encaps_mean         26.6 us         26.6 us            8     112.1k       37.6088k/s
+lightsaber/encaps_median       26.6 us         26.6 us            8    111.97k       37.6006k/s
+lightsaber/encaps_stddev      0.291 us        0.294 us            8    405.997        418.648/s
+lightsaber/encaps_cv           1.09 %          1.11 %             8      0.36%            1.11%
 ```
 
-### On 12th Gen Intel(R) Core(TM) i7-1260P ( compiled with GCC-13.1 )
+### On 12th Gen Intel(R) Core(TM) i7-1260P ( compiled with GCC-13.1.0 )
 
 ```bash
-2023-08-15T17:40:13+04:00
-Running ./benchmarks/perf.out
-Run on (16 X 2500 MHz CPU s)
+2023-09-17T17:04:18+04:00
+Running ./build/perf.out
+Run on (16 X 2989.53 MHz CPU s)
 CPU Caches:
   L1 Data 48 KiB (x8)
   L1 Instruction 32 KiB (x8)
   L2 Unified 1280 KiB (x8)
   L3 Unified 18432 KiB (x1)
-Load Average: 0.48, 0.54, 0.60
-***WARNING*** There are 9 benchmarks with threads and 1 performance counters were requested. Beware counters will reflect the combined usage across all threads.
-----------------------------------------------------------------------------------------
-Benchmark                  Time             CPU   Iterations     CYCLES items_per_second
-----------------------------------------------------------------------------------------
-lightsaber/keygen       42.7 us         42.7 us        16448   199.181k       23.4172k/s
-lightsaber/encaps       62.4 us         62.4 us        11213   291.544k       16.0302k/s
-lightsaber/decaps       79.5 us         79.5 us         8827   371.144k       12.5831k/s
-saber/keygen            90.5 us         90.5 us         7755   423.147k       11.0511k/s
-saber/encaps             120 us          120 us         5817   560.616k       8.33287k/s
-saber/decaps             146 us          146 us         4824   679.251k       6.87212k/s
-firesaber/keygen         159 us          159 us         4394   742.446k       6.29437k/s
-firesaber/encaps         198 us          198 us         3541   924.359k       5.06157k/s
-firesaber/decaps         232 us          232 us         3010   1083.93k       4.31128k/s
+Load Average: 0.44, 0.55, 0.53
+-----------------------------------------------------------------------------------------------
+Benchmark                         Time             CPU   Iterations     CYCLES items_per_second
+-----------------------------------------------------------------------------------------------
+lightsaber/decaps_mean         79.3 us         79.3 us            8   369.421k       12.6136k/s
+lightsaber/decaps_median       79.2 us         79.2 us            8   369.461k       12.6186k/s
+lightsaber/decaps_stddev      0.185 us        0.184 us            8    291.383        29.2546/s
+lightsaber/decaps_cv           0.23 %          0.23 %             8      0.08%            0.23%
+firesaber/decaps_mean           232 us          232 us            8   1.07915M       4.31664k/s
+firesaber/decaps_median         231 us          231 us            8   1.07916M       4.32059k/s
+firesaber/decaps_stddev       0.561 us        0.561 us            8    518.472         10.436/s
+firesaber/decaps_cv            0.24 %          0.24 %             8      0.05%            0.24%
+saber/decaps_mean               144 us          144 us            8   673.033k       6.92433k/s
+saber/decaps_median             144 us          144 us            8   672.948k         6.927k/s
+saber/decaps_stddev           0.256 us        0.256 us            8    546.228        12.2588/s
+saber/decaps_cv                0.18 %          0.18 %             8      0.08%            0.18%
+firesaber/keygen_mean           158 us          158 us            8   737.507k       6.31831k/s
+firesaber/keygen_median         158 us          158 us            8   737.537k       6.31944k/s
+firesaber/keygen_stddev       0.272 us        0.275 us            8    464.613        10.9647/s
+firesaber/keygen_cv            0.17 %          0.17 %             8      0.06%            0.17%
+firesaber/encaps_mean           197 us          197 us            8   919.664k       5.07095k/s
+firesaber/encaps_median         197 us          197 us            8   919.706k       5.07029k/s
+firesaber/encaps_stddev       0.143 us        0.147 us            8    845.157        3.77785/s
+firesaber/encaps_cv            0.07 %          0.07 %             8      0.09%            0.07%
+saber/keygen_mean              89.8 us         89.8 us            8   418.859k       11.1335k/s
+saber/keygen_median            89.8 us         89.8 us            8   418.871k       11.1324k/s
+saber/keygen_stddev           0.171 us        0.170 us            8    626.854        21.1389/s
+saber/keygen_cv                0.19 %          0.19 %             8      0.15%            0.19%
+lightsaber/keygen_mean         42.1 us         42.1 us            8   196.456k       23.7336k/s
+lightsaber/keygen_median       42.1 us         42.1 us            8   196.461k       23.7364k/s
+lightsaber/keygen_stddev      0.107 us        0.107 us            8     234.03        59.9809/s
+lightsaber/keygen_cv           0.25 %          0.25 %             8      0.12%            0.25%
+saber/encaps_mean               119 us          119 us            8    556.87k       8.36951k/s
+saber/encaps_median             119 us          119 us            8   556.878k       8.37355k/s
+saber/encaps_stddev           0.199 us        0.200 us            8     138.96        13.9991/s
+saber/encaps_cv                0.17 %          0.17 %             8      0.02%            0.17%
+lightsaber/encaps_mean         62.3 us         62.3 us            8   290.002k       16.0445k/s
+lightsaber/encaps_median       62.2 us         62.2 us            8   290.035k       16.0647k/s
+lightsaber/encaps_stddev      0.200 us        0.200 us            8    237.475        51.1426/s
+lightsaber/encaps_cv           0.32 %          0.32 %             8      0.08%            0.32%
 ```
 
 ## Usage

diff --git a/benchmarks/bench_kem.cpp b/benchmarks/bench_kem.cpp
@@ -5,13 +5,13 @@
 #include <cassert>
 
 // Benchmark Saber KEM key generation algorithm for various suggested parameters.
-template<const size_t L,
-         const size_t EQ,
-         const size_t EP,
-         const size_t MU,
-         const size_t seedBytes,
-         const size_t noiseBytes,
-         const size_t keyBytes>
+template<size_t L,
+         size_t EQ,
+         size_t EP,
+         size_t MU,
+         size_t seedBytes,
+         size_t noiseBytes,
+         size_t keyBytes>
 void
 keygen(benchmark::State& state)
 {
@@ -52,14 +52,14 @@ keygen(benchmark::State& state)
 }
 
 // Benchmark Saber KEM encapsulation algorithm for various suggested parameters.
-template<const size_t L,
-         const size_t EQ,
-         const size_t EP,
-         const size_t ET,
-         const size_t MU,
-         const size_t seedBytes,
-         const size_t noiseBytes,
-         const size_t keyBytes>
+template<size_t L,
+         size_t EQ,
+         size_t EP,
+         size_t ET,
+         size_t MU,
+         size_t seedBytes,
+         size_t noiseBytes,
+         size_t keyBytes>
 void
 encaps(benchmark::State& state)
 {
@@ -109,14 +109,14 @@ encaps(benchmark::State& state)
 }
 
 // Benchmark Saber KEM decapsulation algorithm for various suggested parameters.
-template<const size_t L,
-         const size_t EQ,
-         const size_t EP,
-         const size_t ET,
-         const size_t MU,
-         const size_t seedBytes,
-         const size_t noiseBytes,
-         const size_t keyBytes>
+template<size_t L,
+         size_t EQ,
+         size_t EP,
+         size_t ET,
+         size_t MU,
+         size_t seedBytes,
+         size_t noiseBytes,
+         size_t keyBytes>
 void
 decaps(benchmark::State& state)
 {

diff --git a/include/cbd.hpp b/include/cbd.hpp
@@ -13,7 +13,7 @@ namespace saber_utils {
 // https://github.com/KULeuven-COSIC/SABER/blob/f7f39e4db2f3e22a21e1dd635e0601caae2b4510/Reference_Implementation_KEM/cbd.c.
 // Similar sort of sampling routine can also be found in
 // https://github.com/itzmeanjan/kyber/blob/8cbb09472dc5f7e5ae8bc52cbcbf6344f637d4fe/include/sampling.hpp#L88-L152.
-template<const uint16_t moduli, const size_t mu>
+template<uint16_t moduli, size_t mu>
 inline poly::poly_t<moduli>
 cbd(std::span<const uint8_t> bytes)
   requires((mu == 10) || (mu == 8) || (mu == 6))

diff --git a/include/consts.hpp b/include/consts.hpp
@@ -5,7 +5,7 @@
 namespace saber_consts {
 
 // Compile-time compute constant polynomial h1 ∈ Rq, following section 2.3 of spec.
-template<const uint16_t moduli, const uint16_t εq, const uint16_t εp>
+template<uint16_t moduli, uint16_t εq, uint16_t εp>
 inline constexpr poly::poly_t<moduli>
 compute_poly_h1()
   requires((εq > εp) && (moduli == (1u << εq)))
@@ -22,7 +22,7 @@ compute_poly_h1()
 }
 
 // Compile-time compute constant vector h ∈ Rq^(lx1), following section 2.3 of spec.
-template<const size_t L, const uint16_t moduli, const uint16_t εq, const uint16_t εp>
+template<size_t L, uint16_t moduli, uint16_t εq, uint16_t εp>
 inline constexpr mat::poly_matrix_t<L, 1, moduli>
 compute_polyvec_h()
 {
@@ -37,7 +37,7 @@ compute_polyvec_h()
 }
 
 // Compile-time compute constant polynomial h2 ∈ Rq, following section 2.3 of spec.
-template<const uint16_t moduli, const uint16_t εq, const uint16_t εp, const uint16_t εt>
+template<uint16_t moduli, uint16_t εq, uint16_t εp, uint16_t εt>
 inline constexpr poly::poly_t<moduli>
 compute_poly_h2()
   requires(((εq > εp) && (εp > εt)) && (moduli == (1u << εq)))