Merge pull request #6 from itzmeanjan/known-answer-tests

Add testing using Known Answer Tests
itzmeanjan · Aug 2, 2023 · 3628f5a · 3628f5a
2 parents 51e0604 + ad0cc64
commit 3628f5a
Show file tree

Hide file tree

Showing 17 changed files with 3,352 additions and 120 deletions.
diff --git a/Makefile b/Makefile
@@ -36,10 +36,10 @@ benchmarks/perf.out: benchmarks/bench_kem.cpp include/*.hpp
 	$(CXX) $(CXX_FLAGS) $(WARN_FLAGS) $(OPT_FLAGS) $(I_FLAGS) $(DEP_IFLAGS) $< -lbenchmark -lpthread -lpfm -lbenchmark_main -o $@
 
 benchmark: benchmarks/bench.out
-	./$< --benchmark_time_unit=us --benchmark_counters_tabular=true
+	./$< --benchmark_min_warmup_time=.5 --benchmark_time_unit=us --benchmark_counters_tabular=true
 
 perf: benchmarks/perf.out
-	./$< --benchmark_time_unit=us --benchmark_counters_tabular=true --benchmark_perf_counters=CYCLES
+	./$< --benchmark_min_warmup_time=.5 --benchmark_time_unit=us --benchmark_counters_tabular=true --benchmark_perf_counters=CYCLES
 
 clean:
 	find . -name '*.out' -o -name '*.o' -o -name '*.so' -o -name '*.gch' | xargs rm -rf

diff --git a/README.md b/README.md
@@ -1,7 +1,7 @@
 # saber
 Saber: Post-Quantum Key Encapsulation Mechanism
 
-> **Warning** This header-only library implementation of Saber KEM is attempted to be constant-time though it's not yet audited. If you consider using it in production environment, be careful !
+> **Warning** This header-only library implementation of Saber KEM is made to behave constant-time though it's not yet audited. If you consider using it in production environment, be careful !
 
 ## Overview
 
@@ -29,7 +29,7 @@ $ g++ --version
 g++ (Ubuntu 12.2.0-17ubuntu1) 12.2.0
 
 $ clang++ --version
-Ubuntu clang version 15.0.7
+Ubuntu clang version 16.0.0 (1~exp5ubuntu3)
 Target: x86_64-pc-linux-gnu
 Thread model: posix
 InstalledDir: /usr/bin
@@ -48,7 +48,9 @@ cmake version 3.25.1
 
 - For testing functional correctness of Saber KEM and its components, you need to globally install `google-test` headers and library. Follow [this](https://github.com/google/googletest/tree/main/googletest#standalone-cmake-project) guide.
 - For benchmarking Saber KEM algorithms, you need to globally install `google-benchmark` headers and library. Follow [this](https://github.com/google/benchmark#installation) guide.
-- If you are on a machine running GNU/Linux kernel and you want to obtain CPU cycle count for KEM algorithms, you should consider building `google-benchmark` library with libPFM support, following [this](https://gist.github.com/itzmeanjan/05dc3e946f635d00c5e0b21aae6203a7) step-by-step guide. Find more about libPFM @ https://perfmon2.sourceforge.net.
+
+> **Note** If you are on a machine running GNU/Linux kernel and you want to obtain CPU cycle count for KEM algorithms, you should consider building `google-benchmark` library with libPFM support, following [this](https://gist.github.com/itzmeanjan/05dc3e946f635d00c5e0b21aae6203a7) step-by-step guide. Find more about libPFM @ https://perfmon2.sourceforge.net.
+
 - Saber KEM has two dependencies ( i.e. `sha3` and `subtle` ), managed by git submodule. After cloning this repository, you must run following command inside root of this repository, so that you can test/ benchmark/ use it.
 
 ```bash
@@ -61,24 +63,30 @@ popd
 
 ## Testing
 
-For testing functional correctness of Saber KEM algorithms and its components, issue following command.
+For testing functional correctness and conformance to the specification for this Saber KEM implementation, issue following command.
 
-> **Warning** Tests ensuring conformance to Saber specification and reference implementation are still being worked on. Meaning I don't **yet** guarantee that this implementation is fully conformant with the Saber specification.
+> **Warning** Conformance to the specification is ensured by using known answer test files, generated by following instructions @ https://gist.github.com/itzmeanjan/e499eba2b8c42f150a795d9e1c3c5dea. Generated known answer test files live under [kats](./kats/) directory.
 
 ```bash
 make -j $(nproc --all)
 ```
 
 ```bash
-[==========] Running 8 tests from 1 test suite.
+[==========] Running 11 tests from 1 test suite.
 [----------] Global test environment set-up.
-[----------] 8 tests from SaberKEM
+[----------] 11 tests from SaberKEM
 [ RUN      ] SaberKEM.LightSaberKeyEncapsulationMechanism
 [       OK ] SaberKEM.LightSaberKeyEncapsulationMechanism (0 ms)
 [ RUN      ] SaberKEM.SaberKeyEncapsulationMechanism
 [       OK ] SaberKEM.SaberKeyEncapsulationMechanism (0 ms)
 [ RUN      ] SaberKEM.FireSaberKeyEncapsulationMechanism
 [       OK ] SaberKEM.FireSaberKeyEncapsulationMechanism (0 ms)
+[ RUN      ] SaberKEM.LightSaberKnownAnswerTests
+[       OK ] SaberKEM.LightSaberKnownAnswerTests (19 ms)
+[ RUN      ] SaberKEM.SaberKnownAnswerTests
+[       OK ] SaberKEM.SaberKnownAnswerTests (36 ms)
+[ RUN      ] SaberKEM.FireSaberKnownAnswerTests
+[       OK ] SaberKEM.FireSaberKnownAnswerTests (59 ms)
 [ RUN      ] SaberKEM.LightSaberPublicKeyEncryption
 [       OK ] SaberKEM.LightSaberPublicKeyEncryption (0 ms)
 [ RUN      ] SaberKEM.SaberPublicKeyEncryption
@@ -89,11 +97,11 @@ make -j $(nproc --all)
 [       OK ] SaberKEM.PolynomialMatrixConversion (0 ms)
 [ RUN      ] SaberKEM.PolynomialConversion
 [       OK ] SaberKEM.PolynomialConversion (0 ms)
-[----------] 8 tests from SaberKEM (1 ms total)
+[----------] 11 tests from SaberKEM (116 ms total)
 
 [----------] Global test environment tear-down
-[==========] 8 tests from 1 test suite ran. (1 ms total)
-[  PASSED  ] 8 tests.
+[==========] 11 tests from 1 test suite ran. (116 ms total)
+[  PASSED  ] 11 tests.
 ```
 
 ## Benchmarking
@@ -109,58 +117,58 @@ make benchmark  # If you haven't built google-benchmark library with libPFM supp
 make perf       # Must do if you have built google-benchmark library with libPFM support.
 ```
 
-### On 12th Gen Intel(R) Core(TM) i7-1260P ( compiled with Clang )
+### On 12th Gen Intel(R) Core(TM) i7-1260P ( compiled with Clang-16.0 )
 
 ```bash
-2023-07-23T11:10:44+04:00
+2023-08-02T20:43:41+04:00
 Running ./benchmarks/perf.out
-Run on (16 X 4640.91 MHz CPU s)
+Run on (16 X 1038.99 MHz CPU s)
 CPU Caches:
   L1 Data 48 KiB (x8)
   L1 Instruction 32 KiB (x8)
   L2 Unified 1280 KiB (x8)
   L3 Unified 18432 KiB (x1)
-Load Average: 0.41, 0.21, 0.13
+Load Average: 0.73, 0.57, 0.31
 ***WARNING*** There are 9 benchmarks with threads and 1 performance counters were requested. Beware counters will reflect the combined usage across all threads.
 ----------------------------------------------------------------------------------------
 Benchmark                  Time             CPU   Iterations     CYCLES items_per_second
 ----------------------------------------------------------------------------------------
-lightsaber/keygen       16.1 us         16.1 us        43501   75.1952k       62.2169k/s
-lightsaber/encaps       23.7 us         23.7 us        29582   110.801k       42.2284k/s
-lightsaber/decaps       28.6 us         28.6 us        24472   133.839k         34.93k/s
-saber/keygen            35.7 us         35.7 us        19598   166.743k       28.0203k/s
-saber/encaps            45.9 us         45.9 us        15285   211.105k       21.7743k/s
-saber/decaps            53.6 us         53.6 us        13217   245.731k       18.6595k/s
-firesaber/keygen        60.7 us         60.7 us        11512   275.481k       16.4672k/s
-firesaber/encaps        75.4 us         75.3 us         9178   339.179k       13.2716k/s
-firesaber/decaps        86.1 us         86.1 us         8132   389.051k       11.6189k/s
+lightsaber/keygen       15.9 us         15.9 us        44209    74.161k       62.9147k/s
+lightsaber/encaps       23.3 us         23.3 us        30047   108.713k       42.9518k/s
+lightsaber/decaps       28.6 us         28.6 us        24575   133.342k       35.0179k/s
+saber/keygen            33.0 us         33.0 us        21126   154.265k       30.2664k/s
+saber/encaps            45.5 us         45.6 us        15320   212.825k       21.9538k/s
+saber/decaps            52.2 us         52.2 us        13547   241.843k       19.1717k/s
+firesaber/keygen        60.4 us         60.5 us        11621   281.552k       16.5417k/s
+firesaber/encaps        71.3 us         71.3 us         9854   332.447k       14.0283k/s
+firesaber/decaps        83.9 us         83.9 us         8374    391.38k       11.9164k/s
 ```
 
-### On 12th Gen Intel(R) Core(TM) i7-1260P ( compiled with GCC )
+### On 12th Gen Intel(R) Core(TM) i7-1260P ( compiled with GCC-12.2 )
 
 ```bash
-2023-07-23T11:33:42+04:00
+2023-08-02T20:45:14+04:00
 Running ./benchmarks/perf.out
-Run on (16 X 3889.79 MHz CPU s)
+Run on (16 X 4661.29 MHz CPU s)
 CPU Caches:
   L1 Data 48 KiB (x8)
   L1 Instruction 32 KiB (x8)
   L2 Unified 1280 KiB (x8)
   L3 Unified 18432 KiB (x1)
-Load Average: 0.74, 0.47, 0.29
+Load Average: 0.46, 0.54, 0.32
 ***WARNING*** There are 9 benchmarks with threads and 1 performance counters were requested. Beware counters will reflect the combined usage across all threads.
 ----------------------------------------------------------------------------------------
 Benchmark                  Time             CPU   Iterations     CYCLES items_per_second
 ----------------------------------------------------------------------------------------
-lightsaber/keygen       32.3 us         32.3 us        21672   150.556k       30.9795k/s
-lightsaber/encaps       47.9 us         47.9 us        14647   223.555k       20.8894k/s
-lightsaber/decaps       61.0 us         61.0 us        11451   284.928k       16.3899k/s
-saber/keygen            70.0 us         70.0 us         9999   326.935k       14.2863k/s
-saber/encaps            92.7 us         92.8 us         7400   433.104k       10.7814k/s
-saber/decaps             112 us          112 us         6228   523.627k       8.92189k/s
-firesaber/keygen         123 us          123 us         5669   575.465k       8.11279k/s
-firesaber/encaps         153 us          153 us         4566   715.346k       6.52661k/s
-firesaber/decaps         180 us          180 us         3895   838.883k       5.56453k/s
+lightsaber/keygen       32.9 us         32.9 us        21197   153.732k       30.4171k/s
+lightsaber/encaps       48.5 us         48.5 us        14478   226.064k       20.6292k/s
+lightsaber/decaps       61.6 us         61.6 us        11349   288.259k       16.2315k/s
+saber/keygen            69.6 us         69.6 us        10106   324.865k       14.3671k/s
+saber/encaps            92.6 us         92.6 us         7594   431.568k       10.8002k/s
+saber/decaps             112 us          112 us         6177   521.694k       8.96309k/s
+firesaber/keygen         123 us          123 us         5709   571.835k       8.15302k/s
+firesaber/encaps         153 us          153 us         4603   711.865k       6.55279k/s
+firesaber/decaps         179 us          179 us         3922   835.373k       5.60172k/s
 ```
 
 ## Usage

diff --git a/include/cbd.hpp b/include/cbd.hpp
@@ -0,0 +1,103 @@
+#pragma once
+#include "polynomial.hpp"
+#include "utils.hpp"
+
+// Utility functions for Saber KEM
+namespace saber_utils {
+
+// Centered Binomial Distribution, which is used for deterministically sampling a
+// degree-255 polynomial from output of a pseudo-random function (PRF). This function is
+// used for generating secret vector `s` from SHAKE128 output of seed value `seedS`.
+//
+// While implementing this, I collected some inspiration from
+// https://github.com/KULeuven-COSIC/SABER/blob/f7f39e4db2f3e22a21e1dd635e0601caae2b4510/Reference_Implementation_KEM/cbd.c.
+// Similar sort of sampling routine can also be found in
+// https://github.com/itzmeanjan/kyber/blob/8cbb09472dc5f7e5ae8bc52cbcbf6344f637d4fe/include/sampling.hpp#L88-L152.
+template<const uint16_t moduli, const size_t mu>
+inline poly::poly_t<moduli>
+cbd(std::span<const uint8_t> bytes)
+  requires((mu == 10) || (mu == 8) || (mu == 6))
+{
+  constexpr size_t poly_blen = (poly::N * mu) / 8;
+  constexpr size_t muby2 = mu / 2;
+
+  poly::poly_t<moduli> res;
+
+  if constexpr (muby2 == 5) {
+    constexpr uint64_t mask = 0b0000100001000010000100001000010000100001ul;
+    constexpr uint64_t mask5 = (1ul << muby2) - 1;
+
+    size_t boff = 0;
+    size_t coff = 0;
+
+    while (boff < poly_blen) {
+      const uint64_t word = from_le_bytes<uint64_t>(bytes.subspan(boff, 5));
+      const uint64_t hw = ((word >> 0) & mask) + ((word >> 1) & mask) +
+                          ((word >> 2) & mask) + ((word >> 3) & mask) +
+                          ((word >> 4) & mask);
+
+      res[coff + 0] = static_cast<uint16_t>((hw >> 0) & mask5) -
+                      static_cast<uint16_t>((hw >> 5) & mask5);
+      res[coff + 1] = static_cast<uint16_t>((hw >> 10) & mask5) -
+                      static_cast<uint16_t>((hw >> 15) & mask5);
+      res[coff + 2] = static_cast<uint16_t>((hw >> 20) & mask5) -
+                      static_cast<uint16_t>((hw >> 25) & mask5);
+      res[coff + 3] = static_cast<uint16_t>((hw >> 30) & mask5) -
+                      static_cast<uint16_t>((hw >> 35) & mask5);
+
+      boff += 5;
+      coff += 4;
+    }
+  } else if constexpr (muby2 == 4) {
+    constexpr uint32_t mask = 0b00010001000100010001000100010001u;
+    constexpr uint32_t mask4 = (1u << muby2) - 1;
+
+    size_t boff = 0;
+    size_t coff = 0;
+
+    while (boff < poly_blen) {
+      const uint32_t word = from_le_bytes<uint32_t>(bytes.subspan(boff, 4));
+      const uint32_t hw = ((word >> 0) & mask) + ((word >> 1) & mask) +
+                          ((word >> 2) & mask) + ((word >> 3) & mask);
+
+      res[coff + 0] = static_cast<uint16_t>((hw >> 0) & mask4) -
+                      static_cast<uint16_t>((hw >> 4) & mask4);
+      res[coff + 1] = static_cast<uint16_t>((hw >> 8) & mask4) -
+                      static_cast<uint16_t>((hw >> 12) & mask4);
+      res[coff + 2] = static_cast<uint16_t>((hw >> 16) & mask4) -
+                      static_cast<uint16_t>((hw >> 20) & mask4);
+      res[coff + 3] = static_cast<uint16_t>((hw >> 24) & mask4) -
+                      static_cast<uint16_t>((hw >> 28) & mask4);
+
+      boff += 4;
+      coff += 4;
+    }
+  } else if constexpr (muby2 == 3) {
+    constexpr uint32_t mask = 0b001001001001001001001001u;
+    constexpr uint32_t mask3 = (1u << muby2) - 1;
+
+    size_t boff = 0;
+    size_t coff = 0;
+
+    while (boff < poly_blen) {
+      const uint32_t word = from_le_bytes<uint32_t>(bytes.subspan(boff, 3));
+      const uint32_t hw = (word & mask) + ((word >> 1) & mask) + ((word >> 2) & mask);
+
+      res[coff + 0] = static_cast<uint16_t>((hw >> 0) & mask3) -
+                      static_cast<uint16_t>((hw >> 3) & mask3);
+      res[coff + 1] = static_cast<uint16_t>((hw >> 6) & mask3) -
+                      static_cast<uint16_t>((hw >> 9) & mask3);
+      res[coff + 2] = static_cast<uint16_t>((hw >> 12) & mask3) -
+                      static_cast<uint16_t>((hw >> 15) & mask3);
+      res[coff + 3] = static_cast<uint16_t>((hw >> 18) & mask3) -
+                      static_cast<uint16_t>((hw >> 21) & mask3);
+
+      boff += 3;
+      coff += 4;
+    }
+  }
+
+  return res;
+}
+
+}
diff --git a/include/kem.hpp b/include/kem.hpp
@@ -38,13 +38,13 @@ keygen(
   constexpr size_t pke_pklen = saber_utils::pke_pklen<L, EP, seedBytes>();
   constexpr size_t pke_sklen = saber_utils::pke_sklen<L, EQ>();
 
-  auto sk_z = skey.template subspan<0, keyBytes>();
-  constexpr size_t off0 = keyBytes;
-  auto sk_hpk = skey.template subspan<off0, sha3_256::DIGEST_LEN>();
-  constexpr size_t off1 = off0 + sha3_256::DIGEST_LEN;
-  auto sk_pk = skey.template subspan<off1, pke_pklen>();
-  constexpr size_t off2 = off1 + pke_pklen;
-  auto sk_sk = skey.template subspan<off2, pke_sklen>();
+  auto sk_sk = skey.template subspan<0, pke_sklen>();
+  constexpr size_t off0 = sk_sk.size();
+  auto sk_pk = skey.template subspan<off0, pke_pklen>();
+  constexpr size_t off1 = off0 + sk_pk.size();
+  auto sk_hpk = skey.template subspan<off1, sha3_256::DIGEST_LEN>();
+  constexpr size_t off2 = off1 + sk_hpk.size();
+  auto sk_z = skey.template subspan<off2, keyBytes>();
 
   // step 1
   saber_pke::keygen<L, EQ, EP, MU>(seedA, seedS, pkey, sk_sk);
@@ -151,13 +151,13 @@ decaps(std::span<const uint8_t, saber_utils::kem_ctlen<L, EP, ET>()> ctxt,
   constexpr size_t pke_sklen = saber_utils::pke_sklen<L, EQ>();
 
   // step 1
-  auto z = skey.template subspan<0, keyBytes>();
-  constexpr size_t off0 = keyBytes;
-  auto hash_pk = skey.template subspan<off0, sha3_256::DIGEST_LEN>();
-  constexpr size_t off1 = off0 + sha3_256::DIGEST_LEN;
-  auto pk = skey.template subspan<off1, pke_pklen>();
-  constexpr size_t off2 = off1 + pke_pklen;
-  auto sk = skey.template subspan<off2, pke_sklen>();
+  auto sk = skey.template subspan<0, pke_sklen>();
+  constexpr size_t off0 = pke_sklen;
+  auto pk = skey.template subspan<off0, pke_pklen>();
+  constexpr size_t off1 = off0 + pke_pklen;
+  auto hash_pk = skey.template subspan<off1, sha3_256::DIGEST_LEN>();
+  constexpr size_t off2 = off1 + sha3_256::DIGEST_LEN;
+  auto z = skey.template subspan<off2, keyBytes>();
 
   std::array<uint8_t, sha3_256::DIGEST_LEN> m;
   std::array<uint8_t, sha3_512::DIGEST_LEN> rk;

diff --git a/include/params.hpp b/include/params.hpp
@@ -1,8 +1,11 @@
 #pragma once
+#include <algorithm>
+#include <array>
 #include <bit>
 #include <cstddef>
 #include <cstdint>
 #include <type_traits>
+#include <vector>
 
 // Compile-time executable checks and parameters for Saber KEM implementation.
 namespace saber_params {
@@ -37,6 +40,20 @@ is_even(T val)
   return !static_cast<bool>(val & 1);
 }
 
+// Given a power of 2 integer moduli, figure, in compile-time, if that's supported in
+// polynomial serialization/ deserialization routines. If not supported, it must return
+// FALSE, so that translation units can't be compiled anymore.
+template<const uint16_t moduli>
+inline constexpr bool
+validate_poly_serialization_args()
+{
+  constexpr uint16_t lg2_moduli = saber_params::log2(moduli);
+
+  std::array<uint16_t, 7> bit_widths = { 13, 10, 6, 5, 4, 3, 1 };
+  auto elm = std::find(bit_widths.begin(), bit_widths.end(), lg2_moduli);
+  return elm != bit_widths.end();
+}
+
 // Compile-time executable check for validating template arguments passed to Saber PKE
 // key generation routine.
 inline constexpr bool