From 529a00031d37b584c3b03f9375ec9c4fef40e779 Mon Sep 17 00:00:00 2001 From: Shanshan Date: Thu, 6 Oct 2022 20:34:24 -0700 Subject: [PATCH 1/5] optimized krum (geometric median) to original (square distances) --- python/fedml/core/security/defense/krum_defense.py | 2 +- python/fedml/core/security/defense/three_sigma_krum_defense.py | 2 +- python/fedml/core/security/readme.md | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/python/fedml/core/security/defense/krum_defense.py b/python/fedml/core/security/defense/krum_defense.py index 976a05bf90..03714f7c4e 100755 --- a/python/fedml/core/security/defense/krum_defense.py +++ b/python/fedml/core/security/defense/krum_defense.py @@ -63,7 +63,7 @@ def _compute_krum_score(self, vec_grad_list): dists.append( utils.compute_euclidean_distance( vec_grad_list[i], vec_grad_list[j] - ).item() + ).item() ** 2 ) dists.sort() # ascending score = dists[0 : num_client - self.byzantine_client_num - 2] diff --git a/python/fedml/core/security/defense/three_sigma_krum_defense.py b/python/fedml/core/security/defense/three_sigma_krum_defense.py index ae64eeacb1..a4b6a4dfd1 100644 --- a/python/fedml/core/security/defense/three_sigma_krum_defense.py +++ b/python/fedml/core/security/defense/three_sigma_krum_defense.py @@ -138,7 +138,7 @@ def _compute_krum_score(self, vec_grad_list): compute_euclidean_distance( torch.Tensor(vec_grad_list[i]), torch.Tensor(vec_grad_list[j]), - ).item() + ).item() ** 2 ) dists.sort() # ascending score = dists[0 : math.floor(num_client / 2)] diff --git a/python/fedml/core/security/readme.md b/python/fedml/core/security/readme.md index 727b8fbdd7..0b6a33c76a 100644 --- a/python/fedml/core/security/readme.md +++ b/python/fedml/core/security/readme.md @@ -42,7 +42,7 @@ https://arxiv.org/pdf/2006.09365.pdf 3. GeometricMedianDefense: "Distributed statistical machine learning in adversarial settings: Byzantine gradient descent. " https://dl.acm.org/doi/pdf/10.1145/3154503 4. (NeurIPS 2017) KrumDefense: "Machine Learning with Adversaries: Byzantine Tolerant Gradient Descent" -https://arxiv.org/pdf/1703.02757.pdf +https://papers.nips.cc/paper/2017/file/f4b9ec30ad9f68f89b29639786cb62ef-Paper.pdf 5. (ICLR 2021) MultiKrumDefense: "Distributed momentum for byzantine-resilient stochastic gradient descent" https://infoscience.epfl.ch/record/287261 6. NormDiffClippingDefense: "Can You Really Backdoor Federated Learning?" From 709df159284f8d7c1d8fbcec3620c7fcf17cdd08 Mon Sep 17 00:00:00 2001 From: Shanshan Date: Fri, 7 Oct 2022 17:59:11 -0700 Subject: [PATCH 2/5] add lr_cifar10 model --- python/fedml/model/linear/lr_cifar10.py | 18 ++++++++++++++++++ python/fedml/model/model_hub.py | 6 ++++-- 2 files changed, 22 insertions(+), 2 deletions(-) create mode 100644 python/fedml/model/linear/lr_cifar10.py diff --git a/python/fedml/model/linear/lr_cifar10.py b/python/fedml/model/linear/lr_cifar10.py new file mode 100644 index 0000000000..762b9c2c3a --- /dev/null +++ b/python/fedml/model/linear/lr_cifar10.py @@ -0,0 +1,18 @@ +import torch + + +class LogisticRegression_Cifar10(torch.nn.Module): + def __init__(self, input_dim, output_dim): + super(LogisticRegression_Cifar10, self).__init__() + self.linear = torch.nn.Linear(input_dim, output_dim) + + def forward(self, x): + # Flatten images into vectors + # print(f"size = {x.size()}") + x = x.view(x.size(0), -1) + outputs = torch.sigmoid(self.linear(x)) + # except: + # print(x.size()) + # import pdb + # pdb.set_trace() + return outputs diff --git a/python/fedml/model/model_hub.py b/python/fedml/model/model_hub.py index 67388d4db7..4906596fb2 100644 --- a/python/fedml/model/model_hub.py +++ b/python/fedml/model/model_hub.py @@ -1,7 +1,5 @@ import logging - import torch.nn as nn - from fedml.model.cv.cnn import CNN_DropOut from fedml.model.cv.darts import genotypes from fedml.model.cv.darts.model import NetworkCIFAR @@ -14,6 +12,7 @@ from fedml.model.cv.resnet56 import resnet_client, resnet_server from fedml.model.cv.resnet_gn import resnet18 from fedml.model.linear.lr import LogisticRegression +from fedml.model.linear.lr_cifar10 import LogisticRegression_Cifar10 from fedml.model.nlp.rnn import RNN_OriginalFedAvg, RNN_StackOverFlow, RNN_FedShakespeare @@ -24,6 +23,9 @@ def create(args, output_dim): if model_name == "lr" and args.dataset == "mnist": logging.info("LogisticRegression + MNIST") model = LogisticRegression(28 * 28, output_dim) + elif model_name == "lr" and args.dataset == "cifar10": + logging.info("LogisticRegression + CIFAR10") + model = LogisticRegression_Cifar10(32 * 32 * 3, output_dim) elif model_name == "cnn" and args.dataset == "mnist": logging.info("CNN + MNIST") model = CNN_DropOut(False) From 0a3379e93dbc1dde6d98d756894f5144bbcd7001 Mon Sep 17 00:00:00 2001 From: Nicole456 <592056267@qq.com> Date: Sun, 9 Oct 2022 17:02:07 +0800 Subject: [PATCH 3/5] add dp budget accountant --- .../fedml/core/dp/budget_accountant/README.md | 8 + .../core/dp/budget_accountant/__init__.py | 28 + .../{accountant.py => advanced_accountant.py} | 4 +- ...nt_test.py => advanced_accountant_test.py} | 12 +- .../core/dp/budget_accountant/dp_event.py | 221 +++ .../dp/budget_accountant/dp_event_builder.py | 63 + .../dp_event_builder_test.py | 70 + .../mechanism_calibration.py | 219 +++ .../mechanism_calibration_test.py | 198 +++ .../core/dp/budget_accountant/pld/__init__.py | 3 + .../dp/budget_accountant/pld/accountant.py | 248 ++++ .../budget_accountant/pld/accountant_test.py | 223 +++ .../core/dp/budget_accountant/pld/common.py | 308 ++++ .../dp/budget_accountant/pld/common_test.py | 222 +++ .../core/dp/budget_accountant/pld/pld_pmf.py | 567 ++++++++ .../dp/budget_accountant/pld/pld_pmf_test.py | 426 ++++++ .../pld/pld_privacy_accountant.py | 108 ++ .../pld/pld_privacy_accountant_test.py | 111 ++ .../pld/privacy_loss_distribution.py | 1255 +++++++++++++++++ ...privacy_loss_distribution_basic_example.py | 52 + .../pld/privacy_loss_distribution_test.py | 1173 +++++++++++++++ .../{ => pld}/privacy_loss_mechanism.py | 2 +- .../pld/privacy_loss_mechanism_test.py | 977 +++++++++++++ .../dp/budget_accountant/pld/test_util.py | 60 + .../budget_accountant/pld/test_util_test.py | 104 ++ .../core/dp/budget_accountant/pld_pmf.py | 517 ------- .../budget_accountant/privacy_accountant.py | 118 ++ .../privacy_accountant_test.py | 88 ++ .../privacy_loss_distribution.py | 1253 ---------------- .../core/dp/budget_accountant/rdp/__init__.py | 18 + .../rdp/rdp_privacy_accountant.py | 901 ++++++++++++ .../rdp/rdp_privacy_accountant_test.py | 712 ++++++++++ 32 files changed, 8490 insertions(+), 1779 deletions(-) create mode 100644 python/fedml/core/dp/budget_accountant/README.md rename python/fedml/core/dp/budget_accountant/{accountant.py => advanced_accountant.py} (98%) rename python/fedml/core/dp/budget_accountant/{accountant_test.py => advanced_accountant_test.py} (94%) create mode 100644 python/fedml/core/dp/budget_accountant/dp_event.py create mode 100644 python/fedml/core/dp/budget_accountant/dp_event_builder.py create mode 100644 python/fedml/core/dp/budget_accountant/dp_event_builder_test.py create mode 100644 python/fedml/core/dp/budget_accountant/mechanism_calibration.py create mode 100644 python/fedml/core/dp/budget_accountant/mechanism_calibration_test.py create mode 100644 python/fedml/core/dp/budget_accountant/pld/__init__.py create mode 100644 python/fedml/core/dp/budget_accountant/pld/accountant.py create mode 100644 python/fedml/core/dp/budget_accountant/pld/accountant_test.py create mode 100644 python/fedml/core/dp/budget_accountant/pld/common.py create mode 100644 python/fedml/core/dp/budget_accountant/pld/common_test.py create mode 100644 python/fedml/core/dp/budget_accountant/pld/pld_pmf.py create mode 100644 python/fedml/core/dp/budget_accountant/pld/pld_pmf_test.py create mode 100644 python/fedml/core/dp/budget_accountant/pld/pld_privacy_accountant.py create mode 100644 python/fedml/core/dp/budget_accountant/pld/pld_privacy_accountant_test.py create mode 100644 python/fedml/core/dp/budget_accountant/pld/privacy_loss_distribution.py create mode 100644 python/fedml/core/dp/budget_accountant/pld/privacy_loss_distribution_basic_example.py create mode 100644 python/fedml/core/dp/budget_accountant/pld/privacy_loss_distribution_test.py rename python/fedml/core/dp/budget_accountant/{ => pld}/privacy_loss_mechanism.py (99%) create mode 100644 python/fedml/core/dp/budget_accountant/pld/privacy_loss_mechanism_test.py create mode 100644 python/fedml/core/dp/budget_accountant/pld/test_util.py create mode 100644 python/fedml/core/dp/budget_accountant/pld/test_util_test.py delete mode 100644 python/fedml/core/dp/budget_accountant/pld_pmf.py create mode 100644 python/fedml/core/dp/budget_accountant/privacy_accountant.py create mode 100644 python/fedml/core/dp/budget_accountant/privacy_accountant_test.py delete mode 100644 python/fedml/core/dp/budget_accountant/privacy_loss_distribution.py create mode 100644 python/fedml/core/dp/budget_accountant/rdp/__init__.py create mode 100644 python/fedml/core/dp/budget_accountant/rdp/rdp_privacy_accountant.py create mode 100644 python/fedml/core/dp/budget_accountant/rdp/rdp_privacy_accountant_test.py diff --git a/python/fedml/core/dp/budget_accountant/README.md b/python/fedml/core/dp/budget_accountant/README.md new file mode 100644 index 0000000000..a92ad5694b --- /dev/null +++ b/python/fedml/core/dp/budget_accountant/README.md @@ -0,0 +1,8 @@ +# Privacy Budget Accountant + +This directory contains tools for tracking differential privacy budgets, available as part of the [Google differential privacy library](https://github.com/google/differential-privacy). + +The set of DpEvent classes allow you to describe complex differentially private mechanisms such as Laplace and Gaussian, subsampling mechanisms, and their compositions. The PrivacyAccountant classes can ingest DpEvents and return the ε, δ of the composite mechanism. Privacy Loss Distributions (PLDs) and RDP accounting are currently supported. + + + diff --git a/python/fedml/core/dp/budget_accountant/__init__.py b/python/fedml/core/dp/budget_accountant/__init__.py index e69de29bb2..601881b121 100644 --- a/python/fedml/core/dp/budget_accountant/__init__.py +++ b/python/fedml/core/dp/budget_accountant/__init__.py @@ -0,0 +1,28 @@ +"""DP Accounting package.""" + +from fedml.core.dp.budget_accountant import pld +from fedml.core.dp.budget_accountant import rdp +from fedml.core.dp.budget_accountant import pld + +from fedml.core.dp.budget_accountant.dp_event import ComposedDpEvent +from fedml.core.dp.budget_accountant.dp_event import DpEvent +from fedml.core.dp.budget_accountant.dp_event import GaussianDpEvent +from fedml.core.dp.budget_accountant.dp_event import LaplaceDpEvent +from fedml.core.dp.budget_accountant.dp_event import NonPrivateDpEvent +from fedml.core.dp.budget_accountant.dp_event import NoOpDpEvent +from fedml.core.dp.budget_accountant.dp_event import PoissonSampledDpEvent +from fedml.core.dp.budget_accountant.dp_event import SampledWithoutReplacementDpEvent +from fedml.core.dp.budget_accountant.dp_event import SampledWithReplacementDpEvent +from fedml.core.dp.budget_accountant.dp_event import SelfComposedDpEvent +from fedml.core.dp.budget_accountant.dp_event import SingleEpochTreeAggregationDpEvent +from fedml.core.dp.budget_accountant.dp_event import UnsupportedDpEvent + +from fedml.core.dp.budget_accountant.dp_event_builder import DpEventBuilder + +from fedml.core.dp.budget_accountant.mechanism_calibration import calibrate_dp_mechanism +from fedml.core.dp.budget_accountant.mechanism_calibration import ExplicitBracketInterval +from fedml.core.dp.budget_accountant.mechanism_calibration import LowerEndpointAndGuess + +from fedml.core.dp.budget_accountant.privacy_accountant import NeighboringRelation +from fedml.core.dp.budget_accountant.privacy_accountant import PrivacyAccountant +from fedml.core.dp.budget_accountant.privacy_accountant import UnsupportedEventError diff --git a/python/fedml/core/dp/budget_accountant/accountant.py b/python/fedml/core/dp/budget_accountant/advanced_accountant.py similarity index 98% rename from python/fedml/core/dp/budget_accountant/accountant.py rename to python/fedml/core/dp/budget_accountant/advanced_accountant.py index 0e67690feb..c271637578 100644 --- a/python/fedml/core/dp/budget_accountant/accountant.py +++ b/python/fedml/core/dp/budget_accountant/advanced_accountant.py @@ -5,8 +5,8 @@ from scipy import special from fedml.core.dp.budget_accountant import common -from fedml.core.dp.budget_accountant import privacy_loss_distribution -from fedml.core.dp.budget_accountant import privacy_loss_mechanism +from fedml.core.dp.budget_accountant.pld import privacy_loss_distribution +from fedml.core.dp.budget_accountant.pld import privacy_loss_mechanism def get_smallest_parameter( diff --git a/python/fedml/core/dp/budget_accountant/accountant_test.py b/python/fedml/core/dp/budget_accountant/advanced_accountant_test.py similarity index 94% rename from python/fedml/core/dp/budget_accountant/accountant_test.py rename to python/fedml/core/dp/budget_accountant/advanced_accountant_test.py index 4dab0f4d8f..ed3d55b9bc 100644 --- a/python/fedml/core/dp/budget_accountant/accountant_test.py +++ b/python/fedml/core/dp/budget_accountant/advanced_accountant_test.py @@ -3,7 +3,7 @@ import unittest from absl.testing import parameterized -from fedml.core.dp.budget_accountant import accountant +from fedml.core.dp.budget_accountant import advanced_accountant from fedml.core.dp.budget_accountant import common @@ -48,7 +48,7 @@ def test_get_smallest_laplace_noise(self, epsilon, delta, num_queries, epsilon, delta) self.assertAlmostEqual( expected_parameter, - accountant.get_smallest_laplace_noise( + advanced_accountant.get_smallest_laplace_noise( privacy_parameters, num_queries, sensitivity=sensitivity), delta=0.1) @@ -84,7 +84,7 @@ def test_get_smallest_discrete_laplace_noise(self, epsilon, delta, epsilon, delta) self.assertAlmostEqual( expected_parameter, - accountant.get_smallest_discrete_laplace_noise( + advanced_accountant.get_smallest_discrete_laplace_noise( privacy_parameters, num_queries, sensitivity=sensitivity), delta=1e-3) @@ -111,7 +111,7 @@ def test_get_smallest_gaussian_noise(self, epsilon, delta, num_queries, epsilon, delta) self.assertAlmostEqual( expected_std, - accountant.get_smallest_gaussian_noise( + advanced_accountant.get_smallest_gaussian_noise( privacy_parameters, num_queries, sensitivity=sensitivity)) @parameterized.named_parameters( @@ -159,7 +159,7 @@ def test_advanced_composition(self, epsilon, delta, num_queries, total_delta, expected_total_epsilon): privacy_parameters = common.DifferentialPrivacyParameters( epsilon, delta) - total_epsilon = accountant.advanced_composition(privacy_parameters, + total_epsilon = advanced_accountant.advanced_composition(privacy_parameters, num_queries, total_delta) if expected_total_epsilon is None: self.assertIsNone(total_epsilon) @@ -211,7 +211,7 @@ def test_get_smallest_epsilon_from_advanced_composition( self, total_epsilon, total_delta, num_queries, delta, expected_epsilon): total_privacy_parameters = common.DifferentialPrivacyParameters( total_epsilon, total_delta) - epsilon = accountant.get_smallest_epsilon_from_advanced_composition( + epsilon = advanced_accountant.get_smallest_epsilon_from_advanced_composition( total_privacy_parameters, num_queries, delta) if expected_epsilon is None: self.assertIsNone(epsilon) diff --git a/python/fedml/core/dp/budget_accountant/dp_event.py b/python/fedml/core/dp/budget_accountant/dp_event.py new file mode 100644 index 0000000000..636c229b8f --- /dev/null +++ b/python/fedml/core/dp/budget_accountant/dp_event.py @@ -0,0 +1,221 @@ +"""Standard DpEvent classes. + +A `DpEvent` represents the (hyper)parameters of a differentially +private query, amplification mechanism, or composition, that are necessary +and sufficient for privacy accounting. Various independent implementations of DP +algorithms that are functionally equivalent from an accounting perspective may +correspond to the same `DpEvent`. Similarly, various independent implementations +of accounting algorithms may consume the same `DpEvent`. + +All `DpEvents` processed together are assumed to take place on a single dataset +of records. `DpEvents` fall into roughly three categories: + - `DpEvents` that release an output, and incur a privacy cost, + e.g., `GaussianDpEvent`. + - `DpEvents` that select a subset (or subsets) of the dataset, and run nested + `DpEvents` on those subsets, e.g., `PoissonSampledDpEvent`. + - `DpEvents` that represent (possibly sequentially) applying (multiple) + mechanisms to the dataset (or currently active subset). Currently, this is + only `ComposedDpEvent` and `SelfComposedDpEvent`. + +Each `DpEvent` should completely document the mathematical behavior and +assumptions of the mechanism it represents so that the writer of an accountant +class can implement the accounting correctly without knowing any other +implementation details of the algorithm that produced it. + +New mechanism types should be given a corresponding `DpEvent` class, although +not all accountants will be required to support them. In general, +`PrivacyAccountant` implementations are not required to be aware of all +`DpEvent` classes, but they should support the following basic events and handle +them appropriately: `NoOpDpEvent`, `NonPrivateDpEvent`, `ComposedDpEvent`, and +`SelfComposedDpEvent`. They should return `supports(event)` is False for +`UnsupportedDpEvent` or any other event type they have not been designed to +handle. + +To ensure that a `PrivacyAccountant` does not accidentally start to return +incorrect results, the following should be enforced: + * `DpEvent` classes and their parameters should never be removed, barring some + extended, onerous deprecation process. + * New parameters cannot be added to existing mechanisms unless they are + optional. That is, old composed `DpEvent` objects that do not include them + must remain valid. + * The meaning of existing mechanisms or parameters must not change. That is, + existing mechanisms should not have their implementations change in ways that + alter their privacy properties; new `DpEvent` classes should be added + instead. + * `PrivacyAccountant` implementations are expected to return `supports(event)` + is `False` when processing unknown mechanisms. +""" + +from typing import List, Union + +import attr + + +class DpEvent(object): + """Represents application of a private mechanism. + + A `DpEvent` describes a differentially private mechanism sufficiently for + computing the associated privacy losses, both in isolation and in combination + with other `DpEvent`s. + """ + + +@attr.s(frozen=True) +class NoOpDpEvent(DpEvent): + """Represents appplication of an operation with no privacy impact. + + A `NoOpDpEvent` is generally never required, but it can be useful as a + placeholder where a `DpEvent` is expected, such as in tests or some live + accounting pipelines. + """ + + +@attr.s(frozen=True) +class NonPrivateDpEvent(DpEvent): + """Represents application of a non-private operation. + + This `DpEvent` should be used when an operation is performed that does not + satisfy (epsilon, delta)-DP. All `PrivacyAccountant`s should return infinite + epsilon/delta when encountering a `NonPrivateDpEvent`. + """ + + +@attr.s(frozen=True) +class UnsupportedDpEvent(DpEvent): + """Represents application of an as-yet unsupported operation. + + This `DpEvent` should be used when an operation is performed that does not yet + have any associated DP description, or if the description is temporarily + inaccessible, for example, during development. All `PrivacyAccountant`s should + return `supports(event) == False` for `UnsupportedDpEvent`. + """ + + +@attr.s(frozen=True, slots=True, auto_attribs=True) +class GaussianDpEvent(DpEvent): + """Represents an application of the Gaussian mechanism. + + For values v_i and noise z ~ N(0, s^2I), this mechanism returns sum_i v_i + z. + If the norms of the values are bounded ||v_i|| <= C, the noise_multiplier is + defined as s / C. + """ + noise_multiplier: float + + +@attr.s(frozen=True, slots=True, auto_attribs=True) +class LaplaceDpEvent(DpEvent): + """Represents an application of the Laplace mechanism. + + For values v_i and noise z sampled coordinate-wise from the Laplace + distribution L(0, s), this mechanism returns sum_i v_i + z. + The probability density function of the Laplace distribution L(0, s) with + parameter s is given as exp(-|x|/s) * (0.5/s) at x for any real value x. + If the L_1 norm of the values are bounded ||v_i||_1 <= C, the noise_multiplier + is defined as s / C. + """ + noise_multiplier: float + + +@attr.s(frozen=True, slots=True, auto_attribs=True) +class SelfComposedDpEvent(DpEvent): + """Represents repeated application of a mechanism. + + The repeated applications may be adaptive, where the query producing each + event depends on the results of prior queries. + + This is equivalent to `ComposedDpEvent` that contains a list of length `count` + of identical copies of `event`. + """ + event: DpEvent + count: int + + +@attr.s(frozen=True, slots=True, auto_attribs=True) +class ComposedDpEvent(DpEvent): + """Represents application of a series of composed mechanisms. + + The composition may be adaptive, where the query producing each event depends + on the results of prior queries. + """ + events: List[DpEvent] + + +@attr.s(frozen=True, slots=True, auto_attribs=True) +class PoissonSampledDpEvent(DpEvent): + """Represents an application of Poisson subsampling. + + Each record in the dataset is included in the sample independently with + probability `sampling_probability`. Then the `DpEvent` `event` is applied + to the sample of records. + """ + sampling_probability: float + event: DpEvent + + +@attr.s(frozen=True, slots=True, auto_attribs=True) +class SampledWithReplacementDpEvent(DpEvent): + """Represents sampling a fixed sized batch of records with replacement. + + A sample of `sample_size` (possibly repeated) records is drawn uniformly at + random from the set of possible samples of a source dataset of size + `source_dataset_size`. Then the `DpEvent` `event` is applied to the sample of + records. + """ + source_dataset_size: int + sample_size: int + event: DpEvent + + +@attr.s(frozen=True, slots=True, auto_attribs=True) +class SampledWithoutReplacementDpEvent(DpEvent): + """Represents sampling a fixed sized batch of records without replacement. + + A sample of `sample_size` unique records is drawn uniformly at random from the + set of possible samples of a source dataset of size `source_dataset_size`. + Then the `DpEvent` `event` is applied to the sample of records. + """ + source_dataset_size: int + sample_size: int + event: DpEvent + + +@attr.s(frozen=True, slots=True, auto_attribs=True) +class SingleEpochTreeAggregationDpEvent(DpEvent): + """Represents aggregation for a single epoch using one or more trees. + + Multiple tree-aggregation steps can occur, but it is required that each + record occurs at most once *across all trees*. See appendix D of + "Practical and Private (Deep) Learning without Sampling or Shuffling" + https://arxiv.org/abs/2103.00039. + + To represent the common case where the same record can occur in multiple + trees (but still at most once per tree), wrap this with `SelfComposedDpEvent` + or `ComposedDpEvent` and use a scalar for `step_counts`. + + Attributes: + noise_multiplier: The ratio of the noise per node to the sensitivity. + step_counts: The number of steps in each tree. May be a scalar for a single + tree. + """ + noise_multiplier: float + step_counts: Union[int, List[int]] + + +@attr.s(frozen=True, slots=True, auto_attribs=True) +class RepeatAndSelectDpEvent(DpEvent): + """Represents repeatedly running the mechanism and selecting the best output. + + The total number of runs is randomized and drawn from a distribution + with the given parameters: Poisson (shape=infinity), Geometric (shape=1), + Logarithmic (shape=0), or Truncated Negative binomial (0 dp_event.DpEvent: + """Builds and returns the composed DpEvent represented by the builder.""" + if not self._composed_event: + events = [] + for event, count in self._event_counts: + if count == 1: + events.append(event) + else: + events.append(dp_event.SelfComposedDpEvent(event, count)) + if not events: + self._composed_event = dp_event.NoOpDpEvent() + elif len(events) == 1: + self._composed_event = events[0] + else: + self._composed_event = dp_event.ComposedDpEvent(events) + + return self._composed_event diff --git a/python/fedml/core/dp/budget_accountant/dp_event_builder_test.py b/python/fedml/core/dp/budget_accountant/dp_event_builder_test.py new file mode 100644 index 0000000000..8d4e2601b5 --- /dev/null +++ b/python/fedml/core/dp/budget_accountant/dp_event_builder_test.py @@ -0,0 +1,70 @@ +"""Tests for DpEventBuilder.""" + +from absl.testing import absltest +from fedml.core.dp.budget_accountant import dp_event +from fedml.core.dp.budget_accountant import dp_event_builder + +_gaussian_event = dp_event.GaussianDpEvent(1.0) +_laplace_event = dp_event.LaplaceDpEvent(1.0) +_poisson_event = dp_event.PoissonSampledDpEvent(_gaussian_event, 0.1) +_self_composed_event = dp_event.SelfComposedDpEvent(_gaussian_event, 3) + + +class DpEventBuilderTest(absltest.TestCase): + + def test_no_op(self): + builder = dp_event_builder.DpEventBuilder() + self.assertEqual(dp_event.NoOpDpEvent(), builder.build()) + + def test_single_gaussian(self): + builder = dp_event_builder.DpEventBuilder() + builder.compose(_gaussian_event) + self.assertEqual(_gaussian_event, builder.build()) + + def test_single_laplace(self): + builder = dp_event_builder.DpEventBuilder() + builder.compose(_laplace_event) + self.assertEqual(_laplace_event, builder.build()) + + def test_compose_no_op(self): + builder = dp_event_builder.DpEventBuilder() + builder.compose(dp_event.NoOpDpEvent()) + builder.compose(_gaussian_event) + builder.compose(dp_event.NoOpDpEvent()) + self.assertEqual(_gaussian_event, builder.build()) + + def test_compose_self(self): + builder = dp_event_builder.DpEventBuilder() + builder.compose(_gaussian_event) + builder.compose(_gaussian_event, 2) + self.assertEqual(_self_composed_event, builder.build()) + + def test_compose_heterogenous(self): + builder = dp_event_builder.DpEventBuilder() + builder.compose(_poisson_event) + builder.compose(_gaussian_event) + builder.compose(_gaussian_event, 2) + builder.compose(_poisson_event) + expected_event = dp_event.ComposedDpEvent( + [_poisson_event, _self_composed_event, _poisson_event]) + self.assertEqual(expected_event, builder.build()) + + def test_compose_composed(self): + builder = dp_event_builder.DpEventBuilder() + composed_event = dp_event.ComposedDpEvent( + [_gaussian_event, _poisson_event, _self_composed_event]) + builder.compose(_gaussian_event) + builder.compose(composed_event) + builder.compose(composed_event, 2) + builder.compose(_poisson_event) + builder.compose(_poisson_event) + expected_event = dp_event.ComposedDpEvent([ + _gaussian_event, + dp_event.SelfComposedDpEvent(composed_event, 3), + dp_event.SelfComposedDpEvent(_poisson_event, 2) + ]) + self.assertEqual(expected_event, builder.build()) + + +if __name__ == '__main__': + absltest.main() diff --git a/python/fedml/core/dp/budget_accountant/mechanism_calibration.py b/python/fedml/core/dp/budget_accountant/mechanism_calibration.py new file mode 100644 index 0000000000..9b93c7a4d0 --- /dev/null +++ b/python/fedml/core/dp/budget_accountant/mechanism_calibration.py @@ -0,0 +1,219 @@ +"""Library for calibration of differentially private mechanisms. + +Algorithms to optimize some quantity while remaining within a specified privacy +budget. +""" + +from typing import Callable, Optional, Union + +import attr +from scipy import optimize + +from fedml.core.dp.budget_accountant import dp_event +from fedml.core.dp.budget_accountant import privacy_accountant + + +class BracketInterval(object): + pass + + +@attr.define(frozen=True) +class ExplicitBracketInterval(BracketInterval): + endpoint_1: float + endpoint_2: float + + +@attr.define(frozen=True) +class LowerEndpointAndGuess(BracketInterval): + lower_endpoint: float + initial_guess: float + + +class NoBracketIntervalFoundError(Exception): + """Error raised when explicit bracket interval cannot be found.""" + + +class NoOptimumFoundError(Exception): + """Error raised when root finding algorithm fails.""" + + +class NonEmptyAccountantError(Exception): + """Error raised when result of make_fresh_accountant has nonempty ledger.""" + + +def _search_for_explicit_bracket_interval( + bracket_interval: LowerEndpointAndGuess, + epsilon_gap: Callable[[float], float]) -> ExplicitBracketInterval: + """Explores exponentially increasing interval to find an explicit bracket. + + Args: + bracket_interval: A LowerEndpointAndGuess which will be expanded to find + an explicit interval. + epsilon_gap: Function computing the epsilon at the provided value minus + the target epsilon. It is assumed that this function is monotonic with + respect to its parameter, otherwise the search could fail. + + Returns: + A valid ExplicitBracketInterval. + + Raises: + NoBracketIntervalFoundError: if no valid bracketing interval is found + within a factor of 2**30 of the initial guess. + """ + lower, upper = attr.astuple(bracket_interval) + if lower >= upper: + raise ValueError( + f'bracket_interval.lower_endpoint ({bracket_interval.lower_endpoint}) ' + f'must be less than bracket_interval.initial_guess ' + f'({bracket_interval.initial_guess}).') + + lower_value = epsilon_gap(lower) + upper_value = epsilon_gap(upper) + + gap = upper - lower + num_tries = 0 + + while lower_value * upper_value > 0: + num_tries += 1 + if num_tries > 30: + raise NoBracketIntervalFoundError( + 'Unable to find bracketing interval within 2**30 of initial guess. ' + 'Consider providing an ExplicitBracketInterval.') + + gap *= 2 # Loop invariant: gap = initial_gap * (2 ** num_tries). + lower, upper = upper, upper + gap + lower_value, upper_value = upper_value, epsilon_gap(upper) + + return ExplicitBracketInterval(lower, upper) + + +def calibrate_dp_mechanism( + make_fresh_accountant: Callable[[], privacy_accountant.PrivacyAccountant], + make_event_from_param: Union[Callable[[float], dp_event.DpEvent], + Callable[[int], dp_event.DpEvent]], + target_epsilon: float, + target_delta: float, + bracket_interval: BracketInterval, + discrete: bool = False, + tol: Optional[float] = None +) -> Union[float, int]: + """Searches for optimal mechanism parameter value within privacy budget. + + The procedure searches over the space of parameters by creating, for each + sample value, a DpEvent representing the mechanism generated from that value, + and a freshly initialized PrivacyAccountant. Then the accountant is applied to + the event to determine its epsilon at the target delta. Brent's method is used + to determine the value of the parameter at which the target epsilon is + achieved. + + Args: + make_fresh_accountant: A callable with no parameters that returns an + initialized PrivacyAccountant. The accountants that are returned across + multiple calls are assumed to be initialized identically. It is an error + for the initialized accountant's `ledger` property to return anything + besides `NoOpDpEvent`. + make_event_from_param: A callable that takes a parameter value as an + argument and creates a `DpEvent` representing the mechanism defined using + that value. + target_epsilon: The target epsilon value. + target_delta: The target delta value. + bracket_interval: A BracketInterval used to determine the upper and lower + endpoints of the interval within which Brent's method will search. + discrete: A bool determining whether the parameter is continuous or discrete + valued. If True, the parameter is assumed to take only integer values. + Concretely, `discrete=True` has three effects. 1) ints, not floats are + passed to `make_event_from_param`. 2) The minimum optimization tolerance + is 0.5. 3) An integer is returned. + tol: The tolerance, in parameter space. If the maximum (or minimum) value of + the parameter that meets the privacy requirements is x*, + calibrate_dp_mechanism is guaranteed to return a value x such that |x - + x*| <= tol. If `None`, tol is set to 1e-6 for continuous parameters or 0.5 + for discrete parameters. + + Returns: + A value of the parameter within tol of the optimum subject to the privacy + constraint. If discrete=True, the returned value will be an integer. + Otherwise it will be a float. + + Raises: + NoBracketIntervalFoundError: if bracket_interval is LowerEndpointAndGuess + and no upper bound can be found within a factor of 2**30 of the original + guess. + NoOptimumFoundError: if scipy.optimize.brentq fails to find an optimum. + NonEmptyAccountantError: if make_fresh_accountant returns an accountant with + nonempty ledger. + """ + + if not callable(make_fresh_accountant): + raise TypeError(f'make_fresh_accountant must be callable. ' + f'found {type(make_fresh_accountant)}.') + + if not callable(make_event_from_param): + raise TypeError(f'make_fresh_accountant must be callable. ' + f'found {type(make_fresh_accountant)}.') + + if target_epsilon < 0: + raise ValueError(f'target_epsilon must be nonnegative. Found ' + f'{target_epsilon}.') + + if not 0 <= target_delta <= 1: + raise ValueError(f'target_delta must be in range [0, 1]. Found ' + f'{target_delta}.') + + if tol is None: + tol = 0.5 if discrete else 1e-6 + elif discrete: + tol = max(tol, 0.5) + elif tol <= 0: + raise ValueError(f'tol must be positive. Found {tol}.') + + def epsilon_gap(x: float) -> float: + if discrete: + x = round(x) + event = make_event_from_param(x) + accountant = make_fresh_accountant() + if not isinstance(accountant.ledger, dp_event.NoOpDpEvent): + raise NonEmptyAccountantError() + accountant.compose(event) + return accountant.get_epsilon(target_delta) - target_epsilon + + if isinstance(bracket_interval, LowerEndpointAndGuess): + bracket_interval = _search_for_explicit_bracket_interval( + bracket_interval, epsilon_gap) + elif not isinstance(bracket_interval, ExplicitBracketInterval): + raise TypeError(f'Unrecognized bracket_interval type: ' + f'{type(bracket_interval)}') + + value_1 = epsilon_gap(bracket_interval.endpoint_1) + value_2 = epsilon_gap(bracket_interval.endpoint_2) + if value_1 * value_2 > 0: + raise ValueError( + f'Bracket endpoints do not bracket target_epsilon={target_epsilon}: ' + f'endpoint 1 {bracket_interval.endpoint_1} with epsilon=' + f'{value_1 + target_epsilon}, and endpoint 2 ' + f'{bracket_interval.endpoint_2} with epsilon={value_2 + target_epsilon}' + ) + + root, result = optimize.brentq(epsilon_gap, bracket_interval.endpoint_1, + bracket_interval.endpoint_2, xtol=tol, + full_output=True) + + if not result.converged: + raise NoOptimumFoundError( + 'Unable to find root with scipy.optimize.brentq.') + + if epsilon_gap(root) > 0: + # Ensure that gap is not positive, guaranteeing returned parameter gives no + # less privacy than was requested. + if epsilon_gap(root + tol) < 0: + root += tol + elif epsilon_gap(root - tol) < 0: + root -= tol + else: + raise NoOptimumFoundError( + f'Unable to find valid value near root {root} returned by brentq.') + + if discrete: + root = round(root) + + return root diff --git a/python/fedml/core/dp/budget_accountant/mechanism_calibration_test.py b/python/fedml/core/dp/budget_accountant/mechanism_calibration_test.py new file mode 100644 index 0000000000..37e2946780 --- /dev/null +++ b/python/fedml/core/dp/budget_accountant/mechanism_calibration_test.py @@ -0,0 +1,198 @@ +"""Tests for mechanism_calibration.""" + +from absl.testing import absltest +from absl.testing import parameterized +import attr + +import numpy as np + +from fedml.core.dp.budget_accountant import dp_event +from fedml.core.dp.budget_accountant import mechanism_calibration +from fedml.core.dp.budget_accountant import privacy_accountant + + +@attr.define +class MockEvent(dp_event.DpEvent): + param: float + + +class MockAccountant(privacy_accountant.PrivacyAccountant): + + def __init__(self, value_to_epsilon): + super().__init__( + privacy_accountant.NeighboringRelation.ADD_OR_REMOVE_ONE) + self._value = 0.0 + self._value_to_epsilon = value_to_epsilon + + def supports(self, event: dp_event.DpEvent) -> bool: + return True + + def _compose(self, event: dp_event.DpEvent, count: int = 1): + self._value = event.param + + def get_epsilon(self, target_delta: float) -> float: + return self._value_to_epsilon(self._value) + + +class MechanismCalibrationTest(parameterized.TestCase): + + @parameterized.parameters( + {'eps_fn': lambda x: x, 'expected': 2.0}, + {'eps_fn': lambda x: 4 - x, 'expected': 2.0}, + {'eps_fn': np.square, 'expected': np.sqrt(2)}, + {'eps_fn': np.cbrt, 'expected': 8.0}, + {'eps_fn': lambda x: (x - 5) ** 3 + 2, 'expected': 5}, + {'eps_fn': lambda x: np.cos(x / 3) + 2, 'expected': 3 * np.pi / 2}, + {'eps_fn': lambda x: np.sin(x - 5) + (x + 3) / 4, 'expected': 5}, + {'eps_fn': lambda x: (13 - x) / 4 - np.sin(x - 5), 'expected': 5}, + ) + def test_basic_inversion(self, eps_fn, expected): + value = mechanism_calibration.calibrate_dp_mechanism( + lambda: MockAccountant(eps_fn), MockEvent, 2, 0, + mechanism_calibration.ExplicitBracketInterval(0, 10), tol=1e-12) + + self.assertIsInstance(value, float) + self.assertAlmostEqual(value, expected) + + accountant = MockAccountant(eps_fn) + accountant.compose(MockEvent(value)) + epsilon = accountant.get_epsilon(0) + self.assertLessEqual(epsilon, 2) + + @parameterized.parameters( + {'eps_fn': lambda x: -1 if x < 0 else 1}, + {'eps_fn': lambda x: 1 if x < 0 else -1}, + {'eps_fn': lambda x: x - 1 if x < 0 else x + 1}, + {'eps_fn': lambda x: -2 - x if x < 0 else 2 - x}, + {'eps_fn': lambda x: x + 2 if x < 0 else x - 2}, + {'eps_fn': lambda x: 1 - x if x < 0 else -1 - x}, + ) + def test_discontinuous(self, eps_fn): + value = mechanism_calibration.calibrate_dp_mechanism( + lambda: MockAccountant(eps_fn), MockEvent, 0, 0, + mechanism_calibration.ExplicitBracketInterval(-1, 1), tol=1e-12) + + self.assertIsInstance(value, float) + self.assertAlmostEqual(value, 0) + + accountant = MockAccountant(eps_fn) + accountant.compose(MockEvent(value)) + epsilon = accountant.get_epsilon(0) + self.assertLessEqual(epsilon, 0) + + @parameterized.parameters( + {'eps_fn': lambda x: x - 2, 'expected_eps': 0}, + {'eps_fn': lambda x: x - 2.1, 'expected_eps': -0.1}, + {'eps_fn': lambda x: x - 2.9, 'expected_eps': -0.9}, + {'eps_fn': lambda x: 2 - x, 'expected_eps': 0}, + {'eps_fn': lambda x: 1.9 - x, 'expected_eps': -0.1}, + {'eps_fn': lambda x: 1.1 - x, 'expected_eps': -0.9}, + ) + def test_discrete(self, eps_fn, expected_eps): + value = mechanism_calibration.calibrate_dp_mechanism( + lambda: MockAccountant(eps_fn), MockEvent, 0, 0, + mechanism_calibration.ExplicitBracketInterval(0, 5), discrete=True) + + self.assertIsInstance(value, int) + self.assertEqual(value, 2) + + accountant = MockAccountant(eps_fn) + accountant.compose(MockEvent(value)) + epsilon = accountant.get_epsilon(0) + self.assertAlmostEqual(epsilon, expected_eps) + + @parameterized.parameters( + {'epsilon_gap': lambda x: x, 'lower': -1, 'guess': -0.5}, + {'epsilon_gap': lambda x: -x, 'lower': -1, 'guess': -0.5}, + {'epsilon_gap': lambda x: np.exp(x) - 2, 'lower': 0, 'guess': 0.1}, + {'epsilon_gap': lambda x: 1 - np.sqrt(x), 'lower': 0, 'guess': 0.1}, + {'epsilon_gap': lambda x: np.log(x) - 20, 'lower': 1, 'guess': 2}, + ) + def test_search_for_explicit_bracket_interval( + self, epsilon_gap, lower, guess): + lower_value = epsilon_gap(lower) + interval = mechanism_calibration._search_for_explicit_bracket_interval( + mechanism_calibration.LowerEndpointAndGuess(lower, guess), epsilon_gap) + upper_value = epsilon_gap(interval.endpoint_2) + self.assertLessEqual(lower_value * upper_value, 0) + + def test_raises_unknown_bracket_interval_type(self): + class UnknownBracketInterval(mechanism_calibration.BracketInterval): + pass + + with self.assertRaisesRegex(TypeError, 'Unrecognized bracket_interval'): + mechanism_calibration.calibrate_dp_mechanism( + lambda: MockAccountant(lambda x: x), MockEvent, 1.0, 0, + UnknownBracketInterval()) + + def test_raises_mfa_not_callable(self): + with self.assertRaisesRegex(TypeError, 'callable'): + mechanism_calibration.calibrate_dp_mechanism( + 'not a callable', MockEvent, 1.0, 0, + mechanism_calibration.ExplicitBracketInterval(0, 5)) + + def test_raises_mefv_not_callable(self): + with self.assertRaisesRegex(TypeError, 'callable'): + mechanism_calibration.calibrate_dp_mechanism( + lambda: MockAccountant(lambda x: x), 'not a callable', 1.0, 0, + mechanism_calibration.ExplicitBracketInterval(0, 5)) + + def test_raises_target_epsilon_negative(self): + with self.assertRaisesRegex(ValueError, 'nonnegative'): + mechanism_calibration.calibrate_dp_mechanism( + lambda: MockAccountant(lambda x: x), MockEvent, -1.0, 0, + mechanism_calibration.ExplicitBracketInterval(0, 5)) + + def test_raises_target_delta_out_of_range(self): + with self.assertRaisesRegex(ValueError, 'in range'): + mechanism_calibration.calibrate_dp_mechanism( + lambda: MockAccountant(lambda x: x), MockEvent, 0.0, -0.1, + mechanism_calibration.ExplicitBracketInterval(0, 5)) + + with self.assertRaisesRegex(ValueError, 'in range'): + mechanism_calibration.calibrate_dp_mechanism( + lambda: MockAccountant(lambda x: x), MockEvent, 0.0, 1.1, + mechanism_calibration.ExplicitBracketInterval(0, 5)) + + def test_bad_bracket_interval(self): + with self.assertRaisesRegex(ValueError, 'Bracket endpoints'): + mechanism_calibration.calibrate_dp_mechanism( + lambda: MockAccountant(lambda x: x), MockEvent, 1.0, 0.0, + mechanism_calibration.ExplicitBracketInterval(2, 5)) + + with self.assertRaisesRegex(ValueError, 'Bracket endpoints'): + mechanism_calibration.calibrate_dp_mechanism( + lambda: MockAccountant(lambda x: x), MockEvent, 1.0, 0.0, + mechanism_calibration.ExplicitBracketInterval(-2, 0)) + + with self.assertRaisesRegex(ValueError, 'must be less than'): + mechanism_calibration.calibrate_dp_mechanism( + lambda: MockAccountant(lambda x: x), MockEvent, 1.0, 0.0, + mechanism_calibration.LowerEndpointAndGuess(2, 0)) + + def test_negative_tol(self): + with self.assertRaisesRegex(ValueError, 'tol'): + mechanism_calibration.calibrate_dp_mechanism( + lambda: MockAccountant(lambda x: x), MockEvent, 1.0, 0.0, + mechanism_calibration.LowerEndpointAndGuess(0, 1), tol=-1) + + def test_no_bracket_interval_found(self): + with self.assertRaises(mechanism_calibration.NoBracketIntervalFoundError): + mechanism_calibration.calibrate_dp_mechanism( + lambda: MockAccountant(lambda x: x), MockEvent, 1.0e10, 0.0, + mechanism_calibration.LowerEndpointAndGuess(0, 1)) + + def test_nonempty_accountant(self): + def make_fresh_accountant(): + accountant = MockAccountant(lambda x: x) + accountant.compose(MockEvent(1.0)) + return accountant + + with self.assertRaises(mechanism_calibration.NonEmptyAccountantError): + mechanism_calibration.calibrate_dp_mechanism( + make_fresh_accountant, MockEvent, 0.5, 0.0, + mechanism_calibration.ExplicitBracketInterval(0, 1)) + + +if __name__ == '__main__': + absltest.main() diff --git a/python/fedml/core/dp/budget_accountant/pld/__init__.py b/python/fedml/core/dp/budget_accountant/pld/__init__.py new file mode 100644 index 0000000000..d777011e3b --- /dev/null +++ b/python/fedml/core/dp/budget_accountant/pld/__init__.py @@ -0,0 +1,3 @@ +"""PLD Accounting package.""" + +from dp_accounting.pld.pld_privacy_accountant import PLDAccountant diff --git a/python/fedml/core/dp/budget_accountant/pld/accountant.py b/python/fedml/core/dp/budget_accountant/pld/accountant.py new file mode 100644 index 0000000000..e7761dbe18 --- /dev/null +++ b/python/fedml/core/dp/budget_accountant/pld/accountant.py @@ -0,0 +1,248 @@ +"""Helper functions for privacy accounting across queries.""" + +import math +import typing +from scipy import special + +from dp_accounting.pld import common +from dp_accounting.pld import privacy_loss_distribution +from dp_accounting.pld import privacy_loss_mechanism + + +def get_smallest_parameter( + privacy_parameters: common.DifferentialPrivacyParameters, num_queries: int, + privacy_loss_distribution_constructor: typing.Callable[ + [float], privacy_loss_distribution.PrivacyLossDistribution], + search_parameters: common.BinarySearchParameters +) -> typing.Union[float, None]: + """Finds smallest parameter for which the mechanism satisfies desired privacy. + + This function computes the smallest "parameter" for which the corresponding + mechanism, when run a specified number of times, satisfies a given privacy + level. It is assumed that, when the parameter increases, the mechanism becomes + more private. + + Args: + privacy_parameters: The desired privacy guarantee. + num_queries: Number of times the mechanism will be invoked. + privacy_loss_distribution_constructor: A function that takes in a parameter + and returns the privacy loss distribution for the corresponding mechanism + for the given parameter. + search_parameters: Parameters used for binary search. + + Returns: + Smallest parameter for which the corresponding mechanism with that + parameter, when applied the given number of times, satisfies the desired + privacy guarantee. When no parameter in the given range satisfies this, + return None. + """ + + def get_delta_for_parameter(parameter): + pld_single_query = privacy_loss_distribution_constructor(parameter) + pld_all_queries = pld_single_query.self_compose(num_queries) + return pld_all_queries.get_delta_for_epsilon(privacy_parameters.epsilon) + + return common.inverse_monotone_function(get_delta_for_parameter, + privacy_parameters.delta, + search_parameters) + + +def get_smallest_laplace_noise( + privacy_parameters: common.DifferentialPrivacyParameters, + num_queries: int, + sensitivity: float = 1) -> float: + """Finds smallest Laplace noise for which the mechanism satisfies desired privacy. + + Args: + privacy_parameters: The desired privacy guarantee. + num_queries: Number of times the mechanism will be invoked. + sensitivity: The l1 sensitivity of each query. + + Returns: + Smallest parameter for which the Laplace mechanism with this parameter, when + applied the given number of times, satisfies the desired privacy guarantee. + """ + + def privacy_loss_distribution_constructor(parameter): + # Setting value_discretization_interval equal to + # 0.01 * epsilon / num_queries ensures that the resulting parameter is not + # (epsilon', delta)-DP for epsilon' less than 0.99 * epsilon / num_queries. + # This is a heuristic for getting a reasonable pessimistic estimate for the + # noise parameter. + return privacy_loss_distribution.from_laplace_mechanism( + parameter, + sensitivity=sensitivity, + value_discretization_interval=0.01 * privacy_parameters.epsilon / + num_queries) + + # Laplace mechanism with parameter sensitivity * num_queries / epsilon is + # epsilon-DP (for num_queries queries). + search_parameters = common.BinarySearchParameters( + 0, num_queries * sensitivity / privacy_parameters.epsilon) + + parameter = get_smallest_parameter(privacy_parameters, num_queries, + privacy_loss_distribution_constructor, + search_parameters) + if parameter is None: + parameter = num_queries * sensitivity / privacy_parameters.epsilon + return parameter + + +def get_smallest_discrete_laplace_noise( + privacy_parameters: common.DifferentialPrivacyParameters, + num_queries: int, + sensitivity: int = 1) -> float: + """Finds smallest discrete Laplace noise for which the mechanism satisfies desired privacy. + + Note that from the way discrete Laplace distribution is defined, the amount of + noise decreases as the parameter increases. (In other words, the mechanism + becomes less private as the parameter increases.) As a result, the output will + be the largest parameter (instead of smallest as in Laplace). + + Args: + privacy_parameters: The desired privacy guarantee. + num_queries: Number of times the mechanism will be invoked. + sensitivity: The l1 sensitivity of each query. + + Returns: + Largest parameter for which the discrete Laplace mechanism with this + parameter, when applied the given number of times, satisfies the desired + privacy guarantee. + """ + + # Search for inverse of the parameter instead of the parameter itself. + def privacy_loss_distribution_constructor(inverse_parameter): + parameter = 1 / inverse_parameter + # Setting value_discretization_interval equal to parameter because the + # privacy loss of discrete Laplace mechanism is always divisible by the + # parameter. + return privacy_loss_distribution.from_discrete_laplace_mechanism( + parameter, + sensitivity=sensitivity, + value_discretization_interval=parameter) + + # discrete Laplace mechanism with parameter + # epsilon / (sensitivity * num_queries) is epsilon-DP (for num_queries + # queries). + search_parameters = common.BinarySearchParameters( + 0, num_queries * sensitivity / privacy_parameters.epsilon) + + inverse_parameter = get_smallest_parameter( + privacy_parameters, num_queries, privacy_loss_distribution_constructor, + search_parameters) + if inverse_parameter is None: + parameter = privacy_parameters.epsilon / (num_queries * sensitivity) + else: + parameter = 1 / inverse_parameter + return parameter + + +def get_smallest_gaussian_noise( + privacy_parameters: common.DifferentialPrivacyParameters, + num_queries: int, + sensitivity: float = 1) -> float: + """Finds smallest Gaussian noise for which the mechanism satisfies desired privacy. + + Args: + privacy_parameters: The desired privacy guarantee. + num_queries: Number of times the mechanism will be invoked. + sensitivity: The l2 sensitivity of each query. + + Returns: + Smallest standard deviation for which the Gaussian mechanism with this std, + when applied the given number of times, satisfies the desired privacy + guarantee. + """ + # The l2 sensitivity grows as square root of the number of queries + return privacy_loss_mechanism.GaussianPrivacyLoss.from_privacy_guarantee( + privacy_parameters, + sensitivity=sensitivity * math.sqrt(num_queries)).standard_deviation + + +def advanced_composition( + privacy_parameters: common.DifferentialPrivacyParameters, + num_queries: int, total_delta: float) -> typing.Optional[float]: + """Computes total DP parameters after applying an algorithm with given privacy parameters multiple times. + + Using the optimal advanced composition theorem, Theorem 3.3 from the paper + Kairouz, Oh, Viswanath. "The Composition Theorem for Differential Privacy", + to compute the total DP parameters given that we are applying an algorithm + with a given privacy parameters for a given number of times. + + Note that we can compute this alternatively from PrivacyLossDistribution + by invoking from_privacy_parameters and applying the given number of + composition. When setting value_discretization_interval appropriately, these + two approaches should coincide but using the advanced composition theorem + directly is less computational intensive. + + Args: + privacy_parameters: The privacy guarantee of a single query. + num_queries: Number of times the algorithm is invoked. + total_delta: The target value of total delta of the privacy parameters for + the multiple runs of the algorithm. + + Returns: + total_epsilon such that, when applying the algorithm the given number of + times, the result is still (total_epsilon, total_delta)-DP. + + None when the total_delta is less than 1 - (1 - delta)^num_queries, for + which no guarantee of (total_epsilon, total_delta)-DP is possible for any + value of total_epsilon. + """ + epsilon = privacy_parameters.epsilon + delta = privacy_parameters.delta + k = num_queries + + # The calculation follows Theorem 3.3 of https://arxiv.org/pdf/1311.0776.pdf + for i in range(k // 2, -1, -1): + delta_i = 0 + for l in range(i): + delta_i += special.binom(k, l) * ( + math.exp(epsilon * (k - l)) - math.exp(epsilon * (k - 2 * i + l))) + delta_i /= ((1 + math.exp(epsilon))**k) + if 1 - ((1 - delta) ** k) * (1 - delta_i) <= total_delta: + return epsilon * (k - 2 * i) + return None + + +def get_smallest_epsilon_from_advanced_composition( + total_privacy_parameters: common.DifferentialPrivacyParameters, + num_queries: int, delta: float = 0) -> typing.Optional[float]: + """Computes DP parameters that after a certain number of queries remain DP with given parameters. + + Using the optimal advanced composition theorem, Theorem 3.3 from the paper + Kairouz, Oh, Viswanath. "The Composition Theorem for Differential Privacy", + to compute DP parameter for an algorithm, so that when applied a given number + of times it remains DP with given privacy parameters. + + Args: + total_privacy_parameters: The desired privacy guarantee after applying the + algorithm a given number of times. + num_queries: Number of times the algorithm is invoked. + delta: The value of DP parameter delta for the algorithm. + + Returns: + epsilon such that if an algorithm is (epsilon, delta)-DP, then applying it + the given number of times remains DP with total_privacy_parameters. + + None when total_privacy_parameters.delta is less than + 1 - (1 - delta)^num_queries for which no guarantee of + total_privacy_parameters DP is possible for any value of epsilon. + """ + if 1 - ((1 - delta) ** num_queries) > total_privacy_parameters.delta: + return None + + search_parameters = common.BinarySearchParameters( + total_privacy_parameters.epsilon / num_queries, + total_privacy_parameters.epsilon) + + def get_total_epsilon_for_epsilon(epsilon): + privacy_parameters = common.DifferentialPrivacyParameters(epsilon, delta) + return advanced_composition(privacy_parameters, num_queries, + total_privacy_parameters.delta) + + return common.inverse_monotone_function( + get_total_epsilon_for_epsilon, + total_privacy_parameters.epsilon, + search_parameters, + increasing=True) diff --git a/python/fedml/core/dp/budget_accountant/pld/accountant_test.py b/python/fedml/core/dp/budget_accountant/pld/accountant_test.py new file mode 100644 index 0000000000..c416e8fbd4 --- /dev/null +++ b/python/fedml/core/dp/budget_accountant/pld/accountant_test.py @@ -0,0 +1,223 @@ +"""Tests for accountant.""" + +import unittest +from absl.testing import parameterized + +from dp_accounting.pld import accountant +from dp_accounting.pld import common + + +class AccountantTest(parameterized.TestCase): + + @parameterized.named_parameters( + { + 'testcase_name': 'basic_composition', + 'sensitivity': 21, + 'epsilon': 3, + 'delta': 0, + 'num_queries': 10, + 'expected_parameter': 70, + }, + { + 'testcase_name': 'positive_delta', + 'sensitivity': 1, + 'epsilon': 1, + 'delta': 0.0001, + 'num_queries': 20, + 'expected_parameter': 13.6, + }, + { + 'testcase_name': 'positive_delta_varying_sensitivity', + 'sensitivity': 0.5, + 'epsilon': 1, + 'delta': 0.0001, + 'num_queries': 20, + 'expected_parameter': 6.8, + }, + { + 'testcase_name': 'large_num_composition', + 'sensitivity': 1, + 'epsilon': 1, + 'delta': 0.0001, + 'num_queries': 500, + 'expected_parameter': 71.2, + },) + def test_get_smallest_laplace_noise(self, epsilon, delta, num_queries, + sensitivity, expected_parameter): + privacy_parameters = common.DifferentialPrivacyParameters( + epsilon, delta) + self.assertAlmostEqual( + expected_parameter, + accountant.get_smallest_laplace_noise( + privacy_parameters, num_queries, sensitivity=sensitivity), + delta=0.1) + + @parameterized.named_parameters( + { + 'testcase_name': 'basic_composition', + 'sensitivity': 2, + 'epsilon': 3, + 'delta': 0, + 'num_queries': 5, + 'expected_parameter': 0.3, + }, + { + 'testcase_name': 'positive_delta', + 'sensitivity': 1, + 'epsilon': 1, + 'delta': 0.0001, + 'num_queries': 20, + 'expected_parameter': 0.073, + }, + { + 'testcase_name': 'positive_delta_varying_sensitivity', + 'sensitivity': 5, + 'epsilon': 1, + 'delta': 0.0001, + 'num_queries': 20, + 'expected_parameter': 0.014, + },) + def test_get_smallest_discrete_laplace_noise(self, epsilon, delta, + num_queries, sensitivity, + expected_parameter): + privacy_parameters = common.DifferentialPrivacyParameters( + epsilon, delta) + self.assertAlmostEqual( + expected_parameter, + accountant.get_smallest_discrete_laplace_noise( + privacy_parameters, num_queries, sensitivity=sensitivity), + delta=1e-3) + + @parameterized.named_parameters( + { + 'testcase_name': 'base', + 'sensitivity': 1, + 'epsilon': 1, + 'delta': 0.78760074, + 'num_queries': 1, + 'expected_std': 1/3, + }, + { + 'testcase_name': 'varying_sensitivity_and_num_queries', + 'sensitivity': 6, + 'epsilon': 1, + 'delta': 0.78760074, + 'num_queries': 25, + 'expected_std': 10, + }) + def test_get_smallest_gaussian_noise(self, epsilon, delta, num_queries, + sensitivity, expected_std): + privacy_parameters = common.DifferentialPrivacyParameters( + epsilon, delta) + self.assertAlmostEqual( + expected_std, + accountant.get_smallest_gaussian_noise( + privacy_parameters, num_queries, sensitivity=sensitivity)) + + @parameterized.named_parameters( + { + 'testcase_name': 'basic_composition', + 'epsilon': 1, + 'delta': 0, + 'total_delta': 0, + 'num_queries': 30, + 'expected_total_epsilon': 30, + }, + { + 'testcase_name': 'advantage_over_basic1', + 'epsilon': 1, + 'delta': 0.001, + 'total_delta': 0.06, + 'num_queries': 30, + 'expected_total_epsilon': 22, + }, + { + 'testcase_name': 'advantage_over_basic2', + 'epsilon': 1, + 'delta': 0.001, + 'total_delta': 0.1, + 'num_queries': 30, + 'expected_total_epsilon': 20, + }, + { + 'testcase_name': 'total_delta_too_small', + 'epsilon': 1, + 'delta': 0.2, + 'total_delta': 0.1, + 'num_queries': 1, + 'expected_total_epsilon': None, + }, + { + 'testcase_name': 'total_delta_too_small2', + 'epsilon': 1, + 'delta': 0.01, + 'total_delta': 0.26, + 'num_queries': 30, + 'expected_total_epsilon': None, + }) + def test_advanced_composition(self, epsilon, delta, num_queries, total_delta, + expected_total_epsilon): + privacy_parameters = common.DifferentialPrivacyParameters( + epsilon, delta) + total_epsilon = accountant.advanced_composition(privacy_parameters, + num_queries, total_delta) + if expected_total_epsilon is None: + self.assertIsNone(total_epsilon) + else: + self.assertAlmostEqual(expected_total_epsilon, total_epsilon) + + @parameterized.named_parameters( + { + 'testcase_name': 'basic_composition', + 'total_epsilon': 30, + 'total_delta': 0, + 'delta': 0, + 'num_queries': 30, + 'expected_epsilon': 1, + }, + { + 'testcase_name': 'advantage_over_basic', + 'total_epsilon': 22, + 'total_delta': 0.06, + 'delta': 0.001, + 'num_queries': 30, + 'expected_epsilon': 1, + }, + { + 'testcase_name': 'advantage_over_basic2', + 'total_epsilon': 5, + 'total_delta': 0.01, + 'delta': 0, + 'num_queries': 50, + 'expected_epsilon': 0.25, + }, + { + 'testcase_name': 'total_delta_too_small', + 'total_epsilon': 1, + 'total_delta': 0.1, + 'delta': 0.2, + 'num_queries': 1, + 'expected_epsilon': None, + }, + { + 'testcase_name': 'total_delta_too_small2', + 'total_epsilon': 30, + 'total_delta': 0.26, + 'delta': 0.01, + 'num_queries': 30, + 'expected_epsilon': None, + }) + def test_get_smallest_epsilon_from_advanced_composition( + self, total_epsilon, total_delta, num_queries, delta, expected_epsilon): + total_privacy_parameters = common.DifferentialPrivacyParameters( + total_epsilon, total_delta) + epsilon = accountant.get_smallest_epsilon_from_advanced_composition( + total_privacy_parameters, num_queries, delta) + if expected_epsilon is None: + self.assertIsNone(epsilon) + else: + self.assertAlmostEqual(expected_epsilon, epsilon, places=6) + + +if __name__ == '__main__': + unittest.main() diff --git a/python/fedml/core/dp/budget_accountant/pld/common.py b/python/fedml/core/dp/budget_accountant/pld/common.py new file mode 100644 index 0000000000..e7fa666aaa --- /dev/null +++ b/python/fedml/core/dp/budget_accountant/pld/common.py @@ -0,0 +1,308 @@ +"""Common classes and functions for the accounting library.""" + +import dataclasses +import math +from typing import Callable, List, Mapping, Optional, Tuple, Union + +import numpy as np +from scipy import fft +from scipy import signal + +ArrayLike = Union[np.ndarray, List[float]] + + +@dataclasses.dataclass +class DifferentialPrivacyParameters(object): + """Representation of the differential privacy parameters of a mechanism. + + Attributes: + epsilon: the epsilon in (epsilon, delta)-differential privacy. + delta: the delta in (epsilon, delta)-differential privacy. + """ + epsilon: float + delta: float = 0 + + def __post_init__(self): + if self.epsilon < 0: + raise ValueError(f'epsilon should be positive: {self.epsilon}') + if self.delta < 0 or self.delta > 1: + raise ValueError(f'delta should be between 0 and 1: {self.delta}') + + +@dataclasses.dataclass +class BinarySearchParameters(object): + """Parameters used for binary search. + + Attributes: + upper_bound: An upper bound on the binary search range. + lower_bound: A lower bound on the binary search range. + initial_guess: An initial guess to start the search with. Must be positive. + When this guess is close to the true value, it can help make the binary + search faster. + tolerance: An acceptable error on the returned value. + discrete: Whether the search is over integers. + """ + lower_bound: float + upper_bound: float + initial_guess: Optional[float] = None + tolerance: float = 1e-7 + discrete: bool = False + + +def inverse_monotone_function(func: Callable[[float], float], + value: float, + search_parameters: BinarySearchParameters, + increasing: bool = False) -> Optional[float]: + """Inverse a monotone function. + + Args: + func: The function to be inversed. + value: The desired value of the function. + search_parameters: Parameters used for binary search. + increasing: Whether the function is monotonically increasing. + + Returns: + x such that func(x) is no more than value, when such x exists. It is + guaranteed that the returned x is within search_parameters.tolerance of the + smallest (for monotonically decreasing func) or the largest (for + monotonically increasing func) such x. When no such x exists within the + given range, returns None. + """ + lower_x = search_parameters.lower_bound + upper_x = search_parameters.upper_bound + initial_guess_x = search_parameters.initial_guess + + if increasing: + check = lambda func_value, target_value: func_value <= target_value + if lower_x != -math.inf and func(lower_x) > value: + return None + else: + check = lambda func_value, target_value: func_value > target_value + if upper_x != math.inf and func(upper_x) > value: + return None + + if initial_guess_x is not None: + while initial_guess_x < upper_x and check(func(initial_guess_x), value): + lower_x = initial_guess_x + initial_guess_x *= 2 + upper_x = min(upper_x, initial_guess_x) + + if search_parameters.discrete: + tolerance = 1 + else: + tolerance = search_parameters.tolerance + + while upper_x - lower_x > tolerance: + if search_parameters.discrete: + mid_x = (upper_x + lower_x) // 2 + else: + mid_x = (upper_x + lower_x) / 2 + + if check(func(mid_x), value): + lower_x = mid_x + else: + upper_x = mid_x + + if increasing: + return lower_x + else: + return upper_x + + +def dictionary_to_list( + input_dictionary: Mapping[int, float]) -> Tuple[int, List[float]]: + """Converts an integer-keyed dictionary into an list. + + Args: + input_dictionary: A dictionary whose keys are integers. + + Returns: + A tuple of an integer offset and a list result_list. The offset is the + minimum value of the input dictionary. result_list has length equal to the + difference between the maximum and minimum values of the input dictionary. + result_list[i] is equal to dictionary[offset + i] and is zero if offset + i + is not a key in the input dictionary. + """ + offset = min(input_dictionary) + max_val = max(input_dictionary) + result_list = [input_dictionary.get(i, 0) for i in range(offset, max_val + 1)] + return (offset, result_list) + + +def list_to_dictionary(input_list: List[float], + offset: int, + tail_mass_truncation: float = 0) -> Mapping[int, float]: + """Converts a list into an integer-keyed dictionary, with a specified offset. + + Args: + input_list: An input list. + offset: The offset in the key of the output dictionary + tail_mass_truncation: an upper bound on the tails of the input list that + might be truncated. + + Returns: + A dictionary whose value at key is equal to input_list[key - offset]. If + input_list[key - offset] is less than or equal to zero, it is not included + in the dictionary. + """ + lower_truncation_index = 0 + lower_truncation_mass = 0 + while lower_truncation_index < len(input_list): + lower_truncation_mass += input_list[lower_truncation_index] + if lower_truncation_mass > tail_mass_truncation / 2: + break + lower_truncation_index += 1 + + upper_truncation_index = len(input_list) - 1 + upper_truncation_mass = 0 + while upper_truncation_index >= 0: + upper_truncation_mass += input_list[upper_truncation_index] + if upper_truncation_mass > tail_mass_truncation / 2: + break + upper_truncation_index -= 1 + + result_dictionary = {} + for i in range(lower_truncation_index, upper_truncation_index + 1): + if input_list[i] > 0: + result_dictionary[i + offset] = input_list[i] + return result_dictionary + + +def convolve_dictionary(dictionary1: Mapping[int, float], + dictionary2: Mapping[int, float], + tail_mass_truncation: float = 0) -> Mapping[int, float]: + """Computes a convolution of two dictionaries. + + Args: + dictionary1: The first dictionary whose keys are integers. + dictionary2: The second dictionary whose keys are integers. + tail_mass_truncation: an upper bound on the tails of the output that might + be truncated. + + Returns: + The dictionary where for each key its corresponding value is the sum, over + all key1, key2 such that key1 + key2 = key, of dictionary1[key1] times + dictionary2[key2] + """ + + # Convert the dictionaries to lists. + min1, list1 = dictionary_to_list(dictionary1) + min2, list2 = dictionary_to_list(dictionary2) + + # Compute the convolution of the two lists. + result_list = signal.fftconvolve(list1, list2) + + # Convert the list back to a dictionary and return + return list_to_dictionary( + result_list, min1 + min2, tail_mass_truncation=tail_mass_truncation) + + +def compute_self_convolve_bounds( + input_list: List[float], + num_times: int, + tail_mass_truncation: float = 0, + orders: Optional[List[float]] = None) -> Tuple[int, int]: + """Computes truncation bounds for convolution using Chernoff bound. + + Args: + input_list: The input list to be convolved. + num_times: The number of times the list is to be convolved with itself. + tail_mass_truncation: an upper bound on the tails of the output that might + be truncated. + orders: a list of orders on which the Chernoff bound is applied. + + Returns: + A pair of upper and lower bounds for which the mass of the result of + convolution outside of this range is at most tail_mass_truncation. + """ + upper_bound = (len(input_list) - 1) * num_times + lower_bound = 0 + + if tail_mass_truncation == 0: + return lower_bound, upper_bound + + if orders is None: + # Set orders so whose absolute values are not too large; otherwise, we may + # run into numerical issues. + orders = ( + np.concatenate((np.arange(-20, 0), np.arange(1, 21))) / len(input_list)) + + # Compute log of the moment generating function at the specified orders. + log_mgfs = np.log([ + np.dot(np.exp(np.arange(len(input_list)) * order), input_list) + for order in orders + ]) + + for order, log_mgf_value in zip(orders, log_mgfs): + # Use Chernoff bound to update the upper/lower bound. See equation (5) in + # the supplementary material. + bound = (num_times * log_mgf_value + + math.log(2 / tail_mass_truncation)) / order + if order > 0: + upper_bound = min(upper_bound, math.ceil(bound)) + if order < 0: + lower_bound = max(lower_bound, math.floor(bound)) + + return lower_bound, upper_bound + + +def self_convolve(input_list: ArrayLike, + num_times: int, + tail_mass_truncation: float = 0) -> Tuple[int, List[float]]: + """Computes a convolution of the input list with itself num_times times. + + Args: + input_list: The input list to be convolved. + num_times: The number of times the list is to be convolved with itself. + tail_mass_truncation: an upper bound on the tails of the output that might + be truncated. + + Returns: + A pair of truncation_lower_bound, output_list, where the i-th entry of + output_list is approximately the sum, over all i_1, i_2, ..., i_num_times + such that i_1 + i_2 + ... + i_num_times = i + truncation_lower_bound, + of input_list[i_1] * input_list[i_2] * ... * input_list[i_num_times]. + """ + truncation_lower_bound, truncation_upper_bound = compute_self_convolve_bounds( + input_list, num_times, tail_mass_truncation) + + # Use FFT to compute the convolution + fast_len = fft.next_fast_len(truncation_upper_bound - truncation_lower_bound + + 1) + truncated_convolution_output = np.real( + fft.ifft(fft.fft(input_list, fast_len)**num_times)) + + # Discrete Fourier Transform wraps around modulo fast_len. Extract the output + # values in the range of interest. + output_list = [ + truncated_convolution_output[i % fast_len] + for i in range(truncation_lower_bound, truncation_upper_bound + 1) + ] + + return truncation_lower_bound, output_list + + +def self_convolve_dictionary( + input_dictionary: Mapping[int, float], + num_times: int, + tail_mass_truncation: float = 0) -> Mapping[int, float]: + """Computes a convolution of the input dictionary with itself num_times times. + + Args: + input_dictionary: The input dictionary whose keys are integers. + num_times: The number of times the dictionary is to be convolved with + itself. + tail_mass_truncation: an upper bound on the tails of the output that might + be truncated. + + Returns: + The dictionary where for each key its corresponding value is the sum, over + all key1, key2, ..., key_num_times such that key1 + key2 + ... + + key_num_times = key, of input_dictionary[key1] * input_dictionary[key2] * + ... * input_dictionary[key_num_times] + """ + min_val, input_list = dictionary_to_list(input_dictionary) + min_val_convolution, output_list = self_convolve( + input_list, num_times, tail_mass_truncation=tail_mass_truncation) + return list_to_dictionary(output_list, + min_val * num_times + min_val_convolution) diff --git a/python/fedml/core/dp/budget_accountant/pld/common_test.py b/python/fedml/core/dp/budget_accountant/pld/common_test.py new file mode 100644 index 0000000000..23d292c350 --- /dev/null +++ b/python/fedml/core/dp/budget_accountant/pld/common_test.py @@ -0,0 +1,222 @@ +"""Tests for common.""" + +import math +import unittest +from absl.testing import parameterized + +from dp_accounting.pld import common +from dp_accounting.pld import test_util + + +class DifferentialPrivacyParametersTest(parameterized.TestCase): + + @parameterized.parameters((-0.1, 0.1), (1, -0.1), (1, 1.1)) + def test_epsilon_delta_value_errors(self, epsilon, delta): + with self.assertRaises(ValueError): + common.DifferentialPrivacyParameters(epsilon, delta) + + +class CommonTest(parameterized.TestCase): + + @parameterized.named_parameters( + { + 'testcase_name': 'no_initial_guess', + 'func': (lambda x: -x), + 'value': -4.5, + 'lower_x': 0, + 'upper_x': 10, + 'initial_guess_x': None, + 'expected_x': 4.5, + 'increasing': False, + }, { + 'testcase_name': 'with_initial_guess', + 'func': (lambda x: -x), + 'value': -5, + 'lower_x': 0, + 'upper_x': 10, + 'initial_guess_x': 2, + 'expected_x': 5, + 'increasing': False, + }, { + 'testcase_name': 'out_of_range', + 'func': (lambda x: -x), + 'value': -5, + 'lower_x': 0, + 'upper_x': 4, + 'initial_guess_x': None, + 'expected_x': None, + 'increasing': False, + }, { + 'testcase_name': 'infinite_upper_bound', + 'func': (lambda x: -1 / (1 / x)), + 'value': -5, + 'lower_x': 0, + 'upper_x': math.inf, + 'initial_guess_x': 2, + 'expected_x': 5, + 'increasing': False, + }, { + 'testcase_name': 'increasing_no_initial_guess', + 'func': (lambda x: x**2), + 'value': 25, + 'lower_x': 0, + 'upper_x': 10, + 'initial_guess_x': None, + 'expected_x': 5, + 'increasing': True, + }, { + 'testcase_name': 'increasing_with_initial_guess', + 'func': (lambda x: x**2), + 'value': 25, + 'lower_x': 0, + 'upper_x': 10, + 'initial_guess_x': 2, + 'expected_x': 5, + 'increasing': True, + }, { + 'testcase_name': 'increasing_out_of_range', + 'func': (lambda x: x**2), + 'value': 5, + 'lower_x': 6, + 'upper_x': 10, + 'initial_guess_x': None, + 'expected_x': None, + 'increasing': True, + }, { + 'testcase_name': 'discrete', + 'func': (lambda x: -x), + 'value': -4.5, + 'lower_x': 0, + 'upper_x': 10, + 'initial_guess_x': None, + 'expected_x': 5, + 'increasing': False, + 'discrete': True, + }) + def test_inverse_monotone_function(self, + func, + value, + lower_x, + upper_x, + initial_guess_x, + expected_x, + increasing, + discrete=False): + search_parameters = common.BinarySearchParameters( + lower_x, upper_x, initial_guess=initial_guess_x, discrete=discrete) + x = common.inverse_monotone_function( + func, value, search_parameters, increasing=increasing) + if expected_x is None: + self.assertIsNone(x) + else: + self.assertAlmostEqual(expected_x, x) + + +class DictListConversionTest(parameterized.TestCase): + + @parameterized.named_parameters( + { + 'testcase_name': 'truncate_both_sides', + 'input_list': [0.2, 0.5, 0.3], + 'offset': 1, + 'tail_mass_truncation': 0.6, + 'expected_result': { + 2: 0.5 + }, + }, { + 'testcase_name': 'truncate_lower_only', + 'input_list': [0.2, 0.5, 0.3], + 'offset': 1, + 'tail_mass_truncation': 0.4, + 'expected_result': { + 2: 0.5, + 3: 0.3 + }, + }, { + 'testcase_name': 'truncate_upper_only', + 'input_list': [0.4, 0.5, 0.1], + 'offset': 1, + 'tail_mass_truncation': 0.3, + 'expected_result': { + 1: 0.4, + 2: 0.5 + }, + }, { + 'testcase_name': 'truncate_all', + 'input_list': [0.4, 0.5, 0.1], + 'offset': 1, + 'tail_mass_truncation': 3, + 'expected_result': {}, + }) + def test_list_to_dict_truncation(self, input_list, offset, + tail_mass_truncation, expected_result): + result = common.list_to_dictionary( + input_list, offset, tail_mass_truncation=tail_mass_truncation) + test_util.assert_dictionary_almost_equal(self, expected_result, result) + + +class ConvolveTest(parameterized.TestCase): + + def test_convolve_dictionary(self): + dictionary1 = {1: 2, 3: 4} + dictionary2 = {2: 3, 4: 6} + expected_result = {3: 6, 5: 24, 7: 24} + result = common.convolve_dictionary(dictionary1, dictionary2) + test_util.assert_dictionary_almost_equal(self, expected_result, result) + + def test_convolve_dictionary_with_truncation(self): + dictionary1 = {1: 0.4, 2: 0.6} + dictionary2 = {1: 0.7, 3: 0.3} + expected_result = {3: 0.42, 4: 0.12} + result = common.convolve_dictionary(dictionary1, dictionary2, 0.57) + test_util.assert_dictionary_almost_equal(self, expected_result, result) + + def test_self_convolve_dictionary(self): + inp_dictionary = {1: 2, 3: 5, 4: 6} + expected_result = { + 3: 8, + 5: 60, + 6: 72, + 7: 150, + 8: 360, + 9: 341, + 10: 450, + 11: 540, + 12: 216 + } + result = common.self_convolve_dictionary(inp_dictionary, 3) + test_util.assert_dictionary_almost_equal(self, expected_result, result) + + @parameterized.parameters(([3, 5, 7], 2, [9, 30, 67, 70, 49]), + ([1, 3, 4], 3, [1, 9, 39, 99, 156, 144, 64])) + def test_self_convolve_basic(self, input_list, num_times, expected_result): + min_val, result_list = common.self_convolve(input_list, num_times) + self.assertEqual(0, min_val) + self.assertSequenceAlmostEqual(expected_result, result_list) + + @parameterized.parameters(([0.1, 0.4, 0.5], 3, [-1], 0.5, 2, 6), + ([0.2, 0.6, 0.2], 3, [1], 0.7, 0, 5)) + def test_compute_self_convolve_bounds(self, input_list, num_times, orders, + tail_mass_truncation, + expected_lower_bound, + expected_upper_bound): + lower_bound, upper_bound = common.compute_self_convolve_bounds( + input_list, num_times, tail_mass_truncation, orders=orders) + self.assertEqual(expected_lower_bound, lower_bound) + self.assertEqual(expected_upper_bound, upper_bound) + + @parameterized.parameters( + ([0.1, 0.4, 0.5], 3, 0.5, 2, [0.063, 0.184, 0.315, 0.301, 0.137]), + ([0.2, 0.6, 0.2], 3, 0.7, 1, [0.08, 0.24, 0.36, 0.24, 0.08])) + def test_compute_self_convolve_with_truncation(self, input_list, num_times, + tail_mass_truncation, + expected_min_val, + expected_result_list): + min_val, result_list = common.self_convolve( + input_list, num_times, tail_mass_truncation=tail_mass_truncation) + self.assertEqual(min_val, expected_min_val) + self.assertSequenceAlmostEqual(expected_result_list, result_list) + + +if __name__ == '__main__': + unittest.main() diff --git a/python/fedml/core/dp/budget_accountant/pld/pld_pmf.py b/python/fedml/core/dp/budget_accountant/pld/pld_pmf.py new file mode 100644 index 0000000000..d0dbafc757 --- /dev/null +++ b/python/fedml/core/dp/budget_accountant/pld/pld_pmf.py @@ -0,0 +1,567 @@ +"""Probability mass function for privacy loss distributions. + +This file implements work the privacy loss distribution (PLD) probability mass +functions (PMF)and its basic functionalities. Please refer to the +supplementary material below for more details: +../../common_docs/Privacy_Loss_Distributions.pdf +""" + +import abc +import itertools +import math +import numbers +from typing import Iterable, List, Mapping, Sequence, Tuple, Union +import numpy as np +from scipy import signal + +from dp_accounting.pld import common + +ArrayLike = Union[np.ndarray, List[float]] +_MAX_PMF_SPARSE_SIZE = 1000 + + +def _get_delta_for_epsilon_vectorized(infinity_mass: float, + losses: Sequence[float], + probs: Sequence[float], + epsilons: Sequence[float]) -> np.ndarray: + """Computes the epsilon-hockey stick divergence. + + Args: + infinity_mass: the probability of the infinite loss. + losses: privacy losses, assumed to be sorted in ascending order. + probs: probabilities corresponding to losses. + epsilons: epsilons in the epsilon-hockey stick divergence, assumed to be + sorted in ascending order. + + Returns: + The list of epsilon-hockey stick divergences for epsilons. + """ + are_epsilons_sorted = np.all(np.diff(epsilons) >= 0) + if not are_epsilons_sorted: + raise ValueError( + 'Epsilons in get_delta_for_epsilon must be sorted in ascending order') + + # For each epsilon: + # delta = sum_{o} [mu_upper(o) - e^{epsilon} * mu_lower(o)]_+ (for more + # details look for PLDPmf class docstring below). + # It can be rewritten as + # delta(epsilon_i) = inf_mass + + # sum((1-exp(eps - loss_j))*prob_j if loss_j >= epsilon_i) = + # inf_mass + sum(prob_j-exp(eps)*prob_j/exp(loss_j) if loss_j >= epsilon_i) = + # inf_mass + sum(prob_j, loss_j >= epsilon_i) - + # exp(eps)*sum(prob_j/exp(loss_j), loss_j >= epsilon_i). + # + # Denote sums in the last formula as mu_upper_mass, mu_lower_mass. We can + # compute them gradually, which makes computation efficiently for multiple + # epsilons. + deltas = np.zeros_like(epsilons, dtype=np.float64) + mu_upper_mass, mu_lower_mass = infinity_mass, 0 + i = len(probs) - 1 + j = len(epsilons) - 1 + + while j >= 0: + if np.isposinf(epsilons[j]): + deltas[j] = infinity_mass + j -= 1 + elif i < 0 or losses[i] <= epsilons[j]: + deltas[j] = mu_upper_mass - np.exp(epsilons[j]) * mu_lower_mass + j -= 1 + else: + mu_upper_mass += probs[i] + mu_lower_mass += probs[i] * np.exp(-losses[i]) + i -= 1 + + return deltas + + +def _get_epsilon_for_delta(infinity_mass: float, + reversed_losses: Iterable[float], + probs: Iterable[float], delta: float) -> float: + """Computes epsilon for which hockey stick divergence is at most delta. + + Args: + infinity_mass: the probability of the infinite loss. + reversed_losses: privacy losses, assumed to be sorted in descending order. + probs: probabilities corresponding to losses. + delta: the target epsilon-hockey stick divergence.. + + Returns: + The smallest epsilon such that the epsilon-hockey stick divergence is at + most delta. When no such finite epsilon exists, return math.inf. + """ + if infinity_mass > delta: + return math.inf + + mass_upper, mass_lower = infinity_mass, 0 + + for loss, prob in zip(reversed_losses, probs): + if (mass_upper > delta and mass_lower > 0 and math.log( + (mass_upper - delta) / mass_lower) >= loss): + # Epsilon is greater than or equal to loss. + break + + mass_upper += prob + mass_lower += math.exp(-loss) * prob + + if mass_upper >= delta and mass_lower == 0: + # This only occurs when loss is very large, which results in exp(-loss) + # being treated as zero. + return max(0, loss) + + if mass_upper <= mass_lower + delta: + return 0 + return math.log((mass_upper - delta) / mass_lower) + + +def _truncate_tails(probs: ArrayLike, tail_mass_truncation: float, + pessimistic_estimate: bool) -> Tuple[int, ArrayLike, float]: + """Truncates an array from both sides by not more than tail_mass_truncation. + + It truncates the maximum prefix and suffix from probs, each of which have + sum <= tail_mass_truncation/2. + + Args: + probs: array to truncate. + tail_mass_truncation: an upper bound on the tails of the probability mass of + the PMF that might be truncated. + pessimistic_estimate: if true then the left truncated sum is added to 0th + element of the truncated array and the right truncated returned as it goes + to infinity. If false then the right truncated sum is added to the last of + the truncated array and the left truncated sum is discarded. + + Returns: + Tuple of (size of truncated prefix, truncated array, mass that goes to + infinity). + """ + if tail_mass_truncation == 0: + return 0, probs, 0 + + def _find_prefix_to_truncate(arr: np.ndarray, threshold: float) -> int: + # Find the max size of array prefix, with the sum of elements less than + # threshold. + s = 0 + for i, val in enumerate(arr): + s += val + if s > threshold: + return i + return len(arr) + + left_idx = _find_prefix_to_truncate(probs, tail_mass_truncation / 2) + right_idx = len(probs) - _find_prefix_to_truncate( + np.flip(probs), tail_mass_truncation / 2) + # Be sure that left_idx <= right_idx. left_idx > right_idx might be when + # tail_mass_truncation is too large or if probs has too small mass + # (i.e. if a few truncations were operated on it already). + right_idx = max(right_idx, left_idx) + + left_mass = np.sum(probs[:left_idx]) + right_mass = np.sum(probs[right_idx:]) + + truncated_probs = probs[left_idx:right_idx] + if pessimistic_estimate: + # put truncated the left mass to the 0th element. + truncated_probs[0] += left_mass + return left_idx, truncated_probs, right_mass + # This is rounding to left case. Put truncated the right mass to the last + # element. + truncated_probs[-1] += right_mass + return left_idx, truncated_probs, 0 + + +class PLDPmf(abc.ABC): + """Base class for probability mass functions for privacy loss distributions. + + The privacy loss distribution (PLD) of two discrete distributions, the upper + distribution mu_upper and the lower distribution mu_lower, is defined as a + distribution on real numbers generated by first sampling an outcome o + according to mu_upper and then outputting the privacy loss + ln(mu_upper(o) / mu_lower(o)) where mu_lower(o) and mu_upper(o) are the + probability masses of o in mu_lower and mu_upper respectively. This class + allows one to create and manipulate privacy loss distributions. + + PLD allows one to (approximately) compute the epsilon-hockey stick divergence + between mu_upper and mu_lower, which is defined as + sum_{o} [mu_upper(o) - e^{epsilon} * mu_lower(o)]_+. This quantity in turn + governs the parameter delta of (eps, delta)-differential privacy of the + corresponding protocol. (See Observation 1 in the supplementary material.) + + The above definitions extend to continuous distributions. The PLD of two + continuous distributions mu_upper and mu_lower is defined as a distribution on + real numbers generated by first sampling an outcome o according to mu_upper + and then outputting the privacy loss ln(f_{mu_upper}(o) / f_{mu_lower}(o)) + where f_{mu_lower}(o) and f_{mu_upper}(o) are the probability density + functions at o in mu_lower and mu_upper respectively. Moreover, for continuous + distributions the epsilon-hockey stick divergence is defined as + integral [f_{mu_upper}(o) - e^{epsilon} * f_{mu_lower}(o)]_+ do. + """ + + def __init__(self, discretization: float, infinity_mass: float, + pessimistic_estimate: bool): + self._discretization = discretization + self._infinity_mass = infinity_mass + self._pessimistic_estimate = pessimistic_estimate + + @property + @abc.abstractmethod + def size(self) -> int: + """Returns number of points in discretization.""" + + @abc.abstractmethod + def compose(self, + other: 'PLDPmf', + tail_mass_truncation: float = 0) -> 'PLDPmf': + """Computes a PMF resulting from composing two PMFs. + + Args: + other: the privacy loss distribution PMF to be composed. The two must have + the same discretization and pessimistic_estimate. + tail_mass_truncation: an upper bound on the tails of the probability mass + of the PMF that might be truncated. + + Returns: + A PMF which is the result of convolving (composing) the two. + """ + + @abc.abstractmethod + def self_compose(self, + num_times: int, + tail_mass_truncation: float = 0) -> 'PLDPmf': + """Computes PMF resulting from repeated composing the PMF with itself. + + Args: + num_times: the number of times to compose this PMF with itself. + tail_mass_truncation: an upper bound on the tails of the probability mass + of the PMF that might be truncated. + + Returns: + A privacy loss distribution PMF which is the result of the composition. + """ + + @abc.abstractmethod + def get_delta_for_epsilon( + self, epsilon: Union[float, Sequence[float]]) -> Union[float, np.ndarray]: + """Computes the epsilon-hockey stick divergence.""" + + @abc.abstractmethod + def get_epsilon_for_delta(self, delta: float) -> float: + """Computes epsilon for which hockey stick divergence is at most delta.""" + + @abc.abstractmethod + def to_dense_pmf(self) -> 'DensePLDPmf': + """Returns the dense PMF with data from 'self'.""" + + @abc.abstractmethod + def get_delta_for_epsilon_for_composed_pld(self, other: 'PLDPmf', + epsilon: float) -> float: + """Computes delta for 'epsilon' for the composiion of 'self' and 'other'.""" + + def validate_composable(self, other: 'PLDPmf'): + """Checks whether 'self' and 'other' can be composed.""" + if not isinstance(self, type(other)): + raise ValueError(f'Only PMFs of the same type can be composed:' + f'{type(self).__name__} != {type(other).__name__}.') + # pylint: disable=protected-access + if self._discretization != other._discretization: + raise ValueError(f'Discretization intervals are different: ' + f'{self._discretization} != ' + f'{other._discretization}.') + if self._pessimistic_estimate != other._pessimistic_estimate: + raise ValueError(f'Estimation types are different: ' + f'{self._pessimistic_estimate} != ' + f'{other._pessimistic_estimate}.') # pylint: disable=protected-access + # pylint: enable=protected-access + + +class DensePLDPmf(PLDPmf): + """Class for dense probability mass function. + + It represents a discrete probability distribution on a grid of privacy losses. + The grid contains numbers multiple of 'discretization', starting from + lower_loss * discretization. + """ + + def __init__(self, discretization: float, lower_loss: int, probs: np.ndarray, + infinity_mass: float, pessimistic_estimate: bool): + super().__init__(discretization, infinity_mass, pessimistic_estimate) + self._lower_loss = lower_loss + self._probs = probs + + @property + def size(self) -> int: + return len(self._probs) + + def compose(self, + other: 'DensePLDPmf', + tail_mass_truncation: float = 0) -> 'DensePLDPmf': + """Computes a PMF resulting from composing two PMFs. See base class.""" + self.validate_composable(other) + + # pylint: disable=protected-access + lower_loss = self._lower_loss + other._lower_loss + probs = signal.fftconvolve(self._probs, other._probs) + infinity_mass = 1 - (1 - self._infinity_mass) * (1 - other._infinity_mass) + offset, probs, right_tail = _truncate_tails(probs, tail_mass_truncation, + self._pessimistic_estimate) + # pylint: enable=protected-access + return DensePLDPmf(self._discretization, lower_loss + offset, probs, + infinity_mass + right_tail, self._pessimistic_estimate) + + def self_compose(self, + num_times: int, + tail_mass_truncation: float = 1e-15) -> 'DensePLDPmf': + """See base class.""" + if num_times <= 0: + raise ValueError(f'num_times should be >= 1, num_times={num_times}') + lower_loss = self._lower_loss * num_times + truncation_lower_bound, probs = common.self_convolve( + self._probs, num_times, tail_mass_truncation) + lower_loss += truncation_lower_bound + probs = np.array(probs) + inf_prob = 1 - (1 - self._infinity_mass)**num_times + offset, probs, right_tail = _truncate_tails(probs, tail_mass_truncation, + self._pessimistic_estimate) + return DensePLDPmf(self._discretization, lower_loss + offset, probs, + inf_prob + right_tail, self._pessimistic_estimate) + + def get_delta_for_epsilon( + self, epsilon: Union[float, Sequence[float]]) -> Union[float, np.ndarray]: + """Computes the epsilon-hockey stick divergence.""" + losses = (np.arange(self.size) + self._lower_loss) * self._discretization + + is_scalar = isinstance(epsilon, numbers.Number) + if is_scalar: + epsilon = [epsilon] + + delta = _get_delta_for_epsilon_vectorized(self._infinity_mass, losses, + self._probs, epsilon) + if is_scalar: + delta = delta[0] + return delta + + def get_epsilon_for_delta(self, delta: float) -> float: + """Computes epsilon for which hockey stick divergence is at most delta.""" + upper_loss = (self._lower_loss + len(self._probs) - + 1) * self._discretization + reversed_losses = itertools.count(upper_loss, -self._discretization) + + return _get_epsilon_for_delta(self._infinity_mass, reversed_losses, + np.flip(self._probs), delta) + + def to_dense_pmf(self) -> 'DensePLDPmf': + return self + + def get_delta_for_epsilon_for_composed_pld(self, other: PLDPmf, + epsilon: float) -> float: + other = other.to_dense_pmf() + self.validate_composable(other) + discretization = self._discretization + # pylint: disable=protected-access + self_loss = lambda index: (index + self._lower_loss) * discretization + other_loss = lambda index: (index + other._lower_loss) * discretization + + self_probs, other_probs = self._probs, other._probs + len_self, len_other = len(self_probs), len(other_probs) + delta = 1 - (1 - self._infinity_mass) * (1 - other._infinity_mass) + # pylint: enable=protected-access + + # Compute the hockey stick divergence using equation (2) in the + # supplementary material. upper_mass represents summation in equation (3) + # and lower_mass represents the summation in equation (4). + + if self_loss(len_self - 1) + other_loss(len_other - 1) <= epsilon: + return delta + + i, j = 0, len_other - 1 + upper_mass = lower_mass = 0 + + # This is summation by i,j, such that self_loss(i) + other_loss(j) >= + # epsilon, and self_loss(i) + other_loss(j-1)< epsilon, as in the + # equation(2). + + # If i is todo small then increase it. + while self_loss(i) + other_loss(j) < epsilon: + i += 1 + + # Else if j is too large then decrease it. + while j >= 0 and self_loss(i) + other_loss(j - 1) >= epsilon: + upper_mass += other_probs[j] + lower_mass += other_probs[j] * np.exp(-other_loss(j)) + j -= 1 + + # Invariant: + # self_loss(i) + other_loss(j-1) < epsilon <= self_loss(i) + other_loss(j) + # Sum over all i, keeping this invariant. + for i in range(i, len_self): + if j >= 0: + upper_mass += other_probs[j] + lower_mass += other_probs[j] * np.exp(-other_loss(j)) + j -= 1 + delta += self_probs[i] * ( + upper_mass - np.exp(epsilon - self_loss(i)) * lower_mass) + + return delta + + +class SparsePLDPmf(PLDPmf): + """Class for sparse probability mass function. + + It represents a discrete probability distribution on a grid of 1d losses with + a dictionary. The grid contains numbers multiples of 'discretization'. + """ + + def __init__(self, loss_probs: Mapping[int, float], discretization: float, + infinity_mass: float, pessimistic_estimate: bool): + super().__init__(discretization, infinity_mass, pessimistic_estimate) + self._loss_probs = loss_probs + + @property + def size(self) -> int: + return len(self._loss_probs) + + def compose(self, + other: 'SparsePLDPmf', + tail_mass_truncation: float = 0) -> 'SparsePLDPmf': + """Computes a PMF resulting from composing two PMFs. See base class.""" + self.validate_composable(other) + # Assumed small number of points, so simple quadratic algorithm is fine. + convolution = {} + # pylint: disable=protected-access + for key1, value1 in self._loss_probs.items(): + for key2, value2 in other._loss_probs.items(): + key = key1 + key2 + convolution[key] = convolution.get(key, 0.0) + value1 * value2 + infinity_mass = 1 - (1 - self._infinity_mass) * (1 - other._infinity_mass) + # pylint: enable=protected-access + # Do truncation. + sorted_losses = sorted(convolution.keys()) + probs = [convolution[loss] for loss in sorted_losses] + offset, probs, right_mass = _truncate_tails(probs, tail_mass_truncation, + self._pessimistic_estimate) + sorted_losses = sorted_losses[offset:offset + len(probs)] + truncated_convolution = dict(zip(sorted_losses, probs)) + return SparsePLDPmf(truncated_convolution, self._discretization, + infinity_mass + right_mass, self._pessimistic_estimate) + + def self_compose(self, + num_times: int, + tail_mass_truncation: float = 1e-15) -> 'PLDPmf': + """See base class.""" + if num_times <= 0: + raise ValueError(f'num_times should be >= 1, num_times={num_times}') + if num_times == 1: + return self + + # Compute a rough upper bound overestimate, since from some power, the PMF + # becomes dense and start growing linearly further. But in this case we + # should definitely go to dense. + max_result_size = self.size**num_times + + if max_result_size > _MAX_PMF_SPARSE_SIZE: + # The size of composed PMF is too large for sparse. Convert to dense. + return self.to_dense_pmf().self_compose(num_times, tail_mass_truncation) + + result = self + for i in range(2, num_times + 1): + # To truncate only on the last composition. + mass_truncation = 0 if i != num_times else tail_mass_truncation + result = result.compose(self, mass_truncation) + + return result + + def _get_losses_probs(self) -> Tuple[List[float], List[float]]: + """Returns losses, sorted ascendingly and respective probabilities.""" + losses = sorted(list(self._loss_probs.keys())) + probs = [self._loss_probs[loss] for loss in losses] + losses = [loss * self._discretization for loss in losses] + return losses, probs + + def get_delta_for_epsilon( + self, epsilon: Union[float, Sequence[float]]) -> Union[float, np.ndarray]: + """Computes the epsilon-hockey stick divergence.""" + losses, probs = self._get_losses_probs() + is_scalar = isinstance(epsilon, numbers.Number) + if is_scalar: + epsilon = [epsilon] + + delta = _get_delta_for_epsilon_vectorized(self._infinity_mass, losses, + probs, epsilon) + if is_scalar: + delta = delta[0] + return delta + + def get_epsilon_for_delta(self, delta: float) -> float: + """Computes epsilon for which hockey stick divergence is at most delta.""" + losses, probs = self._get_losses_probs() + return _get_epsilon_for_delta(self._infinity_mass, losses[::-1], + probs[::-1], delta) + + def get_delta_for_epsilon_for_composed_pld(self, other: PLDPmf, + epsilon: float) -> float: + # If 'self' is sparse, then it is small, so it is not so expensive to + # convert to dense. Let us convert it for simplicity for dense. + return self.to_dense_pmf().get_delta_for_epsilon_for_composed_pld( + other, epsilon) + + def to_dense_pmf(self) -> DensePLDPmf: + """"Converts to dense PMF.""" + lower_loss, probs = common.dictionary_to_list(self._loss_probs) + return DensePLDPmf(self._discretization, lower_loss, np.array(probs), + self._infinity_mass, self._pessimistic_estimate) + + +def create_pmf(loss_probs: Mapping[int, float], discretization: float, + infinity_mass: float, pessimistic_estimate: bool) -> PLDPmf: + """Creates PLDPmfs. + + It returns SparsePLDPmf if the size of loss_probs less than + MAX_PMF_SPARSE_SIZE, otherwise DensePLDPmf. + + Args: + loss_probs: probability mass function of the discretized privacy loss + distribution. + discretization: the interval length for which the values of the privacy loss + distribution are discretized. + infinity_mass: infinity_mass for privacy loss distribution. + pessimistic_estimate: whether the rounding is done in such a way that the + resulting epsilon-hockey stick divergence computation gives an upper + estimate to the real value. + + Returns: + Created PLDPmf. + """ + if len(loss_probs) <= _MAX_PMF_SPARSE_SIZE: + return SparsePLDPmf(loss_probs, discretization, infinity_mass, + pessimistic_estimate) + + lower_loss, probs = common.dictionary_to_list(loss_probs) + probs = np.array(probs) + return DensePLDPmf(discretization, lower_loss, probs, infinity_mass, + pessimistic_estimate) + + +def compose_pmfs(pmf1: PLDPmf, + pmf2: PLDPmf, + tail_mass_truncation: float = 0) -> PLDPmf: + """Computes a PMF resulting from composing two PMFs. + + It returns SparsePLDPmf only if input PLDPmfs are SparsePLDPmf and the + product of input pmfs sizes are less than MAX_PMF_SPARSE_SIZE. + + Args: + pmf1: the privacy loss distribution PMF to be composed. + pmf2: the privacy loss distribution PMF to be composed. The two must have + the same discretization and pessimistic_estimate. + tail_mass_truncation: an upper bound on the tails of the probability mass of + the PMF that might be truncated. + + Returns: + A PMF which is the result of convolving (composing) the two. + """ + max_result_size = pmf1.size * pmf2.size + if (isinstance(pmf1, SparsePLDPmf) and isinstance(pmf2, SparsePLDPmf) and + max_result_size <= _MAX_PMF_SPARSE_SIZE): + return pmf1.compose(pmf2, tail_mass_truncation) + + pmf1 = pmf1.to_dense_pmf() + pmf2 = pmf2.to_dense_pmf() + return pmf1.compose(pmf2, tail_mass_truncation) diff --git a/python/fedml/core/dp/budget_accountant/pld/pld_pmf_test.py b/python/fedml/core/dp/budget_accountant/pld/pld_pmf_test.py new file mode 100644 index 0000000000..0d6b16169c --- /dev/null +++ b/python/fedml/core/dp/budget_accountant/pld/pld_pmf_test.py @@ -0,0 +1,426 @@ +"""Tests for PLDPmf.""" + +import unittest +from absl.testing import parameterized +import numpy as np + +from dp_accounting.pld import common +from dp_accounting.pld import pld_pmf +from dp_accounting.pld import test_util + + +class PLDPmfTest(parameterized.TestCase): + + def _create_pmf(self, + discretization: float, + dense: bool, + infinity_mass: float = 0.0, + lower_loss: int = 0, + probs: np.ndarray = np.array([1.0]), + pessimistic_estimate: bool = True) -> pld_pmf.PLDPmf: + """Helper function for creating PLD for testing.""" + if dense: + return pld_pmf.DensePLDPmf(discretization, lower_loss, probs, + infinity_mass, pessimistic_estimate) + + loss_probs = common.list_to_dictionary(probs, lower_loss) + return pld_pmf.SparsePLDPmf(loss_probs, discretization, infinity_mass, + pessimistic_estimate) + + def _check_dense_probs(self, dense_pmf: pld_pmf.DensePLDPmf, + expected_lower_loss: int, expected_probs: np.ndarray): + """Checks that resulting dense pmf satisfies expectations.""" + self.assertEqual(expected_lower_loss, dense_pmf._lower_loss) + self.assertSequenceAlmostEqual(expected_probs, dense_pmf._probs) + + def _check_sparse_probs(self, sparse_pmf: pld_pmf.SparsePLDPmf, + expected_lower_loss: int, expected_probs: np.ndarray): + """Checks that resulting sparse pmf satisfies expectations.""" + expected_loss_probs = common.list_to_dictionary(expected_probs, + expected_lower_loss) + test_util.assert_dictionary_almost_equal(self, expected_loss_probs, + sparse_pmf._loss_probs) + + @parameterized.parameters(False, True) + def test_delta_for_epsilon(self, dense: bool): + discretization = 0.1 + infinity_mass = 0.1 + lower_loss = -1 + probs = np.array([0.2, 0.3, 0, 0.4]) + pmf = self._create_pmf(discretization, dense, infinity_mass, lower_loss, + probs) + self.assertAlmostEqual(0.1, pmf.get_delta_for_epsilon(3)) # infinity_mass + self.assertAlmostEqual(0.1 + 0.4 * (1 - np.exp(-0.1)), + pmf.get_delta_for_epsilon(0.1)) + self.assertAlmostEqual(1, pmf.get_delta_for_epsilon(-20)) + self.assertEqual(infinity_mass, pmf.get_delta_for_epsilon(np.inf)) + self.assertAlmostEqual(1, pmf.get_delta_for_epsilon(-np.inf)) + + @parameterized.parameters(False, True) + def test_delta_for_epsilon_vectorized(self, dense: bool): + discretization = 0.1 + infinity_mass = 0.1 + lower_loss = -1 + probs = np.array([0.2, 0.3, 0, 0.4]) + pmf = self._create_pmf(discretization, dense, infinity_mass, lower_loss, + probs) + epsilon = [-np.inf, -20, 0.1, np.inf] + expected_delta = [ + 1, 1, infinity_mass + 0.4 * (1 - np.exp(-0.1)), infinity_mass + ] + + self.assertSequenceAlmostEqual(expected_delta, + pmf.get_delta_for_epsilon(epsilon)) + + def test_delta_for_epsilon_not_sorted(self): + pmf = self._create_pmf(discretization=0.1, dense=True, infinity_mass=0) + epsilon = [2.0, 3.0, 1.0] # not sorted + + with self.assertRaisesRegex( + ValueError, + 'Epsilons in get_delta_for_epsilon must be sorted in ascending order'): + pmf.get_delta_for_epsilon(epsilon) + + @parameterized.parameters(False, True) + def test_get_delta_for_epsilon_for_composed_pld(self, dense): + discretization = 0.1 + infinity_mass1, lower_loss1, probs1 = 0.1, -1, np.array( + [0.2, 0.3, 0, 0.1, 0.3]) + infinity_mass2, lower_loss2, probs2 = 0.2, -2, np.array([0.1, 0, 0.4, 0.3]) + pmf1 = self._create_pmf(discretization, dense, infinity_mass1, lower_loss1, + probs1) + pmf2 = self._create_pmf(discretization, dense, infinity_mass2, lower_loss2, + probs2) + pmf_composed = pmf1.compose(pmf2) + for epsilon in np.linspace(-10, 10, num=100): + delta1 = pmf1.get_delta_for_epsilon_for_composed_pld(pmf2, epsilon) + delta2 = pmf_composed.get_delta_for_epsilon(epsilon) + self.assertAlmostEqual(delta1, delta2, msg=f'{epsilon}') + + @parameterized.parameters(False, True) + def test_epsilon_for_delta(self, dense): + discretization = 0.1 + lower_loss = -1 # loss_value + probs = np.array([0.2, 0.3, 0, 0.4]) # probs for losses -0.1, 0, 0.1, 0.2 + infinity_mass = 0.1 + pmf = self._create_pmf(discretization, dense, infinity_mass, lower_loss, + probs) + self.assertEqual(np.inf, pmf.get_epsilon_for_delta(0.05)) # bool: + return self._maybe_compose(event, 0, False) + + def _compose(self, event: dp_event.DpEvent, count: int = 1): + self._maybe_compose(event, count, True) + + def _maybe_compose(self, event: dp_event.DpEvent, count: int, + do_compose: bool) -> bool: + """Traverses `event` and performs composition if `do_compose` is True. + + If `do_compose` is False, can be used to check whether composition is + supported. + + Args: + event: A `DpEvent` to process. + count: The number of times to compose the event. + do_compose: Whether to actually perform the composition. + + Returns: + True if event is supported, otherwise False. + """ + + if isinstance(event, dp_event.NoOpDpEvent): + return True + elif isinstance(event, dp_event.NonPrivateDpEvent): + if do_compose: + self._contains_non_dp_event = True + return True + elif isinstance(event, dp_event.SelfComposedDpEvent): + return self._maybe_compose(event.event, event.count * count, do_compose) + elif isinstance(event, dp_event.ComposedDpEvent): + return all( + self._maybe_compose(e, count, do_compose) for e in event.events) + elif isinstance(event, dp_event.GaussianDpEvent): + if do_compose: + gaussian_pld = PLD.from_gaussian_mechanism( + standard_deviation=event.noise_multiplier / math.sqrt(count), + value_discretization_interval=self._value_discretization_interval) + self._pld = self._pld.compose(gaussian_pld) + return True + elif isinstance(event, dp_event.LaplaceDpEvent): + if do_compose: + laplace_pld = PLD.from_laplace_mechanism( + parameter=event.noise_multiplier, + value_discretization_interval=self._value_discretization_interval + ).self_compose(count) + self._pld = self._pld.compose(laplace_pld) + return True + elif isinstance(event, dp_event.PoissonSampledDpEvent): + if self.neighboring_relation != NeighborRel.ADD_OR_REMOVE_ONE: + return False + if isinstance(event.event, dp_event.GaussianDpEvent): + if do_compose: + subsampled_gaussian_pld = PLD.from_gaussian_mechanism( + standard_deviation=event.event.noise_multiplier, + value_discretization_interval=self._value_discretization_interval, + sampling_prob=event.sampling_probability).self_compose(count) + self._pld = self._pld.compose(subsampled_gaussian_pld) + return True + elif isinstance(event.event, dp_event.LaplaceDpEvent): + if do_compose: + subsampled_laplace_pld = PLD.from_laplace_mechanism( + parameter=event.event.noise_multiplier, + value_discretization_interval=self._value_discretization_interval, + sampling_prob=event.sampling_probability).self_compose(count) + self._pld = self._pld.compose(subsampled_laplace_pld) + return True + else: + return False + else: + # Unsupported event (including `UnsupportedDpEvent`). + return False + + def get_epsilon(self, target_delta: float) -> float: + if self._contains_non_dp_event: + return math.inf + return self._pld.get_epsilon_for_delta(target_delta) + + def get_delta(self, target_epsilon: float) -> float: + if self._contains_non_dp_event: + return 1 + return self._pld.get_delta_for_epsilon(target_epsilon) # pytype: disable=bad-return-type diff --git a/python/fedml/core/dp/budget_accountant/pld/pld_privacy_accountant_test.py b/python/fedml/core/dp/budget_accountant/pld/pld_privacy_accountant_test.py new file mode 100644 index 0000000000..dbe03b6594 --- /dev/null +++ b/python/fedml/core/dp/budget_accountant/pld/pld_privacy_accountant_test.py @@ -0,0 +1,111 @@ +"""Tests for pld_privacy_accountant.""" + +import math + +from absl.testing import absltest +from absl.testing import parameterized + +from dp_accounting import dp_event +from dp_accounting import privacy_accountant_test +from dp_accounting.pld import pld_privacy_accountant + + +class PldPrivacyAccountantTest(privacy_accountant_test.PrivacyAccountantTest, + parameterized.TestCase): + + def _make_test_accountants(self): + return [pld_privacy_accountant.PLDAccountant()] + + @parameterized.parameters( + dp_event.GaussianDpEvent(1.0), + dp_event.SelfComposedDpEvent(dp_event.GaussianDpEvent(1.0), 6), + dp_event.ComposedDpEvent( + [dp_event.GaussianDpEvent(1.0), + dp_event.GaussianDpEvent(2.0)]), + dp_event.PoissonSampledDpEvent(0.1, dp_event.GaussianDpEvent(1.0)), + dp_event.ComposedDpEvent([ + dp_event.PoissonSampledDpEvent(0.1, dp_event.GaussianDpEvent(1.0)), + dp_event.GaussianDpEvent(2.0) + ])) + def test_supports_gaussian(self, event): + pld_accountant = pld_privacy_accountant.PLDAccountant() + self.assertTrue(pld_accountant.supports(event)) + + @parameterized.parameters(0, -1) + def test_non_positive_composition_value_error(self, count): + event = dp_event.GaussianDpEvent(1.0) + accountant = pld_privacy_accountant.PLDAccountant() + with self.assertRaises(ValueError): + accountant.compose(event, count) + + def test_gaussian_basic(self): + gaussian_event = dp_event.GaussianDpEvent(noise_multiplier=math.sqrt(3)) + accountant = pld_privacy_accountant.PLDAccountant() + accountant.compose(gaussian_event, 1) + accountant.compose(gaussian_event, 2) + + exact_epsilon = 1 + exact_delta = 0.126936 + self.assertAlmostEqual( + accountant.get_delta(exact_epsilon), exact_delta, delta=1e-3) + self.assertAlmostEqual( + accountant.get_epsilon(exact_delta), exact_epsilon, delta=1e-3) + + def test_poisson_subsampled_gaussian(self): + subsampled_gaussian_event = dp_event.PoissonSampledDpEvent( + 0.2, dp_event.GaussianDpEvent(noise_multiplier=0.5)) + accountant = pld_privacy_accountant.PLDAccountant() + accountant.compose(subsampled_gaussian_event, 1) + accountant.compose(subsampled_gaussian_event, 2) + + exact_epsilon = 1 + expected_delta = 0.15594 + self.assertAlmostEqual( + accountant.get_delta(exact_epsilon), expected_delta, delta=1e-3) + self.assertAlmostEqual( + accountant.get_epsilon(expected_delta), exact_epsilon, delta=1e-3) + + def test_self_composed_subsampled_gaussian(self): + event = dp_event.SelfComposedDpEvent( + dp_event.PoissonSampledDpEvent(0.2, dp_event.GaussianDpEvent(0.5)), 3) + accountant = pld_privacy_accountant.PLDAccountant() + accountant.compose(event) + + exact_epsilon = 1 + expected_delta = 0.15594 + self.assertAlmostEqual( + accountant.get_delta(exact_epsilon), expected_delta, delta=1e-3) + self.assertAlmostEqual( + accountant.get_epsilon(expected_delta), exact_epsilon, delta=1e-3) + + def test_laplace_basic(self): + first_laplace_event = dp_event.LaplaceDpEvent(noise_multiplier=1) + second_laplace_event = dp_event.LaplaceDpEvent(noise_multiplier=2) + accountant = pld_privacy_accountant.PLDAccountant() + accountant.compose(first_laplace_event, 3) + accountant.compose(second_laplace_event, 2) + + expected_epsilon = 4 + expected_delta = 0 + self.assertAlmostEqual( + accountant.get_delta(expected_epsilon), expected_delta, delta=1e-6) + self.assertAlmostEqual( + accountant.get_epsilon(expected_delta), expected_epsilon, delta=1e-6) + + def test_poisson_subsampled_laplace(self): + subsampled_laplace_event = dp_event.PoissonSampledDpEvent( + 0.2, dp_event.LaplaceDpEvent(noise_multiplier=0.5)) + accountant = pld_privacy_accountant.PLDAccountant() + accountant.compose(subsampled_laplace_event, 1) + accountant.compose(subsampled_laplace_event, 2) + + exact_epsilon = 2.46964 + expected_delta = 0 + self.assertAlmostEqual( + accountant.get_delta(exact_epsilon), expected_delta, delta=1e-6) + self.assertAlmostEqual( + accountant.get_epsilon(expected_delta), exact_epsilon, delta=1e-3) + + +if __name__ == '__main__': + absltest.main() diff --git a/python/fedml/core/dp/budget_accountant/pld/privacy_loss_distribution.py b/python/fedml/core/dp/budget_accountant/pld/privacy_loss_distribution.py new file mode 100644 index 0000000000..5bc1c0e636 --- /dev/null +++ b/python/fedml/core/dp/budget_accountant/pld/privacy_loss_distribution.py @@ -0,0 +1,1255 @@ +"""Implementing Privacy Loss Distribution. + +This file implements the privacy loss distribution (PLD) and its basic +functionalities. The main feature of PLD is that it allows for accurate +computation of privacy parameters under composition. Please refer to the +supplementary material below for more details: +../../common_docs/Privacy_Loss_Distributions.pdf +""" + +import collections +import logging +import math +from typing import Any, Callable, Mapping, Optional, Sequence, Tuple, Union +import numpy as np + +from dp_accounting.pld import common +from dp_accounting.pld import pld_pmf +from dp_accounting.pld import privacy_loss_mechanism + + +def _deprecation_warning(method_name: str): + logging.warning('PrivacyLossDistribution.%s() will be deprecated shortly. ' + 'Use factory method %s() instead.', method_name, method_name) + + +class PrivacyLossDistribution: + """Class for privacy loss distributions and computation involving them. + + The privacy loss distribution (PLD) of two discrete distributions, the upper + distribution mu_upper and the lower distribution mu_lower, is defined as a + distribution on real numbers generated by first sampling an outcome o + according to mu_upper and then outputting the privacy loss + ln(mu_upper(o) / mu_lower(o)) where mu_lower(o) and mu_upper(o) are the + probability masses of o in mu_lower and mu_upper respectively. This class + allows one to create and manipulate privacy loss distributions. + + PLD allows one to (approximately) compute the epsilon-hockey stick divergence + between mu_upper and mu_lower, which is defined as + sum_{o} [mu_upper(o) - e^{epsilon} * mu_lower(o)]_+. This quantity in turn + governs the parameter delta of (eps, delta)-differential privacy of the + corresponding protocol. (See Observation 1 in the supplementary material.) + + The above definitions extend to continuous distributions. The PLD of two + continuous distributions mu_upper and mu_lower is defined as a distribution on + real numbers generated by first sampling an outcome o according to mu_upper + and then outputting the privacy loss ln(f_{mu_upper}(o) / f_{mu_lower}(o)) + where f_{mu_lower}(o) and f_{mu_upper}(o) are the probability density + functions at o in mu_lower and mu_upper respectively. Moreover, for continuous + distributions the epsilon-hockey stick divergence is defined as + integral [f_{mu_upper}(o) - e^{epsilon} * f_{mu_lower}(o)]_+ do. + + A single privacy loss distribution is represented as an object of the class + BasicPrivacyLossDistribution. This class, on the other hand, holds the higher + level logic. + + Namely, this class maintains up to two BasicPrivacyLossDistribution objects. + One for the 'add' adjacency type, which specifies the privacy loss + distribution for a mechanism M with mu_upper = M(D) and mu_lower = M(D'), + where D' contains one more datapoint than D. + And one for the 'remove' adjacency type, which specifies the privacy loss + distribution for a mechanism M, with mu_upper = M(D) and mu_lower = M(D'), + where D' contains one less datapoint than D. + In the case where both privacy loss distributions are the same, only one copy + is maintained. + + While this class offers additional support with respect to the ADD/REMOVE + adjacency, this is not an inherent limitation; this class can also be used + in the case of other adjacencies such as Substitution. + + Factory methods in this module provide a convenient way to generate objects of + this class associated to various mechanisms. + + Attributes: + _basic_pld_remove: basic privacy loss distribution with respect to REMOVE + adjacency. + _basic_pld_add: basic privacy loss distribution with respect to ADD + adjacency. + _symmetric: When True, basic_pld_add is assumed to be the same as + basic_pld_remove. + _basic_pld: An alias for basic_pld_remove. Useful when symmetric is True. + """ + + def __init__(self, + pmf_remove: pld_pmf.PLDPmf, + pmf_add: Optional[pld_pmf.PLDPmf] = None): + """Initialization method for PrivacyLossDistribution.""" + self._pmf_remove = pmf_remove + self._symmetric = pmf_add is None + self._pmf_add = pmf_remove if self._symmetric else pmf_add + + @classmethod + def create_from_rounded_probability( + cls, + rounded_probability_mass_function: Mapping[int, float], + infinity_mass: float, + value_discretization_interval: float, + pessimistic_estimate: bool = True, + rounded_probability_mass_function_add: Optional[Mapping[int, + float]] = None, + infinity_mass_add: Optional[float] = None, + symmetric: bool = True) -> 'PrivacyLossDistribution': + """Create PrivacyLossDistribution from rounded probability mass function(s). + + Args: + rounded_probability_mass_function: rounded probability mass function of + the basic privacy loss distribution, with respect to REMOVE adjacency. + infinity_mass: infinity_mass for basic privacy loss distribution with + respect to the REMOVE adjacency. + value_discretization_interval: the interval length for which the values of + the privacy loss distribution are discretized. In particular, the values + are always integer multiples of value_discretization_interval. Smaller + value results in more accurate estimates of the privacy loss, at the + cost of increased run-time / memory usage. Smaller value results in more + accurate estimates of the privacy loss, at the cost of increased + run-time / memory usage. + pessimistic_estimate: whether the rounding is done in such a way that the + resulting epsilon-hockey stick divergence computation gives an upper + estimate to the real value. + rounded_probability_mass_function_add: rounded probability mass function + of the basic privacy loss distribution,with respect to ADD adjacency. + infinity_mass_add: infinity_mass for basic privacy loss distribution with + respect to the ADD adjacency. + symmetric: When True, the basic privacy loss distribution with respect to + ADD adjacency is assumed to be the same as that for REMOVE adjacency. + Arguments rounded_probability_mass_function_add, infinity_mass_add are + ignored in this case. + + Returns: + Privacy Loss Distribution object. + """ + pmf_remove = pld_pmf.create_pmf(rounded_probability_mass_function, + value_discretization_interval, + infinity_mass, pessimistic_estimate) + pmf_add = None + if symmetric: + if (rounded_probability_mass_function_add is not None or + infinity_mass_add is not None): + raise ValueError('Details about privacy loss distribution with respect' + 'to ADD adjacency cannot be specified when symmetric') + else: + if (rounded_probability_mass_function_add is None or + infinity_mass_add is None): + raise ValueError('Details about privacy loss distribution with respect' + 'to ADD adjacency should be specified when not ' + 'symmetric') + pmf_add = pld_pmf.create_pmf(rounded_probability_mass_function_add, + value_discretization_interval, + infinity_mass_add, pessimistic_estimate) + return cls(pmf_remove, pmf_add) + + @classmethod + def identity( + cls, + value_discretization_interval: float = 1e-4) -> 'PrivacyLossDistribution': + """Constructs an identity privacy loss distribution. + + This class method will be deprecated shortly. Use factory method identity() + instead. + + Args: + value_discretization_interval: the dicretization interval for the privacy + loss distribution. The values will be rounded up/down to be integer + multiples of this number. Smaller value results in more accurate + estimates of the privacy loss, at the cost of increased run-time / + memory usage. + + Returns: + The privacy loss distribution corresponding to an algorithm with no + privacy leak (i.e. output is independent of input). + """ + _deprecation_warning('identity') + return identity(value_discretization_interval) + + @classmethod + def from_two_probability_mass_functions( + cls, + log_probability_mass_function_lower: Mapping[Any, float], + log_probability_mass_function_upper: Mapping[Any, float], + pessimistic_estimate: bool = True, + value_discretization_interval: float = 1e-4, + log_mass_truncation_bound: float = -math.inf + ) -> 'PrivacyLossDistribution': + """Constructs a privacy loss distribution from mu_lower and mu_upper. + + This class method will be deprecated shortly. Use factory method + from_two_probability_mass_functions() instead. + + Args: + log_probability_mass_function_lower: the probability mass function of + mu_lower represented as a dictionary where each key is an outcome o of + mu_lower and the corresponding value is the natural log of the + probability mass of mu_lower at o. + log_probability_mass_function_upper: the probability mass function of + mu_upper represented as a dictionary where each key is an outcome o of + mu_upper and the corresponding value is the natural log of the + probability mass of mu_upper at o. + pessimistic_estimate: whether the rounding is done in such a way that the + resulting epsilon-hockey stick divergence computation gives an upper + estimate to the real value. + value_discretization_interval: the dicretization interval for the privacy + loss distribution. The values will be rounded up/down to be integer + multiples of this number. Smaller value results in more accurate + estimates of the privacy loss, at the cost of increased run-time / + memory usage. + log_mass_truncation_bound: when the log of the probability mass of the + upper distribution is below this bound, it is either (i) included in + infinity_mass in the case of pessimistic estimate or (ii) discarded + completely in the case of optimistic estimate. The larger + log_mass_truncation_bound is, the more error it may introduce in + divergence calculations. + + Returns: + The privacy loss distribution constructed as specified. + """ + _deprecation_warning('from_two_probability_mass_functions') + return from_two_probability_mass_functions( + log_probability_mass_function_lower, + log_probability_mass_function_upper, + pessimistic_estimate, + value_discretization_interval, + log_mass_truncation_bound) + + @classmethod + def create_from_cdf( + cls, + cdf: Callable[[float], float], + pessimistic_estimate: bool = True, + value_discretization_interval: float = 1e-4, + tail_mass_truncation: float = 1e-15) -> 'PrivacyLossDistribution': + """Constructs the privacy loss distribution from its cumulative density function. + + This class method will be deprecated shortly. Use factory method + create_from_cdf() instead. + + Args: + cdf: the cumulative density function of the privacy loss distribution. + pessimistic_estimate: a value indicating whether the rounding is done in + such a way that the resulting epsilon-hockey stick divergence + computation gives an upper estimate to the real value. + value_discretization_interval: the length of the dicretization interval + for the privacy loss distribution. The values will be rounded up/down to + be integer multiples of this number. Smaller value results in more + accurate estimates of the privacy loss, at the cost of increased + run-time / memory usage. + tail_mass_truncation: an upper bound on the tails of the probability mass + of the PLD that might be truncated. + + Returns: + The privacy loss distribution constructed as specified. + """ + _deprecation_warning('create_from_cdf') + return create_from_cdf(cdf, pessimistic_estimate, + value_discretization_interval, + tail_mass_truncation) + + @classmethod + def from_randomized_response( + cls, + noise_parameter: float, + num_buckets: int, + pessimistic_estimate: bool = True, + value_discretization_interval: float = 1e-4) -> 'PrivacyLossDistribution': + """Constructs the privacy loss distribution of Randomized Response. + + The Randomized Response over k buckets with noise parameter p takes in an + input which is one of the k buckets. With probability 1 - p, it simply + outputs the input bucket. Otherwise, with probability p, it outputs a bucket + drawn uniformly at random from the k buckets. + + This function calculates the privacy loss distribution for the + aforementioned Randomized Response with a given number of buckets, and a + given noise parameter. + + Specifically, suppose that the original input is x and it is changed to x'. + Recall that the privacy loss distribution of the Randomized Response + mechanism is generated as follows: first pick o according to R(x), where + R(x) denote the output distribution of the Randomized Response mechanism + on input x. Then, the privacy loss is ln(Pr[R(x) = o] / Pr[R(x') = o]). + There are three cases here: + - When o = x, ln(Pr[R(x) = o] / Pr[R(x') = o]) = + ln(Pr[R(x) = x] / Pr[R(x') = x]). Here Pr[R(x) = x] = 1 - p + p / k + and Pr[R(x') = x] = p / k. + - When o = x', ln(Pr[R(x) = o] / Pr[R(x') = o]) = + ln(Pr[R(x') = x'] / Pr[R(x) = x']), which is just the negation of the + previous privacy loss. + - When o != x, x', the privacy loss is zero. + + This class method will be deprecated shortly. Use factory method + from_randomized_response() instead. + + Args: + noise_parameter: the probability that the Randomized Response outputs a + completely random bucket. + num_buckets: the total number of possible input values (which is equal to + the total number of possible output values). + pessimistic_estimate: a value indicating whether the rounding is done in + such a way that the resulting epsilon-hockey stick divergence + computation gives an upper estimate to the real value. + value_discretization_interval: the length of the dicretization interval + for the privacy loss distribution. The values will be rounded up/down to + be integer multiples of this number. Smaller value results in more + accurate estimates of the privacy loss, at the cost of increased + run-time / memory usage. + + Returns: + The privacy loss distribution constructed as specified. + """ + _deprecation_warning('from_randomized_response') + return from_randomized_response(noise_parameter, num_buckets, + pessimistic_estimate, + value_discretization_interval) + + @classmethod + def from_laplace_mechanism( + cls, + parameter: float, + sensitivity: float = 1, + pessimistic_estimate: bool = True, + value_discretization_interval: float = 1e-4, + sampling_prob: float = 1.0) -> 'PrivacyLossDistribution': + """Computes the privacy loss distribution of the Laplace mechanism. + + This class method will be deprecated shortly. Use factory method + from_laplace_mechanism() instead. + + Args: + parameter: the parameter of the Laplace distribution. + sensitivity: the sensitivity of function f. (i.e. the maximum absolute + change in f when an input to a single user changes.) + pessimistic_estimate: a value indicating whether the rounding is done in + such a way that the resulting epsilon-hockey stick divergence + computation gives an upper estimate to the real value. + value_discretization_interval: the length of the dicretization interval + for the privacy loss distribution. The values will be rounded up/down to + be integer multiples of this number. Smaller value results in more + accurate estimates of the privacy loss, at the cost of increased + run-time / memory usage. + sampling_prob: sub-sampling probability, a value in (0,1]. + + Returns: + The privacy loss distribution corresponding to the Laplace mechanism with + given parameters. + """ + _deprecation_warning('from_laplace_mechanism') + return from_laplace_mechanism(parameter, sensitivity, pessimistic_estimate, + value_discretization_interval, sampling_prob) + + @classmethod + def from_gaussian_mechanism( + cls, + standard_deviation: float, + sensitivity: float = 1, + pessimistic_estimate: bool = True, + value_discretization_interval: float = 1e-4, + log_mass_truncation_bound: float = -50, + sampling_prob: float = 1.0) -> 'PrivacyLossDistribution': + """Creates the privacy loss distribution of the Gaussian mechanism. + + This class method will be deprecated shortly. Use factory method + from_gaussian_mechanism() instead. + + Args: + standard_deviation: the standard_deviation of the Gaussian distribution. + sensitivity: the sensitivity of function f. (i.e. the maximum absolute + change in f when an input to a single user changes.) + pessimistic_estimate: a value indicating whether the rounding is done in + such a way that the resulting epsilon-hockey stick divergence + computation gives an upper estimate to the real value. + value_discretization_interval: the length of the dicretization interval + for the privacy loss distribution. The values will be rounded up/down to + be integer multiples of this number. Smaller value results in more + accurate estimates of the privacy loss, at the cost of increased + run-time / memory usage. + log_mass_truncation_bound: the ln of the probability mass that might be + discarded from the noise distribution. The larger this number, the more + error it may introduce in divergence calculations. + sampling_prob: sub-sampling probability, a value in (0,1]. + + Returns: + The privacy loss distribution corresponding to the Gaussian mechanism with + given parameters. + """ + _deprecation_warning('from_gaussian_mechanism') + return from_gaussian_mechanism(standard_deviation, sensitivity, + pessimistic_estimate, + value_discretization_interval, + log_mass_truncation_bound, + sampling_prob) + + @classmethod + def from_discrete_laplace_mechanism( + cls, + parameter: float, + sensitivity: int = 1, + pessimistic_estimate: bool = True, + value_discretization_interval: float = 1e-4, + sampling_prob: float = 1.0) -> 'PrivacyLossDistribution': + """Computes the privacy loss distribution of the Discrete Laplace mechanism. + + This class method will be deprecated shortly. Use factory method + from_discrete_laplace_mechanism() instead. + + Args: + parameter: the parameter of the discrete Laplace distribution. + sensitivity: the sensitivity of function f. (i.e. the maximum absolute + change in f when an input to a single user changes.) + pessimistic_estimate: a value indicating whether the rounding is done in + such a way that the resulting epsilon-hockey stick divergence + computation gives an upper estimate to the real value. + value_discretization_interval: the length of the dicretization interval + for the privacy loss distribution. The values will be rounded up/down to + be integer multiples of this number. Smaller value results in more + accurate estimates of the privacy loss, at the cost of increased + run-time / memory usage. + sampling_prob: sub-sampling probability, a value in (0,1]. + + Returns: + The privacy loss distribution corresponding to the Discrete Laplace + mechanism with given parameters. + """ + _deprecation_warning('from_discrete_laplace_mechanism') + return from_discrete_laplace_mechanism(parameter, sensitivity, + pessimistic_estimate, + value_discretization_interval, + sampling_prob) + + @classmethod + def from_discrete_gaussian_mechanism( + cls, + sigma: float, + sensitivity: int = 1, + truncation_bound: Optional[int] = None, + pessimistic_estimate: bool = True, + value_discretization_interval: float = 1e-4, + sampling_prob: float = 1.0) -> 'PrivacyLossDistribution': + """Creates the privacy loss distribution of the discrete Gaussian mechanism. + + This class method will be deprecated shortly. Use factory method + from_discrete_gaussian_mechanism() instead. + + Args: + sigma: the parameter of the discrete Gaussian distribution. Note that + unlike the (continuous) Gaussian distribution this is not equal to the + standard deviation of the noise. + sensitivity: the sensitivity of function f. (i.e. the maximum absolute + change in f when an input to a single user changes.) + truncation_bound: bound for truncating the noise, i.e. the noise will only + have a support in [-truncation_bound, truncation_bound]. When not + specified, truncation_bound will be chosen in such a way that the mass + of the noise outside of this range is at most 1e-30. + pessimistic_estimate: a value indicating whether the rounding is done in + such a way that the resulting epsilon-hockey stick divergence + computation gives an upper estimate to the real value. + value_discretization_interval: the length of the dicretization interval + for the privacy loss distribution. The values will be rounded up/down to + be integer multiples of this number. Smaller value results in more + accurate estimates of the privacy loss, at the cost of increased + run-time / memory usage. + sampling_prob: sub-sampling probability, a value in (0,1]. + + Returns: + The privacy loss distribution corresponding to the discrete Gaussian + mechanism with given parameters. + """ + _deprecation_warning('from_discrete_gaussian_mechanism') + return from_discrete_gaussian_mechanism(sigma, sensitivity, + truncation_bound, + pessimistic_estimate, + value_discretization_interval, + sampling_prob) + + @classmethod + def from_privacy_parameters( + cls, + privacy_parameters: common.DifferentialPrivacyParameters, + value_discretization_interval: float = 1e-4) -> 'PrivacyLossDistribution': + """Constructs pessimistic PLD from epsilon and delta parameters. + + When the mechanism is (epsilon, delta)-differentially private, the following + is a pessimistic estimate of its privacy loss distribution (see Section 3.5 + of the supplementary material for more explanation): + - infinity with probability delta. + - epsilon with probability (1 - delta) / (1 + exp(-eps)) + - -epsilon with probability (1 - delta) / (1 + exp(eps)) + + This class method will be deprecated shortly. Use factory method + from_privacy_parameters() instead. + + Args: + privacy_parameters: the privacy guarantee of the mechanism. + value_discretization_interval: the length of the dicretization interval + for the privacy loss distribution. The values will be rounded up/down to + be integer multiples of this number. Smaller value results in more + accurate estimates of the privacy loss, at the cost of increased + run-time / memory usage. + + Returns: + The privacy loss distribution constructed as specified. + """ + _deprecation_warning('from_privacy_parameters') + return from_privacy_parameters(privacy_parameters, + value_discretization_interval) + + def get_delta_for_epsilon( + self, epsilon: Union[float, Sequence[float]]) -> Union[float, np.ndarray]: + """Computes the epsilon-hockey stick divergence between mu_upper, mu_lower. + + When this privacy loss distribution corresponds to a mechanism, the + epsilon-hockey stick divergence gives the value of delta for which the + mechanism is (epsilon, delta)-differentially private. (See Observation 1 in + the supplementary material.) + + Args: + epsilon: the epsilon in epsilon-hockey stick divergence. + + Returns: + A non-negative real number which is the epsilon-hockey stick divergence + between the upper (mu_upper) and the lower (mu_lower) distributions + corresponding to this privacy loss distribution. + """ + delta_remove = self._pmf_remove.get_delta_for_epsilon(epsilon) + if self._symmetric: + return delta_remove + delta_add = self._pmf_add.get_delta_for_epsilon(epsilon) + return np.maximum(delta_remove, delta_add) + + def get_epsilon_for_delta(self, delta: float) -> float: + """Computes epsilon for which hockey stick divergence is at most delta. + + This function computes the smallest non-negative epsilon for which the + epsilon-hockey stick divergence between mu_upper, mu_lower is at most delta. + + When this privacy loss distribution corresponds to a mechanism and the + rounding is pessimistic, the returned value corresponds to an epsilon for + which the mechanism is (epsilon, delta)-differentially private. (See + Observation 1 in the supplementary material.) + + Args: + delta: the target epsilon-hockey stick divergence. + + Returns: + A non-negative real number which is the smallest epsilon such that the + epsilon-hockey stick divergence between the upper (mu_upper) and the + lower (mu_lower) distributions is at most delta. When no such finite + epsilon exists, return math.inf. + """ + epsilon_remove = self._pmf_remove.get_epsilon_for_delta(delta) + if self._symmetric: + return epsilon_remove + epsilon_add = self._pmf_add.get_epsilon_for_delta(delta) + return max(epsilon_remove, epsilon_add) + + def validate_composable(self, + privacy_loss_distribution: 'PrivacyLossDistribution'): + """Verifies that a given PLD can be composed with this PLD. + + The two privacy loss distributions must have the same discretization + interval and estimate type for the composition to be allowed. + + Args: + privacy_loss_distribution: the privacy loss distribution to be composed + with the current privacy loss distribution. + + Raises: + ValueError if the value_discretization_interval or estimate_type of the + two PLDs are different. + """ + self._pmf_remove.validate_composable(privacy_loss_distribution._pmf_remove) # pylint:disable=protected-access + + def compose( + self, + privacy_loss_distribution: 'PrivacyLossDistribution', + tail_mass_truncation: float = 1e-15, + ) -> 'PrivacyLossDistribution': + """Computes a privacy loss distribution resulting from composing two PLDs. + + Args: + privacy_loss_distribution: the privacy loss distribution to be composed + with the current privacy loss distribution. The two must have the same + value_discretization_interval. + tail_mass_truncation: an upper bound on the tails of the probability mass + of the PLD that might be truncated. + + Returns: + A privacy loss distribution which is the result of composing the two. + """ + # pylint:disable=protected-access + pld_pmf_remove = pld_pmf.compose_pmfs(self._pmf_remove, + privacy_loss_distribution._pmf_remove, + tail_mass_truncation) + if self._symmetric and privacy_loss_distribution._symmetric: + return PrivacyLossDistribution(pld_pmf_remove) + pld_pmf_add = pld_pmf.compose_pmfs(self._pmf_add, + privacy_loss_distribution._pmf_add, + tail_mass_truncation) + # pylint:enable=protected-access + return PrivacyLossDistribution(pld_pmf_remove, pld_pmf_add) + + def get_delta_for_epsilon_for_composed_pld( + self, privacy_loss_distribution: 'PrivacyLossDistribution', + epsilon: float) -> float: + """Computes delta for given epsilon for the result of composing this PLD and a given PLD. + + The output of this function should be the same as first composing this PLD + and privacy_loss_distribution, and then call get_delta_for_epsilon on the + resulting PLD. The main advantage is that this function is faster. + + Args: + privacy_loss_distribution: the privacy loss distribution to be composed + with the current privacy loss distribution. The two must have the same + value_discretization_interval. + epsilon: the epsilon in epsilon-hockey stick divergence. + + Returns: + A non-negative real number which is the epsilon-hockey stick divergence + of the privacy loss distribution which is the result of composing this PLD + with privacy_loss_distribution. + """ + # pylint:disable=protected-access + delta_remove = self._pmf_remove.get_delta_for_epsilon_for_composed_pld( + privacy_loss_distribution._pmf_remove, epsilon) + if self._symmetric and privacy_loss_distribution._symmetric: + return delta_remove + delta_add = self._pmf_add.get_delta_for_epsilon_for_composed_pld( + privacy_loss_distribution._pmf_add, epsilon) + # pylint:enable=protected-access + return max(delta_remove, delta_add) + + def self_compose( + self, + num_times: int, + tail_mass_truncation: float = 1e-15) -> 'PrivacyLossDistribution': + """Computes PLD resulting from repeated composing the PLD with itself. + + Args: + num_times: the number of times to compose this PLD with itself. + tail_mass_truncation: an upper bound on the tails of the probability mass + of the PLD that might be truncated. Currently only supports for + pessimistic estimates. + + Returns: + A privacy loss distribution which is the result of the composition. + """ + pmf_remove = self._pmf_remove.self_compose(num_times, tail_mass_truncation) + if self._symmetric: + return PrivacyLossDistribution(pmf_remove) + pmf_add = self._pmf_add.self_compose(num_times, tail_mass_truncation) + return PrivacyLossDistribution(pmf_remove, pmf_add) + + +def identity( + value_discretization_interval: float = 1e-4) -> PrivacyLossDistribution: + """Constructs an identity privacy loss distribution. + + Args: + value_discretization_interval: the dicretization interval for the privacy + loss distribution. The values will be rounded up/down to be integer + multiples of this number. Smaller value results in more accurate estimates + of the privacy loss, at the cost of increased run-time / memory usage. + + Returns: + The privacy loss distribution corresponding to an algorithm with no + privacy leak (i.e. output is independent of input). + """ + return PrivacyLossDistribution.create_from_rounded_probability( + {0: 1}, 0, value_discretization_interval) + + +def from_two_probability_mass_functions( + log_probability_mass_function_lower: Mapping[Any, float], + log_probability_mass_function_upper: Mapping[Any, float], + pessimistic_estimate: bool = True, + value_discretization_interval: float = 1e-4, + log_mass_truncation_bound: float = -math.inf, + symmetric: bool = True) -> PrivacyLossDistribution: + """Constructs a privacy loss distribution from mu_lower and mu_upper. + + Args: + log_probability_mass_function_lower: the probability mass function of + mu_lower represented as a dictionary where each key is an outcome o of + mu_lower and the corresponding value is the natural log of the + probability mass of mu_lower at o. + log_probability_mass_function_upper: the probability mass function of + mu_upper represented as a dictionary where each key is an outcome o of + mu_upper and the corresponding value is the natural log of the + probability mass of mu_upper at o. + pessimistic_estimate: whether the rounding is done in such a way that the + resulting epsilon-hockey stick divergence computation gives an upper + estimate to the real value. + value_discretization_interval: the dicretization interval for the privacy + loss distribution. The values will be rounded up/down to be integer + multiples of this number. Smaller value results in more accurate estimates + of the privacy loss, at the cost of increased run-time / memory usage. + log_mass_truncation_bound: when the log of the probability mass of the upper + distribution is below this bound, it is either (i) included in + infinity_mass in the case of pessimistic estimate or (ii) discarded + completely in the case of optimistic estimate. The larger + log_mass_truncation_bound is, the more error it may introduce in + divergence calculations. + symmetric: if True it creates a symmetric PrivacyLossDistribution. + + Returns: + The privacy loss distribution constructed as specified. + """ + + def _create_rounded_probability_mass_function( + log_probability_mass_function_lower: Mapping[Any, float], + log_probability_mass_function_upper: Mapping[Any, float] + ) -> Tuple[float, Mapping[int, float]]: + """Helper function for creating rounded pmf.""" + infinity_mass = 0 + for outcome in log_probability_mass_function_upper: + if log_probability_mass_function_lower.get(outcome, + -math.inf) == -math.inf: + # When an outcome only appears in the upper distribution but not in the + # lower distribution, then it must be counted in infinity_mass as such + # an outcome contributes to the hockey stick divergence. + infinity_mass += math.exp(log_probability_mass_function_upper[outcome]) + # Compute the (non-discretized) probability mass function for the privacy + # loss distribution. + probability_mass_function = {} + for outcome in log_probability_mass_function_lower: + if log_probability_mass_function_lower[outcome] == -math.inf: + # This outcome never occurs in mu_lower. This case was already included + # as infinity_mass above. + continue + elif (log_probability_mass_function_upper.get(outcome, -math.inf) > + log_mass_truncation_bound): + # When the probability mass of mu_upper at the outcome is greater than + # the threshold, add it to the distribution. + privacy_loss_value = ( + log_probability_mass_function_upper[outcome] - + log_probability_mass_function_lower[outcome]) + probability_mass_function[privacy_loss_value] = ( + probability_mass_function.get(privacy_loss_value, 0) + + math.exp(log_probability_mass_function_upper[outcome])) + else: + if pessimistic_estimate: + # When the probability mass of mu_upper at the outcome is no more than + # the threshold and we would like to get a pessimistic estimate, + # account for this in infinity_mass. + infinity_mass += math.exp( + log_probability_mass_function_upper.get(outcome, -math.inf)) + # Discretize the probability mass so that the values are integer multiples + # of value_discretization_interval + rounded_probability_mass_function = collections.defaultdict(lambda: 0) + round_fn = math.ceil if pessimistic_estimate else math.floor + for val in probability_mass_function: + rounded_probability_mass_function[round_fn( + val / + value_discretization_interval)] += probability_mass_function[val] + return infinity_mass, rounded_probability_mass_function + + infinity_mass, rounded_probability_mass_function = _create_rounded_probability_mass_function( + log_probability_mass_function_lower, log_probability_mass_function_upper) + + if symmetric: + return PrivacyLossDistribution.create_from_rounded_probability( + rounded_probability_mass_function, + infinity_mass, + value_discretization_interval, + pessimistic_estimate=pessimistic_estimate) + + infinity_mass_add, rounded_probability_mass_function_add = _create_rounded_probability_mass_function( + log_probability_mass_function_lower=log_probability_mass_function_upper, + log_probability_mass_function_upper=log_probability_mass_function_lower) + return PrivacyLossDistribution.create_from_rounded_probability( + rounded_probability_mass_function, infinity_mass, + value_discretization_interval, pessimistic_estimate, + rounded_probability_mass_function_add, infinity_mass_add) + + +def _create_pld_pmf_from_additive_noise( + additive_noise_privacy_loss: + 'privacy_loss_mechanism.AdditiveNoisePrivacyLoss', + pessimistic_estimate: bool = True, + value_discretization_interval: float = 1e-4) -> pld_pmf.PLDPmf: + """Constructs the privacy loss distribution of an additive noise mechanism. + + An additive noise mechanism for computing a scalar-valued function f is a + mechanism that outputs the sum of the true value of the function and a noise + drawn from a certain distribution mu. This function calculates the privacy + loss distribution for such an additive noise mechanism. + + Args: + additive_noise_privacy_loss: the privacy loss representation of the + mechanism. + pessimistic_estimate: a value indicating whether the rounding is done in + such a way that the resulting epsilon-hockey stick divergence computation + gives an upper estimate to the real value. + value_discretization_interval: the length of the dicretization interval for + the privacy loss distribution. The values will be rounded up/down to be + integer multiples of this number. Smaller value results in more accurate + estimates of the privacy loss, at the cost of increased run-time / memory + usage. + + Returns: + The privacy loss distribution constructed as specified. + """ + round_fn = math.ceil if pessimistic_estimate else math.floor + + tail_pld = additive_noise_privacy_loss.privacy_loss_tail() + + rounded_probability_mass_function = collections.defaultdict(lambda: 0) + infinity_mass = tail_pld.tail_probability_mass_function.get(math.inf, 0) + for privacy_loss in tail_pld.tail_probability_mass_function: + if privacy_loss != math.inf: + rounded_probability_mass_function[round_fn( + privacy_loss / value_discretization_interval + )] += tail_pld.tail_probability_mass_function[privacy_loss] + + if additive_noise_privacy_loss.discrete_noise: + xs = list( + range( + math.ceil(tail_pld.lower_x_truncation) - 1, + math.floor(tail_pld.upper_x_truncation) + 1)) + + # Compute PMF for the x's. Note that a vectorized call to mu_upper_cdf can + # be much faster than many scalar calls. + cdf_values = additive_noise_privacy_loss.mu_upper_cdf(xs) + probability_mass = cdf_values[1:] - cdf_values[:-1] + + for x, prob in zip(xs[1:], probability_mass): + rounded_probability_mass_function[round_fn( + additive_noise_privacy_loss.privacy_loss(x) / + value_discretization_interval)] += prob + else: + lower_x = tail_pld.lower_x_truncation + rounded_down_value = math.floor( + additive_noise_privacy_loss.privacy_loss(lower_x) / + value_discretization_interval) + upper_x_privacy_loss = additive_noise_privacy_loss.privacy_loss( + tail_pld.upper_x_truncation) + + # Compute discretization intervals for PLD approximation. + xs, rounded_values = [lower_x], [] + x = lower_x + while x < tail_pld.upper_x_truncation: + if (value_discretization_interval * rounded_down_value <= + upper_x_privacy_loss): + x = tail_pld.upper_x_truncation + else: + x = additive_noise_privacy_loss.inverse_privacy_loss( + value_discretization_interval * rounded_down_value) + + xs.append(x) + rounded_values.append(round_fn(rounded_down_value + 0.5)) + rounded_down_value -= 1 + + # Compute PLD for discretization intervals. Note that a vectorized call to + # mu_upper_cdf is much faster than many scalar calls. + cdf_values = additive_noise_privacy_loss.mu_upper_cdf(xs) + probability_mass = cdf_values[1:] - cdf_values[:-1] + + # Each x in [lower_x, upper_x] results in privacy loss that lies in + # [value_discretization_interval * rounded_down_value, + # value_discretization_interval * (rounded_down_value + 1)] + for rounded_value, prob in zip(rounded_values, probability_mass): + rounded_probability_mass_function[rounded_value] += prob + + return pld_pmf.create_pmf( + dict(rounded_probability_mass_function), + value_discretization_interval, + infinity_mass, + pessimistic_estimate=pessimistic_estimate) + + +def create_from_cdf( + cdf: Callable[[float], float], + pessimistic_estimate: bool = True, + value_discretization_interval: float = 1e-4, + tail_mass_truncation: float = 1e-15) -> PrivacyLossDistribution: + """Constructs the privacy loss distribution from its cumulative density function. + + Args: + cdf: the cumulative density function of the privacy loss distribution. + pessimistic_estimate: a value indicating whether the rounding is done in + such a way that the resulting epsilon-hockey stick divergence computation + gives an upper estimate to the real value. + value_discretization_interval: the length of the dicretization interval for + the privacy loss distribution. The values will be rounded up/down to be + integer multiples of this number. Smaller value results in more accurate + estimates of the privacy loss, at the cost of increased run-time / memory + usage. + tail_mass_truncation: an upper bound on the tails of the probability mass of + the PLD that might be truncated. + + Returns: + The privacy loss distribution constructed as specified. + """ + rounded_probability_mass_function = {} + + # Construct the distribution for value greater than or equal to zero. + rounded_value = 1 if pessimistic_estimate else 0 + value = 0 + while cdf(value) < 1 - tail_mass_truncation / 2: + rounded_probability_mass_function[rounded_value] = ( + cdf(value + value_discretization_interval) - cdf(value)) + rounded_value += 1 + value += value_discretization_interval + + # Construct the distribution for value less than zero. + rounded_value = 0 if pessimistic_estimate else -1 + value = 0 + while cdf(value) > tail_mass_truncation / 2: + rounded_probability_mass_function[rounded_value] = ( + cdf(value) - cdf(value - value_discretization_interval)) + rounded_value -= 1 + value -= value_discretization_interval + + return PrivacyLossDistribution.create_from_rounded_probability( + rounded_probability_mass_function, + tail_mass_truncation if pessimistic_estimate else 0, + value_discretization_interval, + pessimistic_estimate=pessimistic_estimate) + + +def from_randomized_response( + noise_parameter: float, + num_buckets: int, + pessimistic_estimate: bool = True, + value_discretization_interval: float = 1e-4 +) -> PrivacyLossDistribution: + """Constructs the privacy loss distribution of Randomized Response. + + The Randomized Response over k buckets with noise parameter p takes in an + input which is one of the k buckets. With probability 1 - p, it simply + outputs the input bucket. Otherwise, with probability p, it outputs a bucket + drawn uniformly at random from the k buckets. + + This function calculates the privacy loss distribution for the + aforementioned Randomized Response with a given number of buckets, and a + given noise parameter. + + Specifically, suppose that the original input is x and it is changed to x'. + Recall that the privacy loss distribution of the Randomized Response + mechanism is generated as follows: first pick o according to R(x), where + R(x) denote the output distribution of the Randomized Response mechanism + on input x. Then, the privacy loss is ln(Pr[R(x) = o] / Pr[R(x') = o]). + There are three cases here: + - When o = x, ln(Pr[R(x) = o] / Pr[R(x') = o]) = + ln(Pr[R(x) = x] / Pr[R(x') = x]). Here Pr[R(x) = x] = 1 - p + p / k + and Pr[R(x') = x] = p / k. + - When o = x', ln(Pr[R(x) = o] / Pr[R(x') = o]) = + ln(Pr[R(x') = x'] / Pr[R(x) = x']), which is just the negation of the + previous privacy loss. + - When o != x, x', the privacy loss is zero. + + Args: + noise_parameter: the probability that the Randomized Response outputs a + completely random bucket. + num_buckets: the total number of possible input values (which is equal to + the total number of possible output values). + pessimistic_estimate: a value indicating whether the rounding is done in + such a way that the resulting epsilon-hockey stick divergence computation + gives an upper estimate to the real value. + value_discretization_interval: the length of the dicretization interval for + the privacy loss distribution. The values will be rounded up/down to be + integer multiples of this number. Smaller value results in more accurate + estimates of the privacy loss, at the cost of increased run-time / memory + usage. + + Returns: + The privacy loss distribution constructed as specified. + """ + + if noise_parameter <= 0 or noise_parameter >= 1: + raise ValueError(f'Noise parameter must be strictly between 0 and 1: ' + f'{noise_parameter}') + + if num_buckets <= 1: + raise ValueError( + f'Number of buckets must be strictly greater than 1: {num_buckets}') + + round_fn = math.ceil if pessimistic_estimate else math.floor + + rounded_probability_mass_function = collections.defaultdict(lambda: 0) + + # Probability that the output is equal to the input, i.e., Pr[R(x) = x] + probability_output_equal_input = ((1 - noise_parameter) + + noise_parameter / num_buckets) + # Probability that the output is equal to a specific bucket that is not the + # input, i.e., Pr[R(x') = x] for x' != x. + probability_output_not_input = noise_parameter / num_buckets + + # Add privacy loss for the case o = x + rounded_value = round_fn( + math.log(probability_output_equal_input / probability_output_not_input) + / value_discretization_interval) + rounded_probability_mass_function[ + rounded_value] += probability_output_equal_input + + # Add privacy loss for the case o = x' + rounded_value = round_fn( + math.log(probability_output_not_input / probability_output_equal_input) + / value_discretization_interval) + rounded_probability_mass_function[ + rounded_value] += probability_output_not_input + + # Add privacy loss for the case o != x, x' + rounded_probability_mass_function[0] += ( + probability_output_not_input * (num_buckets - 2)) + + return PrivacyLossDistribution.create_from_rounded_probability( + rounded_probability_mass_function, + 0, + value_discretization_interval, + pessimistic_estimate=pessimistic_estimate) + + +def _pld_for_subsampled_mechanism( + single_pld_pmf: Callable[[privacy_loss_mechanism.AdjacencyType], + pld_pmf.PLDPmf], + sampling_prob: float = 1.0) -> PrivacyLossDistribution: + """Computes the privacy loss distribution for subsampled mechanisms. + + It is assumed that when sub-sampling probability is 1, the privacy loss + distributions corresponding to ADD and REMOVE adjacencies are identical. + + Args: + single_pld_pmf: method for computing the privacy loss distributions with + respect to ADD and REMOVE adjacency types. + sampling_prob: sub-sampling probability, a value in (0,1]. + + Returns: + A symmetric privacy loss distribution when sampling_prob = 1; An + asymmetric privacy loss distribution corresponding to ADD and REMOVE + adjacency types when sampling_prob < 1. + """ + pmf_remove = single_pld_pmf(privacy_loss_mechanism.AdjacencyType.REMOVE) + if sampling_prob == 1.0: + return PrivacyLossDistribution(pmf_remove) + + pmf_add = single_pld_pmf(privacy_loss_mechanism.AdjacencyType.ADD) + return PrivacyLossDistribution(pmf_remove, pmf_add) + + +def from_laplace_mechanism( + parameter: float, + sensitivity: float = 1, + pessimistic_estimate: bool = True, + value_discretization_interval: float = 1e-4, + sampling_prob: float = 1.0) -> PrivacyLossDistribution: + """Computes the privacy loss distribution of the Laplace mechanism. + + Args: + parameter: the parameter of the Laplace distribution. + sensitivity: the sensitivity of function f. (i.e. the maximum absolute + change in f when an input to a single user changes.) + pessimistic_estimate: a value indicating whether the rounding is done in + such a way that the resulting epsilon-hockey stick divergence computation + gives an upper estimate to the real value. + value_discretization_interval: the length of the dicretization interval for + the privacy loss distribution. The values will be rounded up/down to be + integer multiples of this number. Smaller value results in more accurate + estimates of the privacy loss, at the cost of increased run-time / memory + usage. + sampling_prob: sub-sampling probability, a value in (0,1]. + + Returns: + The privacy loss distribution corresponding to the Laplace mechanism with + given parameters. + """ + + def single_laplace_pld( + adjacency_type: privacy_loss_mechanism.AdjacencyType) -> pld_pmf.PLDPmf: + return _create_pld_pmf_from_additive_noise( + privacy_loss_mechanism.LaplacePrivacyLoss( + parameter, + sensitivity=sensitivity, + sampling_prob=sampling_prob, + adjacency_type=adjacency_type), + pessimistic_estimate=pessimistic_estimate, + value_discretization_interval=value_discretization_interval) + + return _pld_for_subsampled_mechanism(single_laplace_pld, sampling_prob) + + +def from_gaussian_mechanism( + standard_deviation: float, + sensitivity: float = 1, + pessimistic_estimate: bool = True, + value_discretization_interval: float = 1e-4, + log_mass_truncation_bound: float = -50, + sampling_prob: float = 1.0) -> PrivacyLossDistribution: + """Creates the privacy loss distribution of the Gaussian mechanism. + + Args: + standard_deviation: the standard_deviation of the Gaussian distribution. + sensitivity: the sensitivity of function f. (i.e. the maximum absolute + change in f when an input to a single user changes.) + pessimistic_estimate: a value indicating whether the rounding is done in + such a way that the resulting epsilon-hockey stick divergence computation + gives an upper estimate to the real value. + value_discretization_interval: the length of the dicretization interval for + the privacy loss distribution. The values will be rounded up/down to be + integer multiples of this number. Smaller value results in more accurate + estimates of the privacy loss, at the cost of increased run-time / memory + usage. + log_mass_truncation_bound: the ln of the probability mass that might be + discarded from the noise distribution. The larger this number, the more + error it may introduce in divergence calculations. + sampling_prob: sub-sampling probability, a value in (0,1]. + + Returns: + The privacy loss distribution corresponding to the Gaussian mechanism with + given parameters. + """ + + def single_gaussian_pld( + adjacency_type: privacy_loss_mechanism.AdjacencyType) -> pld_pmf.PLDPmf: + return _create_pld_pmf_from_additive_noise( + privacy_loss_mechanism.GaussianPrivacyLoss( + standard_deviation, + sensitivity=sensitivity, + pessimistic_estimate=pessimistic_estimate, + log_mass_truncation_bound=log_mass_truncation_bound, + sampling_prob=sampling_prob, + adjacency_type=adjacency_type), + pessimistic_estimate=pessimistic_estimate, + value_discretization_interval=value_discretization_interval) + + return _pld_for_subsampled_mechanism(single_gaussian_pld, sampling_prob) + + +def from_discrete_laplace_mechanism( + parameter: float, + sensitivity: int = 1, + pessimistic_estimate: bool = True, + value_discretization_interval: float = 1e-4, + sampling_prob: float = 1.0) -> PrivacyLossDistribution: + """Computes the privacy loss distribution of the Discrete Laplace mechanism. + + Args: + parameter: the parameter of the discrete Laplace distribution. + sensitivity: the sensitivity of function f. (i.e. the maximum absolute + change in f when an input to a single user changes.) + pessimistic_estimate: a value indicating whether the rounding is done in + such a way that the resulting epsilon-hockey stick divergence computation + gives an upper estimate to the real value. + value_discretization_interval: the length of the dicretization interval for + the privacy loss distribution. The values will be rounded up/down to be + integer multiples of this number. Smaller value results in more accurate + estimates of the privacy loss, at the cost of increased run-time / memory + usage. + sampling_prob: sub-sampling probability, a value in (0,1]. + + Returns: + The privacy loss distribution corresponding to the Discrete Laplace + mechanism with given parameters. + """ + + def single_discrete_laplace_pld( + adjacency_type: privacy_loss_mechanism.AdjacencyType) -> pld_pmf.PLDPmf: + return _create_pld_pmf_from_additive_noise( + privacy_loss_mechanism.DiscreteLaplacePrivacyLoss( + parameter, + sensitivity=sensitivity, + sampling_prob=sampling_prob, + adjacency_type=adjacency_type), + pessimistic_estimate=pessimistic_estimate, + value_discretization_interval=value_discretization_interval) + + return _pld_for_subsampled_mechanism(single_discrete_laplace_pld, + sampling_prob) + + +def from_discrete_gaussian_mechanism( + sigma: float, + sensitivity: int = 1, + truncation_bound: Optional[int] = None, + pessimistic_estimate: bool = True, + value_discretization_interval: float = 1e-4, + sampling_prob: float = 1.0) -> PrivacyLossDistribution: + """Creates the privacy loss distribution of the discrete Gaussian mechanism. + + Args: + sigma: the parameter of the discrete Gaussian distribution. Note that + unlike the (continuous) Gaussian distribution this is not equal to the + standard deviation of the noise. + sensitivity: the sensitivity of function f. (i.e. the maximum absolute + change in f when an input to a single user changes.) + truncation_bound: bound for truncating the noise, i.e. the noise will only + have a support in [-truncation_bound, truncation_bound]. When not + specified, truncation_bound will be chosen in such a way that the mass + of the noise outside of this range is at most 1e-30. + pessimistic_estimate: a value indicating whether the rounding is done in + such a way that the resulting epsilon-hockey stick divergence computation + gives an upper estimate to the real value. + value_discretization_interval: the length of the dicretization interval for + the privacy loss distribution. The values will be rounded up/down to be + integer multiples of this number. Smaller value results in more accurate + estimates of the privacy loss, at the cost of increased run-time / memory + usage. + sampling_prob: sub-sampling probability, a value in (0,1]. + + Returns: + The privacy loss distribution corresponding to the discrete Gaussian + mechanism with given parameters. + """ + + def single_discrete_gaussian_pld( + adjacency_type: privacy_loss_mechanism.AdjacencyType) -> pld_pmf.PLDPmf: + return _create_pld_pmf_from_additive_noise( + privacy_loss_mechanism.DiscreteGaussianPrivacyLoss( + sigma, + sensitivity=sensitivity, + truncation_bound=truncation_bound, + sampling_prob=sampling_prob, + adjacency_type=adjacency_type), + pessimistic_estimate=pessimistic_estimate, + value_discretization_interval=value_discretization_interval) + + return _pld_for_subsampled_mechanism(single_discrete_gaussian_pld, + sampling_prob) + + +def from_privacy_parameters( + privacy_parameters: common.DifferentialPrivacyParameters, + value_discretization_interval: float = 1e-4) -> PrivacyLossDistribution: + """Constructs pessimistic PLD from epsilon and delta parameters. + + When the mechanism is (epsilon, delta)-differentially private, the following + is a pessimistic estimate of its privacy loss distribution (see Section 3.5 + of the supplementary material for more explanation): + - infinity with probability delta. + - epsilon with probability (1 - delta) / (1 + exp(-eps)) + - -epsilon with probability (1 - delta) / (1 + exp(eps)) + + Args: + privacy_parameters: the privacy guarantee of the mechanism. + value_discretization_interval: the length of the dicretization interval for + the privacy loss distribution. The values will be rounded up/down to be + integer multiples of this number. Smaller value results in more accurate + estimates of the privacy loss, at the cost of increased run-time / memory + usage. + + Returns: + The privacy loss distribution constructed as specified. + """ + delta = privacy_parameters.delta + epsilon = privacy_parameters.epsilon + + rounded_probability_mass_function = collections.defaultdict(lambda: 0) + + rounded_probability_mass_function[math.ceil( + epsilon / + value_discretization_interval)] = (1 - delta) / (1 + math.exp(-epsilon)) + rounded_probability_mass_function[math.ceil( + -epsilon / + value_discretization_interval)] += (1 - delta) / (1 + math.exp(epsilon)) + + return PrivacyLossDistribution.create_from_rounded_probability( + rounded_probability_mass_function, privacy_parameters.delta, + value_discretization_interval) diff --git a/python/fedml/core/dp/budget_accountant/pld/privacy_loss_distribution_basic_example.py b/python/fedml/core/dp/budget_accountant/pld/privacy_loss_distribution_basic_example.py new file mode 100644 index 0000000000..a409ea4843 --- /dev/null +++ b/python/fedml/core/dp/budget_accountant/pld/privacy_loss_distribution_basic_example.py @@ -0,0 +1,52 @@ +"""Basic Example for Using Privacy Loss Distributions. +""" + +from absl import app + +from dp_accounting import privacy_loss_distribution + + +def main(argv): + if len(argv) > 1: + raise app.UsageError('Too many command-line arguments.') + + # The parameter of Laplace Noise added + parameter_laplace = 3 + # PLD for one execution of the Laplace Mechanism. (Throughout we assume that + # sensitivity = 1.) + laplace_pld = privacy_loss_distribution.from_laplace_mechanism( + parameter_laplace, value_discretization_interval=1e-3) + + # Number of times Laplace Mechanism is run + num_laplace = 40 + # PLD for num_laplace executions of the Laplace Mechanism. + composed_laplace_pld = laplace_pld.self_compose(num_laplace) + + epsilon = 10 + delta = composed_laplace_pld.get_delta_for_epsilon(epsilon) + print(f'An algorithm that executes the Laplace Mechanism with parameter ' + f'{parameter_laplace} for a total of {num_laplace} times is ' + f'({epsilon}, {delta})-DP.') + + # PLDs for different mechanisms can also be composed. Below is an example in + # which we compose PLDs for Laplace Mechanism and Gaussian Mechanism. + + # STD of the Gaussian Noise + standard_deviation = 5 + # PLD for an execution of the Gaussian Mechanism. + gaussian_pld = privacy_loss_distribution.from_gaussian_mechanism( + standard_deviation, value_discretization_interval=1e-3) + + # PLD for num_laplace executions of the Laplace Mechanism and one execution of + # the Gaussian Mechanism. + composed_laplace_and_gaussian_pld = composed_laplace_pld.compose(gaussian_pld) + + epsilon = 10 + delta = composed_laplace_and_gaussian_pld.get_delta_for_epsilon(epsilon) + print(f'An algorithm that executes the Laplace Mechanism with parameter ' + f'{parameter_laplace} for a total of {num_laplace} times and in ' + f'addition executes once the Gaussian Mechanism with STD ' + f'{standard_deviation} is ({epsilon}, {delta})-DP.') + +if __name__ == '__main__': + app.run(main) diff --git a/python/fedml/core/dp/budget_accountant/pld/privacy_loss_distribution_test.py b/python/fedml/core/dp/budget_accountant/pld/privacy_loss_distribution_test.py new file mode 100644 index 0000000000..10ec074fd6 --- /dev/null +++ b/python/fedml/core/dp/budget_accountant/pld/privacy_loss_distribution_test.py @@ -0,0 +1,1173 @@ +"""Tests for privacy_loss_distribution.py.""" +import math +from typing import Any, Mapping +import unittest +from absl.testing import parameterized +from scipy import stats + +from dp_accounting.pld import privacy_loss_distribution +from dp_accounting.pld import test_util + + +class AddRemovePrivacyLossDistributionTest(parameterized.TestCase): + + def _create_pld( + self, + log_pmf_lower: Mapping[Any, float], + log_pmf_upper: Mapping[Any, float], + pessimistic: bool = True + ) -> privacy_loss_distribution.PrivacyLossDistribution: + pmf_remove = ( + privacy_loss_distribution.from_two_probability_mass_functions( + log_pmf_lower, log_pmf_upper, + pessimistic_estimate=pessimistic)._pmf_remove) + pmf_add = ( + privacy_loss_distribution.from_two_probability_mass_functions( + log_pmf_upper, log_pmf_lower, + pessimistic_estimate=pessimistic)._pmf_remove) + return privacy_loss_distribution.PrivacyLossDistribution( + pmf_remove, pmf_add) + + def test_init_errors(self): + rounded_pmf = {1: 0.5, -1: 0.5} + value_discretization_interval = 1 + infinity_mass = 0 + pessimistic_estimate = True + pld = privacy_loss_distribution.PrivacyLossDistribution + with self.assertRaises(ValueError): + pld.create_from_rounded_probability( + rounded_probability_mass_function=rounded_pmf, + infinity_mass=infinity_mass, + value_discretization_interval=value_discretization_interval, + pessimistic_estimate=pessimistic_estimate, + rounded_probability_mass_function_add=rounded_pmf, + infinity_mass_add=infinity_mass, + symmetric=True) + with self.assertRaises(ValueError): + pld.create_from_rounded_probability( + rounded_probability_mass_function=rounded_pmf, + infinity_mass=infinity_mass, + value_discretization_interval=value_discretization_interval, + pessimistic_estimate=pessimistic_estimate, + rounded_probability_mass_function_add=None, + infinity_mass_add=None, + symmetric=False) + + def test_hockey_stick_basic(self): + # Basic hockey stick divergence computation test + log_pmf_lower = {1: math.log(0.5), 2: math.log(0.5)} + log_pmf_upper = {1: math.log(0.6), 2: math.log(0.4)} + pld_pessimistic = self._create_pld( + log_pmf_lower, log_pmf_upper, pessimistic=True) + pld_optimistic = self._create_pld( + log_pmf_lower, log_pmf_upper, pessimistic=False) + + # 0-hockey stick divergence is 0.1 (for basic_pld_remove & basic_pld_add) + # When using pessimistic estimate, the output should be in [0.1, 0.1+1e-4] + self.assertTrue( + 0.1 <= pld_pessimistic.get_delta_for_epsilon(0.0) <= 0.1 + 1e-4) + # When using optimistic estimate, the output should be in [0.1 - 1e-4, 0.1] + self.assertTrue( + 0.1 - 1e-4 <= pld_optimistic.get_delta_for_epsilon(0.0) <= 0.1) + + # math.log(1.1)-hockey stick divergence is 0.06 (for basic_pld_add) + # When using pessimistic estimate, the output should be in [0.06, 0.06+1e-4] + self.assertTrue(0.06 <= pld_pessimistic + .get_delta_for_epsilon(math.log(1.1)) <= 0.06 + 1e-4) + # When using optimistic estimate, the output should be in [0.06-1e-4, 0.06] + self.assertTrue(0.06 - 1e-4 <= pld_optimistic + .get_delta_for_epsilon(math.log(1.1)) <= 0.06) + + # math.log(0.9)-hockey stick divergence is 0.15 (for basic_pld_remove) + # When using pessimistic estimate, the output should be in [0.15, 0.15+1e-4] + self.assertTrue(0.15 <= pld_pessimistic + .get_delta_for_epsilon(math.log(0.9)) <= 0.15 + 1e-4) + # When using optimistic estimate, the output should be in [0.15-1e-4, 0.15] + self.assertTrue(0.15 - 1e-4 <= pld_optimistic + .get_delta_for_epsilon(math.log(0.9)) <= 0.15) + + self.assertFalse(pld_pessimistic._symmetric) + self.assertFalse(pld_optimistic._symmetric) + + def test_hockey_stick_unequal_support(self): + # Hockey stick divergence computation test when the two distributions have + # differenet supports + log_pmf_lower = {1: math.log(0.2), 2: math.log(0.2), 3: math.log(0.6)} + log_pmf_upper = {1: math.log(0.5), 2: math.log(0.4), 4: math.log(0.1)} + pld_pessimistic = self._create_pld( + log_pmf_lower, log_pmf_upper, pessimistic=True) + pld_optimistic = self._create_pld( + log_pmf_lower, log_pmf_upper, pessimistic=False) + + # Here 4 appears as an outcome of only mu_upper and hence should be included + # in the infinity_mass variable of _pmf_remove. + self.assertAlmostEqual(pld_pessimistic._pmf_remove._infinity_mass, 0.1) + self.assertAlmostEqual(pld_optimistic._pmf_remove._infinity_mass, 0.1) + + # Here 3 appears as an outcome of only mu_lower and hence should be included + # in the infinity_mass variable of basic_pld_add. + self.assertAlmostEqual(pld_pessimistic._pmf_add._infinity_mass, 0.6) + self.assertAlmostEqual(pld_optimistic._pmf_add._infinity_mass, 0.6) + + # 0-hockey stick divergence is 0.6 (for basic_pld_remove & basic_pld_add) + # When using pessimistic estimate, the output should be in [0.6, 0.6+1e-4] + self.assertTrue(0.6 <= pld_pessimistic + .get_delta_for_epsilon(0.0) <= 0.6 + 1e-4) + # When using optimistic estimate, the output should lie in [0.6 - 1e-4, 0.6] + self.assertTrue(0.6 - 1e-4 <= pld_optimistic + .get_delta_for_epsilon(0.0) <= 0.6) + + # math.log(1.1)-hockey stick divergence is 0.6 (for basic_pld_add) + # When using pessimistic estimate, the output should be in [0.6, 0.6 + 1e-4] + self.assertTrue(0.6 <= pld_pessimistic + .get_delta_for_epsilon(math.log(1.1)) <= 0.6 + 1e-4) + # When using optimistic estimate, the output should lie in [0.6 - 1e-4, 0.6] + self.assertTrue(0.6 - 1e-4 <= pld_optimistic + .get_delta_for_epsilon(math.log(1.1)) <= 0.6) + + # math.log(0.9)-hockey stick divergence is 0.64 (for basic_pld_remove) + # When using pessimistic estimate, the output should be + # in [0.64, 0.64 + 1e-4] + self.assertTrue(0.64 <= pld_pessimistic + .get_delta_for_epsilon(math.log(0.9)) <= 0.64 + 1e-4) + # When using optimistic estimate, the output should lie in + # [0.64 - 1e-4, 0.64] + self.assertTrue(0.64 - 1e-4 <= pld_optimistic + .get_delta_for_epsilon(math.log(0.9)) <= 0.64) + + def test_composition(self): + # Test for composition of privacy loss distribution + log_pmf_lower1 = {1: math.log(0.2), 2: math.log(0.2), 3: math.log(0.6)} + log_pmf_upper1 = {1: math.log(0.5), 2: math.log(0.2), 4: math.log(0.3)} + pld1 = self._create_pld(log_pmf_lower1, log_pmf_upper1, pessimistic=True) + + log_pmf_lower2 = {1: math.log(0.4), 2: math.log(0.6)} + log_pmf_upper2 = {2: math.log(0.7), 3: math.log(0.3)} + pld2 = self._create_pld(log_pmf_lower2, log_pmf_upper2, pessimistic=True) + + # Result from composing the above two privacy loss distributions + result = pld1.compose(pld2) + + # The correct result + log_pmf_lower_composed = { + (1, 1): math.log(0.08), + (1, 2): math.log(0.12), + (2, 1): math.log(0.08), + (2, 2): math.log(0.12), + (3, 1): math.log(0.24), + (3, 2): math.log(0.36) + } + log_pmf_upper_composed = { + (1, 2): math.log(0.35), + (1, 3): math.log(0.15), + (2, 2): math.log(0.14), + (2, 3): math.log(0.06), + (4, 2): math.log(0.21), + (4, 3): math.log(0.09) + } + expected_result = self._create_pld(log_pmf_lower_composed, + log_pmf_upper_composed) + + # Check that the result is as expected. Note that we cannot check that the + # rounded_down_probability_mass_function and + # rounded_up_probability_mass_function of the two distributions are equal + # directly because the rounding might cause off-by-one error in index. + self.assertAlmostEqual(expected_result._pmf_remove._discretization, + result._pmf_remove._discretization) + self.assertAlmostEqual(expected_result._pmf_add._discretization, + result._pmf_add._discretization) + self.assertAlmostEqual(expected_result._pmf_remove._infinity_mass, + result._pmf_remove._infinity_mass) + self.assertAlmostEqual(expected_result._pmf_add._infinity_mass, + result._pmf_add._infinity_mass) + self.assertAlmostEqual( + expected_result.get_delta_for_epsilon(0), + result.get_delta_for_epsilon(0)) + self.assertAlmostEqual( + expected_result.get_delta_for_epsilon(0.5), + result.get_delta_for_epsilon(0.5)) + self.assertAlmostEqual( + expected_result.get_delta_for_epsilon(-0.5), + result.get_delta_for_epsilon(-0.5)) + + def test_composition_asymmetric_with_symmetric(self): + # Test for composition of privacy loss distribution + log_pmf_lower1 = {1: math.log(0.2), 2: math.log(0.2), 3: math.log(0.6)} + log_pmf_upper1 = {1: math.log(0.5), 2: math.log(0.2), 4: math.log(0.3)} + pld1 = self._create_pld(log_pmf_lower1, log_pmf_upper1) + + log_pmf_lower2 = {1: math.log(0.4), 2: math.log(0.6)} + log_pmf_upper2 = {2: math.log(0.7), 3: math.log(0.3)} + pld2 = self._create_pld(log_pmf_lower2, log_pmf_upper2) + + # Result from composing the above two privacy loss distributions + result12 = pld1.compose(pld2) + result21 = pld2.compose(pld1) + + # The correct result + log_pmf_lower1_lower2_composed = { + (1, 1): math.log(0.08), + (1, 2): math.log(0.12), + (2, 1): math.log(0.08), + (2, 2): math.log(0.12), + (3, 1): math.log(0.24), + (3, 2): math.log(0.36) + } + log_pmf_upper1_upper2_composed = { + (1, 2): math.log(0.35), + (1, 3): math.log(0.15), + (2, 2): math.log(0.14), + (2, 3): math.log(0.06), + (4, 2): math.log(0.21), + (4, 3): math.log(0.09) + } + + expected_result = self._create_pld(log_pmf_lower1_lower2_composed, + log_pmf_upper1_upper2_composed) + # Check that the result is as expected. Note that we cannot check that the + # rounded_down_probability_mass_function and + # rounded_up_probability_mass_function of the two distributions are equal + # directly because the rounding might cause off-by-one error in index. + for result in [result12, result21]: + self.assertAlmostEqual(expected_result._pmf_remove._discretization, + result._pmf_remove._discretization) + self.assertAlmostEqual(expected_result._pmf_add._discretization, + result._pmf_add._discretization) + self.assertAlmostEqual(expected_result._pmf_remove._infinity_mass, + result._pmf_remove._infinity_mass) + self.assertAlmostEqual(expected_result._pmf_add._infinity_mass, + result._pmf_add._infinity_mass) + self.assertAlmostEqual( + expected_result.get_delta_for_epsilon(0), + result.get_delta_for_epsilon(0)) + self.assertAlmostEqual( + expected_result.get_delta_for_epsilon(0.5), + result.get_delta_for_epsilon(0.5)) + self.assertAlmostEqual( + expected_result.get_delta_for_epsilon(-0.5), + result.get_delta_for_epsilon(-0.5)) + + def test_self_composition(self): + log_pmf_lower = {1: math.log(0.2), 2: math.log(0.2), 3: math.log(0.6)} + log_pmf_upper = {1: math.log(0.5), 2: math.log(0.2), 4: math.log(0.3)} + + pld = self._create_pld(log_pmf_lower, log_pmf_upper) + result = pld.self_compose(3) + + expected_log_pmf_lower = {} + for i, vi in log_pmf_lower.items(): + for j, vj in log_pmf_lower.items(): + for k, vk in log_pmf_lower.items(): + expected_log_pmf_lower[(i, j, k)] = vi + vj + vk + expected_log_pmf_upper = {} + for i, vi in log_pmf_upper.items(): + for j, vj in log_pmf_upper.items(): + for k, vk in log_pmf_upper.items(): + expected_log_pmf_upper[(i, j, k)] = vi + vj + vk + + expected_result = self._create_pld(expected_log_pmf_lower, + expected_log_pmf_upper) + + self.assertAlmostEqual(expected_result._pmf_remove._discretization, + result._pmf_remove._discretization) + self.assertAlmostEqual(expected_result._pmf_remove._infinity_mass, + result._pmf_remove._infinity_mass) + self.assertAlmostEqual(expected_result._pmf_add._discretization, + result._pmf_add._discretization) + self.assertAlmostEqual(expected_result._pmf_add._infinity_mass, + result._pmf_add._infinity_mass) + self.assertAlmostEqual( + expected_result.get_delta_for_epsilon(0), + result.get_delta_for_epsilon(0)) + self.assertAlmostEqual( + expected_result.get_delta_for_epsilon(0.5), + result.get_delta_for_epsilon(0.5)) + self.assertAlmostEqual( + expected_result.get_delta_for_epsilon(-0.2), + result.get_delta_for_epsilon(-0.2)) + + +class LaplacePrivacyLossDistributionTest(parameterized.TestCase): + + @parameterized.parameters((1.0, 1.0, -0.1), (1.0, 1.0, 1.1), (1.0, 1.0, 0.0), + (-0.1, 1.0, 1.0), (0.0, 1.0, 1.0), (1.0, -1.0, 1.0), + (1.0, 0.0, 1.0)) + def test_laplace_value_errors(self, parameter, sensitivity, sampling_prob): + with self.assertRaises(ValueError): + privacy_loss_distribution.from_laplace_mechanism( + parameter, sensitivity=sensitivity, value_discretization_interval=1, + sampling_prob=sampling_prob) + + @parameterized.parameters( + # Tests with sampling_prob = 1 + (1.0, 1.0, 1.0, { + 1: 0.69673467, + 0: 0.11932561, + -1: 0.18393972 + }), + (3.0, 3.0, 1.0, { + 1: 0.69673467, + 0: 0.11932561, + -1: 0.18393972 + }), + (1.0, 2.0, 1.0, { + 2: 0.69673467, + 1: 0.11932561, + 0: 0.07237464, + -1: 0.04389744, + -2: 0.06766764 + }), + (2.0, 4.0, 1.0, { + 2: 0.69673467, + 1: 0.11932561, + 0: 0.07237464, + -1: 0.04389744, + -2: 0.06766764 + }), + # Tests with sampling_prob < 1 + (1.0, 1.0, 0.8, { + 1: 0.69673467, + 0: 0.30326533 + }, { + 1: 0.6180408, + 0: 0.3819592 + }), + (3.0, 3.0, 0.5, { + 1: 0.69673467, + 0: 0.30326533 + }, { + 1: 0.5, + 0: 0.5 + }), + (1.0, 2.0, 0.7, { + 1: 0.81606028, + 0: 0.08497712, + -1: 0.09896260 + }, { + 2: 0.49036933, + 1: 0.13605478, + 0: 0.37357589 + }), + (2.0, 4.0, 0.3, { + 1: 0.81606028, + 0: 0.11302356, + -1: 0.07091617 + }, { + 2: 0.20651251, + 1: 0.16706338, + 0: 0.62642411 + })) + def test_laplace_varying_parameter_and_sensitivity( + self, parameter, sensitivity, sampling_prob, + expected_rounded_pmf_add, expected_rounded_pmf_remove=None): + """Verifies correctness of pessimistic PLD for various parameter values.""" + pld = privacy_loss_distribution.from_laplace_mechanism( + parameter, sensitivity=sensitivity, value_discretization_interval=1, + sampling_prob=sampling_prob) + + if expected_rounded_pmf_remove is None: + test_util.assert_dictionary_almost_equal(self, expected_rounded_pmf_add, + pld._pmf_remove._loss_probs) # pytype: disable=attribute-error + self.assertTrue(pld._symmetric) + else: + test_util.assert_dictionary_almost_equal(self, expected_rounded_pmf_add, + pld._pmf_add._loss_probs) # pytype: disable=attribute-error + test_util.assert_dictionary_almost_equal(self, + expected_rounded_pmf_remove, + pld._pmf_remove._loss_probs) # pytype: disable=attribute-error + self.assertFalse(pld._symmetric) + + @parameterized.parameters((0.5, { + 2: 0.61059961, + 1: 0.08613506, + 0: 0.06708205, + -1: 0.05224356, + -2: 0.18393972 + }), (0.3, { + 4: 0.52438529, + 3: 0.06624934, + 2: 0.05702133, + 1: 0.04907872, + 0: 0.04224244, + -1: 0.03635841, + -2: 0.03129397, + -3: 0.19337051 + })) + def test_laplace_discretization(self, value_discretization_interval, + expected_rounded_probability_mass_function): + """Verifies correctness of pessimistic PLD for varying discretization.""" + pld = privacy_loss_distribution.from_laplace_mechanism( + 1, value_discretization_interval=value_discretization_interval) + test_util.assert_dictionary_almost_equal( + self, expected_rounded_probability_mass_function, + pld._pmf_remove._loss_probs) # pytype: disable=attribute-error + + @parameterized.parameters( + # Tests with sampling_prob = 1 + (1.0, 1.0, 1.0, { + 1: 0.5, + 0: 0.19673467, + -1: 0.30326533 + }), + (1.0, 2.0, 1.0, { + 2: 0.5, + 1: 0.19673467, + 0: 0.11932561, + -1: 0.07237464, + -2: 0.11156508 + }), + # Tests with sampling_prob < 1 + (1.0, 1.0, 0.8, { + 0: 0.69673467, + -1: 0.30326533 + }, { + 0: 0.6180408, + -1: 0.3819592 + }), + (3.0, 3.0, 0.5, { + 0: 0.69673467, + -1: 0.30326533 + }, { + 0: 0.5, + -1: 0.5 + }), + (1.0, 2.0, 0.7, { + 0: 0.81606028, + -1: 0.08497712, + -2: 0.09896260 + }, { + 1: 0.49036933, + 0: 0.13605478, + -1: 0.37357589 + }), + (2.0, 4.0, 0.3, { + 0: 0.81606028, + -1: 0.11302356, + -2: 0.07091617 + }, { + 1: 0.20651251, + 0: 0.16706338, + -1: 0.62642411 + })) + def test_laplace_varying_parameter_and_sensitivity_optimistic( + self, parameter, sensitivity, sampling_prob, + expected_rounded_pmf_add, expected_rounded_pmf_remove=None): + """Verifies correctness of optimistic PLD for various parameter values.""" + pld = privacy_loss_distribution.from_laplace_mechanism( + parameter=parameter, + sensitivity=sensitivity, + pessimistic_estimate=False, + value_discretization_interval=1, + sampling_prob=sampling_prob) + + if expected_rounded_pmf_remove is None: + test_util.assert_dictionary_almost_equal(self, expected_rounded_pmf_add, + pld._pmf_remove._loss_probs) # pytype: disable=attribute-error + self.assertTrue(pld._symmetric) + else: + test_util.assert_dictionary_almost_equal(self, expected_rounded_pmf_add, + pld._pmf_add._loss_probs) # pytype: disable=attribute-error + test_util.assert_dictionary_almost_equal(self, + expected_rounded_pmf_remove, + pld._pmf_remove._loss_probs) # pytype: disable=attribute-error + self.assertFalse(pld._symmetric) + + +class GaussianPrivacyLossDistributionTest(parameterized.TestCase): + + @parameterized.parameters((1.0, 1.0, -0.1), (1.0, 1.0, 1.1), (1.0, 1.0, 0.0), + (-0.1, 1.0, 1.0), (0.0, 1.0, 1.0), (1.0, -1.0, 1.0), + (1.0, 0.0, 1.0)) + def test_gaussian_value_errors(self, standard_deviation, sensitivity, + sampling_prob): + with self.assertRaises(ValueError): + privacy_loss_distribution.from_gaussian_mechanism( + standard_deviation, + sensitivity=sensitivity, + value_discretization_interval=1, + sampling_prob=sampling_prob) + + @parameterized.parameters( + # Tests with sampling_prob = 1 + (1.0, 1.0, 1.0, { + 2: 0.12447741, + 1: 0.38292492, + 0: 0.24173034, + -1: 0.0668072 + }), + (5.0, 5.0, 1.0, { + 2: 0.12447741, + 1: 0.38292492, + 0: 0.24173034, + -1: 0.0668072 + }), + (1.0, 2.0, 1.0, { + -3: 0.00620967, + -2: 0.01654047, + -1: 0.04405707, + 0: 0.09184805, + 1: 0.14988228, + 2: 0.19146246, + 3: 0.19146246, + 4: 0.12447741 + }), + (3.0, 6.0, 1.0, { + -3: 0.00620967, + -2: 0.01654047, + -1: 0.04405707, + 0: 0.09184805, + 1: 0.14988228, + 2: 0.19146246, + 3: 0.19146246, + 4: 0.12447741 + }), + # Tests with sampling_prob < 1 + (1.0, 1.0, 0.8, { + 1: 0.50740234, + 0: 0.25872977, + -1: 0.04980776 + }, { + 2: 0.06409531, + 1: 0.39779076, + 0: 0.38512252 + }), + (5.0, 5.0, 0.6, { + 1: 0.50740234, + 0: 0.27649963, + -1: 0.03203791 + }, { + 2: 0.00921465, + 1: 0.40715514, + 0: 0.46170751 + }), + (1.0, 2.0, 0.4, { + 1: 0.65728462, + 0: 0.12528727, + -1: 0.02551767, + -2: 0.00785031 + }, { + 3: 0.06547773, + 2: 0.10625501, + 1: 0.18525477, + 0: 0.56826895 + }), + (3.0, 6.0, 0.2, { + 1: 0.65728462, + 0: 0.14208735, + -1: 0.01356463, + -2: 0.00300327 + }, { + 3: 0.00957871, + 2: 0.05499325, + 1: 0.19231652, + 0: 0.70480685 + })) + def test_gaussian_varying_standard_deviation_and_sensitivity( + self, standard_deviation, sensitivity, sampling_prob, + expected_rounded_pmf_add, expected_rounded_pmf_remove=None): + """Verifies correctness of pessimistic PLD for various parameter values.""" + pld = privacy_loss_distribution.from_gaussian_mechanism( + standard_deviation, + sensitivity=sensitivity, + log_mass_truncation_bound=math.log(2) + stats.norm.logcdf(-0.9), + value_discretization_interval=1, + sampling_prob=sampling_prob) + + if expected_rounded_pmf_remove is None: + test_util.assert_dictionary_almost_equal(self, expected_rounded_pmf_add, + pld._pmf_remove._loss_probs) # pytype: disable=attribute-error + test_util.assert_almost_greater_equal(self, stats.norm.cdf(-0.9), + pld._pmf_remove._infinity_mass) + self.assertTrue(pld._symmetric) + else: + test_util.assert_dictionary_almost_equal(self, expected_rounded_pmf_add, + pld._pmf_add._loss_probs) # pytype: disable=attribute-error + test_util.assert_almost_greater_equal(self, stats.norm.cdf(-0.9), + pld._pmf_add._infinity_mass) + test_util.assert_dictionary_almost_equal(self, + expected_rounded_pmf_remove, + pld._pmf_remove._loss_probs) # pytype: disable=attribute-error + test_util.assert_almost_greater_equal(self, stats.norm.cdf(-0.9), + pld._pmf_remove._infinity_mass) + self.assertFalse(pld._symmetric) + + @parameterized.parameters((0.5, { + 3: 0.12447741, + 2: 0.19146246, + 1: 0.19146246, + 0: 0.14988228, + -1: 0.09184805, + -2: 0.06680720 + }), (0.3, { + 5: 0.05790353, + 4: 0.10261461, + 3: 0.11559390, + 2: 0.11908755, + 1: 0.11220275, + 0: 0.09668214, + -1: 0.07618934, + -2: 0.0549094, + -3: 0.0361912, + -4: 0.04456546 + })) + def test_gaussian_discretization(self, value_discretization_interval, + expected_rounded_probability_mass_function): + """Verifies correctness of pessimistic PLD for varying discretization.""" + pld = privacy_loss_distribution.from_gaussian_mechanism( + 1, + log_mass_truncation_bound=math.log(2) + stats.norm.logcdf(-0.9), + value_discretization_interval=value_discretization_interval) + test_util.assert_almost_greater_equal(self, stats.norm.cdf(-0.9), + pld._pmf_remove._infinity_mass) + test_util.assert_dictionary_almost_equal( + self, expected_rounded_probability_mass_function, + pld._pmf_remove._loss_probs) # pytype: disable=attribute-error + + @parameterized.parameters( + # Tests with sampling_prob = 1 + (1.0, 1.0, 1.0, { + 1: 0.30853754, + 0: 0.38292492, + -1: 0.24173034, + -2: 0.03809064 + }), + (5.0, 5.0, 1.0, { + 1: 0.30853754, + 0: 0.38292492, + -1: 0.24173034, + -2: 0.03809064 + }), + (1.0, 2.0, 1.0, { + 3: 0.30853754, + 2: 0.19146246, + 1: 0.19146246, + 0: 0.14988228, + -1: 0.09184805, + -2: 0.04405707, + -3: 0.01654047, + -4: 0.00434385 + }), + (3.0, 6.0, 1.0, { + 3: 0.30853754, + 2: 0.19146246, + 1: 0.19146246, + 0: 0.14988228, + -1: 0.09184805, + -2: 0.04405707, + -3: 0.01654047, + -4: 0.00434385 + }), + # Tests with sampling_prob < 1 + (1.0, 1.0, 0.8, { + 0: 0.69146246, + -1: 0.25872977, + -2: 0.0210912 + }, { + 1: 0.21708672, + 0: 0.39779076, + -1: 0.32533725 + }), + (5.0, 5.0, 0.6, { + 0: 0.69146246, + -1: 0.27649963, + -2: 0.00332135 + }, { + 1: 0.13113735, + 0: 0.40715514, + -1: 0.37085352 + }), + (1.0, 2.0, 0.4, { + 0: 0.84134475, + -1: 0.12528727, + -2: 0.02551767, + -3: 0.0059845 + }, { + 2: 0.14022127, + 1: 0.10625501, + 0: 0.18525477, + -1: 0.45708655 + }), + (3.0, 6.0, 0.2, { + 0: 0.84134475, + -1: 0.14208735, + -2: 0.01356463, + -3: 0.00113746 + }, { + 2: 0.04788338, + 1: 0.05499325, + 0: 0.19231652, + -1: 0.55718558 + })) + def test_gaussian_varying_standard_deviation_and_sensitivity_optimistic( + self, standard_deviation, sensitivity, sampling_prob, + expected_rounded_pmf_add, expected_rounded_pmf_remove=None): + """Verifies correctness of optimistic PLD for various parameter values.""" + pld = privacy_loss_distribution.from_gaussian_mechanism( + standard_deviation, + sensitivity=sensitivity, + pessimistic_estimate=False, + log_mass_truncation_bound=math.log(2) + stats.norm.logcdf(-0.9), + value_discretization_interval=1, + sampling_prob=sampling_prob) + + if expected_rounded_pmf_remove is None: + test_util.assert_dictionary_almost_equal(self, expected_rounded_pmf_add, + pld._pmf_remove._loss_probs) # pytype: disable=attribute-error + test_util.assert_almost_greater_equal(self, stats.norm.cdf(-0.9), + pld._pmf_remove._infinity_mass) + self.assertTrue(pld._symmetric) + else: + test_util.assert_dictionary_almost_equal(self, expected_rounded_pmf_add, + pld._pmf_add._loss_probs) # pytype: disable=attribute-error + test_util.assert_almost_greater_equal(self, stats.norm.cdf(-0.9), + pld._pmf_add._infinity_mass) + test_util.assert_dictionary_almost_equal(self, + expected_rounded_pmf_remove, + pld._pmf_remove._loss_probs) # pytype: disable=attribute-error + test_util.assert_almost_greater_equal(self, stats.norm.cdf(-0.9), + pld._pmf_remove._infinity_mass) + self.assertFalse(pld._symmetric) + + +class DiscreteLaplacePrivacyLossDistributionTest(parameterized.TestCase): + + @parameterized.parameters((1.0, 1, -0.1), (1.0, 1, 1.1), (1.0, 1, 0.0), + (-0.1, 1, 1.0), (0.0, 1, 1.0), (1.0, -1, 1.0), + (1.0, 0, 1.0), (1.0, 0.5, 1.0), (1.0, 1.0, 1.0)) + def test_discrete_laplace_value_errors(self, parameter, sensitivity, + sampling_prob): + with self.assertRaises(ValueError): + privacy_loss_distribution.from_discrete_laplace_mechanism( + parameter, sensitivity=sensitivity, value_discretization_interval=1, + sampling_prob=sampling_prob) + + @parameterized.parameters( + # Tests with sampling_prob = 1 + (1.0, 1, 1, { + 1: 0.73105858, + -1: 0.26894142 + }), + (1.0, 2, 1, { + 2: 0.73105858, + 0: 0.17000340, + -2: 0.09893802 + }), + (0.8, 2, 1, { + 2: 0.68997448, + 0: 0.17072207, + -1: 0.13930345 + }), + (0.8, 3, 1, { + 3: 0.68997448, + 1: 0.17072207, + 0: 0.07671037, + -2: 0.06259307 + }), + # Tests with sampling_prob < 1 + (1.0, 1, 0.8, { + 1: 0.7310585786300049, + 0: 0.2689414213699951 + }, { + 1: 0.63863515, + 0: 0.36136485 + }), + (1.0, 2, 0.5, { + 1.0: 0.7310585786300049, + 0.0: 0.17000340156854787, + -1.0: 0.09893801980144723 + }, { + 2.0: 0.41499829921572606, + 1.0: 0.0, + 0.0: 0.5850017007842739 + }), + (0.8, 2, 0.3, { + 1: 0.6899744811276125, + 0: 0.3100255188723875 + }, { + 1: 0.30450475600966753, + 0: 0.6954952439903325 + }), + (0.8, 3, 0.2, { + 1: 0.8606965547551659, + 0: 0.07671037249501267, + -1: 0.0625930727498214 + }, { + 2: 0.1880693544253796, + 1: 0.09551271272152079, + 0: 0.7164179328530996, + })) + def test_discrete_laplace_varying_parameter_and_sensitivity( + self, parameter, sensitivity, sampling_prob, + expected_rounded_pmf_add, expected_rounded_pmf_remove=None): + """Verifies correctness of pessimistic PLD for various parameter values.""" + pld = privacy_loss_distribution.from_discrete_laplace_mechanism( + parameter, sensitivity=sensitivity, value_discretization_interval=1, + sampling_prob=sampling_prob) + if expected_rounded_pmf_remove is None: + test_util.assert_dictionary_almost_equal(self, expected_rounded_pmf_add, + pld._pmf_remove._loss_probs) # pytype: disable=attribute-error + self.assertTrue(pld._symmetric) + else: + test_util.assert_dictionary_almost_equal(self, expected_rounded_pmf_add, + pld._pmf_add._loss_probs) # pytype: disable=attribute-error + test_util.assert_dictionary_almost_equal(self, + expected_rounded_pmf_remove, + pld._pmf_remove._loss_probs) # pytype: disable=attribute-error + self.assertFalse(pld._symmetric) + + @parameterized.parameters((0.1, { + 10: 0.73105858, + -10: 0.26894142 + }), (0.03, { + 34: 0.73105858, + -33: 0.26894142 + })) + def test_discrete_laplace_discretization( + self, value_discretization_interval, + expected_rounded_probability_mass_function): + """Verifies correctness of pessimistic PLD for varying discretization.""" + pld = privacy_loss_distribution.from_discrete_laplace_mechanism( + 1, value_discretization_interval=value_discretization_interval) + test_util.assert_dictionary_almost_equal( + self, expected_rounded_probability_mass_function, + pld._pmf_remove._loss_probs) # pytype: disable=attribute-error + + @parameterized.parameters( + # Tests with sampling_prob = 1 + (1.0, 1, 1, { + 1: 0.73105858, + -1: 0.26894142 + }), + (1.0, 2, 1, { + 2: 0.73105858, + 0: 0.17000340, + -2: 0.09893802 + }), + (0.8, 2, 1, { + 1: 0.68997448, + 0: 0.17072207, + -2: 0.13930345 + }), + (0.8, 3, 1, { + 2: 0.68997448, + 0: 0.17072207, + -1: 0.07671037, + -3: 0.06259307 + }), + # Tests with sampling_prob < 1 + (1.0, 1, 0.8, { + 0: 0.7310585786300049, + -1: 0.2689414213699951 + }, { + 0: 0.63863515, + -1: 0.36136485 + }), + (1.0, 2, 0.5, { + 0: 0.9010619801985528, + -2: 0.09893801980144723, + }, { + 1: 0.41499829921572606, + 0: 0.17000340156854787, + -1: 0.41499829921572606 + }), + (0.8, 2, 0.3, { + 0: 0.8606965547551659, + -1: 0.13930344524483407 + }, { + 0: 0.47522682963722107, + -1: 0.5247731703627789 + }), + (0.8, 3, 0.2, { + 0: 0.8606965547551659, + -1: 0.07671037249501267, + -2: 0.0625930727498214 + }, { + 1: 0.1880693544253796, + 0: 0.09551271272152079, + -1: 0.7164179328530996 + })) + def test_discrete_laplace_varying_parameter_and_sensitivity_optimistic( + self, parameter, sensitivity, sampling_prob, + expected_rounded_pmf_add, expected_rounded_pmf_remove=None): + """Verifies correctness of optimistic PLD for various parameter values.""" + pld = privacy_loss_distribution.from_discrete_laplace_mechanism( + parameter, sensitivity=sensitivity, value_discretization_interval=1, + pessimistic_estimate=False, + sampling_prob=sampling_prob) + if expected_rounded_pmf_remove is None: + test_util.assert_dictionary_almost_equal(self, expected_rounded_pmf_add, + pld._pmf_remove._loss_probs) # pytype: disable=attribute-error + self.assertTrue(pld._symmetric) + else: + test_util.assert_dictionary_almost_equal(self, expected_rounded_pmf_add, + pld._pmf_add._loss_probs) # pytype: disable=attribute-error + test_util.assert_dictionary_almost_equal(self, + expected_rounded_pmf_remove, + pld._pmf_remove._loss_probs) # pytype: disable=attribute-error + self.assertFalse(pld._symmetric) + + +class DiscreteGaussianPrivacyLossDistributionTest(parameterized.TestCase): + + @parameterized.parameters((1.0, 1, -0.1), (1.0, 1, 1.1), (1.0, 1, 0.0), + (-0.1, 1, 1.0), (0.0, 1, 1.0), (1.0, -1, 1.0), + (1.0, 0, 1.0), (1.0, 0.5, 1.0), (1.0, 1.0, 1.0)) + def test_discrete_gaussian_value_errors(self, sigma, sensitivity, + sampling_prob): + with self.assertRaises(ValueError): + privacy_loss_distribution.from_discrete_gaussian_mechanism( + sigma, sensitivity=sensitivity, truncation_bound=1, + sampling_prob=sampling_prob) + + @parameterized.parameters( + # Tests with sampling_prob = 1 + (1.0, 1, 1.0, { + 5000: 0.45186276, + -5000: 0.27406862 + }, 0.27406862), + (1.0, 2, 1.0, { + 0: 0.27406862 + }, 0.72593138), + (3.0, 1, 1.0, { + 556: 0.34579116, + -555: 0.32710442 + }, 0.32710442), + # Tests with sampling_prob < 1 + (1.0, 1, 0.6, { + -3287: 0.27406862, + 2693: 0.45186276, + 9163: 0.27406862 + }, 0.0, { + 3288: 0.3807451, + -2692: 0.34518628, + -9162: 0.10962745 + }, 0.16444117), + (1.0, 2, 0.3, { + 0: 0.27406862, + 3567: 0.7259314, + }, 0.0, { + 0: 0.27406862, + -3566: 0.50815197, + }, 0.2177794), + (3.0, 1, 0.1, { + -56: 0.32710442, + 55: 0.34579116, + 1054: 0.32710442 + }, 0.0, { + 57: 0.32897309, + -54: 0.34392248, + -1053: 0.29439398 + }, 0.03271044)) + def test_discrete_gaussian_varying_sigma_and_sensitivity( + self, sigma, sensitivity, sampling_prob, + expected_rounded_pmf_add, expected_infinity_mass_add, + expected_rounded_pmf_remove=None, expected_infinity_mass_remove=None): + """Verifies correctness of pessimistic PLD for various parameter values.""" + pld = privacy_loss_distribution.from_discrete_gaussian_mechanism( + sigma, sensitivity=sensitivity, truncation_bound=1, + sampling_prob=sampling_prob) + if expected_rounded_pmf_remove is None: + self.assertAlmostEqual(pld._pmf_remove._infinity_mass, + expected_infinity_mass_add) + test_util.assert_dictionary_almost_equal(self, expected_rounded_pmf_add, + pld._pmf_remove._loss_probs) # pytype: disable=attribute-error + self.assertTrue(pld._symmetric) + else: + self.assertAlmostEqual(pld._pmf_add._infinity_mass, + expected_infinity_mass_add) + test_util.assert_dictionary_almost_equal(self, expected_rounded_pmf_add, + pld._pmf_add._loss_probs) # pytype: disable=attribute-error + self.assertAlmostEqual(pld._pmf_remove._infinity_mass, + expected_infinity_mass_remove) + test_util.assert_dictionary_almost_equal(self, + expected_rounded_pmf_remove, + pld._pmf_remove._loss_probs) # pytype: disable=attribute-error + self.assertFalse(pld._symmetric) + + @parameterized.parameters((2, { + 15000: 0.24420134, + 5000: 0.40261995, + -5000: 0.24420134, + -15000: 0.05448868 + }, 0.05448868), (3, { + 25000: 0.05400558, + 15000: 0.24203622, + 5000: 0.39905027, + -5000: 0.24203623, + -15000: 0.05400558, + -25000: 0.00443305 + }, 0.00443305)) + def test_discrete_gaussian_truncation( + self, truncation_bound, expected_rounded_probability_mass_function, + expected_infinity_mass): + """Verifies correctness of pessimistic PLD for varying truncation bound.""" + pld = privacy_loss_distribution.from_discrete_gaussian_mechanism( + 1, truncation_bound=truncation_bound) + self.assertAlmostEqual(pld._pmf_remove._infinity_mass, + expected_infinity_mass) + test_util.assert_dictionary_almost_equal( + self, expected_rounded_probability_mass_function, + pld._pmf_remove._loss_probs) # pytype: disable=attribute-error + + @parameterized.parameters( + # Tests with sampling_prob = 1 + (1.0, 1, 1.0, { + 5000: 0.45186276, + -5000: 0.27406862 + }, 0.27406862), + (1.0, 2, 1.0, { + 0: 0.27406862 + }, 0.72593138), + (3.0, 1, 1.0, { + 555: 0.34579116, + -556: 0.32710442 + }, 0.32710442), + # Tests with sampling_prob < 1 + (1.0, 1, 0.6, { + -3288: 0.27406862, + 2692: 0.45186276, + 9162: 0.27406862 + }, 0.0, { + 3287: 0.3807451, + -2693: 0.34518628, + -9163: 0.10962745 + }, 0.16444117), + (1.0, 2, 0.3, { + 0: 0.27406862, + 3566: 0.7259314, + }, 0.0, { + 0: 0.27406862, + -3567: 0.50815197, + }, 0.2177794), + (3.0, 1, 0.1, { + -57: 0.32710442, + 54: 0.34579116, + 1053: 0.32710442 + }, 0.0, { + 56: 0.32897309, + -55: 0.34392248, + -1054: 0.29439398 + }, 0.03271044)) + def test_discrete_gaussian_varying_sigma_and_sensitivity_optimistic( + self, sigma, sensitivity, sampling_prob, + expected_rounded_pmf_add, expected_infinity_mass_add, + expected_rounded_pmf_remove=None, expected_infinity_mass_remove=None): + """Verifies correctness of optimistic PLD for various parameter values.""" + pld = privacy_loss_distribution.from_discrete_gaussian_mechanism( + sigma, sensitivity=sensitivity, truncation_bound=1, + pessimistic_estimate=False, + sampling_prob=sampling_prob) + if expected_rounded_pmf_remove is None: + self.assertAlmostEqual(pld._pmf_remove._infinity_mass, + expected_infinity_mass_add) + test_util.assert_dictionary_almost_equal(self, expected_rounded_pmf_add, + pld._pmf_remove._loss_probs) # pytype: disable=attribute-error + self.assertTrue(pld._symmetric) + else: + self.assertAlmostEqual(pld._pmf_add._infinity_mass, + expected_infinity_mass_add) + test_util.assert_dictionary_almost_equal(self, expected_rounded_pmf_add, + pld._pmf_add._loss_probs) # pytype: disable=attribute-error + self.assertAlmostEqual(pld._pmf_remove._infinity_mass, + expected_infinity_mass_remove) + test_util.assert_dictionary_almost_equal(self, + expected_rounded_pmf_remove, + pld._pmf_remove._loss_probs) # pytype: disable=attribute-error + self.assertFalse(pld._symmetric) + + +class RandomizedResponsePrivacyLossDistributionTest(parameterized.TestCase): + + @parameterized.parameters((0.5, 2, { + 2: 0.75, + -1: 0.25 + }), (0.2, 4, { + 3: 0.85, + -2: 0.05, + 0: 0.1 + })) + def test_randomized_response_basic( + self, noise_parameter, num_buckets, + expected_rounded_probability_mass_function): + # Set value_discretization_interval = 1 here. + pld = privacy_loss_distribution.from_randomized_response( + noise_parameter, num_buckets, value_discretization_interval=1) + test_util.assert_dictionary_almost_equal( + self, expected_rounded_probability_mass_function, + pld._pmf_remove._loss_probs) # pytype: disable=attribute-error + + @parameterized.parameters((0.7, { + 5: 0.85, + -4: 0.05, + 0: 0.1 + }), (2, { + 2: 0.85, + -1: 0.05, + 0: 0.1 + })) + def test_randomized_response_discretization( + self, value_discretization_interval, + expected_rounded_probability_mass_function): + # Set noise_parameter = 0.2, num_buckets = 4 here. + # The true (non-discretized) PLD is + # {2.83321334: 0.85, -2.83321334: 0.05, 0: 0.1}. + pld = privacy_loss_distribution.from_randomized_response( + 0.2, 4, value_discretization_interval=value_discretization_interval) + test_util.assert_dictionary_almost_equal( + self, expected_rounded_probability_mass_function, + pld._pmf_remove._loss_probs) # pytype: disable=attribute-error + + @parameterized.parameters((0.5, 2, { + 1: 0.75, + -2: 0.25 + }), (0.2, 4, { + 2: 0.85, + -3: 0.05, + 0: 0.1 + })) + def test_randomized_response_optimistic( + self, noise_parameter, num_buckets, + expected_rounded_probability_mass_function): + # Set value_discretization_interval = 1 here. + pld = privacy_loss_distribution.from_randomized_response( + noise_parameter, + num_buckets, + pessimistic_estimate=False, + value_discretization_interval=1) + test_util.assert_dictionary_almost_equal( + self, expected_rounded_probability_mass_function, + pld._pmf_remove._loss_probs) # pytype: disable=attribute-error + + @parameterized.parameters((0.0, 10), (1.1, 4), (0.5, 1)) + def test_randomized_response_value_errors(self, noise_parameter, num_buckets): + with self.assertRaises(ValueError): + privacy_loss_distribution.from_randomized_response( + noise_parameter, num_buckets) + + +class IdentityPrivacyLossDistributionTest(parameterized.TestCase): + + def test_identity(self): + pld = privacy_loss_distribution.identity() + test_util.assert_dictionary_almost_equal(self, pld._pmf_remove._loss_probs, + {0: 1}) # pytype: disable=attribute-error + self.assertAlmostEqual(pld._pmf_remove._infinity_mass, 0) + + pld = pld.compose( + privacy_loss_distribution.PrivacyLossDistribution + .create_from_rounded_probability({ + 1: 0.5, + -1: 0.5 + }, 0, 1e-4)) + test_util.assert_dictionary_almost_equal( + self, + pld._pmf_remove._loss_probs, # pytype: disable=attribute-error + { + 1: 0.5, + -1: 0.5 + }) + self.assertAlmostEqual(pld._pmf_remove._infinity_mass, 0) + + +if __name__ == '__main__': + unittest.main() diff --git a/python/fedml/core/dp/budget_accountant/privacy_loss_mechanism.py b/python/fedml/core/dp/budget_accountant/pld/privacy_loss_mechanism.py similarity index 99% rename from python/fedml/core/dp/budget_accountant/privacy_loss_mechanism.py rename to python/fedml/core/dp/budget_accountant/pld/privacy_loss_mechanism.py index bc57654ae8..7c37624a4d 100644 --- a/python/fedml/core/dp/budget_accountant/privacy_loss_mechanism.py +++ b/python/fedml/core/dp/budget_accountant/pld/privacy_loss_mechanism.py @@ -14,7 +14,7 @@ import numpy as np from scipy import stats -from fedml.core.dp.budget_accountant import common +from fedml.core.dp.budget_accountant.pld import common class AdjacencyType(enum.Enum): diff --git a/python/fedml/core/dp/budget_accountant/pld/privacy_loss_mechanism_test.py b/python/fedml/core/dp/budget_accountant/pld/privacy_loss_mechanism_test.py new file mode 100644 index 0000000000..011e44b21c --- /dev/null +++ b/python/fedml/core/dp/budget_accountant/pld/privacy_loss_mechanism_test.py @@ -0,0 +1,977 @@ +"""Tests for privacy_loss_mechanism.""" + +import math +import unittest +from absl.testing import parameterized +from scipy import stats + +from dp_accounting.pld import common +from dp_accounting.pld import privacy_loss_mechanism +from dp_accounting.pld import test_util + + +ADD = privacy_loss_mechanism.AdjacencyType.ADD +REM = privacy_loss_mechanism.AdjacencyType.REMOVE + + +class LaplacePrivacyLossTest(parameterized.TestCase): + + @parameterized.parameters( + # Tests with sampling_prob = 1 for adjacency_type=ADD + (1.0, 1.0, 1.0, ADD, -0.1, 1.0), (1.0, 1.0, 1.0, ADD, 2.0, -1.0), + (1.0, 1.0, 1.0, ADD, 0.3, 0.4), (4.0, 4.0, 1.0, ADD, -0.4, 1.0), + (5.0, 5.0, 1.0, ADD, 7.0, -1.0), (7.0, 7.0, 1.0, ADD, 2.1, 0.4), + # Tests with sampling_prob < 1 for adjacency_type=ADD + (1.0, 1.0, 0.8, ADD, 1.1, -0.86483972516319), + (2.0, 1.0, 0.2, ADD, -0.2, 0.0819629071393439), + (1.0, 1.0, 0.5, ADD, 0.5, 0.0), + # Tests with sampling_prob = 1 for adjacency_type=REMOVE + (1.0, 1.0, 1.0, REM, -1.1, 1.0), (1.0, 1.0, 1.0, REM, 1.0, -1.0), + (1.0, 1.0, 1.0, REM, -0.7, 0.4), (4.0, 4.0, 1.0, REM, -4.4, 1.0), + (5.0, 5.0, 1.0, REM, 2.0, -1.0), (7.0, 7.0, 1.0, REM, -4.9, 0.4), + # Tests with sampling_prob < 1 for adjacency_type=REMOVE + (1.0, 1.0, 0.8, REM, -1.1, 0.86483972516319), + (2.0, 1.0, 0.2, REM, 0.2, -0.0819629071393439), + (1.0, 1.0, 0.5, REM, -0.5, 0.0)) + def test_laplace_privacy_loss(self, parameter, sensitivity, sampling_prob, + adjacency_type, x, expected_privacy_loss): + pl = privacy_loss_mechanism.LaplacePrivacyLoss( + parameter, + sensitivity=sensitivity, + sampling_prob=sampling_prob, + adjacency_type=adjacency_type) + self.assertAlmostEqual(expected_privacy_loss, pl.privacy_loss(x)) + + @parameterized.parameters( + # Tests with sampling_prob = 1 for adjacency_type=ADD + (1.0, 1.0, 1.0, ADD, 1.0, 0.0), (1.0, 1.0, 1.0, ADD, -1.0, math.inf), + (1.0, 1.0, 1.0, ADD, 0.4, 0.3), (4.0, 4.0, 1.0, ADD, 1.0, 0.0), + (5.0, 5.0, 1.0, ADD, -1.0, math.inf), (7.0, 7.0, 1.0, ADD, 0.4, 2.1), + (1.0, 1.0, 1.0, ADD, 2.0, -math.inf), (3, 1, 1, ADD, 3.1, -math.inf), + (4.0, 4.0, 1.0, ADD, 1.1, -math.inf), + # Tests with sampling_prob < 1 for adjacency_type=ADD + (1.0, 1.0, 0.8, ADD, -0.8649, math.inf), + (1.0, 1.0, 0.7, ADD, 1.0, -math.inf), + (2.0, 1.0, 0.2, ADD, 0.0819629071393439, 0), + (1.0, 1.0, 0.5, ADD, 0.0, 0.5), + # Tests with sampling_prob = 1 for adjacency_type=REMOVE + (1.0, 1.0, 1.0, REM, 1.0, -1.0), (1.0, 1.0, 1.0, REM, -1.0, math.inf), + (1.0, 1.0, 1.0, REM, 0.4, -0.7), (4.0, 4.0, 1.0, REM, 1.0, -4.0), + (5.0, 5.0, 1.0, REM, -1.0, math.inf), (7.0, 7.0, 1.0, REM, 0.4, -4.9), + (1.0, 1.0, 1.0, REM, 2.0, -math.inf), + (3.0, 1.0, 1.0, REM, 3.1, -math.inf), + (4.0, 4.0, 1.0, REM, 1.1, -math.inf), + # Tests with sampling_prob < 1 for adjacency_type=REMOVE + (1.0, 1.0, 0.8, REM, 0.86483972516319, -1.0), + (2.0, 1.0, 0.2, REM, -0.082, math.inf), + (1.0, 1.0, 0.7, REM, 1.0, -math.inf), + (1.0, 1.0, 0.5, REM, 0.0, -0.5)) + def test_laplace_inverse_privacy_loss(self, parameter, sensitivity, + sampling_prob, adjacency_type, + privacy_loss, expected_x): + pl = privacy_loss_mechanism.LaplacePrivacyLoss( + parameter, + sensitivity=sensitivity, + sampling_prob=sampling_prob, + adjacency_type=adjacency_type) + self.assertAlmostEqual(expected_x, pl.inverse_privacy_loss(privacy_loss)) + + @parameterized.parameters( + # Tests with sampling_prob = 1 for adjacency_type=ADD + (1.0, 1.0, 1.0, ADD, 0.0, 1.0, {1: 0.5, -1: 0.18393972}), + (3.0, 3.0, 1.0, ADD, 0.0, 3.0, {1: 0.5, -1: 0.18393972}), + (1.0, 2.0, 1.0, ADD, 0.0, 2.0, {2: 0.5, -2: 0.06766764}), + (4.0, 8.0, 1.0, ADD, 0.0, 8.0, {2: 0.5, -2: 0.06766764}), + # Tests with sampling_prob < 1 for adjacency_type=ADD + (1.0, 1.0, 0.8, ADD, 0.0, 1.0, { + 0.7046054708796524: 0.5, + -0.864839725163191: 0.18393972 + }), + (3.0, 3.0, 0.6, ADD, 0.0, 3.0, { + 0.4768628363884146: 0.5, + -0.7085130668623151: 0.18393972 + }), + (1.0, 2.0, 0.7, ADD, 0.0, 2.0, { + 0.929541389699331: 0.5, + -1.699706179357965: 0.06766764 + }), + # Tests with sampling_prob = 1 for adjacency_type=REMOVE + (1.0, 1.0, 1.0, REM, -1.0, 0.0, {1: 0.5, -1: 0.18393972}), + (3.0, 3.0, 1.0, REM, -3.0, 0.0, {1: 0.5, -1: 0.18393972}), + (1.0, 2.0, 1.0, REM, -2.0, 0.0, {2: 0.5, -2: 0.06766764}), + (4.0, 8.0, 1.0, REM, -8.0, 0.0, {2: 0.5, -2: 0.06766764}), + # Tests with sampling_prob < 1 for adjacency_type=REMOVE + (1.0, 1.0, 0.8, REM, -1.0, 0.0, { + 0.864839725163191: 0.4367879441171443, + -0.7046054708796524: 0.2471517764685769 + }), + (3.0, 3.0, 0.6, REM, -3.0, 0.0, { + 0.7085130668623151: 0.3735758882342885, + -0.4768628363884146: 0.3103638323514328 + }), + (1.0, 2.0, 0.7, REM, -2.0, 0.0, { + 1.699706179357965: 0.3703002924854919, + -0.929541389699331: 0.1973673491328145 + })) + def test_laplace_privacy_loss_tail(self, parameter, sensitivity, + sampling_prob, adjacency_type, + expected_lower_x_truncation, + expected_upper_x_truncation, + expected_tail_probability_mass_function): + pl = privacy_loss_mechanism.LaplacePrivacyLoss( + parameter, + sensitivity=sensitivity, + sampling_prob=sampling_prob, + adjacency_type=adjacency_type) + tail_pld = pl.privacy_loss_tail() + self.assertAlmostEqual(expected_lower_x_truncation, + tail_pld.lower_x_truncation) + self.assertAlmostEqual(expected_upper_x_truncation, + tail_pld.upper_x_truncation) + test_util.assert_dictionary_almost_equal( + self, expected_tail_probability_mass_function, + tail_pld.tail_probability_mass_function) + + @parameterized.parameters((-3.0, 1.0, 1.0, ADD), (0.0, 1.0, 1.0, ADD), + (1.0, 0.0, 1.0, REM), (2.0, -1.0, 1.0, REM), + (2.0, 1.0, 0.0, ADD), (1.0, 1.0, 1.2, REM), + (2.0, 1.0, -0.1, REM)) + def test_laplace_value_errors(self, + parameter, + sensitivity, + sampling_prob=1.0, + adjacency_type=ADD): + with self.assertRaises(ValueError): + privacy_loss_mechanism.LaplacePrivacyLoss( + parameter, + sensitivity=sensitivity, + sampling_prob=sampling_prob, + adjacency_type=adjacency_type) + + @parameterized.parameters((1.0, 1.0, 1.0, 1.1), (1.0, 1.0, 1.0, -0.1), + (1.0, 0.0, 1.0, 0.1), (1.0, -0.2, 1.0, 0.1), + (1.0, 1.1, 1.0, 0.2)) + def test_laplace_from_privacy_parameters_value_errors( + self, sensitivity, sampling_prob, epsilon, delta): + with self.assertRaises(ValueError): + privacy_loss_mechanism.GaussianPrivacyLoss.from_privacy_guarantee( + common.DifferentialPrivacyParameters(epsilon, delta), + sensitivity, sampling_prob=sampling_prob) + + @parameterized.parameters((1.0, 1.0, ADD, 1.0, 0.0, 1.0), + (1.0, 1.0, ADD, 1.0, 0.1, 1.0), + (2.0, 1.0, REM, 1.0, 0.01, 2.0), + (1.0, 1.0, REM, 3.0, 0.01, 0.33333333), + (1.0, 0.8, ADD, 1.0, 0.0, 0.8720521537764049), + (1.0, 0.5, REM, 1.0, 0.1, 0.671194938966816), + (2.0, 0.9, ADD, 1.0, 0.01, 1.8728716669259162), + (1.0, 0.7, REM, 3.0, 0.01, 0.2992554981396725)) + def test_laplace_from_privacy_parameters(self, sensitivity, sampling_prob, + adjacency_type, + epsilon, delta, + expected_parameter): + pl = privacy_loss_mechanism.LaplacePrivacyLoss.from_privacy_guarantee( + common.DifferentialPrivacyParameters(epsilon, delta), + sensitivity, + sampling_prob=sampling_prob, + adjacency_type=adjacency_type) + self.assertAlmostEqual(expected_parameter, pl.parameter) + self.assertEqual(adjacency_type, pl.adjacency_type) + + @parameterized.parameters( + # Tests with sampling_prob = 1 for adjacency_type=ADD + (1.0, 1.0, 1.0, ADD, 1.0, 0.0), (3.0, 3.0, 1.0, ADD, 1.0, 0.0), + (2.0, 4.0, 1.0, ADD, 2.0, 0.0), + (2.0, 4.0, 1.0, ADD, 0.5, 0.52763345), + (1.0, 1.0, 1.0, ADD, 0.0, 0.39346934), + (2.0, 2.0, 1.0, ADD, 0.0, 0.39346934), + (1.0, 1.0, 1.0, ADD, -2.0, 0.86466472), + # Tests with sampling_prob < 1 for adjacency_type=ADD + (1.0, 1.0, 0.8, ADD, 1.0, 0.0), + (2.0, 4.0, 0.8, ADD, 0.5, 0.3243606497234246), + (1.0, 1.0, 0.6, ADD, 0.2, 0.1401134521354217), + (2.0, 2.0, 0.3, ADD, 0.0, 0.1180408020862099), + (5.0, 5.0, 0.2, ADD, 2.0, 0.0), + # Tests with sampling_prob = 1 for adjacency_type=REMOVE + (1.0, 1.0, 1.0, REM, 1.0, 0.0), (3.0, 3.0, 1.0, REM, 1.0, 0.0), + (2.0, 4.0, 1.0, REM, 2.0, 0.0), (2.0, 4.0, 1.0, REM, 0.5, 0.52763345), + (1.0, 1.0, 1.0, REM, 0.0, 0.39346934), + (2.0, 2.0, 1.0, REM, 0.0, 0.39346934), + (1.0, 1.0, 1.0, REM, -2.0, 0.86466472), + # Tests with sampling_prob < 1 for adjacency_type=REMOVE) + (1.0, 1.0, 0.8, REM, 1.0, 0.0), + (2.0, 4.0, 0.8, REM, 0.5, 0.4039564635032081), + (1.0, 1.0, 0.6, REM, 0.2, 0.1741992102060086), + (2.0, 2.0, 0.3, REM, 0.0, 0.1180408020862099), + (5.0, 5.0, 0.2, REM, -0.25, 0.2211992169285951)) + def test_laplace_get_delta_for_epsilon(self, parameter, sensitivity, + sampling_prob, adjacency_type, epsilon, + expected_delta): + pl = privacy_loss_mechanism.LaplacePrivacyLoss( + parameter, + sensitivity=sensitivity, + sampling_prob=sampling_prob, + adjacency_type=adjacency_type) + self.assertAlmostEqual(expected_delta, pl.get_delta_for_epsilon(epsilon)) + + +class GaussianPrivacyLossTest(parameterized.TestCase): + + @parameterized.parameters( + # Tests with sampling_prob = 1 for adjacency_type=ADD + (1.0, 1.0, 1.0, ADD, 5.0, -4.5), (1.0, 1.0, 1.0, ADD, -3.0, 3.5), + (1.0, 2.0, 1.0, ADD, 3.0, -4.0), + (4.0, 4.0, 1.0, ADD, 20.0, -4.5), (5.0, 5.0, 1.0, ADD, -15.0, 3.5), + (7.0, 14.0, 1.0, ADD, 21.0, -4.0), + # Tests with sampling_prob < 1 for adjacency_type=ADD + (1.0, 1.0, 0.8, ADD, 0.5, 0.0), + (1.0, 1.0, 0.5, ADD, -4, 0.6820994357113515), + (1.0, 2.0, 0.7, ADD, 0, 0.929541389699331), + (4.0, 4.0, 0.3, ADD, -16, 0.3519252431310541), + (5.0, 5.0, 0.45, ADD, 20, -2.737735427805667), + (7.0, 14.0, 0.9, ADD, -7, 2.150000710600199), + # Tests with sampling_prob = 1 for adjacency_type=REMOVE + (1.0, 1.0, 1.0, REM, 4.0, -4.5), (1.0, 1.0, 1.0, REM, -4.0, 3.5), + (1.0, 2.0, 1.0, REM, 1.0, -4.0), (4.0, 4.0, 1.0, REM, 16.0, -4.5), + (5.0, 5.0, 1.0, REM, -20.0, 3.5), (7.0, 14.0, 1.0, REM, 7.0, -4.0), + # Tests with sampling_prob < 1 for adjacency_type=REMOVE + (1.0, 1.0, 0.8, REM, -0.5, 0.0), + (1.0, 1.0, 0.5, REM, 4.0, -0.6820994357113515), + (1.0, 2.0, 0.7, REM, 0.0, -0.929541389699331), + (4.0, 4.0, 0.3, REM, 16.0, -0.3519252431310541), + (5.0, 5.0, 0.45, REM, -20.0, 2.737735427805667), + (7.0, 14.0, 0.9, REM, 7.0, -2.150000710600199)) + def test_gaussian_privacy_loss(self, standard_deviation, sensitivity, + sampling_prob, adjacency_type, x, + expected_privacy_loss): + pl = privacy_loss_mechanism.GaussianPrivacyLoss( + standard_deviation, + sensitivity=sensitivity, + sampling_prob=sampling_prob, + adjacency_type=adjacency_type) + self.assertAlmostEqual(expected_privacy_loss, pl.privacy_loss(x)) + + @parameterized.parameters( + # Tests with sampling_prob = 1 for adjacency_type=ADD + (1.0, 1.0, 1.0, ADD, -4.5, 5.0), (1.0, 1.0, 1.0, ADD, 3.5, -3.0), + (1.0, 2.0, 1.0, ADD, -4.0, 3.0), + (4.0, 4.0, 1.0, ADD, -4.5, 20.0), (5.0, 5.0, 1.0, ADD, 3.5, -15.0), + (7.0, 14.0, 1.0, ADD, -4.0, 21.0), + # Tests with sampling_prob < 1 for adjacency_type=ADD + (1.0, 1.0, 0.8, ADD, 0.0, 0.5), + (1.0, 1.0, 0.5, ADD, 0.6820994357113515, -4.0), + (1.0, 2.0, 0.7, ADD, 0.929541389699331, 0.0), + (4.0, 4.0, 0.3, ADD, 0.3519252431310541, -16.0), + (5.0, 5.0, 0.45, ADD, -2.737735427805667, 20.0), + (7.0, 14.0, 0.9, ADD, 2.150000710600199, -7.0), + # Tests with sampling_prob = 1 for adjacency_type=REMOVE + (1.0, 1.0, 1.0, REM, -4.5, 4.0), (1.0, 1.0, 1.0, REM, 3.5, -4.0), + (1.0, 2.0, 1.0, REM, -4.0, 1.0), (4.0, 4.0, 1.0, REM, -4.5, 16.0), + (5.0, 5.0, 1.0, REM, 3.5, -20.0), (7.0, 14.0, 1.0, REM, -4.0, 7.0), + # Tests with sampling_prob < 1 for adjacency_type=REMOVE + (1.0, 1.0, 0.8, REM, 0.0, -0.5), + (1.0, 1.0, 0.5, REM, -0.6820994357113515, 4.0), + (1.0, 2.0, 0.7, REM, -0.929541389699331, 0.0), + (4.0, 4.0, 0.3, REM, -0.3519252431310541, 16.0), + (5.0, 5.0, 0.45, REM, 2.737735427805667, -20.0), + (7.0, 14.0, 0.9, REM, -2.150000710600199, 7.0)) + def test_gaussian_inverse_privacy_loss(self, standard_deviation, sensitivity, + sampling_prob, adjacency_type, + privacy_loss, expected_x): + pl = privacy_loss_mechanism.GaussianPrivacyLoss( + standard_deviation, + sensitivity=sensitivity, + sampling_prob=sampling_prob, + adjacency_type=adjacency_type) + self.assertAlmostEqual(expected_x, pl.inverse_privacy_loss(privacy_loss)) + + @parameterized.parameters( + # Tests with sampling_prob = 1 for adjacency_type=ADD + (1.0, 1.0, 1.0, ADD, -1.0, 2.0, True, { + math.inf: 0.15865525, + -1.5: 0.02275013 + }), + (3.0, 3.0, 1.0, ADD, -3.0, 6.0, True, { + math.inf: 0.15865525, + -1.5: 0.02275013 + }), + (1.0, 2.0, 1.0, ADD, -1.0, 3.0, True, { + math.inf: 0.15865525, + -4.0: 0.00134989 + }), + (4.0, 8.0, 1.0, ADD, -4.0, 12.0, True, { + math.inf: 0.15865525, + -4.0: 0.00134989 + }), + (1.0, 1.0, 1.0, ADD, -1.0, 2.0, False, { + 1.5: 0.15865525, + }), + (3.0, 3.0, 1.0, ADD, -3.0, 6.0, False, { + 1.5: 0.15865525, + }), + (1.0, 2.0, 1.0, ADD, -1.0, 3.0, False, { + 4.0: 0.15865525, + }), + (4.0, 8.0, 1.0, ADD, -4.0, 12.0, False, { + 4.0: 0.15865525, + }), + # Tests with sampling_prob < 1 for adjacency_type=ADD + (1.0, 1.0, 0.8, ADD, -1.0, 2.0, True, { + math.inf: 0.15865525, + -1.331139: 0.02275013 + }), + (3.0, 3.0, 0.8, ADD, -3.0, 6.0, True, { + math.inf: 0.15865525, + -1.331139: 0.02275013 + }), + (1.0, 2.0, 0.5, ADD, -1.0, 3.0, True, { + math.inf: 0.15865525, + -3.325003: 0.00134990 + }), + (4.0, 8.0, 0.6, ADD, -4.0, 12.0, True, { + math.inf: 0.15865525, + -3.501311: 0.00134990 + }), + (1.0, 1.0, 0.9, ADD, -1.0, 2.0, False, { + 1.20125: 0.15865525, + }), + (3.0, 3.0, 0.7, ADD, -3.0, 6.0, False, { + 0.784843: 0.15865525, + }), + (1.0, 2.0, 0.4, ADD, -1.0, 3.0, False, { + 0.498689: 0.15865525, + }), + (4.0, 8.0, 0.2, ADD, -4.0, 12.0, False, { + 0.218575: 0.15865525, + }), + # Tests with sampling_prob = 1 for adjacency_type=REMOVE + (1.0, 1.0, 1.0, REM, -2.0, 1.0, True, { + math.inf: 0.15865525, + -1.5: 0.02275013 + }), + (3.0, 3.0, 1.0, REM, -6.0, 3.0, True, { + math.inf: 0.15865525, + -1.5: 0.02275013 + }), + (1.0, 2.0, 1.0, REM, -3.0, 1.0, True, { + math.inf: 0.15865525, + -4.0: 0.00134989 + }), + (4.0, 8.0, 1.0, REM, -12.0, 4.0, True, { + math.inf: 0.15865525, + -4.0: 0.00134989 + }), + (1.0, 1.0, 1.0, REM, -2.0, 1.0, False, { + 1.5: 0.15865525, + }), + (3.0, 3.0, 1.0, REM, -6.0, 3.0, False, { + 1.5: 0.15865525, + }), + (1.0, 2.0, 1.0, REM, -3.0, 1.0, False, { + 4.0: 0.15865525, + }), + (4.0, 8.0, 1.0, REM, -12.0, 4.0, False, { + 4.0: 0.15865525, + }), + # Tests with sampling_prob < 1 for adjacency_type=REMOVE + (1.0, 1.0, 0.8, REM, -2.0, 1.0, True, { + math.inf: 0.1314742295348015, + -0.971528299641668: 0.0499311563448348 + }), + (3.0, 3.0, 0.8, REM, -6.0, 3.0, True, { + math.inf: 0.1314742295348015, + -0.971528299641668: 0.0499311563448348 + }), + (1.0, 2.0, 0.5, REM, -3.0, 1.0, True, { + math.inf: 0.0800025759815436, + -0.6749972526421355: 0.0800025759815436, + }), + (4.0, 8.0, 0.6, REM, -12.0, 4.0, True, { + math.inf: 0.0957331115715263, + -0.88918789612552: 0.06427204039156087 + }), + (1.0, 1.0, 0.9, REM, -2.0, 1.0, False, { + 1.419129383720773: 0.1450647417331293, + }), + (3.0, 3.0, 0.7, REM, -6.0, 3.0, False, { + 1.23465205122806: 0.1178837173364737, + }), + (1.0, 2.0, 0.4, REM, -3.0, 1.0, False, { + 3.110812103874479: 0.06427204039156088, + }), + (4.0, 8, 0.2, REM, -12.0, 4.0, False, { + 2.461265214250274: 0.03281096921159548, + })) + def test_gaussian_privacy_loss_tail(self, standard_deviation, sensitivity, + sampling_prob, adjacency_type, + expected_lower_x_truncation, + expected_upper_x_truncation, + pessimistic_estimate, + expected_tail_probability_mass_function): + pl = privacy_loss_mechanism.GaussianPrivacyLoss( + standard_deviation, + sensitivity=sensitivity, + pessimistic_estimate=pessimistic_estimate, + log_mass_truncation_bound=math.log(2) + stats.norm.logcdf(-1), + sampling_prob=sampling_prob, + adjacency_type=adjacency_type) + tail_pld = pl.privacy_loss_tail() + self.assertAlmostEqual(expected_lower_x_truncation, + tail_pld.lower_x_truncation) + self.assertAlmostEqual(expected_upper_x_truncation, + tail_pld.upper_x_truncation) + test_util.assert_dictionary_almost_equal( + self, expected_tail_probability_mass_function, + tail_pld.tail_probability_mass_function) + + @parameterized.parameters((0.0, 1.0), (-10.0, 2.0), (4.0, 0.0), (2.0, -1.0), + (1.0, 1.0, 1.0, ADD, 1), (2.0, 1.0, 0.0, REM), + (1.0, 1.0, 1.2, ADD), (2.0, 1.0, -0.1, REM)) + def test_gaussian_value_errors(self, standard_deviation, sensitivity, + sampling_prob=1.0, adjacency_type=ADD, + log_mass_truncation_bound=-50): + with self.assertRaises(ValueError): + privacy_loss_mechanism.GaussianPrivacyLoss( + standard_deviation, + sensitivity=sensitivity, + log_mass_truncation_bound=log_mass_truncation_bound, + sampling_prob=sampling_prob, + adjacency_type=adjacency_type) + + @parameterized.parameters((1.0, 1.0, 1.0, 0), (1.0, 1.0, 1.0, 1.1), + (1.0, 1.0, 1.0, -0.1), (1.0, 0, 1.0, 0.1), + (1.0, -0.2, 1.0, 0.1), (1.0, 1.1, 1.0, 0.2)) + def test_gaussian_from_privacy_parameters_value_errors( + self, sensitivity, sampling_prob, epsilon, delta): + with self.assertRaises(ValueError): + privacy_loss_mechanism.GaussianPrivacyLoss.from_privacy_guarantee( + common.DifferentialPrivacyParameters(epsilon, delta), + sensitivity, + sampling_prob=sampling_prob) + + @parameterized.parameters((1.0, 1.0, ADD, 1.0, 0.12693674, 1.0), + (2.0, 1.0, REM, 1.0, 0.12693674, 2.0), + (3.0, 1.0, ADD, 1.0, 0.78760074, 1.0), + (6.0, 1.0, REM, 1.0, 0.78760074, 2.0), + (1.0, 1.0, ADD, 2.0, 0.02092364, 1.0), + (5.0, 1.0, REM, 2.0, 0.02092364, 5.0), + (1.0, 1.0, ADD, 16.0, 1e-5, 0.344), + (2.0, 1.0, REM, 16.0, 1e-5, 0.688), + (1.0, 0.8, ADD, 1.0, 0.081695179, 1.0), + (2.0, 0.7, ADD, 1.0, 0.143886147, 1.5), + (3.0, 0.5, ADD, 1.0, 0.267379199, 1.3), + (6.0, 0.01, ADD, 1.0, 0.0030216468, 2.0), + (1.0, 0.1, REM, 2.0, 2.355186318853955e-6, 1.0), + (5.0, 0.75, REM, 2.0, 0.0087720149, 5.0), + (1.0, 0.3, REM, 16, 0.0000329405, 0.3), + (2.0, 0.2, REM, 16, 0.0230238234, 0.4)) + def test_gaussian_from_privacy_parameters(self, sensitivity, sampling_prob, + adjacency_type, epsilon, delta, + expected_standard_deviation): + pl = privacy_loss_mechanism.GaussianPrivacyLoss.from_privacy_guarantee( + common.DifferentialPrivacyParameters(epsilon, delta), + sensitivity, + sampling_prob=sampling_prob, + adjacency_type=adjacency_type) + self.assertAlmostEqual(expected_standard_deviation, pl.standard_deviation, + 3) + self.assertEqual(adjacency_type, pl.adjacency_type) + + @parameterized.parameters( + # Tests with sampling_prob = 1 for adjacency_type=ADD + (1.0, 1.0, 1.0, ADD, 1.0, 0.12693674), + (2.0, 2.0, 1.0, ADD, 1.0, 0.12693674), + (1.0, 3.0, 1.0, ADD, 1.0, 0.78760074), + (2.0, 6.0, 1.0, ADD, 1.0, 0.78760074), + (1.0, 1.0, 1.0, ADD, 2.0, 0.02092364), + (5.0, 5.0, 1.0, ADD, 2.0, 0.02092364), + # Tests with sampling_prob < 1 for adjacency_type=ADD + (1.0, 1.0, 0.8, ADD, 1.0, 0.0231362104090899), + (2.0, 2.0, 0.8, ADD, 1.0, 0.0231362104090899), + (1.0, 3.0, 0.7, ADD, 1.0, 0.1195051215523554), + (2.0, 6.0, 0.4, ADD, 1.0, 0.0), + (1.0, 1.0, 0.3, ADD, 2.0, 0.0), + (5.0, 5.0, 0.2, ADD, 2.0, 0.0), + # Tests with sampling_prob = 1 for adjacency_type=REMOVE + (1.0, 1.0, 1.0, REM, 1.0, 0.12693674), + (2.0, 2.0, 1.0, REM, 1.0, 0.12693674), + (1.0, 3.0, 1.0, REM, 1.0, 0.78760074), + (2.0, 6.0, 1.0, REM, 1.0, 0.78760074), + (1.0, 1.0, 1.0, REM, 2.0, 0.02092364), + (5.0, 5.0, 1.0, REM, 2.0, 0.02092364), + # Tests with sampling_prob < 1 for adjacency_type=REMOVE + (1.0, 1.0, 0.8, REM, 1.0, 0.0816951786585355), + (2.0, 2.0, 0.8, REM, 1.0, 0.0816951786585355), + (1.0, 3.0, 0.7, REM, 1.0, 0.5356298793262404), + (2.0, 6.0, 0.4, REM, 1.0, 0.2888308005139968), + (1.0, 1.0, 0.3, REM, 2.0, 0.0003341102928869332), + (5.0, 5.0, 0.2, REM, -0.25, 0.2211992169285951)) + def test_gaussian_get_delta_for_epsilon( + self, standard_deviation, sensitivity, sampling_prob, adjacency_type, + epsilon, expected_delta): + pl = privacy_loss_mechanism.GaussianPrivacyLoss( + standard_deviation, + sensitivity=sensitivity, + sampling_prob=sampling_prob, + adjacency_type=adjacency_type) + self.assertAlmostEqual(expected_delta, pl.get_delta_for_epsilon(epsilon)) + + +class DiscreteLaplacePrivacyLossDistributionTest(parameterized.TestCase): + + @parameterized.parameters( + # Tests with sampling_prob = 1 for adjacency_type=ADD + (1.0, 1, 1.0, ADD, 0, 1.0), + (1.0, 1, 1.0, ADD, 1, -1.0), + (0.3, 2, 1.0, ADD, 0, 0.6), + (0.3, 2, 1.0, ADD, 1, 0.0), + (0.3, 2, 1.0, ADD, 2, -0.6), + # Tests with sampling_prob < 1 for adjacency_type=ADD + (1.0, 1, 0.8, ADD, 1, -0.86483972516319), + (1.0, 1, 0.8, ADD, -1, 0.7046054708796525), + (0.3, 2, 0.5, ADD, 2, -0.3443407699259402), + (0.3, 3, 0.5, ADD, 2, -0.1612080639085818), + (0.3, 2, 0.4, ADD, 1, 0), + (0.3, 2, 0.3, ADD, 0, 0.1454380063386891), + # Tests with sampling_prob = 1 for adjacency_type=REMOVE + (1.0, 1, 1.0, REM, -1, 1.0), + (1.0, 1, 1.0, REM, 0, -1.0), + (0.3, 2, 1.0, REM, -2, 0.6), + (0.3, 2, 1.0, REM, -1, 0), + (0.3, 2, 1.0, REM, 0, -0.6), + # Tests with sampling_prob < 1 for adjacency_type=REMOVE + (1.0, 1, 0.8, REM, -1, 0.86483972516319), + (1.0, 1, 0.8, REM, 1, -0.7046054708796525), + (0.3, 2, 0.5, REM, -2, 0.3443407699259402), + (0.3, 3, 0.5, REM, -2, 0.1612080639085818), + (0.3, 2, 0.4, REM, -1, 0), + (0.3, 2, 0.3, REM, 0, -0.1454380063386891)) + def test_discrete_laplace_privacy_loss(self, parameter, sensitivity, + sampling_prob, adjacency_type, x, + expected_privacy_loss): + pl = privacy_loss_mechanism.DiscreteLaplacePrivacyLoss( + parameter, + sensitivity=sensitivity, + sampling_prob=sampling_prob, + adjacency_type=adjacency_type) + self.assertAlmostEqual(expected_privacy_loss, pl.privacy_loss(x)) + + @parameterized.parameters((1.0, 1, 0.4), (2.0, 7, -1.1)) + def test_discrete_laplace_privacy_loss_value_errors( + self, parameter, sensitivity, x): + pl = privacy_loss_mechanism.DiscreteLaplacePrivacyLoss( + parameter, sensitivity=sensitivity) + with self.assertRaises(ValueError): + pl.privacy_loss(x) + + @parameterized.parameters( + # Tests with sampling_prob = 1 for adjacency_type=ADD + (1.0, 1, 1.0, ADD, 1.1, -math.inf), + (1.0, 1, 1.0, ADD, 0.9, 0.0), + (1.0, 1, 1.0, ADD, -1.0, math.inf), + (0.3, 2, 1.0, ADD, 0.7, -math.inf), + (0.3, 2, 1.0, ADD, 0.2, 0), + (0.3, 2, 1.0, ADD, 0.0, 1.0), + (0.3, 2, 1.0, ADD, -0.6, math.inf), + # Tests with sampling_prob < 1 for adjacency_type=ADD + (1.0, 1, 0.8, ADD, 0.9, -math.inf), + (1.0, 1, 0.8, ADD, 0.7, 0), + (1.0, 1, 0.8, ADD, -0.9, math.inf), + (0.3, 2, 0.5, ADD, 0.26, -math.inf), + (0.3, 2, 0.4, ADD, 0.0, 1.0), + (0.3, 2, 0.3, ADD, -0.23, math.inf), + # Tests with sampling_prob = 1 for adjacency_type=REMOVE + (1.0, 1, 1.0, REM, 1.1, -math.inf), + (1.0, 1, 1.0, REM, 0.9, -1.0), + (1.0, 1, 1.0, REM, -1.0, math.inf), + (0.3, 2, 1.0, REM, 0.7, -math.inf), + (0.3, 2, 1.0, REM, 0.2, -2.0), + (0.3, 2, 1.0, REM, 0.0, -1.0), + (0.3, 2, 1.0, REM, -0.6, math.inf), + # Tests with sampling_prob < 1 for adjacency_type=REMOVE + (1.0, 1, 0.8, REM, 0.9, -math.inf), + (1.0, 1, 0.8, REM, 0.86483972516319, -1.0), + (1.0, 1, 0.8, REM, -0.8, math.inf), + (0.3, 2, 0.5, REM, 0.35, -math.inf), + (0.3, 2, 0.4, REM, 0.0, -1.0), + (0.3, 2, 0.3, REM, -0.15, math.inf)) + def test_discrete_laplace_inverse_privacy_loss(self, parameter, sensitivity, + sampling_prob, adjacency_type, + privacy_loss, expected_x): + pl = privacy_loss_mechanism.DiscreteLaplacePrivacyLoss( + parameter, + sensitivity=sensitivity, + sampling_prob=sampling_prob, + adjacency_type=adjacency_type) + self.assertAlmostEqual(expected_x, pl.inverse_privacy_loss(privacy_loss)) + + @parameterized.parameters( + # Tests with sampling_prob = 1 for adjacency_type=ADD + (1.0, 1, 1.0, ADD, 1, 0, { + 1: 0.73105858, + -1: 0.26894142 + }), + (0.3, 2, 1.0, ADD, 1, 1, { + 0.6: 0.57444252, + -0.6: 0.31526074 + }), + # Tests with sampling_prob < 1 for adjacency_type=ADD + (1.0, 1, 0.8, ADD, 1, 0, { + 0.7046054708796525: 0.73105858, + -0.86483972516319: 0.26894142 + }), + (0.3, 2, 0.6, ADD, 1, 1, { + 0.3156879596155301: 0.5744425168116589, + -0.4009692034808894: 0.3152607374933769 + }), + # Tests with sampling_prob = 1 for adjacency_type=REMOVE + (1.0, 1, 1.0, REM, 0, -1, { + 1: 0.73105858, + -1: 0.26894142 + }), + (0.3, 2, 1.0, REM, -1, -1, { + 0.6: 0.57444252, + -0.6: 0.31526074 + }), + # Tests with sampling_prob < 1 for adjacency_type=REMOVE + (1.0, 1, 0.8, REM, 0, -1, { + 0.86483972516319: 0.638635147178003, + -0.7046054708796525: 0.361364852821997 + }), + (0.3, 2, 0.6, REM, -1, -1, { + 0.4009692034808894: 0.4707698050843462, + -0.3156879596155301: 0.4189334492206898 + })) + def test_discrete_laplace_privacy_loss_tail( + self, parameter, sensitivity, sampling_prob, adjacency_type, + expected_lower_x_truncation, expected_upper_x_truncation, + expected_tail_probability_mass_function): + pl = privacy_loss_mechanism.DiscreteLaplacePrivacyLoss( + parameter, + sensitivity=sensitivity, + sampling_prob=sampling_prob, + adjacency_type=adjacency_type) + tail_pld = pl.privacy_loss_tail() + self.assertAlmostEqual(expected_lower_x_truncation, + tail_pld.lower_x_truncation) + self.assertAlmostEqual(expected_upper_x_truncation, + tail_pld.upper_x_truncation) + test_util.assert_dictionary_almost_equal( + self, expected_tail_probability_mass_function, + tail_pld.tail_probability_mass_function) + + @parameterized.parameters((-3.0, 1), (0.0, 1), (2.0, 0.5), + (2.0, -1), (1.0, 0), + (2.0, 1, 0.0, ADD), (1.0, 1, 1.2, REM), + (2.0, 1, -0.1, ADD)) + def test_discrete_laplace_value_errors(self, parameter, sensitivity, + sampling_prob=1.0, adjacency_type=ADD): + with self.assertRaises(ValueError): + privacy_loss_mechanism.DiscreteLaplacePrivacyLoss( + parameter, + sensitivity=sensitivity, + sampling_prob=sampling_prob, + adjacency_type=adjacency_type) + + @parameterized.parameters((-1, 0.8, 1.0, 0.1), (0.5, 1.0, 1.0, 0.1), + (0, 1.0, 1.0, 0.2), (1, 1.0, 1.0, -0.1), + (1, 0.8, 1.0, 1.1), (1, 0.0, 1.0, 0.1), + (3, 1.1, 1.0, 0.1), (1, -0.2, 1.0, 0.1)) + def test_discrete_laplace_from_privacy_parameters_value_errors( + self, sensitivity, sampling_prob, epsilon, delta): + with self.assertRaises(ValueError): + privacy_loss_mechanism.DiscreteLaplacePrivacyLoss.from_privacy_guarantee( + common.DifferentialPrivacyParameters(epsilon, delta), + sensitivity, sampling_prob=sampling_prob) + + @parameterized.parameters((1, 1.0, ADD, 1.0, 0.0, 1.0), + (1, 1.0, REM, 1.0, 0.1, 1.0), + (2, 1.0, ADD, 1.0, 0.01, 0.5), + (1, 1.0, REM, 3.0, 0.01, 3.0), + (1, 0.8, ADD, 1.0, 0.0, 1.1467204062), + (1, 0.7, REM, 1.0, 0.1, 1.2397322437), + (2, 0.3, ADD, 1.0, 0.01, 0.9531096869), + (1, 0.2, REM, 3.0, 0.01, 4.5687933452)) + def test_discrete_laplace_from_privacy_parameters( + self, sensitivity, sampling_prob, adjacency_type, + epsilon, delta, expected_parameter): + pl = (privacy_loss_mechanism.DiscreteLaplacePrivacyLoss + .from_privacy_guarantee( + common.DifferentialPrivacyParameters( + epsilon, delta), + sensitivity, + sampling_prob=sampling_prob, + adjacency_type=adjacency_type)) + self.assertAlmostEqual(expected_parameter, pl.parameter) + self.assertEqual(adjacency_type, pl.adjacency_type) + + @parameterized.parameters( + # Tests with sampling_prob = 1 for adjacency_type=ADD + (1.0, 1, 1.0, ADD, 1.0, 0.0), (0.333333, 3, 1.0, ADD, 1.0, 0.0), + (0.5, 4, 1.0, ADD, 2.0, 0.0), (0.5, 4, 1.0, ADD, 0.5, 0.54202002), + (0.5, 4, 1.0, ADD, 1.0, 0.39346934), + (0.5, 4, 1.0, ADD, -0.5, 0.72222110), + # Tests with sampling_prob < 1 for adjacency_type=ADD + (1.0, 1, 0.8, ADD, 1.0, 0.0), + (0.333333, 3, 0.8, ADD, 1.0, 0.0), + (0.5, 4, 0.7, ADD, 0.5, 0.2293628348747755), + (0.5, 4, 0.6, ADD, 0.6, 0.07668344250639381), + (0.5, 4, 0.3, ADD, 0.5, 0.0), + # Tests with sampling_prob = 1 for adjacency_type=REMOVE + (1.0, 1, 1.0, REM, 1.0, 0.0), (0.333333, 3, 1.0, REM, 1.0, 0.0), + (0.5, 4, 1.0, REM, 2.0, 0.0), (0.5, 4, 1.0, REM, 0.5, 0.54202002), + (0.5, 4, 1.0, REM, 1.0, 0.39346934), + (0.5, 4, 1.0, REM, -0.5, 0.72222110), + # Tests with sampling_prob < 1 for adjacency_type=REMOVE + (1.0, 1, 0.8, REM, 1.0, 0.0), + (0.333333, 3, 0.8, REM, 1.0, 0.0), + (0.5, 4, 0.7, REM, 0.5, 0.3523838505224567), + (0.5, 4, 0.6, REM, 1.0, 0.178181891763215), + (0.5, 4, 0.3, REM, 0.5, 0.1068168460276349), + (1.0, 1, 0.2, REM, -0.25, 0.2211992169285951)) + def test_discrete_laplace_get_delta_for_epsilon(self, parameter, sensitivity, + sampling_prob, adjacency_type, + epsilon, expected_delta): + pl = privacy_loss_mechanism.DiscreteLaplacePrivacyLoss( + parameter, + sensitivity=sensitivity, + sampling_prob=sampling_prob, + adjacency_type=adjacency_type) + self.assertAlmostEqual(expected_delta, pl.get_delta_for_epsilon(epsilon)) + + +class DiscreteGaussianPrivacyLossTest(parameterized.TestCase): + + @parameterized.parameters( + # Tests with sampling_prob = 1 for adjacency_type=ADD + (1.0, 1, 1.0, ADD, 5, -4.5), + (1, 1, 1, ADD, -3, 3.5), + (1, 2, 1, ADD, 3, -4.0), + (4.0, 4, 1.0, ADD, 20, -4.5), + (5, 5, 1, ADD, -15, 3.5), + (7.0, 14, 1.0, ADD, 21, -4.0), + (1.0, 1, 1.0, ADD, -12, math.inf), + (1.0, 1, 1.0, ADD, 13, -math.inf), + # Tests with sampling_prob < 1 for adjacency_type=ADD + (1.0, 1, 0.8, ADD, -4, 1.565960898891332), + (1.0, 1, 0.7, ADD, 4, -3.156183763141021), + (1.0, 2, 0.4, ADD, -1, 0.4986891437585786), + (4.0, 4, 0.3, ADD, -16, 0.3519252431310541), + (5.0, 5, 0.4, ADD, 20, -2.628009438900115), + (7.0, 14, 0.1, ADD, -7, 0.1033275126220077), + (1.0, 1, 0.3, ADD, 13, -math.inf), + (1.0, 1, 0.3, ADD, -12, 0.3566749439387324), + # Tests with sampling_prob = 1 for adjacency_type=REMOVE + (1.0, 1, 1.0, REM, 4, -4.5), (1, 1, 1, REM, -4, 3.5), + (1.0, 2, 1.0, REM, 1, -4.0), (4, 4, 1, REM, 16, -4.5), + (5.0, 5, 1.0, REM, -20, 3.5), (7, 14, 1, REM, 7, -4.0), + (1.0, 1, 1.0, REM, -13, math.inf), + (1.0, 1, 1.0, REM, 12, -math.inf), + # Tests with sampling_prob < 1 for adjacency_type=REMOVE + (1.0, 1, 0.8, REM, 4, -1.565960898891332), + (1.0, 1, 0.7, REM, -4, 3.156183763141021), + (1.0, 2, 0.4, REM, 1, -0.4986891437585786), + (4.0, 4, 0.3, REM, 16, -0.3519252431310541), + (5.0, 5, 0.4, REM, -20, 2.628009438900115), + (7.0, 14, 0.1, REM, 7, -0.1033275126220077), + (1.0, 1, 0.3, REM, -13, math.inf), + (1.0, 1, 0.3, REM, 12, -0.3566749439387324)) + def test_discrete_gaussian_privacy_loss(self, sigma, sensitivity, + sampling_prob, adjacency_type, + x, expected_privacy_loss): + pl = privacy_loss_mechanism.DiscreteGaussianPrivacyLoss( + sigma, + sensitivity=sensitivity, + sampling_prob=sampling_prob, + adjacency_type=adjacency_type) + self.assertAlmostEqual(expected_privacy_loss, pl.privacy_loss(x)) + + @parameterized.parameters((1.0, 1, 1.0, ADD, 0.4), (2.0, 7, 1.0, REM, -1.1), + (1.0, 1, 0.6, ADD, -13), (2.0, 1, 0.5, ADD, 26), + (1.0, 1, 0.6, REM, -14), (2.0, 1, 0.5, REM, 25)) + def test_discrete_gaussian_privacy_loss_value_errors(self, sigma, sensitivity, + sampling_prob, + adjacency_type, x): + pl = privacy_loss_mechanism.DiscreteGaussianPrivacyLoss( + sigma, + sensitivity=sensitivity, + sampling_prob=sampling_prob, + adjacency_type=adjacency_type) + with self.assertRaises(ValueError): + pl.privacy_loss(x) + + @parameterized.parameters( + # Tests with sampling_prob = 1 for adjacency_type=ADD + (1.0, 1, 1.0, ADD, -4.5, 5), + (1.0, 1, 1.0, ADD, 3.5, -3), + (1.0, 2, 1.0, ADD, -4.0, 3), + (4.0, 4, 1.0, ADD, -4.51, 20), + (5.0, 5, 1.0, ADD, 3.49, -15), + (7.0, 14, 1.0, ADD, -4.0, 21), + # Tests with sampling_prob < 1 for adjacency_type=ADD + (1.0, 1, 0.8, ADD, 1.565961, -5), + (1.0, 1, 0.7, ADD, -3.156182, 3), + (1.0, 2, 0.4, ADD, 0.4986892, -2), + (4.0, 4, 0.3, ADD, 0.3519254, -17), + (5.0, 5, 0.4, ADD, -2.6280094, 19), + (7.0, 14, 0.1, ADD, 0.1033276, -8), + # Tests with sampling_prob = 1 for adjacency_type=REMOVE + (1.0, 1, 1.0, REM, -4.5, 4), + (1.0, 1, 1.0, REM, 3.5, -4), + (1.0, 2, 1.0, REM, -4.0, 1), + (4.0, 4, 1.0, REM, -4.51, 16), + (5.0, 5, 1.0, REM, 3.49, -20), + (7.0, 14, 1.0, REM, -4.0, 7), + # Tests with sampling_prob < 1 for adjacency_type=REMOVE + (1.0, 1, 0.8, REM, -1.565961, 4), + (1.0, 1, 0.7, REM, 3.156182, -4), + (1.0, 2, 0.4, REM, -0.4986892, 1), + (4.0, 4, 0.3, REM, -0.3519254, 16), + (5.0, 5, 0.4, REM, 2.6280094, -20), + (7.0, 14, 0.1, REM, -0.1033276, 7)) + def test_discrete_gaussian_inverse_privacy_loss(self, sigma, sensitivity, + sampling_prob, adjacency_type, + privacy_loss, expected_x): + pl = privacy_loss_mechanism.DiscreteGaussianPrivacyLoss( + sigma, + sensitivity=sensitivity, + sampling_prob=sampling_prob, + adjacency_type=adjacency_type) + self.assertAlmostEqual(expected_x, pl.inverse_privacy_loss(privacy_loss)) + + @parameterized.parameters( + # Tests with sampling_prob = 1 for adjacency_type=ADD + (1.0, 1, 2, 1.0, ADD, -1, 2, { + math.inf: 0.05448868 + }), + (1.0, 2, 2, 1.0, ADD, 0, 2, { + math.inf: 0.29869003 + }), + # Tests with sampling_prob < 1 for adjacency_type=ADD + (1.0, 1, 2, 0.8, ADD, -2, 2, { + math.inf: 0.0 + }), + (1.0, 2, 2, 0.7, ADD, -2, 2, { + math.inf: 0.0 + }), + # Tests with sampling_prob = 1 for adjacency_type=REMOVE + (1.0, 1, 2, 1.0, REM, -2, 1, { + math.inf: 0.05448868 + }), + (1.0, 2, 2, 1.0, REM, -2, 0, { + math.inf: 0.29869003 + }), + # Tests with sampling_prob < 1 for adjacency_type=REMOVE + (1.0, 1, 2, 0.8, REM, -2, 2, { + math.inf: 0.043590944 + }), + (1.0, 2, 2, 0.7, REM, -2, 2, { + math.inf: 0.209083021 + })) + def test_discrete_gaussian_privacy_loss_tail( + self, sigma, sensitivity, truncation_bound, sampling_prob, adjacency_type, + expected_lower_x_truncation, expected_upper_x_truncation, + expected_tail_probability_mass_function): + pl = privacy_loss_mechanism.DiscreteGaussianPrivacyLoss( + sigma, + sensitivity=sensitivity, + truncation_bound=truncation_bound, + sampling_prob=sampling_prob, + adjacency_type=adjacency_type) + tail_pld = pl.privacy_loss_tail() + self.assertAlmostEqual(expected_lower_x_truncation, + tail_pld.lower_x_truncation) + self.assertAlmostEqual(expected_upper_x_truncation, + tail_pld.upper_x_truncation) + test_util.assert_dictionary_almost_equal( + self, expected_tail_probability_mass_function, + tail_pld.tail_probability_mass_function) + + @parameterized.parameters((-3.0, 1), (0.0, 1), (2.0, 0.5), (1.0, 0), + (2.0, -1), (2.0, 4, 1, ADD, 1), + (2.0, 1, 0), (1.0, 1, 1.2), (2.0, 1, -0.1)) + def test_discrete_gaussian_value_errors(self, sigma, sensitivity, + sampling_prob=1.0, adjacency_type=ADD, + truncation_bound=None): + with self.assertRaises(ValueError): + privacy_loss_mechanism.DiscreteGaussianPrivacyLoss( + sigma, + sensitivity=sensitivity, + truncation_bound=truncation_bound, + sampling_prob=sampling_prob, + adjacency_type=adjacency_type) + + @parameterized.parameters((1.0, 1, 1, { + -1.5: 0, + -1: 0.27406862, + 0: 0.7259314, + 1: 1, + 1.5: 1 + }), (3.0, 2, 2, { + -2.1: 0, + -2: 0.17820326, + -1: 0.38872553, + 0: 0.61127447, + 1: 0.82179674, + 2: 1, + 2.7: 1 + })) + def test_discrete_gaussian_noise_cdf(self, sigma, sensitivity, + truncation_bound, x_to_cdf_value): + pl = privacy_loss_mechanism.DiscreteGaussianPrivacyLoss( + sigma, sensitivity=sensitivity, truncation_bound=truncation_bound) + for x, cdf_value in x_to_cdf_value.items(): + self.assertAlmostEqual(cdf_value, pl.noise_cdf(x)) + + @parameterized.parameters((1.0, 1, 1, 0.7403629), (3.0, 2, 2, 1.3589226)) + def test_discrete_gaussian_std(self, sigma, sensitivity, truncation_bound, + expected_std): + pl = privacy_loss_mechanism.DiscreteGaussianPrivacyLoss( + sigma, sensitivity=sensitivity, truncation_bound=truncation_bound) + self.assertAlmostEqual(expected_std, pl.standard_deviation()) + + @parameterized.parameters((-1, 1.0, 1.0, 0.1), (0.5, 1.0, 1.0, 0.1), + (0, 0.7, 1.0, 0.2), (1, 1.0, 1.0, 0.0), + (1, 1.0, 1.0, 1.1), (1, 1.0, 1.0, -0.1), + (1, 0.0, 1.0, 0.1), (1, 1.1, 1.0, 0.1), + (1, -0.1, 1.0, 0.1)) + def test_discrete_gaussian_from_privacy_parameters_value_errors( + self, sensitivity, sampling_prob, epsilon, delta): + with self.assertRaises(ValueError): + privacy_loss_mechanism.DiscreteGaussianPrivacyLoss.from_privacy_guarantee( + common.DifferentialPrivacyParameters(epsilon, delta), + sensitivity, + sampling_prob=sampling_prob) + + @parameterized.parameters( + (1, 1.0, ADD, 1.0, 0.12693674, 1.041), + (2, 1.0, REM, 1.0, 0.12693674, 1.972), + (3, 1.0, ADD, 1.0, 0.78760074, 0.993), + (6, 1.0, REM, 1.0, 0.78760074, 2.014), + (1, 1.0, ADD, 2.0, 0.02092364, 1.038), + (5, 1.0, REM, 2.0, 0.02092364, 5.008), + (1, 1.0, ADD, 16.0, 1e-5, 0.306), + (2, 1.0, REM, 16.0, 1e-5, 0.703), + (1, 0.8, REM, 1.0, 0.07850075632001355, 1.041), + (2, 0.7, ADD, 1.0, 0.06665777574091321, 1.972), + (3, 0.4, REM, 1.0, 0.27122238416249084, 0.993), + (6, 0.5, ADD, 1.0, 0.3604879495041193, 2.014), + (1, 0.3, REM, 2.0, 0.0002834863230938751, 1.038), + (5, 0.1, ADD, 2.0, 2.340272571167144e-06, 5.008), + (2, 0.9, REM, 16.0, 4.518347272315105e-06, 0.703)) + def test_discrete_gaussian_from_privacy_parameters(self, sensitivity, + sampling_prob, + adjacency_type, epsilon, + delta, expected_sigma): + pl = ( + privacy_loss_mechanism.DiscreteGaussianPrivacyLoss + .from_privacy_guarantee( + common.DifferentialPrivacyParameters(epsilon, delta), + sensitivity, + sampling_prob=sampling_prob, + adjacency_type=adjacency_type)) + self.assertAlmostEqual(expected_sigma, pl._sigma, 3) + self.assertEqual(adjacency_type, pl.adjacency_type) + +if __name__ == '__main__': + unittest.main() diff --git a/python/fedml/core/dp/budget_accountant/pld/test_util.py b/python/fedml/core/dp/budget_accountant/pld/test_util.py new file mode 100644 index 0000000000..fa080f3471 --- /dev/null +++ b/python/fedml/core/dp/budget_accountant/pld/test_util.py @@ -0,0 +1,60 @@ +"""Helper functions for testing. +""" + +from typing import Optional, Mapping, Union +import unittest # pylint:disable=unused-import + +import numpy as np + + +def assert_dictionary_contained(testcase: 'unittest.TestCase', + dict1: Mapping[Union[int, float], float], + dict2: Mapping[Union[int, float], float]): + """Check whether first dictionary is contained in the second. + + Keys of type float are checked for almost equality. Values are always checked + for almost equality. + + Keys corresponding to values close to 0 are ignored in this test. + + Args: + testcase: unittestTestCase object to assert containment of dictionary. + dict1: first dictionary + dict2: second dictionary + """ + for i in dict1.keys(): + if not np.isclose(dict1[i], 0): + found = False + for j in dict2.keys(): + if np.isclose(i, j) and np.isclose(dict1[i], dict2[j]): + found = True + break + testcase.assertTrue(found, msg=f'Key {i} in {dict1} not found in {dict2}') + + +def assert_dictionary_almost_equal(testcase: 'unittest.TestCase', + dictionary1: Mapping[Union[int, float], + float], + dictionary2: Mapping[Union[int, float], + float]): + """Check two dictionaries have almost equal values. + + Keys of type float are checked for almost equality. Values are always checked + for almost equality. + + Keys corresponding to values close to 0 are ignored in this test. + + Args: + testcase: unittestTestCase object to assert containment of dictionary. + dictionary1: first dictionary + dictionary2: second dictionary + """ + assert_dictionary_contained(testcase, dictionary1, dictionary2) + assert_dictionary_contained(testcase, dictionary2, dictionary1) + + +def assert_almost_greater_equal(testcase: 'unittest.TestCase', + a: float, b: float, msg: Optional[str] = None): + """Asserts that first value is greater or almost equal to second value.""" + msg = f'{a} is less than {b}' if msg is None else msg + testcase.assertTrue(a >= b or np.isclose(a, b), msg=msg) diff --git a/python/fedml/core/dp/budget_accountant/pld/test_util_test.py b/python/fedml/core/dp/budget_accountant/pld/test_util_test.py new file mode 100644 index 0000000000..cb4b2ce5b9 --- /dev/null +++ b/python/fedml/core/dp/budget_accountant/pld/test_util_test.py @@ -0,0 +1,104 @@ +"""Tests for test_util.""" + +import unittest +from absl.testing import parameterized + +from dp_accounting.pld import test_util + + +class TestUtilTest(parameterized.TestCase): + + @parameterized.parameters( + # Dictionary contained + ({ + 1: 0.1, + 2: 0.3 + }, { + 1: 0.1, + 2: 0.3, + 4: 0.2 + }, True), + ({ + 1: 1e-10, + 2: 0.3 + }, { + 2: 0.3, + 4: 0.2 + }, True), + ({ + 1.0: 0.1 + 1e-10, + 2.0: 0.3 + }, { + 1.0 + 1e-10: 0.1, + 2.0: 0.3 + 1e-10, + 4.0: 0.2 + }, True), + # Dictionary not contained + ({ + 1: 0.1, + 2: 0.3 + }, { + 2: 0.3, + 4: 0.2 + }, False)) + def test_assert_dictionary_contained(self, dict1, dict2, expected_result): + if expected_result: + test_util.assert_dictionary_contained(self, dict1, dict2) + else: + with self.assertRaises(AssertionError): + test_util.assert_dictionary_contained(self, dict1, dict2) + + @parameterized.parameters( + # Dictionary almost equal + ({ + 1: 0.1, + 2: 0.3, + }, { + 1: 0.1, + 2: 0.3 + }, True), + ({ + 1: 1e-10, + 2: 0.3, + 4: 0.2, + }, { + 2: 0.3, + 4: 0.2 + }, True), + ({ + 1.0: 0.1 + 1e-10, + 2.0: 0.3, + 4.0 + 1e-10: 0.2 + }, { + 1.0 + 1e-10: 0.1, + 2.0: 0.3 + 1e-10, + 4.0: 0.2 - 1e-10 + }, True), + # Dictionary not almost equal + ({ + 1: 0.1, + 2: 0.3, + }, { + 2: 0.3, + 4: 0.2 + }, False)) + def test_dictionary_almost_equal(self, dict1, dict2, expected_result): + if expected_result: + test_util.assert_dictionary_almost_equal(self, dict1, dict2) + else: + with self.assertRaises(AssertionError): + test_util.assert_dictionary_almost_equal(self, dict1, dict2) + + @parameterized.parameters( + (2, 1, True), (2, 2+1e-10, True), (2+1e-10, 2, True), + (1, 2, False)) + def test_assert_almost_greater_equal(self, a, b, expected_result): + if expected_result: + test_util.assert_almost_greater_equal(self, a, b) + else: + with self.assertRaises(AssertionError): + test_util.assert_almost_greater_equal(self, a, b) + + +if __name__ == '__main__': + unittest.main() diff --git a/python/fedml/core/dp/budget_accountant/pld_pmf.py b/python/fedml/core/dp/budget_accountant/pld_pmf.py deleted file mode 100644 index fc43142bf6..0000000000 --- a/python/fedml/core/dp/budget_accountant/pld_pmf.py +++ /dev/null @@ -1,517 +0,0 @@ -"""Probability mass function for privacy loss distributions. - -This file implements work the privacy loss distribution (PLD) probability mass -functions (PMF)and its basic functionalities. Please refer to the -supplementary material below for more details: -../../common_docs/Privacy_Loss_Distributions.pdf -""" - -import abc -import itertools -import math -from typing import Iterable, List, Mapping, Tuple, Union -import numpy as np -from scipy import signal - -from fedml.core.dp.budget_accountant import common - -ArrayLike = Union[np.ndarray, List[float]] -_MAX_PMF_SPARSE_SIZE = 1000 - - -def _get_delta_for_epsilon(infinity_mass: float, - reversed_losses: Iterable[float], - probs: Iterable[float], epsilon: float) -> float: - """Computes the epsilon-hockey stick divergence. - - Args: - infinity_mass: the probability of the infinite loss. - reversed_losses: privacy losses, assumed to be sorted in descending order. - probs: probabilities corresponding to losses. - epsilon: the epsilon in the epsilon-hockey stick divergence. - - Returns: - The epsilon-hockey stick divergence. - """ - delta = 0 - for loss, prob in zip(reversed_losses, probs): - if loss <= epsilon: - break - delta += (1 - np.exp(epsilon - loss)) * prob - return delta + infinity_mass - - -def _get_epsilon_for_delta(infinity_mass: float, - reversed_losses: Iterable[float], - probs: Iterable[float], delta: float) -> float: - """Computes epsilon for which hockey stick divergence is at most delta. - - Args: - infinity_mass: the probability of the infinite loss. - reversed_losses: privacy losses, assumed to be sorted in descending order. - probs: probabilities corresponding to losses. - delta: the target epsilon-hockey stick divergence.. - - Returns: - The smallest epsilon such that the epsilon-hockey stick divergence is at - most delta. When no such finite epsilon exists, return math.inf. - """ - if infinity_mass > delta: - return math.inf - - mass_upper, mass_lower = infinity_mass, 0 - - for loss, prob in zip(reversed_losses, probs): - if (mass_upper > delta and mass_lower > 0 and math.log( - (mass_upper - delta) / mass_lower) >= loss): - # Epsilon is greater than or equal to loss. - break - - mass_upper += prob - mass_lower += math.exp(-loss) * prob - - if mass_upper >= delta and mass_lower == 0: - # This only occurs when loss is very large, which results in exp(-loss) - # being treated as zero. - return max(0, loss) - - if mass_upper <= mass_lower + delta: - return 0 - return math.log((mass_upper - delta) / mass_lower) - - -def _truncate_tails(probs: ArrayLike, tail_mass_truncation: float, - pessimistic_estimate: bool) -> Tuple[int, ArrayLike, float]: - """Truncates an array from both sides by not more than tail_mass_truncation. - - It truncates the maximum prefix and suffix from probs, each of which have - sum <= tail_mass_truncation/2. - - Args: - probs: array to truncate. - tail_mass_truncation: an upper bound on the tails of the probability mass of - the PMF that might be truncated. - pessimistic_estimate: if true then the left truncated sum is added to 0th - element of the truncated array and the right truncated returned as it goes - to infinity. If false then the right truncated sum is added to the last of - the truncated array and the left truncated sum is discarded. - - Returns: - Tuple of (size of truncated prefix, truncated array, mass that goes to - infinity). - """ - if tail_mass_truncation == 0: - return 0, probs, 0 - - def _find_prefix_to_truncate(arr: np.ndarray, threshold: float) -> int: - # Find the max size of array prefix, with the sum of elements less than - # threshold. - s = 0 - for i, val in enumerate(arr): - s += val - if s > threshold: - return i - return len(arr) - - left_idx = _find_prefix_to_truncate(probs, tail_mass_truncation / 2) - right_idx = len(probs) - _find_prefix_to_truncate( - np.flip(probs), tail_mass_truncation / 2) - # Be sure that left_idx <= right_idx. left_idx > right_idx might be when - # tail_mass_truncation is too large or if probs has too small mass - # (i.e. if a few truncations were operated on it already). - right_idx = max(right_idx, left_idx) - - left_mass = np.sum(probs[:left_idx]) - right_mass = np.sum(probs[right_idx:]) - - truncated_probs = probs[left_idx:right_idx] - if pessimistic_estimate: - # put truncated the left mass to the 0th element. - truncated_probs[0] += left_mass - return left_idx, truncated_probs, right_mass - # This is rounding to left case. Put truncated the right mass to the last - # element. - truncated_probs[-1] += right_mass - return left_idx, truncated_probs, 0 - - -class PLDPmf(abc.ABC): - """Base class for probability mass functions for privacy loss distributions. - - The privacy loss distribution (PLD) of two discrete distributions, the upper - distribution mu_upper and the lower distribution mu_lower, is defined as a - distribution on real numbers generated by first sampling an outcome o - according to mu_upper and then outputting the privacy loss - ln(mu_upper(o) / mu_lower(o)) where mu_lower(o) and mu_upper(o) are the - probability masses of o in mu_lower and mu_upper respectively. This class - allows one to create and manipulate privacy loss distributions. - - PLD allows one to (approximately) compute the epsilon-hockey stick divergence - between mu_upper and mu_lower, which is defined as - sum_{o} [mu_upper(o) - e^{epsilon} * mu_lower(o)]_+. This quantity in turn - governs the parameter delta of (eps, delta)-differential privacy of the - corresponding protocol. (See Observation 1 in the supplementary material.) - - The above definitions extend to continuous distributions. The PLD of two - continuous distributions mu_upper and mu_lower is defined as a distribution on - real numbers generated by first sampling an outcome o according to mu_upper - and then outputting the privacy loss ln(f_{mu_upper}(o) / f_{mu_lower}(o)) - where f_{mu_lower}(o) and f_{mu_upper}(o) are the probability density - functions at o in mu_lower and mu_upper respectively. Moreover, for continuous - distributions the epsilon-hockey stick divergence is defined as - integral [f_{mu_upper}(o) - e^{epsilon} * f_{mu_lower}(o)]_+ do. - """ - - def __init__(self, discretization: float, infinity_mass: float, - pessimistic_estimate: bool): - self._discretization = discretization - self._infinity_mass = infinity_mass - self._pessimistic_estimate = pessimistic_estimate - - @property - @abc.abstractmethod - def size(self) -> int: - """Returns number of points in discretization.""" - - @abc.abstractmethod - def compose(self, - other: 'PLDPmf', - tail_mass_truncation: float = 0) -> 'PLDPmf': - """Computes a PMF resulting from composing two PMFs. - - Args: - other: the privacy loss distribution PMF to be composed. The two must have - the same discretization and pessimistic_estimate. - tail_mass_truncation: an upper bound on the tails of the probability mass - of the PMF that might be truncated. - - Returns: - A PMF which is the result of convolving (composing) the two. - """ - - @abc.abstractmethod - def self_compose(self, - num_times: int, - tail_mass_truncation: float = 0) -> 'PLDPmf': - """Computes PMF resulting from repeated composing the PMF with itself. - - Args: - num_times: the number of times to compose this PMF with itself. - tail_mass_truncation: an upper bound on the tails of the probability mass - of the PMF that might be truncated. - - Returns: - A privacy loss distribution PMF which is the result of the composition. - """ - - @abc.abstractmethod - def get_delta_for_epsilon(self, epsilon: float) -> float: - """Computes the epsilon-hockey stick divergence.""" - - @abc.abstractmethod - def get_epsilon_for_delta(self, delta: float) -> float: - """Computes epsilon for which hockey stick divergence is at most delta.""" - - @abc.abstractmethod - def to_dense_pmf(self) -> 'DensePLDPmf': - """Returns the dense PMF with data from 'self'.""" - - @abc.abstractmethod - def get_delta_for_epsilon_for_composed_pld(self, other: 'PLDPmf', - epsilon: float) -> float: - """Computes delta for 'epsilon' for the composiion of 'self' and 'other'.""" - - def validate_composable(self, other: 'PLDPmf'): - """Checks whether 'self' and 'other' can be composed.""" - if not isinstance(self, type(other)): - raise ValueError(f'Only PMFs of the same type can be composed:' - f'{type(self).__name__} != {type(other).__name__}.') - # pylint: disable=protected-access - if self._discretization != other._discretization: - raise ValueError(f'Discretization intervals are different: ' - f'{self._discretization} != ' - f'{other._discretization}.') - if self._pessimistic_estimate != other._pessimistic_estimate: - raise ValueError(f'Estimation types are different: ' - f'{self._pessimistic_estimate} != ' - f'{other._pessimistic_estimate}.') # pylint: disable=protected-access - # pylint: enable=protected-access - - -class DensePLDPmf(PLDPmf): - """Class for dense probability mass function. - - It represents a discrete probability distribution on a grid of privacy losses. - The grid contains numbers multiple of 'discretization', starting from - lower_loss * discretization. - """ - - def __init__(self, discretization: float, lower_loss: int, probs: np.ndarray, - infinity_mass: float, pessimistic_estimate: bool): - super().__init__(discretization, infinity_mass, pessimistic_estimate) - self._lower_loss = lower_loss - self._probs = probs - - @property - def size(self) -> int: - return len(self._probs) - - def compose(self, - other: 'DensePLDPmf', - tail_mass_truncation: float = 0) -> 'DensePLDPmf': - """Computes a PMF resulting from composing two PMFs. See base class.""" - self.validate_composable(other) - - # pylint: disable=protected-access - lower_loss = self._lower_loss + other._lower_loss - probs = signal.fftconvolve(self._probs, other._probs) - infinity_mass = 1 - (1 - self._infinity_mass) * (1 - other._infinity_mass) - offset, probs, right_tail = _truncate_tails(probs, tail_mass_truncation, - self._pessimistic_estimate) - # pylint: enable=protected-access - return DensePLDPmf(self._discretization, lower_loss + offset, probs, - infinity_mass + right_tail, self._pessimistic_estimate) - - def self_compose(self, - num_times: int, - tail_mass_truncation: float = 1e-15) -> 'DensePLDPmf': - """See base class.""" - if num_times <= 0: - raise ValueError(f'num_times should be >= 1, num_times={num_times}') - lower_loss = self._lower_loss * num_times - truncation_lower_bound, probs = common.self_convolve( - self._probs, num_times, tail_mass_truncation) - lower_loss += truncation_lower_bound - probs = np.array(probs) - inf_prob = 1 - (1 - self._infinity_mass) ** num_times - offset, probs, right_tail = _truncate_tails(probs, tail_mass_truncation, - self._pessimistic_estimate) - return DensePLDPmf(self._discretization, lower_loss + offset, probs, - inf_prob + right_tail, self._pessimistic_estimate) - - def get_delta_for_epsilon(self, epsilon: float) -> float: - """Computes the epsilon-hockey stick divergence.""" - upper_loss = (self._lower_loss + len(self._probs) - - 1) * self._discretization - reversed_losses = itertools.count(upper_loss, -self._discretization) - return _get_delta_for_epsilon(self._infinity_mass, reversed_losses, - np.flip(self._probs), epsilon) - - def get_epsilon_for_delta(self, delta: float) -> float: - """Computes epsilon for which hockey stick divergence is at most delta.""" - upper_loss = (self._lower_loss + len(self._probs) - - 1) * self._discretization - reversed_losses = itertools.count(upper_loss, -self._discretization) - return _get_epsilon_for_delta(self._infinity_mass, reversed_losses, - np.flip(self._probs), delta) - - def to_dense_pmf(self) -> 'DensePLDPmf': - return self - - def get_delta_for_epsilon_for_composed_pld(self, other: PLDPmf, - epsilon: float) -> float: - other = other.to_dense_pmf() - self.validate_composable(other) - discretization = self._discretization - # pylint: disable=protected-access - self_loss = lambda index: (index + self._lower_loss) * discretization - other_loss = lambda index: (index + other._lower_loss) * discretization - - self_probs, other_probs = self._probs, other._probs - len_self, len_other = len(self_probs), len(other_probs) - delta = 1 - (1 - self._infinity_mass) * (1 - other._infinity_mass) - # pylint: enable=protected-access - - # Compute the hockey stick divergence using equation (2) in the - # supplementary material. upper_mass represents summation in equation (3) - # and lower_mass represents the summation in equation (4). - - if self_loss(len_self - 1) + other_loss(len_other - 1) <= epsilon: - return delta - - i, j = 0, len_other - 1 - upper_mass = lower_mass = 0 - - # This is summation by i,j, such that self_loss(i) + other_loss(j) >= - # epsilon, and self_loss(i) + other_loss(j-1)< epsilon, as in the - # equation(2). - - # If i is todo small then increase it. - while self_loss(i) + other_loss(j) < epsilon: - i += 1 - - # Else if j is too large then decrease it. - while j >= 0 and self_loss(i) + other_loss(j - 1) >= epsilon: - upper_mass += other_probs[j] - lower_mass += other_probs[j] * np.exp(-other_loss(j)) - j -= 1 - - # Invariant: - # self_loss(i) + other_loss(j-1) < epsilon <= self_loss(i) + other_loss(j) - # Sum over all i, keeping this invariant. - for i in range(i, len_self): - if j >= 0: - upper_mass += other_probs[j] - lower_mass += other_probs[j] * np.exp(-other_loss(j)) - j -= 1 - delta += self_probs[i] * ( - upper_mass - np.exp(epsilon - self_loss(i)) * lower_mass) - - return delta - - -class SparsePLDPmf(PLDPmf): - """Class for sparse probability mass function. - - It represents a discrete probability distribution on a grid of 1d losses with - a dictionary. The grid contains numbers multiples of 'discretization'. - """ - - def __init__(self, loss_probs: Mapping[int, float], discretization: float, - infinity_mass: float, pessimistic_estimate: bool): - super().__init__(discretization, infinity_mass, pessimistic_estimate) - self._loss_probs = loss_probs - - @property - def size(self) -> int: - return len(self._loss_probs) - - def compose(self, - other: 'SparsePLDPmf', - tail_mass_truncation: float = 0) -> 'SparsePLDPmf': - """Computes a PMF resulting from composing two PMFs. See base class.""" - self.validate_composable(other) - # Assumed small number of points, so simple quadratic algorithm is fine. - convolution = {} - # pylint: disable=protected-access - for key1, value1 in self._loss_probs.items(): - for key2, value2 in other._loss_probs.items(): - key = key1 + key2 - convolution[key] = convolution.get(key, 0.0) + value1 * value2 - infinity_mass = 1 - (1 - self._infinity_mass) * (1 - other._infinity_mass) - # pylint: enable=protected-access - # Do truncation. - sorted_losses = sorted(convolution.keys()) - probs = [convolution[loss] for loss in sorted_losses] - offset, probs, right_mass = _truncate_tails(probs, tail_mass_truncation, - self._pessimistic_estimate) - sorted_losses = sorted_losses[offset:offset + len(probs)] - truncated_convolution = dict(zip(sorted_losses, probs)) - return SparsePLDPmf(truncated_convolution, self._discretization, - infinity_mass + right_mass, self._pessimistic_estimate) - - def self_compose(self, - num_times: int, - tail_mass_truncation: float = 1e-15) -> 'PLDPmf': - """See base class.""" - if num_times <= 0: - raise ValueError(f'num_times should be >= 1, num_times={num_times}') - if num_times == 1: - return self - - # Compute a rough upper bound overestimate, since from some power, the PMF - # becomes dense and start growing linearly further. But in this case we - # should definitely go to dense. - max_result_size = self.size ** num_times - - if max_result_size > _MAX_PMF_SPARSE_SIZE: - # The size of composed PMF is too large for sparse. Convert to dense. - return self.to_dense_pmf().self_compose(num_times, tail_mass_truncation) - - result = self - for i in range(2, num_times + 1): - # To truncate only on the last composition. - mass_truncation = 0 if i != num_times else tail_mass_truncation - result = result.compose(self, mass_truncation) - - return result - - def _get_reversed_losses_probs(self) -> Tuple[List[float], List[float]]: - """Returns losses, sorted in reverse order and respective probabilities.""" - reversed_losses = sorted(list(self._loss_probs.keys()), reverse=True) - reversed_probs = [self._loss_probs[loss] for loss in reversed_losses] - reversed_losses = [loss * self._discretization for loss in reversed_losses] - return reversed_losses, reversed_probs - - def get_delta_for_epsilon(self, epsilon: float) -> float: - """Computes the epsilon-hockey stick divergence.""" - reversed_losses, reversed_probs = self._get_reversed_losses_probs() - return _get_delta_for_epsilon(self._infinity_mass, reversed_losses, - reversed_probs, epsilon) - - def get_epsilon_for_delta(self, delta: float) -> float: - """Computes epsilon for which hockey stick divergence is at most delta.""" - reversed_losses, reversed_probs = self._get_reversed_losses_probs() - return _get_epsilon_for_delta(self._infinity_mass, reversed_losses, - reversed_probs, delta) - - def get_delta_for_epsilon_for_composed_pld(self, other: PLDPmf, - epsilon: float) -> float: - # If 'self' is sparse, then it is small, so it is not so expensive to - # convert to dense. Let us convert it for simplicity for dense. - return self.to_dense_pmf().get_delta_for_epsilon_for_composed_pld( - other, epsilon) - - def to_dense_pmf(self) -> DensePLDPmf: - """"Converts to dense PMF.""" - lower_loss, probs = common.dictionary_to_list(self._loss_probs) - return DensePLDPmf(self._discretization, lower_loss, np.array(probs), - self._infinity_mass, self._pessimistic_estimate) - - -def create_pmf(loss_probs: Mapping[int, float], discretization: float, - infinity_mass: float, pessimistic_estimate: bool) -> PLDPmf: - """Creates PLDPmfs. - - It returns SparsePLDPmf if the size of loss_probs less than - MAX_PMF_SPARSE_SIZE, otherwise DensePLDPmf. - - Args: - loss_probs: probability mass function of the discretized privacy loss - distribution. - discretization: the interval length for which the values of the privacy loss - distribution are discretized. - infinity_mass: infinity_mass for privacy loss distribution. - pessimistic_estimate: whether the rounding is done in such a way that the - resulting epsilon-hockey stick divergence computation gives an upper - estimate to the real value. - - Returns: - Created PLDPmf. - """ - if len(loss_probs) <= _MAX_PMF_SPARSE_SIZE: - return SparsePLDPmf(loss_probs, discretization, infinity_mass, - pessimistic_estimate) - - lower_loss, probs = common.dictionary_to_list(loss_probs) - probs = np.array(probs) - return DensePLDPmf(discretization, lower_loss, probs, infinity_mass, - pessimistic_estimate) - - -def compose_pmfs(pmf1: PLDPmf, - pmf2: PLDPmf, - tail_mass_truncation: float = 0) -> PLDPmf: - """Computes a PMF resulting from composing two PMFs. - - It returns SparsePLDPmf only if input PLDPmfs are SparsePLDPmf and the - product of input pmfs sizes are less than MAX_PMF_SPARSE_SIZE. - - Args: - pmf1: the privacy loss distribution PMF to be composed. - pmf2: the privacy loss distribution PMF to be composed. The two must have - the same discretization and pessimistic_estimate. - tail_mass_truncation: an upper bound on the tails of the probability mass of - the PMF that might be truncated. - - Returns: - A PMF which is the result of convolving (composing) the two. - """ - max_result_size = pmf1.size * pmf2.size - if (isinstance(pmf1, SparsePLDPmf) and isinstance(pmf2, SparsePLDPmf) and - max_result_size <= _MAX_PMF_SPARSE_SIZE): - return pmf1.compose(pmf2, tail_mass_truncation) - - pmf1 = pmf1.to_dense_pmf() - pmf2 = pmf2.to_dense_pmf() - return pmf1.compose(pmf2, tail_mass_truncation) diff --git a/python/fedml/core/dp/budget_accountant/privacy_accountant.py b/python/fedml/core/dp/budget_accountant/privacy_accountant.py new file mode 100644 index 0000000000..670a0841f4 --- /dev/null +++ b/python/fedml/core/dp/budget_accountant/privacy_accountant.py @@ -0,0 +1,118 @@ +"""PrivacyAccountant abstract base class.""" + +import abc +import enum + +from fedml.core.dp.budget_accountant import dp_event +from fedml.core.dp.budget_accountant import dp_event_builder + + +class NeighboringRelation(enum.Enum): + ADD_OR_REMOVE_ONE = 1 + REPLACE_ONE = 2 + + # A record is replaced with a special record, such as the "zero record". See + # https://arxiv.org/pdf/2103.00039.pdf, Definition 1.1. + REPLACE_SPECIAL = 3 + + +class UnsupportedEventError(Exception): + """Exception to raise if _compose is called on unsupported event type.""" + + +class PrivacyAccountant(metaclass=abc.ABCMeta): + """Abstract base class for privacy accountants.""" + + def __init__(self, neighboring_relation: NeighboringRelation): + self._neighboring_relation = neighboring_relation + self._ledger = dp_event_builder.DpEventBuilder() + + @property + def neighboring_relation(self) -> NeighboringRelation: + """The neighboring relation used by the accountant. + + The neighboring relation is expected to remain constant after + initialization. Subclasses should not override this property or change the + value of the private attribute. + """ + return self._neighboring_relation + + @abc.abstractmethod + def supports(self, event: dp_event.DpEvent) -> bool: + """Checks whether the `DpEvent` can be processed by this accountant. + + In general this will require recursively checking the structure of the + `DpEvent`. In particular `ComposedDpEvent` and `SelfComposedDpEvent` should + be recursively examined. + + Args: + event: The `DpEvent` to check. + + Returns: + True iff this accountant supports processing `event`. + """ + + @abc.abstractmethod + def _compose(self, event: dp_event.DpEvent, count: int = 1): + """Updates internal state to account for application of a `DpEvent`. + + Calls to `get_epsilon` or `get_delta` after calling `_compose` will return + values that account for this `DpEvent`. + + Args: + event: A `DpEvent` to process. + count: The number of times to compose the event. + """ + + def compose(self, event: dp_event.DpEvent, count: int = 1): + """Updates internal state to account for application of a `DpEvent`. + + Calls to `get_epsilon` or `get_delta` after calling `compose` will return + values that account for this `DpEvent`. + + Args: + event: A `DpEvent` to process. + count: The number of times to compose the event. + + Raises: + UnsupportedEventError: `event` is not supported by this + `PrivacyAccountant`. + """ + if not isinstance(event, dp_event.DpEvent): + raise TypeError(f'`event` must be `DpEvent`. Found {type(event)}.') + + if not self.supports(event): + raise UnsupportedEventError(f'Unsupported event: {event}.') + + self._ledger.compose(event, count) + self._compose(event, count) + + @property + def ledger(self) -> dp_event.DpEvent: + """Returns the (composed) `DpEvent` processed so far by this accountant.""" + return self._ledger.build() + + @abc.abstractmethod + def get_epsilon(self, target_delta: float) -> float: + """Gets the current epsilon. + + Args: + target_delta: The target delta. + + Returns: + The current epsilon, accounting for all composed `DpEvent`s. + """ + + def get_delta(self, target_epsilon: float) -> float: + """Gets the current delta. + + An implementer of `PrivacyAccountant` may choose not to override this, in + which case `NotImplementedError` will be raised. + + Args: + target_epsilon: The target epsilon. + + Returns: + The current delta, accounting for all composed `DpEvent`s. + """ + raise NotImplementedError() diff --git a/python/fedml/core/dp/budget_accountant/privacy_accountant_test.py b/python/fedml/core/dp/budget_accountant/privacy_accountant_test.py new file mode 100644 index 0000000000..deb0ad7266 --- /dev/null +++ b/python/fedml/core/dp/budget_accountant/privacy_accountant_test.py @@ -0,0 +1,88 @@ +"""Abstract base class for tests of `PrivacyAccountant` classes. + +Checks that a class derived from `PrivacyAccountant` has the correct behavior +for standard `DpEvent` classes. +""" + +from typing import Collection + +from absl.testing import absltest + +from fedml.core.dp.budget_accountant import dp_event +from fedml.core.dp.budget_accountant import privacy_accountant + + +@absltest.skipThisClass('only intended to be run by subclasses') +class PrivacyAccountantTest(absltest.TestCase): + + def _make_test_accountants( + self) -> Collection[privacy_accountant.PrivacyAccountant]: + """Makes a list of accountants to test. + + Subclasses should define this to return a list of accountants to be tested. + + Returns: + A list of accountants to test. + """ + return [] + + def test_make_test_accountants(self): + self.assertNotEmpty(self._make_test_accountants()) + + def test_unsupported(self): + + class UnknownDpEvent(dp_event.DpEvent): + pass + + for accountant in self._make_test_accountants(): + for unsupported in [dp_event.UnsupportedDpEvent(), UnknownDpEvent()]: + self.assertFalse(accountant.supports(unsupported)) + self.assertFalse( + accountant.supports(dp_event.SelfComposedDpEvent(unsupported, 10))) + self.assertFalse( + accountant.supports(dp_event.ComposedDpEvent([unsupported]))) + + def test_no_events(self): + for accountant in self._make_test_accountants(): + self.assertEqual(accountant.get_epsilon(1e-12), 0) + self.assertEqual(accountant.get_epsilon(0), 0) + self.assertEqual(accountant.get_epsilon(1), 0) + try: + self.assertEqual(accountant.get_delta(1e-12), 0) + self.assertEqual(accountant.get_delta(0), 0) + self.assertEqual(accountant.get_delta(float('inf')), 0) + except NotImplementedError: + # Implementing `get_delta` is optional. + pass + + def test_no_op(self): + for accountant in self._make_test_accountants(): + event = dp_event.NoOpDpEvent() + self.assertTrue(accountant.supports(event)) + accountant._compose(event) + self.assertEqual(accountant.get_epsilon(1e-12), 0) + self.assertEqual(accountant.get_epsilon(0), 0) + self.assertEqual(accountant.get_epsilon(1), 0) + try: + self.assertEqual(accountant.get_delta(1e-12), 0) + self.assertEqual(accountant.get_delta(0), 0) + self.assertEqual(accountant.get_delta(float('inf')), 0) + except NotImplementedError: + # Implementing `get_delta` is optional. + pass + + def test_non_private(self): + for accountant in self._make_test_accountants(): + event = dp_event.NonPrivateDpEvent() + self.assertTrue(accountant.supports(event)) + accountant._compose(event) + self.assertEqual(accountant.get_epsilon(0.99), float('inf')) + self.assertEqual(accountant.get_epsilon(0), float('inf')) + self.assertEqual(accountant.get_epsilon(1), float('inf')) + try: + self.assertEqual(accountant.get_delta(100), 1) + self.assertEqual(accountant.get_delta(0), 1) + self.assertEqual(accountant.get_delta(float('inf')), 1) + except NotImplementedError: + # Implementing `get_delta` is optional. + pass diff --git a/python/fedml/core/dp/budget_accountant/privacy_loss_distribution.py b/python/fedml/core/dp/budget_accountant/privacy_loss_distribution.py deleted file mode 100644 index 0ea693d25e..0000000000 --- a/python/fedml/core/dp/budget_accountant/privacy_loss_distribution.py +++ /dev/null @@ -1,1253 +0,0 @@ -"""Implementing Privacy Loss Distribution. - -This file implements the privacy loss distribution (PLD) and its basic -functionalities. The main feature of PLD is that it allows for accurate -computation of privacy parameters under composition. Please refer to the -supplementary material below for more details: -../../common_docs/Privacy_Loss_Distributions.pdf -""" - -import collections -import logging -import math -from typing import Any, Callable, Mapping, Optional, Tuple - -from fedml.core.dp.budget_accountant import common -from fedml.core.dp.budget_accountant import pld_pmf -from fedml.core.dp.budget_accountant import privacy_loss_mechanism - - -def _deprecation_warning(method_name: str): - logging.warning('PrivacyLossDistribution.%s() will be deprecated shortly. ' - 'Use factory method %s() instead.', method_name, method_name) - - -class PrivacyLossDistribution: - """Class for privacy loss distributions and computation involving them. - - The privacy loss distribution (PLD) of two discrete distributions, the upper - distribution mu_upper and the lower distribution mu_lower, is defined as a - distribution on real numbers generated by first sampling an outcome o - according to mu_upper and then outputting the privacy loss - ln(mu_upper(o) / mu_lower(o)) where mu_lower(o) and mu_upper(o) are the - probability masses of o in mu_lower and mu_upper respectively. This class - allows one to create and manipulate privacy loss distributions. - - PLD allows one to (approximately) compute the epsilon-hockey stick divergence - between mu_upper and mu_lower, which is defined as - sum_{o} [mu_upper(o) - e^{epsilon} * mu_lower(o)]_+. This quantity in turn - governs the parameter delta of (eps, delta)-differential privacy of the - corresponding protocol. (See Observation 1 in the supplementary material.) - - The above definitions extend to continuous distributions. The PLD of two - continuous distributions mu_upper and mu_lower is defined as a distribution on - real numbers generated by first sampling an outcome o according to mu_upper - and then outputting the privacy loss ln(f_{mu_upper}(o) / f_{mu_lower}(o)) - where f_{mu_lower}(o) and f_{mu_upper}(o) are the probability density - functions at o in mu_lower and mu_upper respectively. Moreover, for continuous - distributions the epsilon-hockey stick divergence is defined as - integral [f_{mu_upper}(o) - e^{epsilon} * f_{mu_lower}(o)]_+ do. - - A single privacy loss distribution is represented as an object of the class - BasicPrivacyLossDistribution. This class, on the other hand, holds the higher - level logic. - - Namely, this class maintains up to two BasicPrivacyLossDistribution objects. - One for the 'add' adjacency type, which specifies the privacy loss - distribution for a mechanism M with mu_upper = M(D) and mu_lower = M(D'), - where D' contains one more datapoint than D. - And one for the 'remove' adjacency type, which specifies the privacy loss - distribution for a mechanism M, with mu_upper = M(D) and mu_lower = M(D'), - where D' contains one less datapoint than D. - In the case where both privacy loss distributions are the same, only one copy - is maintained. - - While this class offers additional support with respect to the ADD/REMOVE - adjacency, this is not an inherent limitation; this class can also be used - in the case of other adjacencies such as Substitution. - - Factory methods in this module provide a convenient way to generate objects of - this class associated to various mechanisms. - - Attributes: - _basic_pld_remove: basic privacy loss distribution with respect to REMOVE - adjacency. - _basic_pld_add: basic privacy loss distribution with respect to ADD - adjacency. - _symmetric: When True, basic_pld_add is assumed to be the same as - basic_pld_remove. - _basic_pld: An alias for basic_pld_remove. Useful when symmetric is True. - """ - - def __init__(self, - pmf_remove: pld_pmf.PLDPmf, - pmf_add: Optional[pld_pmf.PLDPmf] = None): - """Initialization method for PrivacyLossDistribution.""" - self._pmf_remove = pmf_remove - self._symmetric = pmf_add is None - self._pmf_add = pmf_remove if self._symmetric else pmf_add - - @classmethod - def create_from_rounded_probability( - cls, - rounded_probability_mass_function: Mapping[int, float], - infinity_mass: float, - value_discretization_interval: float, - pessimistic_estimate: bool = True, - rounded_probability_mass_function_add: Optional[Mapping[int, - float]] = None, - infinity_mass_add: Optional[float] = None, - symmetric: bool = True) -> 'PrivacyLossDistribution': - """Create PrivacyLossDistribution from rounded probability mass function(s). - - Args: - rounded_probability_mass_function: rounded probability mass function of - the basic privacy loss distribution, with respect to REMOVE adjacency. - infinity_mass: infinity_mass for basic privacy loss distribution with - respect to the REMOVE adjacency. - value_discretization_interval: the interval length for which the values of - the privacy loss distribution are discretized. In particular, the values - are always integer multiples of value_discretization_interval. Smaller - value results in more accurate estimates of the privacy loss, at the - cost of increased run-time / memory usage. Smaller value results in more - accurate estimates of the privacy loss, at the cost of increased - run-time / memory usage. - pessimistic_estimate: whether the rounding is done in such a way that the - resulting epsilon-hockey stick divergence computation gives an upper - estimate to the real value. - rounded_probability_mass_function_add: rounded probability mass function - of the basic privacy loss distribution,with respect to ADD adjacency. - infinity_mass_add: infinity_mass for basic privacy loss distribution with - respect to the ADD adjacency. - symmetric: When True, the basic privacy loss distribution with respect to - ADD adjacency is assumed to be the same as that for REMOVE adjacency. - Arguments rounded_probability_mass_function_add, infinity_mass_add are - ignored in this case. - - Returns: - Privacy Loss Distribution object. - """ - pmf_remove = pld_pmf.create_pmf(rounded_probability_mass_function, - value_discretization_interval, - infinity_mass, pessimistic_estimate) - pmf_add = None - if symmetric: - if (rounded_probability_mass_function_add is not None or - infinity_mass_add is not None): - raise ValueError('Details about privacy loss distribution with respect' - 'to ADD adjacency cannot be specified when symmetric') - else: - if (rounded_probability_mass_function_add is None or - infinity_mass_add is None): - raise ValueError('Details about privacy loss distribution with respect' - 'to ADD adjacency should be specified when not ' - 'symmetric') - pmf_add = pld_pmf.create_pmf(rounded_probability_mass_function_add, - value_discretization_interval, - infinity_mass_add, pessimistic_estimate) - return cls(pmf_remove, pmf_add) - - @classmethod - def identity( - cls, - value_discretization_interval: float = 1e-4) -> 'PrivacyLossDistribution': - """Constructs an identity privacy loss distribution. - - This class method will be deprecated shortly. Use factory method identity() - instead. - - Args: - value_discretization_interval: the dicretization interval for the privacy - loss distribution. The values will be rounded up/down to be integer - multiples of this number. Smaller value results in more accurate - estimates of the privacy loss, at the cost of increased run-time / - memory usage. - - Returns: - The privacy loss distribution corresponding to an algorithm with no - privacy leak (i.e. output is independent of input). - """ - _deprecation_warning('identity') - return identity(value_discretization_interval) - - @classmethod - def from_two_probability_mass_functions( - cls, - log_probability_mass_function_lower: Mapping[Any, float], - log_probability_mass_function_upper: Mapping[Any, float], - pessimistic_estimate: bool = True, - value_discretization_interval: float = 1e-4, - log_mass_truncation_bound: float = -math.inf - ) -> 'PrivacyLossDistribution': - """Constructs a privacy loss distribution from mu_lower and mu_upper. - - This class method will be deprecated shortly. Use factory method - from_two_probability_mass_functions() instead. - - Args: - log_probability_mass_function_lower: the probability mass function of - mu_lower represented as a dictionary where each key is an outcome o of - mu_lower and the corresponding value is the natural log of the - probability mass of mu_lower at o. - log_probability_mass_function_upper: the probability mass function of - mu_upper represented as a dictionary where each key is an outcome o of - mu_upper and the corresponding value is the natural log of the - probability mass of mu_upper at o. - pessimistic_estimate: whether the rounding is done in such a way that the - resulting epsilon-hockey stick divergence computation gives an upper - estimate to the real value. - value_discretization_interval: the dicretization interval for the privacy - loss distribution. The values will be rounded up/down to be integer - multiples of this number. Smaller value results in more accurate - estimates of the privacy loss, at the cost of increased run-time / - memory usage. - log_mass_truncation_bound: when the log of the probability mass of the - upper distribution is below this bound, it is either (i) included in - infinity_mass in the case of pessimistic estimate or (ii) discarded - completely in the case of optimistic estimate. The larger - log_mass_truncation_bound is, the more error it may introduce in - divergence calculations. - - Returns: - The privacy loss distribution constructed as specified. - """ - _deprecation_warning('from_two_probability_mass_functions') - return from_two_probability_mass_functions( - log_probability_mass_function_lower, - log_probability_mass_function_upper, - pessimistic_estimate, - value_discretization_interval, - log_mass_truncation_bound) - - @classmethod - def create_from_cdf( - cls, - cdf: Callable[[float], float], - pessimistic_estimate: bool = True, - value_discretization_interval: float = 1e-4, - tail_mass_truncation: float = 1e-15) -> 'PrivacyLossDistribution': - """Constructs the privacy loss distribution from its cumulative density function. - - This class method will be deprecated shortly. Use factory method - create_from_cdf() instead. - - Args: - cdf: the cumulative density function of the privacy loss distribution. - pessimistic_estimate: a value indicating whether the rounding is done in - such a way that the resulting epsilon-hockey stick divergence - computation gives an upper estimate to the real value. - value_discretization_interval: the length of the dicretization interval - for the privacy loss distribution. The values will be rounded up/down to - be integer multiples of this number. Smaller value results in more - accurate estimates of the privacy loss, at the cost of increased - run-time / memory usage. - tail_mass_truncation: an upper bound on the tails of the probability mass - of the PLD that might be truncated. - - Returns: - The privacy loss distribution constructed as specified. - """ - _deprecation_warning('create_from_cdf') - return create_from_cdf(cdf, pessimistic_estimate, - value_discretization_interval, - tail_mass_truncation) - - @classmethod - def from_randomized_response( - cls, - noise_parameter: float, - num_buckets: int, - pessimistic_estimate: bool = True, - value_discretization_interval: float = 1e-4) -> 'PrivacyLossDistribution': - """Constructs the privacy loss distribution of Randomized Response. - - The Randomized Response over k buckets with noise parameter p takes in an - input which is one of the k buckets. With probability 1 - p, it simply - outputs the input bucket. Otherwise, with probability p, it outputs a bucket - drawn uniformly at random from the k buckets. - - This function calculates the privacy loss distribution for the - aforementioned Randomized Response with a given number of buckets, and a - given noise parameter. - - Specifically, suppose that the original input is x and it is changed to x'. - Recall that the privacy loss distribution of the Randomized Response - mechanism is generated as follows: first pick o according to R(x), where - R(x) denote the output distribution of the Randomized Response mechanism - on input x. Then, the privacy loss is ln(Pr[R(x) = o] / Pr[R(x') = o]). - There are three cases here: - - When o = x, ln(Pr[R(x) = o] / Pr[R(x') = o]) = - ln(Pr[R(x) = x] / Pr[R(x') = x]). Here Pr[R(x) = x] = 1 - p + p / k - and Pr[R(x') = x] = p / k. - - When o = x', ln(Pr[R(x) = o] / Pr[R(x') = o]) = - ln(Pr[R(x') = x'] / Pr[R(x) = x']), which is just the negation of the - previous privacy loss. - - When o != x, x', the privacy loss is zero. - - This class method will be deprecated shortly. Use factory method - from_randomized_response() instead. - - Args: - noise_parameter: the probability that the Randomized Response outputs a - completely random bucket. - num_buckets: the total number of possible input values (which is equal to - the total number of possible output values). - pessimistic_estimate: a value indicating whether the rounding is done in - such a way that the resulting epsilon-hockey stick divergence - computation gives an upper estimate to the real value. - value_discretization_interval: the length of the dicretization interval - for the privacy loss distribution. The values will be rounded up/down to - be integer multiples of this number. Smaller value results in more - accurate estimates of the privacy loss, at the cost of increased - run-time / memory usage. - - Returns: - The privacy loss distribution constructed as specified. - """ - _deprecation_warning('from_randomized_response') - return from_randomized_response(noise_parameter, num_buckets, - pessimistic_estimate, - value_discretization_interval) - - @classmethod - def from_laplace_mechanism( - cls, - parameter: float, - sensitivity: float = 1, - pessimistic_estimate: bool = True, - value_discretization_interval: float = 1e-4, - sampling_prob: float = 1.0) -> 'PrivacyLossDistribution': - """Computes the privacy loss distribution of the Laplace mechanism. - - This class method will be deprecated shortly. Use factory method - from_laplace_mechanism() instead. - - Args: - parameter: the parameter of the Laplace distribution. - sensitivity: the sensitivity of function f. (i.e. the maximum absolute - change in f when an input to a single user changes.) - pessimistic_estimate: a value indicating whether the rounding is done in - such a way that the resulting epsilon-hockey stick divergence - computation gives an upper estimate to the real value. - value_discretization_interval: the length of the dicretization interval - for the privacy loss distribution. The values will be rounded up/down to - be integer multiples of this number. Smaller value results in more - accurate estimates of the privacy loss, at the cost of increased - run-time / memory usage. - sampling_prob: sub-sampling probability, a value in (0,1]. - - Returns: - The privacy loss distribution corresponding to the Laplace mechanism with - given parameters. - """ - _deprecation_warning('from_laplace_mechanism') - return from_laplace_mechanism(parameter, sensitivity, pessimistic_estimate, - value_discretization_interval, sampling_prob) - - @classmethod - def from_gaussian_mechanism( - cls, - standard_deviation: float, - sensitivity: float = 1, - pessimistic_estimate: bool = True, - value_discretization_interval: float = 1e-4, - log_mass_truncation_bound: float = -50, - sampling_prob: float = 1.0) -> 'PrivacyLossDistribution': - """Creates the privacy loss distribution of the Gaussian mechanism. - - This class method will be deprecated shortly. Use factory method - from_gaussian_mechanism() instead. - - Args: - standard_deviation: the standard_deviation of the Gaussian distribution. - sensitivity: the sensitivity of function f. (i.e. the maximum absolute - change in f when an input to a single user changes.) - pessimistic_estimate: a value indicating whether the rounding is done in - such a way that the resulting epsilon-hockey stick divergence - computation gives an upper estimate to the real value. - value_discretization_interval: the length of the dicretization interval - for the privacy loss distribution. The values will be rounded up/down to - be integer multiples of this number. Smaller value results in more - accurate estimates of the privacy loss, at the cost of increased - run-time / memory usage. - log_mass_truncation_bound: the ln of the probability mass that might be - discarded from the noise distribution. The larger this number, the more - error it may introduce in divergence calculations. - sampling_prob: sub-sampling probability, a value in (0,1]. - - Returns: - The privacy loss distribution corresponding to the Gaussian mechanism with - given parameters. - """ - _deprecation_warning('from_gaussian_mechanism') - return from_gaussian_mechanism(standard_deviation, sensitivity, - pessimistic_estimate, - value_discretization_interval, - log_mass_truncation_bound, - sampling_prob) - - @classmethod - def from_discrete_laplace_mechanism( - cls, - parameter: float, - sensitivity: int = 1, - pessimistic_estimate: bool = True, - value_discretization_interval: float = 1e-4, - sampling_prob: float = 1.0) -> 'PrivacyLossDistribution': - """Computes the privacy loss distribution of the Discrete Laplace mechanism. - - This class method will be deprecated shortly. Use factory method - from_discrete_laplace_mechanism() instead. - - Args: - parameter: the parameter of the discrete Laplace distribution. - sensitivity: the sensitivity of function f. (i.e. the maximum absolute - change in f when an input to a single user changes.) - pessimistic_estimate: a value indicating whether the rounding is done in - such a way that the resulting epsilon-hockey stick divergence - computation gives an upper estimate to the real value. - value_discretization_interval: the length of the dicretization interval - for the privacy loss distribution. The values will be rounded up/down to - be integer multiples of this number. Smaller value results in more - accurate estimates of the privacy loss, at the cost of increased - run-time / memory usage. - sampling_prob: sub-sampling probability, a value in (0,1]. - - Returns: - The privacy loss distribution corresponding to the Discrete Laplace - mechanism with given parameters. - """ - _deprecation_warning('from_discrete_laplace_mechanism') - return from_discrete_laplace_mechanism(parameter, sensitivity, - pessimistic_estimate, - value_discretization_interval, - sampling_prob) - - @classmethod - def from_discrete_gaussian_mechanism( - cls, - sigma: float, - sensitivity: int = 1, - truncation_bound: Optional[int] = None, - pessimistic_estimate: bool = True, - value_discretization_interval: float = 1e-4, - sampling_prob: float = 1.0) -> 'PrivacyLossDistribution': - """Creates the privacy loss distribution of the discrete Gaussian mechanism. - - This class method will be deprecated shortly. Use factory method - from_discrete_gaussian_mechanism() instead. - - Args: - sigma: the parameter of the discrete Gaussian distribution. Note that - unlike the (continuous) Gaussian distribution this is not equal to the - standard deviation of the noise. - sensitivity: the sensitivity of function f. (i.e. the maximum absolute - change in f when an input to a single user changes.) - truncation_bound: bound for truncating the noise, i.e. the noise will only - have a support in [-truncation_bound, truncation_bound]. When not - specified, truncation_bound will be chosen in such a way that the mass - of the noise outside of this range is at most 1e-30. - pessimistic_estimate: a value indicating whether the rounding is done in - such a way that the resulting epsilon-hockey stick divergence - computation gives an upper estimate to the real value. - value_discretization_interval: the length of the dicretization interval - for the privacy loss distribution. The values will be rounded up/down to - be integer multiples of this number. Smaller value results in more - accurate estimates of the privacy loss, at the cost of increased - run-time / memory usage. - sampling_prob: sub-sampling probability, a value in (0,1]. - - Returns: - The privacy loss distribution corresponding to the discrete Gaussian - mechanism with given parameters. - """ - _deprecation_warning('from_discrete_gaussian_mechanism') - return from_discrete_gaussian_mechanism(sigma, sensitivity, - truncation_bound, - pessimistic_estimate, - value_discretization_interval, - sampling_prob) - - @classmethod - def from_privacy_parameters( - cls, - privacy_parameters: common.DifferentialPrivacyParameters, - value_discretization_interval: float = 1e-4) -> 'PrivacyLossDistribution': - """Constructs pessimistic PLD from epsilon and delta parameters. - - When the mechanism is (epsilon, delta)-differentially private, the following - is a pessimistic estimate of its privacy loss distribution (see Section 3.5 - of the supplementary material for more explanation): - - infinity with probability delta. - - epsilon with probability (1 - delta) / (1 + exp(-eps)) - - -epsilon with probability (1 - delta) / (1 + exp(eps)) - - This class method will be deprecated shortly. Use factory method - from_privacy_parameters() instead. - - Args: - privacy_parameters: the privacy guarantee of the mechanism. - value_discretization_interval: the length of the dicretization interval - for the privacy loss distribution. The values will be rounded up/down to - be integer multiples of this number. Smaller value results in more - accurate estimates of the privacy loss, at the cost of increased - run-time / memory usage. - - Returns: - The privacy loss distribution constructed as specified. - """ - _deprecation_warning('from_privacy_parameters') - return from_privacy_parameters(privacy_parameters, - value_discretization_interval) - - def get_delta_for_epsilon(self, epsilon: float) -> float: - """Computes the epsilon-hockey stick divergence between mu_upper, mu_lower. - - When this privacy loss distribution corresponds to a mechanism, the - epsilon-hockey stick divergence gives the value of delta for which the - mechanism is (epsilon, delta)-differentially private. (See Observation 1 in - the supplementary material.) - - Args: - epsilon: the epsilon in epsilon-hockey stick divergence. - - Returns: - A non-negative real number which is the epsilon-hockey stick divergence - between the upper (mu_upper) and the lower (mu_lower) distributions - corresponding to this privacy loss distribution. - """ - delta_remove = self._pmf_remove.get_delta_for_epsilon(epsilon) - if self._symmetric: - return delta_remove - delta_add = self._pmf_add.get_delta_for_epsilon(epsilon) - return max(delta_remove, delta_add) - - def get_epsilon_for_delta(self, delta: float) -> float: - """Computes epsilon for which hockey stick divergence is at most delta. - - This function computes the smallest non-negative epsilon for which the - epsilon-hockey stick divergence between mu_upper, mu_lower is at most delta. - - When this privacy loss distribution corresponds to a mechanism and the - rounding is pessimistic, the returned value corresponds to an epsilon for - which the mechanism is (epsilon, delta)-differentially private. (See - Observation 1 in the supplementary material.) - - Args: - delta: the target epsilon-hockey stick divergence. - - Returns: - A non-negative real number which is the smallest epsilon such that the - epsilon-hockey stick divergence between the upper (mu_upper) and the - lower (mu_lower) distributions is at most delta. When no such finite - epsilon exists, return math.inf. - """ - epsilon_remove = self._pmf_remove.get_epsilon_for_delta(delta) - if self._symmetric: - return epsilon_remove - epsilon_add = self._pmf_add.get_epsilon_for_delta(delta) - return max(epsilon_remove, epsilon_add) - - def validate_composable(self, - privacy_loss_distribution: 'PrivacyLossDistribution'): - """Verifies that a given PLD can be composed with this PLD. - - The two privacy loss distributions must have the same discretization - interval and estimate type for the composition to be allowed. - - Args: - privacy_loss_distribution: the privacy loss distribution to be composed - with the current privacy loss distribution. - - Raises: - ValueError if the value_discretization_interval or estimate_type of the - two PLDs are different. - """ - self._pmf_remove.validate_composable(privacy_loss_distribution._pmf_remove) # pylint:disable=protected-access - - def compose( - self, - privacy_loss_distribution: 'PrivacyLossDistribution', - tail_mass_truncation: float = 1e-15, - ) -> 'PrivacyLossDistribution': - """Computes a privacy loss distribution resulting from composing two PLDs. - - Args: - privacy_loss_distribution: the privacy loss distribution to be composed - with the current privacy loss distribution. The two must have the same - value_discretization_interval. - tail_mass_truncation: an upper bound on the tails of the probability mass - of the PLD that might be truncated. - - Returns: - A privacy loss distribution which is the result of composing the two. - """ - # pylint:disable=protected-access - pld_pmf_remove = pld_pmf.compose_pmfs(self._pmf_remove, - privacy_loss_distribution._pmf_remove, - tail_mass_truncation) - if self._symmetric and privacy_loss_distribution._symmetric: - return PrivacyLossDistribution(pld_pmf_remove) - pld_pmf_add = pld_pmf.compose_pmfs(self._pmf_add, - privacy_loss_distribution._pmf_add, - tail_mass_truncation) - # pylint:enable=protected-access - return PrivacyLossDistribution(pld_pmf_remove, pld_pmf_add) - - def get_delta_for_epsilon_for_composed_pld( - self, privacy_loss_distribution: 'PrivacyLossDistribution', - epsilon: float) -> float: - """Computes delta for given epsilon for the result of composing this PLD and a given PLD. - - The output of this function should be the same as first composing this PLD - and privacy_loss_distribution, and then call get_delta_for_epsilon on the - resulting PLD. The main advantage is that this function is faster. - - Args: - privacy_loss_distribution: the privacy loss distribution to be composed - with the current privacy loss distribution. The two must have the same - value_discretization_interval. - epsilon: the epsilon in epsilon-hockey stick divergence. - - Returns: - A non-negative real number which is the epsilon-hockey stick divergence - of the privacy loss distribution which is the result of composing this PLD - with privacy_loss_distribution. - """ - # pylint:disable=protected-access - delta_remove = self._pmf_remove.get_delta_for_epsilon_for_composed_pld( - privacy_loss_distribution._pmf_remove, epsilon) - if self._symmetric and privacy_loss_distribution._symmetric: - return delta_remove - delta_add = self._pmf_add.get_delta_for_epsilon_for_composed_pld( - privacy_loss_distribution._pmf_add, epsilon) - # pylint:enable=protected-access - return max(delta_remove, delta_add) - - def self_compose( - self, - num_times: int, - tail_mass_truncation: float = 1e-15) -> 'PrivacyLossDistribution': - """Computes PLD resulting from repeated composing the PLD with itself. - - Args: - num_times: the number of times to compose this PLD with itself. - tail_mass_truncation: an upper bound on the tails of the probability mass - of the PLD that might be truncated. Currently only supports for - pessimistic estimates. - - Returns: - A privacy loss distribution which is the result of the composition. - """ - pmf_remove = self._pmf_remove.self_compose(num_times, tail_mass_truncation) - if self._symmetric: - return PrivacyLossDistribution(pmf_remove) - pmf_add = self._pmf_add.self_compose(num_times, tail_mass_truncation) - return PrivacyLossDistribution(pmf_remove, pmf_add) - - -def identity( - value_discretization_interval: float = 1e-4) -> PrivacyLossDistribution: - """Constructs an identity privacy loss distribution. - - Args: - value_discretization_interval: the dicretization interval for the privacy - loss distribution. The values will be rounded up/down to be integer - multiples of this number. Smaller value results in more accurate estimates - of the privacy loss, at the cost of increased run-time / memory usage. - - Returns: - The privacy loss distribution corresponding to an algorithm with no - privacy leak (i.e. output is independent of input). - """ - return PrivacyLossDistribution.create_from_rounded_probability( - {0: 1}, 0, value_discretization_interval) - - -def from_two_probability_mass_functions( - log_probability_mass_function_lower: Mapping[Any, float], - log_probability_mass_function_upper: Mapping[Any, float], - pessimistic_estimate: bool = True, - value_discretization_interval: float = 1e-4, - log_mass_truncation_bound: float = -math.inf, - symmetric: bool = True) -> PrivacyLossDistribution: - """Constructs a privacy loss distribution from mu_lower and mu_upper. - - Args: - log_probability_mass_function_lower: the probability mass function of - mu_lower represented as a dictionary where each key is an outcome o of - mu_lower and the corresponding value is the natural log of the - probability mass of mu_lower at o. - log_probability_mass_function_upper: the probability mass function of - mu_upper represented as a dictionary where each key is an outcome o of - mu_upper and the corresponding value is the natural log of the - probability mass of mu_upper at o. - pessimistic_estimate: whether the rounding is done in such a way that the - resulting epsilon-hockey stick divergence computation gives an upper - estimate to the real value. - value_discretization_interval: the dicretization interval for the privacy - loss distribution. The values will be rounded up/down to be integer - multiples of this number. Smaller value results in more accurate estimates - of the privacy loss, at the cost of increased run-time / memory usage. - log_mass_truncation_bound: when the log of the probability mass of the upper - distribution is below this bound, it is either (i) included in - infinity_mass in the case of pessimistic estimate or (ii) discarded - completely in the case of optimistic estimate. The larger - log_mass_truncation_bound is, the more error it may introduce in - divergence calculations. - symmetric: if True it creates a symmetric PrivacyLossDistribution. - - Returns: - The privacy loss distribution constructed as specified. - """ - - def _create_rounded_probability_mass_function( - log_probability_mass_function_lower: Mapping[Any, float], - log_probability_mass_function_upper: Mapping[Any, float] - ) -> Tuple[float, Mapping[int, float]]: - """Helper function for creating rounded pmf.""" - infinity_mass = 0 - for outcome in log_probability_mass_function_upper: - if log_probability_mass_function_lower.get(outcome, - -math.inf) == -math.inf: - # When an outcome only appears in the upper distribution but not in the - # lower distribution, then it must be counted in infinity_mass as such - # an outcome contributes to the hockey stick divergence. - infinity_mass += math.exp(log_probability_mass_function_upper[outcome]) - # Compute the (non-discretized) probability mass function for the privacy - # loss distribution. - probability_mass_function = {} - for outcome in log_probability_mass_function_lower: - if log_probability_mass_function_lower[outcome] == -math.inf: - # This outcome never occurs in mu_lower. This case was already included - # as infinity_mass above. - continue - elif (log_probability_mass_function_upper.get(outcome, -math.inf) > - log_mass_truncation_bound): - # When the probability mass of mu_upper at the outcome is greater than - # the threshold, add it to the distribution. - privacy_loss_value = ( - log_probability_mass_function_upper[outcome] - - log_probability_mass_function_lower[outcome]) - probability_mass_function[privacy_loss_value] = ( - probability_mass_function.get(privacy_loss_value, 0) + - math.exp(log_probability_mass_function_upper[outcome])) - else: - if pessimistic_estimate: - # When the probability mass of mu_upper at the outcome is no more than - # the threshold and we would like to get a pessimistic estimate, - # account for this in infinity_mass. - infinity_mass += math.exp( - log_probability_mass_function_upper.get(outcome, -math.inf)) - # Discretize the probability mass so that the values are integer multiples - # of value_discretization_interval - rounded_probability_mass_function = collections.defaultdict(lambda: 0) - round_fn = math.ceil if pessimistic_estimate else math.floor - for val in probability_mass_function: - rounded_probability_mass_function[round_fn( - val / - value_discretization_interval)] += probability_mass_function[val] - return infinity_mass, rounded_probability_mass_function - - infinity_mass, rounded_probability_mass_function = _create_rounded_probability_mass_function( - log_probability_mass_function_lower, log_probability_mass_function_upper) - - if symmetric: - return PrivacyLossDistribution.create_from_rounded_probability( - rounded_probability_mass_function, - infinity_mass, - value_discretization_interval, - pessimistic_estimate=pessimistic_estimate) - - infinity_mass_add, rounded_probability_mass_function_add = _create_rounded_probability_mass_function( - log_probability_mass_function_lower=log_probability_mass_function_upper, - log_probability_mass_function_upper=log_probability_mass_function_lower) - return PrivacyLossDistribution.create_from_rounded_probability( - rounded_probability_mass_function, infinity_mass, - value_discretization_interval, pessimistic_estimate, - rounded_probability_mass_function_add, infinity_mass_add) - - -def _create_pld_pmf_from_additive_noise( - additive_noise_privacy_loss: - 'privacy_loss_mechanism.AdditiveNoisePrivacyLoss', - pessimistic_estimate: bool = True, - value_discretization_interval: float = 1e-4) -> pld_pmf.PLDPmf: - """Constructs the privacy loss distribution of an additive noise mechanism. - - An additive noise mechanism for computing a scalar-valued function f is a - mechanism that outputs the sum of the true value of the function and a noise - drawn from a certain distribution mu. This function calculates the privacy - loss distribution for such an additive noise mechanism. - - Args: - additive_noise_privacy_loss: the privacy loss representation of the - mechanism. - pessimistic_estimate: a value indicating whether the rounding is done in - such a way that the resulting epsilon-hockey stick divergence computation - gives an upper estimate to the real value. - value_discretization_interval: the length of the dicretization interval for - the privacy loss distribution. The values will be rounded up/down to be - integer multiples of this number. Smaller value results in more accurate - estimates of the privacy loss, at the cost of increased run-time / memory - usage. - - Returns: - The privacy loss distribution constructed as specified. - """ - round_fn = math.ceil if pessimistic_estimate else math.floor - - tail_pld = additive_noise_privacy_loss.privacy_loss_tail() - - rounded_probability_mass_function = collections.defaultdict(lambda: 0) - infinity_mass = tail_pld.tail_probability_mass_function.get(math.inf, 0) - for privacy_loss in tail_pld.tail_probability_mass_function: - if privacy_loss != math.inf: - rounded_probability_mass_function[round_fn( - privacy_loss / value_discretization_interval - )] += tail_pld.tail_probability_mass_function[privacy_loss] - - if additive_noise_privacy_loss.discrete_noise: - xs = list( - range( - math.ceil(tail_pld.lower_x_truncation) - 1, - math.floor(tail_pld.upper_x_truncation) + 1)) - - # Compute PMF for the x's. Note that a vectorized call to mu_upper_cdf can - # be much faster than many scalar calls. - cdf_values = additive_noise_privacy_loss.mu_upper_cdf(xs) - probability_mass = cdf_values[1:] - cdf_values[:-1] - - for x, prob in zip(xs[1:], probability_mass): - rounded_probability_mass_function[round_fn( - additive_noise_privacy_loss.privacy_loss(x) / - value_discretization_interval)] += prob - else: - lower_x = tail_pld.lower_x_truncation - rounded_down_value = math.floor( - additive_noise_privacy_loss.privacy_loss(lower_x) / - value_discretization_interval) - upper_x_privacy_loss = additive_noise_privacy_loss.privacy_loss( - tail_pld.upper_x_truncation) - - # Compute discretization intervals for PLD approximation. - xs, rounded_values = [lower_x], [] - x = lower_x - while x < tail_pld.upper_x_truncation: - if (value_discretization_interval * rounded_down_value <= - upper_x_privacy_loss): - x = tail_pld.upper_x_truncation - else: - x = additive_noise_privacy_loss.inverse_privacy_loss( - value_discretization_interval * rounded_down_value) - - xs.append(x) - rounded_values.append(round_fn(rounded_down_value + 0.5)) - rounded_down_value -= 1 - - # Compute PLD for discretization intervals. Note that a vectorized call to - # mu_upper_cdf is much faster than many scalar calls. - cdf_values = additive_noise_privacy_loss.mu_upper_cdf(xs) - probability_mass = cdf_values[1:] - cdf_values[:-1] - - # Each x in [lower_x, upper_x] results in privacy loss that lies in - # [value_discretization_interval * rounded_down_value, - # value_discretization_interval * (rounded_down_value + 1)] - for rounded_value, prob in zip(rounded_values, probability_mass): - rounded_probability_mass_function[rounded_value] += prob - - return pld_pmf.create_pmf( - dict(rounded_probability_mass_function), - value_discretization_interval, - infinity_mass, - pessimistic_estimate=pessimistic_estimate) - - -def create_from_cdf( - cdf: Callable[[float], float], - pessimistic_estimate: bool = True, - value_discretization_interval: float = 1e-4, - tail_mass_truncation: float = 1e-15) -> PrivacyLossDistribution: - """Constructs the privacy loss distribution from its cumulative density function. - - Args: - cdf: the cumulative density function of the privacy loss distribution. - pessimistic_estimate: a value indicating whether the rounding is done in - such a way that the resulting epsilon-hockey stick divergence computation - gives an upper estimate to the real value. - value_discretization_interval: the length of the dicretization interval for - the privacy loss distribution. The values will be rounded up/down to be - integer multiples of this number. Smaller value results in more accurate - estimates of the privacy loss, at the cost of increased run-time / memory - usage. - tail_mass_truncation: an upper bound on the tails of the probability mass of - the PLD that might be truncated. - - Returns: - The privacy loss distribution constructed as specified. - """ - rounded_probability_mass_function = {} - - # Construct the distribution for value greater than or equal to zero. - rounded_value = 1 if pessimistic_estimate else 0 - value = 0 - while cdf(value) < 1 - tail_mass_truncation / 2: - rounded_probability_mass_function[rounded_value] = ( - cdf(value + value_discretization_interval) - cdf(value)) - rounded_value += 1 - value += value_discretization_interval - - # Construct the distribution for value less than zero. - rounded_value = 0 if pessimistic_estimate else -1 - value = 0 - while cdf(value) > tail_mass_truncation / 2: - rounded_probability_mass_function[rounded_value] = ( - cdf(value) - cdf(value - value_discretization_interval)) - rounded_value -= 1 - value -= value_discretization_interval - - return PrivacyLossDistribution.create_from_rounded_probability( - rounded_probability_mass_function, - tail_mass_truncation if pessimistic_estimate else 0, - value_discretization_interval, - pessimistic_estimate=pessimistic_estimate) - - -def from_randomized_response( - noise_parameter: float, - num_buckets: int, - pessimistic_estimate: bool = True, - value_discretization_interval: float = 1e-4 -) -> PrivacyLossDistribution: - """Constructs the privacy loss distribution of Randomized Response. - - The Randomized Response over k buckets with noise parameter p takes in an - input which is one of the k buckets. With probability 1 - p, it simply - outputs the input bucket. Otherwise, with probability p, it outputs a bucket - drawn uniformly at random from the k buckets. - - This function calculates the privacy loss distribution for the - aforementioned Randomized Response with a given number of buckets, and a - given noise parameter. - - Specifically, suppose that the original input is x and it is changed to x'. - Recall that the privacy loss distribution of the Randomized Response - mechanism is generated as follows: first pick o according to R(x), where - R(x) denote the output distribution of the Randomized Response mechanism - on input x. Then, the privacy loss is ln(Pr[R(x) = o] / Pr[R(x') = o]). - There are three cases here: - - When o = x, ln(Pr[R(x) = o] / Pr[R(x') = o]) = - ln(Pr[R(x) = x] / Pr[R(x') = x]). Here Pr[R(x) = x] = 1 - p + p / k - and Pr[R(x') = x] = p / k. - - When o = x', ln(Pr[R(x) = o] / Pr[R(x') = o]) = - ln(Pr[R(x') = x'] / Pr[R(x) = x']), which is just the negation of the - previous privacy loss. - - When o != x, x', the privacy loss is zero. - - Args: - noise_parameter: the probability that the Randomized Response outputs a - completely random bucket. - num_buckets: the total number of possible input values (which is equal to - the total number of possible output values). - pessimistic_estimate: a value indicating whether the rounding is done in - such a way that the resulting epsilon-hockey stick divergence computation - gives an upper estimate to the real value. - value_discretization_interval: the length of the dicretization interval for - the privacy loss distribution. The values will be rounded up/down to be - integer multiples of this number. Smaller value results in more accurate - estimates of the privacy loss, at the cost of increased run-time / memory - usage. - - Returns: - The privacy loss distribution constructed as specified. - """ - - if noise_parameter <= 0 or noise_parameter >= 1: - raise ValueError(f'Noise parameter must be strictly between 0 and 1: ' - f'{noise_parameter}') - - if num_buckets <= 1: - raise ValueError( - f'Number of buckets must be strictly greater than 1: {num_buckets}') - - round_fn = math.ceil if pessimistic_estimate else math.floor - - rounded_probability_mass_function = collections.defaultdict(lambda: 0) - - # Probability that the output is equal to the input, i.e., Pr[R(x) = x] - probability_output_equal_input = ((1 - noise_parameter) + - noise_parameter / num_buckets) - # Probability that the output is equal to a specific bucket that is not the - # input, i.e., Pr[R(x') = x] for x' != x. - probability_output_not_input = noise_parameter / num_buckets - - # Add privacy loss for the case o = x - rounded_value = round_fn( - math.log(probability_output_equal_input / probability_output_not_input) - / value_discretization_interval) - rounded_probability_mass_function[ - rounded_value] += probability_output_equal_input - - # Add privacy loss for the case o = x' - rounded_value = round_fn( - math.log(probability_output_not_input / probability_output_equal_input) - / value_discretization_interval) - rounded_probability_mass_function[ - rounded_value] += probability_output_not_input - - # Add privacy loss for the case o != x, x' - rounded_probability_mass_function[0] += ( - probability_output_not_input * (num_buckets - 2)) - - return PrivacyLossDistribution.create_from_rounded_probability( - rounded_probability_mass_function, - 0, - value_discretization_interval, - pessimistic_estimate=pessimistic_estimate) - - -def _pld_for_subsampled_mechanism( - single_pld_pmf: Callable[[privacy_loss_mechanism.AdjacencyType], - pld_pmf.PLDPmf], - sampling_prob: float = 1.0) -> PrivacyLossDistribution: - """Computes the privacy loss distribution for subsampled mechanisms. - - It is assumed that when sub-sampling probability is 1, the privacy loss - distributions corresponding to ADD and REMOVE adjacencies are identical. - - Args: - single_pld_pmf: method for computing the privacy loss distributions with - respect to ADD and REMOVE adjacency types. - sampling_prob: sub-sampling probability, a value in (0,1]. - - Returns: - A symmetric privacy loss distribution when sampling_prob = 1; An - asymmetric privacy loss distribution corresponding to ADD and REMOVE - adjacency types when sampling_prob < 1. - """ - pmf_remove = single_pld_pmf(privacy_loss_mechanism.AdjacencyType.REMOVE) - if sampling_prob == 1.0: - return PrivacyLossDistribution(pmf_remove) - - pmf_add = single_pld_pmf(privacy_loss_mechanism.AdjacencyType.ADD) - return PrivacyLossDistribution(pmf_remove, pmf_add) - - -def from_laplace_mechanism( - parameter: float, - sensitivity: float = 1, - pessimistic_estimate: bool = True, - value_discretization_interval: float = 1e-4, - sampling_prob: float = 1.0) -> PrivacyLossDistribution: - """Computes the privacy loss distribution of the Laplace mechanism. - - Args: - parameter: the parameter of the Laplace distribution. - sensitivity: the sensitivity of function f. (i.e. the maximum absolute - change in f when an input to a single user changes.) - pessimistic_estimate: a value indicating whether the rounding is done in - such a way that the resulting epsilon-hockey stick divergence computation - gives an upper estimate to the real value. - value_discretization_interval: the length of the dicretization interval for - the privacy loss distribution. The values will be rounded up/down to be - integer multiples of this number. Smaller value results in more accurate - estimates of the privacy loss, at the cost of increased run-time / memory - usage. - sampling_prob: sub-sampling probability, a value in (0,1]. - - Returns: - The privacy loss distribution corresponding to the Laplace mechanism with - given parameters. - """ - - def single_laplace_pld( - adjacency_type: privacy_loss_mechanism.AdjacencyType) -> pld_pmf.PLDPmf: - return _create_pld_pmf_from_additive_noise( - privacy_loss_mechanism.LaplacePrivacyLoss( - parameter, - sensitivity=sensitivity, - sampling_prob=sampling_prob, - adjacency_type=adjacency_type), - pessimistic_estimate=pessimistic_estimate, - value_discretization_interval=value_discretization_interval) - - return _pld_for_subsampled_mechanism(single_laplace_pld, sampling_prob) - - -def from_gaussian_mechanism( - standard_deviation: float, - sensitivity: float = 1, - pessimistic_estimate: bool = True, - value_discretization_interval: float = 1e-4, - log_mass_truncation_bound: float = -50, - sampling_prob: float = 1.0) -> PrivacyLossDistribution: - """Creates the privacy loss distribution of the Gaussian mechanism. - - Args: - standard_deviation: the standard_deviation of the Gaussian distribution. - sensitivity: the sensitivity of function f. (i.e. the maximum absolute - change in f when an input to a single user changes.) - pessimistic_estimate: a value indicating whether the rounding is done in - such a way that the resulting epsilon-hockey stick divergence computation - gives an upper estimate to the real value. - value_discretization_interval: the length of the dicretization interval for - the privacy loss distribution. The values will be rounded up/down to be - integer multiples of this number. Smaller value results in more accurate - estimates of the privacy loss, at the cost of increased run-time / memory - usage. - log_mass_truncation_bound: the ln of the probability mass that might be - discarded from the noise distribution. The larger this number, the more - error it may introduce in divergence calculations. - sampling_prob: sub-sampling probability, a value in (0,1]. - - Returns: - The privacy loss distribution corresponding to the Gaussian mechanism with - given parameters. - """ - - def single_gaussian_pld( - adjacency_type: privacy_loss_mechanism.AdjacencyType) -> pld_pmf.PLDPmf: - return _create_pld_pmf_from_additive_noise( - privacy_loss_mechanism.GaussianPrivacyLoss( - standard_deviation, - sensitivity=sensitivity, - pessimistic_estimate=pessimistic_estimate, - log_mass_truncation_bound=log_mass_truncation_bound, - sampling_prob=sampling_prob, - adjacency_type=adjacency_type), - pessimistic_estimate=pessimistic_estimate, - value_discretization_interval=value_discretization_interval) - - return _pld_for_subsampled_mechanism(single_gaussian_pld, sampling_prob) - - -def from_discrete_laplace_mechanism( - parameter: float, - sensitivity: int = 1, - pessimistic_estimate: bool = True, - value_discretization_interval: float = 1e-4, - sampling_prob: float = 1.0) -> PrivacyLossDistribution: - """Computes the privacy loss distribution of the Discrete Laplace mechanism. - - Args: - parameter: the parameter of the discrete Laplace distribution. - sensitivity: the sensitivity of function f. (i.e. the maximum absolute - change in f when an input to a single user changes.) - pessimistic_estimate: a value indicating whether the rounding is done in - such a way that the resulting epsilon-hockey stick divergence computation - gives an upper estimate to the real value. - value_discretization_interval: the length of the dicretization interval for - the privacy loss distribution. The values will be rounded up/down to be - integer multiples of this number. Smaller value results in more accurate - estimates of the privacy loss, at the cost of increased run-time / memory - usage. - sampling_prob: sub-sampling probability, a value in (0,1]. - - Returns: - The privacy loss distribution corresponding to the Discrete Laplace - mechanism with given parameters. - """ - - def single_discrete_laplace_pld( - adjacency_type: privacy_loss_mechanism.AdjacencyType) -> pld_pmf.PLDPmf: - return _create_pld_pmf_from_additive_noise( - privacy_loss_mechanism.DiscreteLaplacePrivacyLoss( - parameter, - sensitivity=sensitivity, - sampling_prob=sampling_prob, - adjacency_type=adjacency_type), - pessimistic_estimate=pessimistic_estimate, - value_discretization_interval=value_discretization_interval) - - return _pld_for_subsampled_mechanism(single_discrete_laplace_pld, - sampling_prob) - - -def from_discrete_gaussian_mechanism( - sigma: float, - sensitivity: int = 1, - truncation_bound: Optional[int] = None, - pessimistic_estimate: bool = True, - value_discretization_interval: float = 1e-4, - sampling_prob: float = 1.0) -> PrivacyLossDistribution: - """Creates the privacy loss distribution of the discrete Gaussian mechanism. - - Args: - sigma: the parameter of the discrete Gaussian distribution. Note that - unlike the (continuous) Gaussian distribution this is not equal to the - standard deviation of the noise. - sensitivity: the sensitivity of function f. (i.e. the maximum absolute - change in f when an input to a single user changes.) - truncation_bound: bound for truncating the noise, i.e. the noise will only - have a support in [-truncation_bound, truncation_bound]. When not - specified, truncation_bound will be chosen in such a way that the mass - of the noise outside of this range is at most 1e-30. - pessimistic_estimate: a value indicating whether the rounding is done in - such a way that the resulting epsilon-hockey stick divergence computation - gives an upper estimate to the real value. - value_discretization_interval: the length of the dicretization interval for - the privacy loss distribution. The values will be rounded up/down to be - integer multiples of this number. Smaller value results in more accurate - estimates of the privacy loss, at the cost of increased run-time / memory - usage. - sampling_prob: sub-sampling probability, a value in (0,1]. - - Returns: - The privacy loss distribution corresponding to the discrete Gaussian - mechanism with given parameters. - """ - - def single_discrete_gaussian_pld( - adjacency_type: privacy_loss_mechanism.AdjacencyType) -> pld_pmf.PLDPmf: - return _create_pld_pmf_from_additive_noise( - privacy_loss_mechanism.DiscreteGaussianPrivacyLoss( - sigma, - sensitivity=sensitivity, - truncation_bound=truncation_bound, - sampling_prob=sampling_prob, - adjacency_type=adjacency_type), - pessimistic_estimate=pessimistic_estimate, - value_discretization_interval=value_discretization_interval) - - return _pld_for_subsampled_mechanism(single_discrete_gaussian_pld, - sampling_prob) - - -def from_privacy_parameters( - privacy_parameters: common.DifferentialPrivacyParameters, - value_discretization_interval: float = 1e-4) -> PrivacyLossDistribution: - """Constructs pessimistic PLD from epsilon and delta parameters. - - When the mechanism is (epsilon, delta)-differentially private, the following - is a pessimistic estimate of its privacy loss distribution (see Section 3.5 - of the supplementary material for more explanation): - - infinity with probability delta. - - epsilon with probability (1 - delta) / (1 + exp(-eps)) - - -epsilon with probability (1 - delta) / (1 + exp(eps)) - - Args: - privacy_parameters: the privacy guarantee of the mechanism. - value_discretization_interval: the length of the dicretization interval for - the privacy loss distribution. The values will be rounded up/down to be - integer multiples of this number. Smaller value results in more accurate - estimates of the privacy loss, at the cost of increased run-time / memory - usage. - - Returns: - The privacy loss distribution constructed as specified. - """ - delta = privacy_parameters.delta - epsilon = privacy_parameters.epsilon - - rounded_probability_mass_function = collections.defaultdict(lambda: 0) - - rounded_probability_mass_function[math.ceil( - epsilon / - value_discretization_interval)] = (1 - delta) / (1 + math.exp(-epsilon)) - rounded_probability_mass_function[math.ceil( - -epsilon / - value_discretization_interval)] += (1 - delta) / (1 + math.exp(epsilon)) - - return PrivacyLossDistribution.create_from_rounded_probability( - rounded_probability_mass_function, privacy_parameters.delta, - value_discretization_interval) diff --git a/python/fedml/core/dp/budget_accountant/rdp/__init__.py b/python/fedml/core/dp/budget_accountant/rdp/__init__.py new file mode 100644 index 0000000000..1a143a0bbb --- /dev/null +++ b/python/fedml/core/dp/budget_accountant/rdp/__init__.py @@ -0,0 +1,18 @@ +# Copyright 2022 Google LLC. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""RDP Accounting package.""" + +from dp_accounting.rdp.rdp_privacy_accountant import compute_epsilon +from dp_accounting.rdp.rdp_privacy_accountant import RdpAccountant diff --git a/python/fedml/core/dp/budget_accountant/rdp/rdp_privacy_accountant.py b/python/fedml/core/dp/budget_accountant/rdp/rdp_privacy_accountant.py new file mode 100644 index 0000000000..e722eafc4f --- /dev/null +++ b/python/fedml/core/dp/budget_accountant/rdp/rdp_privacy_accountant.py @@ -0,0 +1,901 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Privacy accountant that uses Renyi differential privacy.""" + +import math +from typing import Callable, Optional, Sequence, Tuple, Union + +import numpy as np +from scipy import special + +from dp_accounting import dp_event +from dp_accounting import privacy_accountant + +NeighborRel = privacy_accountant.NeighboringRelation + + +def _log_add(logx: float, logy: float) -> float: + """Adds two numbers in the log space.""" + a, b = min(logx, logy), max(logx, logy) + if a == -np.inf: # adding 0 + return b + # Use exp(a) + exp(b) = (exp(a - b) + 1) * exp(b) + return math.log1p(math.exp(a - b)) + b # log1p(x) = log(x + 1) + + +def _log_sub(logx: float, logy: float) -> float: + """Subtracts two numbers in the log space. Answer must be non-negative.""" + if logx < logy: + raise ValueError('The result of subtraction must be non-negative.') + if logy == -np.inf: # subtracting 0 + return logx + if logx == logy: + return -np.inf # 0 is represented as -np.inf in the log space. + + try: + # Use exp(x) - exp(y) = (exp(x - y) - 1) * exp(y). + return math.log(math.expm1(logx - logy)) + logy # expm1(x) = exp(x) - 1 + except OverflowError: + return logx + + +def _log_sub_sign(logx: float, logy: float) -> Tuple[bool, float]: + """Returns log(exp(logx)-exp(logy)) and its sign.""" + if logx > logy: + s = True + mag = logx + np.log(1 - np.exp(logy - logx)) + elif logx < logy: + s = False + mag = logy + np.log(1 - np.exp(logx - logy)) + else: + s = True + mag = -np.inf + + return s, mag + + +def _log_comb(n: int, k: int) -> float: + """Computes log of binomial coefficient.""" + return (special.gammaln(n + 1) - special.gammaln(k + 1) - + special.gammaln(n - k + 1)) + + +def _compute_log_a_int(q: float, sigma: float, alpha: int) -> float: + """Computes log(A_alpha) for integer alpha, 0 < q < 1.""" + + # Initialize with 0 in the log space. + log_a = -np.inf + + for i in range(alpha + 1): + log_coef_i = ( + _log_comb(alpha, i) + i * math.log(q) + (alpha - i) * math.log(1 - q)) + + s = log_coef_i + (i * i - i) / (2 * (sigma ** 2)) + log_a = _log_add(log_a, s) + + return float(log_a) + + +def _compute_log_a_frac(q: float, sigma: float, alpha: float) -> float: + """Computes log(A_alpha) for fractional alpha, 0 < q < 1.""" + # The two parts of A_alpha, integrals over (-inf,z0] and [z0, +inf), are + # initialized to 0 in the log space: + log_a0, log_a1 = -np.inf, -np.inf + i = 0 + + z0 = sigma ** 2 * math.log(1 / q - 1) + .5 + + while True: # do ... until loop + coef = special.binom(alpha, i) + log_coef = math.log(abs(coef)) + j = alpha - i + + log_t0 = log_coef + i * math.log(q) + j * math.log(1 - q) + log_t1 = log_coef + j * math.log(q) + i * math.log(1 - q) + + log_e0 = math.log(.5) + _log_erfc((i - z0) / (math.sqrt(2) * sigma)) + log_e1 = math.log(.5) + _log_erfc((z0 - j) / (math.sqrt(2) * sigma)) + + log_s0 = log_t0 + (i * i - i) / (2 * (sigma ** 2)) + log_e0 + log_s1 = log_t1 + (j * j - j) / (2 * (sigma ** 2)) + log_e1 + + if coef > 0: + log_a0 = _log_add(log_a0, log_s0) + log_a1 = _log_add(log_a1, log_s1) + else: + log_a0 = _log_sub(log_a0, log_s0) + log_a1 = _log_sub(log_a1, log_s1) + + i += 1 + if max(log_s0, log_s1) < -30: + break + + return _log_add(log_a0, log_a1) + + +def _log_erfc(x: float) -> float: + """Computes log(erfc(x)) with high accuracy for large x.""" + try: + return math.log(2) + special.log_ndtr(-x * 2 ** .5) + except NameError: + # If log_ndtr is not available, approximate as follows: + r = special.erfc(x) + if r == 0.0: + # Using the Laurent series at infinity for the tail of the erfc function: + # erfc(x) ~ exp(-x^2-.5/x^2+.625/x^4)/(x*pi^.5) + # To verify in Mathematica: + # Series[Log[Erfc[x]] + Log[x] + Log[Pi]/2 + x^2, {x, Infinity, 6}] + return (-math.log(math.pi) / 2 - math.log(x) - x ** 2 - .5 * x ** -2 + + .625 * x ** -4 - 37. / 24. * x ** -6 + 353. / 64. * x ** -8) + else: + return math.log(r) + + +def compute_delta(orders: Sequence[float], rdp: Sequence[float], + epsilon: float) -> Tuple[float, int]: + """Computes delta given a list of RDP values and target epsilon. + + Args: + orders: An array of orders. + rdp: An array of RDP guarantees. + epsilon: The target epsilon. + + Returns: + 2-tuple containing optimal delta and the optimal order. + + Raises: + ValueError: If input is malformed. + + """ + if epsilon < 0: + raise ValueError(f'Epsilon cannot be negative. Found {epsilon}.') + if len(orders) != len(rdp): + raise ValueError('Input lists must have the same length.') + + # Basic bound (see https://arxiv.org/abs/1702.07476 Proposition 3 in v3): + # delta = min( np.exp((rdp - epsilon) * (orders - 1)) ) + + # Improved bound from https://arxiv.org/abs/2004.00010 Proposition 12 (in v4): + logdeltas = [] # work in log space to avoid overflows + for (a, r) in zip(orders, rdp): + if a < 1: + raise ValueError(f'Renyi divergence order must be at least 1. Found {a}.') + if r < 0: + raise ValueError(f'Renyi divergence cannot be negative. Found {r}.') + # For small alpha, we are better of with bound via KL divergence: + # delta <= sqrt(1-exp(-KL)). + # Take a min of the two bounds. + if r == 0: + logdelta = -np.inf + else: + logdelta = 0.5 * math.log1p(-math.exp(-r)) + if a > 1.01: + # This bound is not numerically stable as alpha->1. + # Thus we have a min value for alpha. + # The bound is also not useful for small alpha, so doesn't matter. + rdp_bound = (a - 1) * (r - epsilon + math.log1p(-1 / a)) - math.log(a) + logdelta = min(logdelta, rdp_bound) + + logdeltas.append(logdelta) + + optimal_index = np.argmin(logdeltas) + return min(math.exp(logdeltas[optimal_index]), 1.), orders[optimal_index] + + +def compute_epsilon(orders: Sequence[float], rdp: Sequence[float], + delta: float) -> Tuple[float, int]: + """Computes epsilon given a list of RDP values and target delta. + + Args: + orders: An array of orders. + rdp: An array of RDP guarantees. + delta: The target delta. Must be >= 0. + + Returns: + 2-tuple containing optimal epsilon and the optimal order. + + Raises: + ValueError: If input is malformed. + + """ + if delta < 0: + raise ValueError(f'Delta cannot be negative. Found {delta}.') + + if delta == 0: + if all(r == 0 for r in rdp): + return 0, 0 + else: + return np.inf, 0 + + if len(orders) != len(rdp): + raise ValueError('Input lists must have the same length.') + + # Basic bound (see https://arxiv.org/abs/1702.07476 Proposition 3 in v3): + # epsilon = min( rdp - math.log(delta) / (orders - 1) ) + + # Improved bound from https://arxiv.org/abs/2004.00010 Proposition 12 (in v4). + # Also appears in https://arxiv.org/abs/2001.05990 Equation 20 (in v1). + eps = [] + for (a, r) in zip(orders, rdp): + if a < 1: + raise ValueError(f'Renyi divergence order must be at least 1. Found {a}.') + if r < 0: + raise ValueError(f'Renyi divergence cannot be negative. Found {r}.') + + if delta ** 2 + math.expm1(-r) > 0: + # In this case, we can simply bound via KL divergence: + # delta <= sqrt(1-exp(-KL)). + epsilon = 0 # No need to try further computation if we have epsilon = 0. + elif a > 1.01: + # This bound is not numerically stable as alpha->1. + # Thus we have a min value of alpha. + # The bound is also not useful for small alpha, so doesn't matter. + epsilon = r + math.log1p(-1 / a) - math.log(delta * a) / (a - 1) + else: + # In this case we can't do anything. E.g., asking for delta = 0. + epsilon = np.inf + eps.append(epsilon) + + optimal_index = np.argmin(eps) + return max(0, eps[optimal_index]), orders[optimal_index] + + +def _stable_inplace_diff_in_log(vec: np.ndarray, + signs: np.ndarray, + n: Optional[int] = None): + """Replaces the first n-1 dims of vec with the log of abs difference operator. + + Args: + vec: numpy array of floats with size larger than 'n' + signs: Optional numpy array of bools with the same size as vec in case one + needs to compute partial differences vec and signs jointly describe a + vector of real numbers' sign and abs in log scale. + n: Optonal upper bound on number of differences to compute. If None, all + differences are computed. + + Returns: + The first n-1 dimension of vec and signs will store the log-abs and sign of + the difference. + + Raises: + ValueError: If input is malformed. + """ + + if vec.shape != signs.shape: + raise ValueError('Shape of vec and signs do not match.') + if signs.dtype != bool: + raise ValueError('signs must be of type bool') + if n is None: + n = np.max(vec.shape) - 1 + else: + assert np.max(vec.shape) >= n + 1 + for j in range(0, n, 1): + if signs[j] == signs[j + 1]: # When the signs are the same + # if the signs are both positive, then we can just use the standard one + signs[j], vec[j] = _log_sub_sign(vec[j + 1], vec[j]) + # otherwise, we do that but toggle the sign + if not signs[j + 1]: + signs[j] = ~signs[j] + else: # When the signs are different. + vec[j] = _log_add(vec[j], vec[j + 1]) + signs[j] = signs[j + 1] + + +def _get_forward_diffs(fun: Callable[[float], float], + n: int) -> Tuple[np.ndarray, np.ndarray]: + """Computes up to nth order forward difference evaluated at 0. + + See Theorem 27 of https://arxiv.org/pdf/1808.00087.pdf + + Args: + fun: Function to compute forward differences of. + n: Number of differences to compute. + + Returns: + Pair (deltas, signs_deltas) of the log deltas and their signs. + """ + func_vec = np.zeros(n + 3) + signs_func_vec = np.ones(n + 3, dtype=bool) + + # ith coordinate of deltas stores log(abs(ith order discrete derivative)) + deltas = np.zeros(n + 2) + signs_deltas = np.zeros(n + 2, dtype=bool) + for i in range(1, n + 3, 1): + func_vec[i] = fun(1.0 * (i - 1)) + for i in range(0, n + 2, 1): + # Diff in log scale + _stable_inplace_diff_in_log(func_vec, signs_func_vec, n=n + 2 - i) + deltas[i] = func_vec[0] + signs_deltas[i] = signs_func_vec[0] + return deltas, signs_deltas + + +def _compute_log_a(q: float, noise_multiplier: float, + alpha: Union[int, float]) -> float: + if float(alpha).is_integer(): + return _compute_log_a_int(q, noise_multiplier, int(alpha)) + else: + return _compute_log_a_frac(q, noise_multiplier, alpha) + + +def _compute_rdp_poisson_subsampled_gaussian( + q: float, noise_multiplier: float, + orders: Sequence[float]) -> Union[float, np.ndarray]: + """Computes RDP of the Poisson sampled Gaussian mechanism. + + Args: + q: The sampling rate. + noise_multiplier: The ratio of the standard deviation of the Gaussian noise + to the l2-sensitivity of the function to which it is added. + orders: An array of RDP orders. + + Returns: + The RDPs at all orders. Can be `np.inf`. + """ + + def compute_one_order(q, alpha): + if np.isinf(alpha) or noise_multiplier == 0: + return np.inf + + if q == 0: + return 0 + + if q == 1.: + return alpha / (2 * noise_multiplier ** 2) + + return _compute_log_a(q, noise_multiplier, alpha) / (alpha - 1) + + return np.array([compute_one_order(q, order) for order in orders]) + + +def _compute_rdp_sample_wor_gaussian( + q: float, noise_multiplier: float, + orders: Sequence[float]) -> Union[float, np.ndarray]: + """Computes RDP of Gaussian mechanism using sampling without replacement. + + This function applies to the following schemes: + 1. Sampling w/o replacement: Sample a uniformly random subset of size m = q*n. + 2. ``Replace one data point'' version of differential privacy, i.e., n is + considered public information. + + Reference: Theorem 27 of https://arxiv.org/pdf/1808.00087.pdf (A strengthened + version applies subsampled-Gaussian mechanism.) + - Wang, Balle, Kasiviswanathan. "Subsampled Renyi Differential Privacy and + Analytical Moments Accountant." AISTATS'2019. + + Args: + q: The sampling proportion = m / n. Assume m is an integer <= n. + noise_multiplier: The ratio of the standard deviation of the Gaussian noise + to the l2-sensitivity of the function to which it is added. + orders: An array of RDP orders. + + Returns: + The RDPs at all orders, can be np.inf. + """ + return np.array([ + _compute_rdp_sample_wor_gaussian_scalar(q, noise_multiplier, order) + for order in orders + ]) + + +def _compute_rdp_sample_wor_gaussian_scalar(q: float, sigma: float, + alpha: Union[float, int]) -> float: + """Computes RDP of the Sampled Gaussian mechanism at order alpha. + + Args: + q: The sampling proportion = m / n. Assume m is an integer <= n. + sigma: The std of the additive Gaussian noise. + alpha: The order at which RDP is computed. + + Returns: + RDP at alpha, can be np.inf. + """ + + assert (q <= 1) and (q >= 0) and (alpha >= 1) + + if q == 0: + return 0 + + if q == 1.: + return alpha / (2 * sigma ** 2) + + if np.isinf(alpha): + return np.inf + + if float(alpha).is_integer(): + return _compute_rdp_sample_wor_gaussian_int(q, sigma, int(alpha)) / ( + alpha - 1) + else: + # When alpha not an integer, we apply Corollary 10 of [WBK19] to interpolate + # the CGF and obtain an upper bound + alpha_f = math.floor(alpha) + alpha_c = math.ceil(alpha) + + x = _compute_rdp_sample_wor_gaussian_int(q, sigma, alpha_f) + y = _compute_rdp_sample_wor_gaussian_int(q, sigma, alpha_c) + t = alpha - alpha_f + return ((1 - t) * x + t * y) / (alpha - 1) + + +def _compute_rdp_sample_wor_gaussian_int(q: float, sigma: float, + alpha: int) -> float: + """Computes log(A_alpha) for integer alpha, subsampling without replacement. + + When alpha is smaller than max_alpha, compute the bound Theorem 27 exactly, + otherwise compute the bound with Stirling approximation. + + Args: + q: The sampling proportion = m / n. Assume m is an integer <= n. + sigma: The std of the additive Gaussian noise. + alpha: The order at which RDP is computed. + + Returns: + RDP at alpha, can be np.inf. + """ + + max_alpha = 256 + + if np.isinf(alpha): + return np.inf + elif alpha == 1: + return 0 + + def cgf(x): + # Return rdp(x+1)*x, the rdp of Gaussian mechanism is alpha/(2*sigma**2) + return x * 1.0 * (x + 1) / (2.0 * sigma ** 2) + + def func(x): + # Return the rdp of Gaussian mechanism + return 1.0 * x / (2.0 * sigma ** 2) + + # Initialize with 1 in the log space. + log_a = 0 + # Calculates the log term when alpha = 2 + log_f2m1 = func(2.0) + np.log(1 - np.exp(-func(2.0))) + if alpha <= max_alpha: + # We need forward differences of exp(cgf) + # The following line is the numerically stable way of implementing it. + # The output is in polar form with logarithmic magnitude + deltas, _ = _get_forward_diffs(cgf, alpha) + # Compute the bound exactly requires book keeping of O(alpha**2) + + for i in range(2, alpha + 1): + if i == 2: + s = 2 * np.log(q) + _log_comb(alpha, 2) + np.minimum( + np.log(4) + log_f2m1, + func(2.0) + np.log(2)) + elif i > 2: + delta_lo = deltas[int(2 * np.floor(i / 2.0)) - 1] + delta_hi = deltas[int(2 * np.ceil(i / 2.0)) - 1] + s = np.log(4) + 0.5 * (delta_lo + delta_hi) + s = np.minimum(s, np.log(2) + cgf(i - 1)) + s += i * np.log(q) + _log_comb(alpha, i) + log_a = _log_add(log_a, s) + return float(log_a) + else: + # Compute the bound with stirling approximation. Everything is O(x) now. + for i in range(2, alpha + 1): + if i == 2: + s = 2 * np.log(q) + _log_comb(alpha, 2) + np.minimum( + np.log(4) + log_f2m1, + func(2.0) + np.log(2)) + else: + s = np.log(2) + cgf(i - 1) + i * np.log(q) + _log_comb(alpha, i) + log_a = _log_add(log_a, s) + + return log_a + + +def _effective_gaussian_noise_multiplier( + event: dp_event.DpEvent) -> Optional[float]: + """Determines the effective noise multiplier of nested structure of Gaussians. + + A series of Gaussian queries on the same data can be reexpressed as a single + query with pre- and post- processing. For details, see section 3 of + https://arxiv.org/pdf/1812.06210.pdf. + + Args: + event: A `dp_event.DpEvent`. In order for conversion to be successful it + must consist of a single `dp_event.GaussianDpEvent`, or a nested structure + of `dp_event.ComposedDpEvent` and/or `dp_event.SelfComposedDpEvent` + bottoming out in `dp_event.GaussianDpEvent`s. + + Returns: + The noise multiplier of the equivalent `dp_event.GaussianDpEvent`, or None + if the input event was not a `dp_event.GaussianDpEvent` or a nested + structure of `dp_event.ComposedDpEvent` and/or + `dp_event.SelfComposedDpEvent` bottoming out in `dp_event.GaussianDpEvent`s. + """ + if isinstance(event, dp_event.GaussianDpEvent): + return event.noise_multiplier + elif isinstance(event, dp_event.ComposedDpEvent): + sum_sigma_inv_sq = 0 + for e in event.events: + sigma = _effective_gaussian_noise_multiplier(e) + if sigma is None: + return None + sum_sigma_inv_sq += sigma ** -2 + return sum_sigma_inv_sq ** -0.5 + elif isinstance(event, dp_event.SelfComposedDpEvent): + sigma = _effective_gaussian_noise_multiplier(event.event) + return None if sigma is None else (event.count * sigma ** -2) ** -0.5 + else: + return None + + +def _compute_rdp_single_epoch_tree_aggregation( + noise_multiplier: float, step_counts: Union[int, Sequence[int]], + orders: Sequence[float]) -> Union[float, np.ndarray]: + """Computes RDP of the Tree Aggregation Protocol for Gaussian Mechanism. + + This function implements the accounting when the tree is periodically + restarted and no record occurs twice across all trees. See appendix D of + "Practical and Private (Deep) Learning without Sampling or Shuffling" + https://arxiv.org/abs/2103.00039. + + Args: + noise_multiplier: A non-negative float representing the ratio of the + standard deviation of the Gaussian noise to the l2-sensitivity of the + function to which it is added. + step_counts: A scalar or a list of non-negative integers representing the + number of steps per epoch (between two restarts). + orders: An array of RDP orders. + + Returns: + The RDPs at all orders. Can be `np.inf`. + """ + if noise_multiplier < 0: + raise ValueError( + f'noise_multiplier must be non-negative. Got {noise_multiplier}.') + if noise_multiplier == 0: + return np.inf + + if not step_counts: + raise ValueError( + 'steps_list must be a non-empty list, or a non-zero scalar. Got ' + f'{step_counts}.') + + if np.isscalar(step_counts): + step_counts = [step_counts] + + for steps in step_counts: + if steps < 0: + raise ValueError(f'Steps must be non-negative. Got {step_counts}') + + max_depth = math.ceil(math.log2(max(step_counts) + 1)) + return np.array([a * max_depth / (2 * noise_multiplier ** 2) for a in orders]) + + +def _expm1_over_x(x: float) -> float: + """Computes (exp(x)-1)/x in a numerically stable manner. + + Args: + x: float + + Returns: + (exp(x)-1)/x + """ + if x < -0.1 or x > 0.1: + return math.expm1(x) / x + # exp(x) = sum_{k>=0} x^k / k! + # (exp(x)-1)/x = sum_{k>=1} x^{k-1} / k! + terms = [] + y = 1 # = x^{k-1}/k! + for k in range(1, 100): + y = y / k + terms.append(y) + y = y * x + return math.fsum(terms) + # Dropped terms: sum_{k>=100} x^{k-1} / k! + # Since |x|<= 0.1, this sum is < 10^-100. + # Note that 0.9 < (exp(x)-1)/x < 1.1, so this is also a small relative error. + + +def _logx_over_xm1(x: float) -> float: + """Computes log(x)/(x-1) in a numerically stable manner. + + Here log is the natural logarithm. + + Args: + x: float + + Returns: + log(x)/(x-1) + """ + if x < 0.9 or x > 1.1: + return math.log(x) / (x - 1) + # Denote y = 1-x. Then we have a Taylor series for the natural logarithm: + # log(x) = log(1-y) = - sum_{k>=1} y^k / k + # Thus log(x)/(x-1) = -log(1-y)/y = sum_{k>=1} y^{k-1}/k + return math.fsum((1 - x) ** (k - 1) / k for k in range(1, 100)) + # Dropped terms: sum_{k>=100} y^{k-1}/k. + # Since |y|<=0.1, this sum is < 10^-100. + # Since 0.9 <= log(x)/(x-1) <= 1.1, this is also a small relative error. + + +def _truncated_negative_binomial_mean(gamma: float, shape: float) -> float: + """Computes the mean of the truncated negative binomial Distribution. + + See Definition 1 in https://arxiv.org/pdf/2110.03620v2.pdf#page=5 + + Args: + gamma: Halting probability parameter of the distribution. Must be >0 and + <=1. + shape: Shape parameter of the Distribution. Must be >=0. + + Returns: + The mean of the distribution. + """ + if shape < 0: + raise ValueError(f'shape must be non-negative. Got {shape}') + if gamma <= 0 or gamma > 1: + raise ValueError(f'gamma must be in (0,1]. Got {gamma}') + + if shape == 1: # Geometric Distribution + return 1 / gamma + elif shape == 0: # Logarithmic distribution + # answer = (1 - 1 / gamma) / log(gamma) + # = 1/(gamma*log(gamma)/(gamma-1)) + return 1 / (gamma * _logx_over_xm1(gamma)) + else: # Truncated Negative Binomial + # answer = shape * (1 / gamma - 1) / (1 - gamma**shape) + # = 1 / ( (exp(shape*log(gamma))-1)/(shape*log(gamma)) * + # log(gamma)/(gamma-1) * gamma) + a = _expm1_over_x(shape * math.log(gamma)) + b = _logx_over_xm1(gamma) + return 1 / (gamma * a * b) + + +def _gamma_truncated_negative_binomial(shape: float, + mean: float, + tolerance: float = 1e-9) -> float: + """Computes gamma parameter of truncated negative binomial from mean. + + Args: + shape: shape parameter of truncated negative binomial distribution. Must be + >= 0. + mean: the expectation of the Distribution + tolerance: relative (i.e. multiplicative) accuracy bound for gamma. Default + tolerance is 10^-9. + + Returns: + The gamma parameter = success probability of the distribution. + """ + if shape < 0: + raise ValueError(f'shape must be non-negative. Got {shape}') + if mean < 1: + raise ValueError(f'mean must be at least 1. Got {mean}') + + if shape == 1: + return 1 / mean # Geometric distribution + # Otherwise we invert the _truncated_negative_binomial_mean function. + gamma_min = 0 # gamma=0 corresponds to mean=infinity. + gamma_max = min(1, 2 * (shape + 1) / mean) # gamma=1 corresponds to mean=1. + # Also max{shape,1/ln(1/gamma)}*(1/gamma-1) <= mean <= 2*(shape+1)/gamma, + # which implies gamma <= 2*(shape+1)/mean + while gamma_max > gamma_min * (1 + tolerance): + gamma = (gamma_min + gamma_max) / 2 + gamma_mean = _truncated_negative_binomial_mean(gamma, shape) + if gamma_mean < mean: + gamma_max = gamma + else: + gamma_min = gamma + return gamma_min # The conservative estimate is returned. + + +def _compute_rdp_repeat_and_select(orders: Sequence[float], + rdp: Sequence[float], mean: float, + shape: float) -> Sequence[float]: + # pyformat: disable + """Computes RDP of repeating and selecting best run. + + Inputs orders & rdp represent RDP of a single run. + Output represents RDP of running multiple times and returning the best run; + outputs of other runs are not returned. + + The total number of runs is randomized and drawn from a distribution + with the given parameters. Poisson (shape=infinity), Geometric (shape=1), + Logarithmic (shape=0), or Tuncated Negative binomial (0= 1. + rdp: List of RDPs for a single run of the mechanism. + mean: The mean of the distribution of the random number of repetitions. + shape: Shape/type of the distribution. Should be >= 0. + * shape == 0 is the logarithmic distribution. + * shape == 1 is the geometric distribution. + * shape == infinity is the Poisson Distribution + * shape in (0, infinity) is the truncated negative binomial. + + Returns: + The RDPs at all orders. + """ + # pyformat: enable + if math.isnan(shape) or shape < 0: + raise ValueError(f'Distribution of repetitions must be >=0. Got {shape}.') + if math.isnan(mean) or mean < 1: + raise ValueError(f'Mean of number of repetitions must be >=1. Got {mean}.') + if len(orders) != len(rdp): + raise ValueError( + f'orders and rdp must be same length, got {len(orders)} & {len(rdp)}.') + + orders = np.asarray(orders) + rdp_out = np.zeros_like(orders, dtype=np.float64) # This will be the output. + rdp_out += np.inf # Initialize to infinity. + + if shape == np.inf: # Poisson Distribution + for i in range(len(orders)): + # orders[i]=lambda and rdp[i]=epsilon in the language of + # Theorem 6 of https://arxiv.org/pdf/2110.03620v2.pdf#page=7 + if orders[i] <= 1: + continue # Our formula is not applicable in this case. + epshat = math.log1p(1 / (orders[i] - 1)) + deltahat, _ = compute_delta(orders, rdp, epshat) + rdp_out[i] = rdp[i] + mean * deltahat + math.log(mean) / (orders[i] - 1) + else: # Truncated Negative Binomial (includes Logarithmic & Geometric) + # First we map mean parameter to gamma parameter of TNB. + gamma = _gamma_truncated_negative_binomial(shape, mean) + + # Next we apply the formula. + # Theorem 2 of https://arxiv.org/pdf/2110.03620v2.pdf#page=5 + # orders[i] = lambda, rdp[i] = epsilon, + # orders[j] = lambdahat, rdp[j] = epsilonhat + # First compute constant term + c = (1 + shape) * np.min((1 - 1 / orders) * rdp - math.log(gamma) / orders) + for i in range(len(orders)): + if orders[i] > 1: # Otherwise our formula is invalid. + rdp_out[i] = rdp[i] + math.log(mean) / (orders[i] - 1) + c + + # Finally we apply monotonicity of Renyi DP + # i.e. if orders[i] < orders[j], then rdp[i] < rdp[j]. + # We can use this to bound rdp for low orders. + for i in range(len(orders)): + rdp_out[i] = min( + rdp_out[j] for j in range(len(orders)) if orders[i] <= orders[j]) + return rdp_out + + +# Default orders chosen to give good coverage for Gaussian mechanism in +# the privacy regime of interest. +DEFAULT_RDP_ORDERS = ([1 + x / 10. for x in range(1, 100)] + + list(range(11, 64)) + [128, 256, 512, 1024]) + + +class RdpAccountant(privacy_accountant.PrivacyAccountant): + """Privacy accountant that uses Renyi differential privacy.""" + + def __init__( + self, + orders: Optional[Sequence[float]] = None, + neighboring_relation: NeighborRel = NeighborRel.ADD_OR_REMOVE_ONE, + ): + super().__init__(neighboring_relation) + if orders is None: + orders = DEFAULT_RDP_ORDERS + self._orders = np.array(orders) + self._rdp = np.zeros_like(orders, dtype=np.float64) + + def supports(self, event: dp_event.DpEvent) -> bool: + return self._maybe_compose(event, 0, False) + + def _compose(self, event: dp_event.DpEvent, count: int = 1): + self._maybe_compose(event, count, True) + + def _maybe_compose(self, event: dp_event.DpEvent, count: int, + do_compose: bool) -> bool: + """Traverses `event` and performs composition if `do_compose` is True. + + If `do_compose` is False, can be used to check whether composition is + supported. + + Args: + event: A `DpEvent` to process. + count: The number of times to compose the event. + do_compose: Whether to actually perform the composition. + + Returns: + True if event is supported, otherwise False. + """ + + if isinstance(event, dp_event.NoOpDpEvent): + return True + elif isinstance(event, dp_event.NonPrivateDpEvent): + if do_compose: + self._rdp += np.inf + return True + elif isinstance(event, dp_event.SelfComposedDpEvent): + return self._maybe_compose(event.event, event.count * count, do_compose) + elif isinstance(event, dp_event.ComposedDpEvent): + return all( + self._maybe_compose(e, count, do_compose) for e in event.events) + elif isinstance(event, dp_event.GaussianDpEvent): + if do_compose: + self._rdp += count * _compute_rdp_poisson_subsampled_gaussian( + q=1.0, noise_multiplier=event.noise_multiplier, orders=self._orders) + return True + elif isinstance(event, dp_event.PoissonSampledDpEvent): + if self._neighboring_relation is not NeighborRel.ADD_OR_REMOVE_ONE: + return False + gaussian_noise_multiplier = _effective_gaussian_noise_multiplier( + event.event) + if gaussian_noise_multiplier is None: + return False + if do_compose: + self._rdp += count * _compute_rdp_poisson_subsampled_gaussian( + q=event.sampling_probability, + noise_multiplier=gaussian_noise_multiplier, + orders=self._orders) + return True + elif isinstance(event, dp_event.SampledWithoutReplacementDpEvent): + if self._neighboring_relation is not NeighborRel.REPLACE_ONE: + return False + gaussian_noise_multiplier = _effective_gaussian_noise_multiplier( + event.event) + if gaussian_noise_multiplier is None: + return False + if do_compose: + self._rdp += count * _compute_rdp_sample_wor_gaussian( + q=event.sample_size / event.source_dataset_size, + noise_multiplier=gaussian_noise_multiplier, + orders=self._orders) + return True + elif isinstance(event, dp_event.SingleEpochTreeAggregationDpEvent): + if self._neighboring_relation is not NeighborRel.REPLACE_SPECIAL: + return False + if do_compose: + self._rdp += count * _compute_rdp_single_epoch_tree_aggregation( + event.noise_multiplier, event.step_counts, self._orders) + return True + elif isinstance(event, dp_event.LaplaceDpEvent): + if do_compose: + # Laplace satisfies eps-DP with eps = 1 / event.noise_multiplier + # eps-DP implies (alpha, min(eps,alpha*eps^2/2))-RDP for all alpha. + eps = 1 / event.noise_multiplier + rho = 0.5 * eps * eps + self._rdp += count * np.array( + [min(eps, rho * order) for order in self._orders]) + return True + elif isinstance(event, dp_event.RepeatAndSelectDpEvent): + if do_compose: + # Save the RDP values from already composed DPEvents. These will + # be added back after we process this RepeatAndSelectDpEvent. + # Zero out self._rdp before computing the RDP of the underlying + # DP event. + save_rdp = self._rdp + self._rdp = np.zeros_like(self._orders, dtype=np.float64) + can_compose = self._maybe_compose(event.event, 1, do_compose) + if can_compose and do_compose: + self._rdp = count * _compute_rdp_repeat_and_select( + self._orders, self._rdp, event.mean, event.shape) + save_rdp + return can_compose + else: + # Unsupported event (including `UnsupportedDpEvent`). + return False + + def get_epsilon_and_optimal_order(self, + target_delta: float) -> Tuple[float, int]: + return compute_epsilon(self._orders, self._rdp, target_delta) + + def get_epsilon(self, target_delta: float) -> float: + return compute_epsilon(self._orders, self._rdp, target_delta)[0] + + def get_delta_and_optimal_order(self, + target_epsilon: float) -> Tuple[float, int]: + return compute_delta(self._orders, self._rdp, target_epsilon) + + def get_delta(self, target_epsilon: float) -> float: + return compute_delta(self._orders, self._rdp, target_epsilon)[0] diff --git a/python/fedml/core/dp/budget_accountant/rdp/rdp_privacy_accountant_test.py b/python/fedml/core/dp/budget_accountant/rdp/rdp_privacy_accountant_test.py new file mode 100644 index 0000000000..f9549a5ce7 --- /dev/null +++ b/python/fedml/core/dp/budget_accountant/rdp/rdp_privacy_accountant_test.py @@ -0,0 +1,712 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Tests for rdp_privacy_accountant.""" + +import math +import sys + +from absl.testing import absltest +from absl.testing import parameterized +import mpmath +import numpy as np + +from dp_accounting import dp_event +from dp_accounting import privacy_accountant +from dp_accounting import privacy_accountant_test +from dp_accounting.rdp import rdp_privacy_accountant + + +def _get_test_rdp(event, count=1): + accountant = rdp_privacy_accountant.RdpAccountant(orders=[2.71828]) + accountant.compose(event, count) + return accountant._rdp[0] + + +def _log_float_mp(x): + # Convert multi-precision input to float log space. + if x >= sys.float_info.min: + return float(mpmath.log(x)) + else: + return -np.inf + + +def _compute_a_mp(sigma, q, alpha): + """Compute A_alpha for arbitrary alpha by numerical integration.""" + + def mu0(x): + return mpmath.npdf(x, mu=0, sigma=sigma) + + def _mu_over_mu0(x, q, sigma): + return (1 - q) + q * mpmath.exp((2 * x - 1) / (2 * sigma ** 2)) + + def a_alpha_fn(z): + return mu0(z) * _mu_over_mu0(z, q, sigma) ** alpha + + bounds = (-mpmath.inf, mpmath.inf) + a_alpha, _ = mpmath.quad(a_alpha_fn, bounds, error=True, maxdegree=8) + return a_alpha + + +def _compose_trees(noise_multiplier, step_counts, orders): + accountant = rdp_privacy_accountant.RdpAccountant( + orders, privacy_accountant.NeighboringRelation.REPLACE_SPECIAL) + accountant.compose( + dp_event.ComposedDpEvent([ + dp_event.SingleEpochTreeAggregationDpEvent(noise_multiplier, + step_count) + for step_count in step_counts + ])) + return accountant + + +def _compose_trees_single_epoch(noise_multiplier, step_counts, orders): + accountant = rdp_privacy_accountant.RdpAccountant( + orders, privacy_accountant.NeighboringRelation.REPLACE_SPECIAL) + accountant.compose( + dp_event.SingleEpochTreeAggregationDpEvent(noise_multiplier, step_counts)) + return accountant + + +class RdpPrivacyAccountantTest(privacy_accountant_test.PrivacyAccountantTest, + parameterized.TestCase): + + def _make_test_accountants(self): + return [ + rdp_privacy_accountant.RdpAccountant( + [2.0], privacy_accountant.NeighboringRelation.ADD_OR_REMOVE_ONE), + rdp_privacy_accountant.RdpAccountant( + [2.0], privacy_accountant.NeighboringRelation.REPLACE_ONE), + rdp_privacy_accountant.RdpAccountant( + [2.0], privacy_accountant.NeighboringRelation.REPLACE_SPECIAL) + ] + + def test_supports(self): + aor_accountant = rdp_privacy_accountant.RdpAccountant( + [2.0], privacy_accountant.NeighboringRelation.ADD_OR_REMOVE_ONE) + ro_accountant = rdp_privacy_accountant.RdpAccountant( + [2.0], privacy_accountant.NeighboringRelation.REPLACE_ONE) + + event = dp_event.GaussianDpEvent(1.0) + self.assertTrue(aor_accountant.supports(event)) + self.assertTrue(ro_accountant.supports(event)) + + event = dp_event.SelfComposedDpEvent(dp_event.GaussianDpEvent(1.0), 6) + self.assertTrue(aor_accountant.supports(event)) + self.assertTrue(ro_accountant.supports(event)) + + event = dp_event.ComposedDpEvent( + [dp_event.GaussianDpEvent(1.0), + dp_event.GaussianDpEvent(2.0)]) + self.assertTrue(aor_accountant.supports(event)) + self.assertTrue(ro_accountant.supports(event)) + + event = dp_event.PoissonSampledDpEvent(0.1, dp_event.GaussianDpEvent(1.0)) + self.assertTrue(aor_accountant.supports(event)) + self.assertFalse(ro_accountant.supports(event)) + + composed_gaussian = dp_event.ComposedDpEvent( + [dp_event.GaussianDpEvent(1.0), + dp_event.GaussianDpEvent(2.0)]) + event = dp_event.PoissonSampledDpEvent(0.1, composed_gaussian) + self.assertTrue(aor_accountant.supports(event)) + self.assertFalse(ro_accountant.supports(event)) + + event = dp_event.SampledWithoutReplacementDpEvent( + 1000, 10, dp_event.GaussianDpEvent(1.0)) + self.assertFalse(aor_accountant.supports(event)) + self.assertTrue(ro_accountant.supports(event)) + + event = dp_event.SampledWithoutReplacementDpEvent(1000, 10, + composed_gaussian) + self.assertFalse(aor_accountant.supports(event)) + self.assertTrue(ro_accountant.supports(event)) + + event = dp_event.SampledWithReplacementDpEvent( + 1000, 10, dp_event.GaussianDpEvent(1.0)) + self.assertFalse(aor_accountant.supports(event)) + self.assertFalse(ro_accountant.supports(event)) + + def test_rdp_composition(self): + base_event = dp_event.GaussianDpEvent(3.14159) + base_rdp = _get_test_rdp(base_event) + + rdp_with_count = _get_test_rdp(base_event, count=6) + self.assertAlmostEqual(rdp_with_count, base_rdp * 6) + + rdp_with_self_compose = _get_test_rdp( + dp_event.SelfComposedDpEvent(base_event, 6)) + self.assertAlmostEqual(rdp_with_self_compose, base_rdp * 6) + + rdp_with_self_compose_and_count = _get_test_rdp( + dp_event.SelfComposedDpEvent(base_event, 2), count=3) + self.assertAlmostEqual(rdp_with_self_compose_and_count, base_rdp * 6) + + rdp_with_compose = _get_test_rdp(dp_event.ComposedDpEvent([base_event] * 6)) + self.assertAlmostEqual(rdp_with_compose, base_rdp * 6) + + rdp_with_compose_and_self_compose = _get_test_rdp( + dp_event.ComposedDpEvent([ + dp_event.SelfComposedDpEvent(base_event, 1), + dp_event.SelfComposedDpEvent(base_event, 2), + dp_event.SelfComposedDpEvent(base_event, 3) + ])) + self.assertAlmostEqual(rdp_with_compose_and_self_compose, base_rdp * 6) + + base_event_2 = dp_event.GaussianDpEvent(1.61803) + base_rdp_2 = _get_test_rdp(base_event_2) + rdp_with_heterogeneous_compose = _get_test_rdp( + dp_event.ComposedDpEvent([base_event, base_event_2])) + self.assertAlmostEqual(rdp_with_heterogeneous_compose, + base_rdp + base_rdp_2) + + def test_zero_poisson_sample(self): + accountant = rdp_privacy_accountant.RdpAccountant([3.14159]) + accountant.compose( + dp_event.PoissonSampledDpEvent(0, dp_event.GaussianDpEvent(1.0))) + self.assertEqual(accountant.get_epsilon(1e-10), 0) + self.assertEqual(accountant.get_delta(1e-10), 0) + + def test_zero_fixed_batch_sample(self): + accountant = rdp_privacy_accountant.RdpAccountant( + [3.14159], privacy_accountant.NeighboringRelation.REPLACE_ONE) + accountant.compose( + dp_event.SampledWithoutReplacementDpEvent( + 1000, 0, dp_event.GaussianDpEvent(1.0))) + self.assertEqual(accountant.get_epsilon(1e-10), 0) + self.assertEqual(accountant.get_delta(1e-10), 0) + + def test_epsilon_non_private_gaussian(self): + accountant = rdp_privacy_accountant.RdpAccountant([3.14159]) + accountant.compose(dp_event.GaussianDpEvent(0)) + self.assertEqual(accountant.get_epsilon(1e-1), np.inf) + + def test_compute_rdp_gaussian(self): + alpha = 3.14159 + sigma = 2.71828 + event = dp_event.GaussianDpEvent(sigma) + accountant = rdp_privacy_accountant.RdpAccountant(orders=[alpha]) + accountant.compose(event) + self.assertAlmostEqual(accountant._rdp[0], alpha / (2 * sigma ** 2)) + + def test_compute_rdp_multi_gaussian(self): + alpha = 3.14159 + sigma1, sigma2 = 2.71828, 6.28319 + + rdp1 = alpha / (2 * sigma1 ** 2) + rdp2 = alpha / (2 * sigma2 ** 2) + rdp = rdp1 + rdp2 + + accountant = rdp_privacy_accountant.RdpAccountant(orders=[alpha]) + accountant.compose( + dp_event.PoissonSampledDpEvent( + 1.0, + dp_event.ComposedDpEvent([ + dp_event.GaussianDpEvent(sigma1), + dp_event.GaussianDpEvent(sigma2) + ]))) + self.assertAlmostEqual(accountant._rdp[0], rdp) + + def test_effective_gaussian_noise_multiplier(self): + np.random.seed(0xBAD5EED) + sigmas = np.random.uniform(size=(4,)) + + event = dp_event.ComposedDpEvent([ + dp_event.GaussianDpEvent(sigmas[0]), + dp_event.SelfComposedDpEvent(dp_event.GaussianDpEvent(sigmas[1]), 3), + dp_event.ComposedDpEvent([ + dp_event.GaussianDpEvent(sigmas[2]), + dp_event.GaussianDpEvent(sigmas[3]) + ]) + ]) + + sigma = rdp_privacy_accountant._effective_gaussian_noise_multiplier(event) + multi_sigmas = list(sigmas) + [sigmas[1]] * 2 + expected = sum(s ** -2 for s in multi_sigmas) ** -0.5 + self.assertAlmostEqual(sigma, expected) + + def test_compute_rdp_poisson_sampled_gaussian(self): + orders = [1.5, 2.5, 5, 50, 100, np.inf] + noise_multiplier = 2.5 + sampling_probability = 0.01 + count = 50 + event = dp_event.SelfComposedDpEvent( + dp_event.PoissonSampledDpEvent( + sampling_probability, dp_event.GaussianDpEvent(noise_multiplier)), + count) + accountant = rdp_privacy_accountant.RdpAccountant(orders=orders) + accountant.compose(event) + self.assertTrue( + np.allclose( + accountant._rdp, [ + 6.5007e-04, 1.0854e-03, 2.1808e-03, 2.3846e-02, 1.6742e+02, + np.inf + ], + rtol=1e-4)) + + def test_compute_epsilon_delta_pure_dp(self): + orders = range(2, 33) + rdp = [1.1 for o in orders] # Constant corresponds to pure DP. + + epsilon, optimal_order = rdp_privacy_accountant.compute_epsilon( + orders, rdp, delta=1e-5) + # Compare with epsilon computed by hand. + self.assertAlmostEqual(epsilon, 1.32783806176) + self.assertEqual(optimal_order, 32) + + delta, optimal_order = rdp_privacy_accountant.compute_delta( + orders, rdp, epsilon=1.32783806176) + self.assertAlmostEqual(delta, 1e-5) + self.assertEqual(optimal_order, 32) + + def test_compute_epsilon_delta_gaussian(self): + orders = [0.001 * i for i in range(1000, 100000)] + + # noise multiplier is chosen to obtain exactly (1,1e-6)-DP. + rdp = rdp_privacy_accountant._compute_rdp_poisson_subsampled_gaussian( + 1, 4.530877117, orders) + + eps = rdp_privacy_accountant.compute_epsilon(orders, rdp, delta=1e-6)[0] + self.assertAlmostEqual(eps, 1) + + delta = rdp_privacy_accountant.compute_delta(orders, rdp, epsilon=1)[0] + self.assertAlmostEqual(delta, 1e-6) + + params = ({ + 'q': 1e-7, + 'sigma': .1, + 'order': 1.01 + }, { + 'q': 1e-6, + 'sigma': .1, + 'order': 256 + }, { + 'q': 1e-5, + 'sigma': .1, + 'order': 256.1 + }, { + 'q': 1e-6, + 'sigma': 1, + 'order': 27 + }, { + 'q': 1e-4, + 'sigma': 1., + 'order': 1.5 + }, { + 'q': 1e-3, + 'sigma': 1., + 'order': 2 + }, { + 'q': .01, + 'sigma': 10, + 'order': 20 + }, { + 'q': .1, + 'sigma': 100, + 'order': 20.5 + }, { + 'q': .99, + 'sigma': .1, + 'order': 256 + }, { + 'q': .999, + 'sigma': 100, + 'order': 256.1 + }) + + # pylint:disable=undefined-variable + @parameterized.parameters(p for p in params) + def test_compute_log_a_equals_mp(self, q, sigma, order): + # Compare the cheap computation of log(A) with an expensive, multi-precision + # computation. + log_a = rdp_privacy_accountant._compute_log_a(q, sigma, order) + log_a_mp = _log_float_mp(_compute_a_mp(sigma, q, order)) + np.testing.assert_allclose(log_a, log_a_mp, rtol=1e-4) + + def test_delta_bounds_gaussian(self): + # Compare the optimal bound for Gaussian with the one derived from RDP. + # Also compare the RDP upper bound with the "standard" upper bound. + orders = [0.1 * x for x in range(10, 505)] + eps_vec = [0.1 * x for x in range(500)] + rdp = rdp_privacy_accountant._compute_rdp_poisson_subsampled_gaussian( + 1, 1, orders) + for eps in eps_vec: + delta = rdp_privacy_accountant.compute_delta(orders, rdp, epsilon=eps)[0] + # For comparison, we compute the optimal guarantee for Gaussian + # using https://arxiv.org/abs/1805.06530 Theorem 8 (in v2). + delta0 = math.erfc((eps - .5) / math.sqrt(2)) / 2 + delta0 = delta0 - math.exp(eps) * math.erfc((eps + .5) / math.sqrt(2)) / 2 + self.assertLessEqual(delta0, delta + 1e-300) # need tolerance 10^-300 + + # Compute the "standard" upper bound, which should be an upper bound. + # Note, if orders is too sparse, this will NOT be an upper bound. + if eps >= 0.5: + delta1 = math.exp(-0.5 * (eps - 0.5) ** 2) + else: + delta1 = 1 + self.assertLessEqual(delta, delta1 + 1e-300) + + def test_epsilon_delta_consistency(self): + orders = range(2, 50) # Large range of orders (helps test for overflows). + for q in [0, 0.01, 0.1, 0.8, 1.]: + for multiplier in [0.0, 0.1, 1., 10., 100.]: + event = dp_event.PoissonSampledDpEvent( + q, dp_event.GaussianDpEvent(multiplier)) + accountant = rdp_privacy_accountant.RdpAccountant(orders) + accountant.compose(event) + for delta in [.99, .9, .1, .01, 1e-3, 1e-5, 1e-9, 1e-12]: + epsilon = accountant.get_epsilon(delta) + delta2 = accountant.get_delta(epsilon) + if np.isposinf(epsilon): + self.assertEqual(delta2, 1.0) + elif epsilon == 0: + self.assertLessEqual(delta2, delta) + else: + self.assertAlmostEqual(delta, delta2) + + @parameterized.named_parameters( + ('add_remove', privacy_accountant.NeighboringRelation.ADD_OR_REMOVE_ONE), + ('replace', privacy_accountant.NeighboringRelation.REPLACE_ONE)) + def test_tree_wrong_neighbor_rel(self, neighboring_relation): + event = dp_event.SingleEpochTreeAggregationDpEvent(1.0, 1) + accountant = rdp_privacy_accountant.RdpAccountant( + neighboring_relation=neighboring_relation) + self.assertFalse(accountant.supports(event)) + + @parameterized.named_parameters(('eps20', 1.13, 19.74), ('eps2', 8.83, 2.04)) + def test_compute_eps_tree(self, noise_multiplier, eps): + orders = [1 + x / 10 for x in range(1, 100)] + list(range(12, 64)) + # This test is based on the StackOverflow setting in "Practical and + # Private (Deep) Learning without Sampling or Shuffling". The calculated + # epsilon could be better as the method in this package keeps improving. + step_counts, target_delta = 1600, 1e-6 + new_eps = _compose_trees_single_epoch(noise_multiplier, step_counts, + orders).get_epsilon(target_delta) + self.assertLess(new_eps, eps) + + @parameterized.named_parameters( + ('restart4', [400] * 4), + ('restart2', [800] * 2), + ('adaptive', [10, 400, 400, 400, 390]), + ) + def test_compose_tree_rdp(self, step_counts): + noise_multiplier, orders = 0.1, [1] + + def get_rdp(step_count): + return _compose_trees_single_epoch(noise_multiplier, [step_count], + orders)._rdp[0] + + rdp_summed = sum(get_rdp(step_count) for step_count in step_counts) + rdp_composed = _compose_trees(noise_multiplier, step_counts, orders)._rdp[0] + self.assertTrue(np.allclose(rdp_composed, rdp_summed, rtol=1e-12)) + + def test_single_epoch_multi_tree_rdp(self): + noise_multiplier, orders = 0.1, [1] + step_counts = [10, 40, 30, 20] + single_rdp = _compose_trees_single_epoch(noise_multiplier, step_counts, + orders)._rdp[0] + + max_rdp = max( + _compose_trees_single_epoch(noise_multiplier, step_count, + orders)._rdp[0] + for step_count in step_counts) + + self.assertEqual(single_rdp, max_rdp) + + @parameterized.named_parameters( + ('restart4', [400] * 4), + ('restart2', [800] * 2), + ('adaptive', [10, 400, 400, 400, 390]), + ) + def test_compute_eps_tree_decreasing(self, step_counts): + # Test privacy epsilon decreases with noise multiplier increasing when + # keeping other parameters the same. + orders = [1 + x / 10. for x in range(1, 100)] + list(range(12, 64)) + target_delta = 1e-6 + prev_eps = np.inf + for noise_multiplier in [0.1 * x for x in range(1, 100, 5)]: + accountant = _compose_trees(noise_multiplier, step_counts, orders) + eps = accountant.get_epsilon(target_delta=target_delta) + self.assertLess(eps, prev_eps) + prev_eps = eps + + @parameterized.named_parameters( + ('negative_noise', -1, [3]), + ('negative_steps', 1, [-3]), + ) + def test_compute_rdp_tree_restart_raise(self, noise_multiplier, step_counts): + with self.assertRaisesRegex(ValueError, 'non-negative'): + _compose_trees(noise_multiplier, step_counts, orders=[1]) + + @parameterized.named_parameters( + ('t100n0.1', 100, 0.1), + ('t1000n0.01', 1000, 0.01), + ) + def test_no_tree_no_sampling(self, total_steps, noise_multiplier): + orders = [1 + x / 10 for x in range(1, 100)] + list(range(12, 64)) + tree_rdp = _compose_trees(noise_multiplier, [1] * total_steps, orders)._rdp + accountant = rdp_privacy_accountant.RdpAccountant(orders) + event = dp_event.SelfComposedDpEvent( + dp_event.GaussianDpEvent(noise_multiplier), total_steps) + accountant.compose(event) + base_rdp = accountant._rdp + self.assertTrue(np.allclose(tree_rdp, base_rdp, rtol=1e-12)) + + @parameterized.named_parameters( + ('small_eps', 0.01, 1), + ('medium_eps', 1.0, 1), + ('large_eps', 100.0, 1), + ('repetition', 1.0, 100) + ) + def test_laplace(self, eps, count): + event = dp_event.LaplaceDpEvent(1 / eps) + if count != 1: + event = dp_event.SelfComposedDpEvent(event, count) + # Simulate Pure DP by using a large Renyi order. + accountant = rdp_privacy_accountant.RdpAccountant(orders=[1.0, 1e10]) + accountant.compose(event) + # Check basic composition by having small delta. + self.assertAlmostEqual(accountant.get_epsilon(1e-10), eps * count) + # Check KL divergence, a.k.a. expected privacy loss, a.k.a. order=1. + self.assertAlmostEqual(accountant._rdp[0], min(eps, eps * eps / 2) * count) + + # The function _truncated_negative_binomial_mean computes the mean in + # multiple ways to ensure numerical stability. + # This test checks that those different ways of computing are consistent. + @parameterized.named_parameters( + ('gamma_shape0', 0.9, 0, 0.9 - 1e-9, 0), + ('gamma_shape2', 0.9, 2, 0.9 - 1e-9, 2), + ('gamma_shape_0.5', 0.9, 0.5, 0.9 - 1e-9, 0.5), + ('x_shape2', math.exp(-0.05), 2, math.exp(-0.05) - 1e-9, + 2), # x = shape * math.log(gamma) = -0.1 + ('x_shape0.5', math.exp(-0.2), 0.5, math.exp(-0.2) - 1e-9, + 0.5), # x = shape * math.log(gamma) = -0.1 + ('shape_0', 0.6, 0, 0.6, 1e-9), + ('shape_1', 0.6, 1, 0.6, 1 + 1e-9)) + def test_truncated_negative_binomial_mean(self, gamma1, shape1, gamma2, + shape2): + mean1 = rdp_privacy_accountant._truncated_negative_binomial_mean( + gamma1, shape1) + mean2 = rdp_privacy_accountant._truncated_negative_binomial_mean( + gamma2, shape2) + self.assertAlmostEqual(mean1, mean2) + + @parameterized.named_parameters(('1e-7', 1e-7), ('.1', 0.1), + ('0.999999', 1 - 1e-6), ('1', 1)) + def test_truncated_negative_binomial_mean2(self, gamma): + # Test this function by simply applying the non-numerically stable formula. + # logarithmic distribution + mean = rdp_privacy_accountant._truncated_negative_binomial_mean(gamma, 0) + if gamma == 1: + ans = 1 + else: + ans = (1 - 1 / gamma) / math.log(gamma) + self.assertAlmostEqual(mean, ans) + + # geometric Distribution + mean = rdp_privacy_accountant._truncated_negative_binomial_mean(gamma, 1) + self.assertAlmostEqual(mean, 1 / gamma) + + # general TNB Distribution + for shape in [0.01, 0.5, 0.99, 1.01, 2, 10]: + mean = rdp_privacy_accountant._truncated_negative_binomial_mean( + gamma, shape) + if gamma == 1: + ans = 1 + else: + ans = shape * (1 / gamma - 1) / (1 - gamma ** shape) + self.assertAlmostEqual(mean, ans) + + # _gamma_truncated_negative_binomial is meant to be the inverse of + # _truncated_negative_binomial_mean, so we test this. + @parameterized.named_parameters( + ('shape0a', 0.1, 0), + ('shape0.5a', 0.1, 0.5), + ('shape1a', 0.1, 1), + ('shape2a', 0.1, 2), + ('shape0b', 0.0001, 0), + ('shape0.5b', 0.0001, 0.5), + ('shape1b', 0.0001, 1), + ('shape2b', 0.0001, 2), + ('shape0c', 1, 0), + ('shape0.5c', 1, 0.5), + ('shape1c', 1, 1), + ('shape2c', 1, 2), + ('shape0', 0.999, 0), + ('shape0.5', 0.999, 0.5), + ('shape1', 0.999, 1), + ('shape2', 0.999, 2) + ) + def test_gamma_truncated_negative_binomial(self, gamma, shape): + mean = rdp_privacy_accountant._truncated_negative_binomial_mean( + gamma, shape) + g = rdp_privacy_accountant._gamma_truncated_negative_binomial(shape, mean) + self.assertAlmostEqual(g, gamma) + + @parameterized.named_parameters( + ('logarithmic', 1, 1000, 0), + ('geometric', 1, 1000, 1), + ('negative binomial 0.5', 1, 1000, 0.5), + ('negative binomial 2', 1, 100, 2), + ('negative binomial 5', 1, 1000, 5), + ) + def test_repeat_select_pure_negative_binomial(self, eps, mean, shape): + # Test the Repeat and Select DP event in the almost-pure DP case. + event = dp_event.LaplaceDpEvent(1 / eps) + event = dp_event.RepeatAndSelectDpEvent(event, mean, shape) + # Use single large order to simulate pure DP. + accountant = rdp_privacy_accountant.RdpAccountant(orders=[1e10]) + accountant.compose(event) + # Correct answer is given by Corollary 3 https://arxiv.org/abs/2110.03620 + self.assertAlmostEqual(accountant._rdp[0], eps * (2 + shape)) + self.assertAlmostEqual(accountant.get_epsilon(1e-10), eps * (2 + shape)) + + @parameterized.named_parameters(('shape0', 0, 1), ('shape0.5', 0.5, 10), + ('shape1', 1, 0.1), ('shape2', 2, 1)) + def test_repeat_select_trivial(self, shape, sigma): + # Test the repeat and select function in the trivial mean=1 case. + orders = [1, 1 + 1e-6, # We include 1, as otherwise this test fails. + 2, 3, 4, 5, 6, 7, 8, 9, 10, 12, 14, 16, 20, 24, 28, 32, 48, 64, + 128, 256, 512, 1024 + ] + event1 = dp_event.GaussianDpEvent(sigma) + accountant1 = rdp_privacy_accountant.RdpAccountant(orders=orders) + accountant1.compose(event1) + event2 = dp_event.RepeatAndSelectDpEvent(event1, 1, shape) + accountant2 = rdp_privacy_accountant.RdpAccountant(orders=orders) + accountant2.compose(event2) + for i in range(len(accountant1._orders)): + if orders[i] > 1: # Otherwise our formula doesn't work. + self.assertAlmostEqual(accountant1._rdp[i], accountant2._rdp[i]) + + @parameterized.named_parameters( + ('small0', 0.01, 0.01, 0), ('med0', 1, 0.1, 0), ('large0', 10, 0.99, 0), + ('small0.5', 0.01, 0.01, 0.5), ('med0.5', 1, 0.1, 0.5), + ('large0.5', 10, 0.99, 0.5), ('small1', 0.01, 0.01, 1), + ('med1', 1, 0.1, 1), ('large1', 10, 0.99, 1), ('small5', 0.01, 0.01, 5), + ('med5', 1, 0.1, 5), ('large5', 10, 0.99, 5)) + def test_repeat_select_gaussian_negative_binomial(self, rho, gamma, shape): + # Test the Repeat and Select DP event in the Gaussian case. + # Correct answer is given by Corollary 4 https://arxiv.org/abs/2110.03620 + mean = rdp_privacy_accountant._truncated_negative_binomial_mean( + gamma, shape) + rho = min(rho, -math.log(gamma)) # We need rho<=log(1/gamma). + self.assertGreater(rho, 0) # Otherwise we get division by zero. + orders = [ + 1, 1.1, 2, + math.sqrt(-math.log(gamma) / rho), + 1 + math.sqrt(math.log(mean) / rho), + 3, 5, 10, 100, 1000, 10000 + ] + event = dp_event.GaussianDpEvent(math.sqrt(0.5 / rho)) + event = dp_event.RepeatAndSelectDpEvent(event, mean, shape) + accountant = rdp_privacy_accountant.RdpAccountant(orders=orders) + accountant.compose(event) + for i in range(len(orders)): + order = accountant._orders[i] + rdp = accountant._rdp[i] + if order <= 1 + math.sqrt(math.log(mean) / rho): + eps = 2 * math.sqrt(rho * math.log(mean)) + 2 * (1 + shape) * math.sqrt( + -rho * math.log(gamma)) - shape * rho + else: + eps = rho * (order - 1) + math.log(mean) / (order - 1) + 2 * ( + 1 + shape) * math.sqrt(-rho * math.log(gamma)) - shape * rho + self.assertAlmostEqual(rdp, eps, msg='order=' + str(order)) + + @parameterized.named_parameters( + ('mean1', 1, 1), + ('mean2', 0.1, 2), + ('mean10', 10, 10), + ('mean100', 0.001, 100), + ('mean10^4', 2, 1000), + ('mean10^10', 1, 1e10) + ) + def test_repeat_and_select_pure_poisson(self, eps, mean): + event = dp_event.LaplaceDpEvent(1 / eps) + event = dp_event.RepeatAndSelectDpEvent(event, mean, np.inf) + alpha = 1 + 1 / math.expm1(eps) + orders = [alpha, 1e10, 1e100, 1e1000] + accountant = rdp_privacy_accountant.RdpAccountant(orders=orders) + accountant.compose(event) + ans = min(eps, alpha * eps ** 2 / 2) + math.log(mean) * math.expm1(eps) + self.assertAlmostEqual(accountant._orders[0], alpha) + self.assertAlmostEqual(accountant._rdp[0], ans) + + +@parameterized.named_parameters( + ('small_small', 0.001, 1), + ('small_med', 0.001, 1000), + ('small_large', 0.001, 1e9), + ('med_small', 1, 1), + ('med_med', 1, 1000), + ('med_large', 1, 1e9), + ('large_small', 1000, 1), + ('large_med', 1000, 1000), + ('large_large', 1000, 1e9) +) +def test_repeat_and_select_gaussian_poisson(self, sigma, mean): + event = dp_event.GaussianDpEvent(sigma) + event = dp_event.RepeatAndSelectDpEvent(event, mean, np.inf) + accountant = rdp_privacy_accountant.RdpAccountant() + accountant.compose(event) + orders = accountant._orders + rdp = [] + for order in orders: + if order <= 1: # Avoid division by zero. + rdp.append(np.inf) + continue + eps = math.log1p(1 / (order - 1)) + x = (eps * sigma - 0.5 / sigma) / math.sqrt(2) + y = (eps * sigma + 0.5 / sigma) / math.sqrt(2) + delta = math.erfc(x) / 2 - math.exp(eps) * math.erfc(y) / 2 + rdp.append(order * 0.5 / (sigma ** 2) + mean * delta + math.log(mean) / (order - 1)) + for i in range(len(orders)): + lb = min(rdp[j] for j in range(len(orders)) if orders[j] >= orders[i]) + self.assertLessEqual(lb, accountant._rdp[i]) + + +@parameterized.named_parameters( + ('all_0', 1, 1, 1, 0), # Compose before and after. + ('all_1', 2, 3, 4, 1), + ('all_2', 0.1, 0.2, 0.3, 2), + ('all_inf', 1.1, 1.2, 2.1, np.inf), + ('pre_0', 1, 2, 0, 0), # Compose before, but not after. + ('pre_1', 1, 0.5, 0, 1), + ('pre_2', 2, 1, 0, 2), + ('pre_inf', 10, 0.1, 0, np.inf), + ('post_0', 1, 0, 2, 0), # Compose after, but not before. + ('post_1', 10, 0, 2, 1), + ('post_half', 0.1, 0, 12, 0.5), + ('post_inf', 6, 0, 0.2, np.inf) +) +def test_repeat_and_select_composition(self, sigma, sigma1, sigma2, shape): + pre_event = dp_event.GaussianDpEvent(sigma1) + post_event = dp_event.GaussianDpEvent(sigma2) + event = dp_event.GaussianDpEvent(sigma) + event = dp_event.RepeatAndSelectDpEvent(event, 1, shape) + accountant = rdp_privacy_accountant.RdpAccountant() + rho = 0.5 / (sigma ** 2) + if sigma1 > 0: + rho += 0.5 / (sigma1 ** 2) + accountant.compose(pre_event) + accountant.compose(event) + if sigma2 > 0: + rho += 0.5 / (sigma2 ** 2) + accountant.compose(post_event) + for i in range(len(accountant._orders)): + self.assertAlmostEqual(accountant._rdp[i], accountant._orders[i] * rho) + + +if __name__ == '__main__': + absltest.main() From 6b70b8999616cc7c625a7353a14e4a43ebfb401e Mon Sep 17 00:00:00 2001 From: alexliang Date: Sun, 9 Oct 2022 17:28:39 +0800 Subject: [PATCH 4/5] add web3 storage and theta edge store, update bootstrap in the examples. --- .gitignore | 11 + .../config/bootstrap.bat | 12 + .../config/bootstrap.sh | 4 +- .../config/bootstrap.bat | 12 + .../config/bootstrap.sh | 5 +- .../one_line/config/bootstrap.bat | 12 + .../one_line/config/bootstrap.sh | 7 + .../config/bootstrap.bat | 12 + .../custom_data_and_model/config/bootstrap.sh | 3 +- .../one_line/config/bootstrap.bat | 12 + .../step_by_step/config/bootstrap.bat | 12 + .../config/bootstrap.bat | 12 + .../config/bootstrap.sh | 1 - .../config/bootstrap.bat | 12 + .../config/bootstrap.bat | 12 + .../config/bootstrap.sh | 7 + .../config/bootstrap.bat | 12 + .../config/bootstrap.sh | 7 + .../config/bootstrap.bat | 12 + .../config/bootstrap.bat | 12 + .../config/bootstrap.sh | 1 - .../config/bootstrap.bat | 12 + .../config/bootstrap.bat | 12 + .../config/bootstrap.sh | 5 +- .../config/bootstrap.bat | 12 + .../config/bootstrap.sh | 1 - .../one_line/config/bootstrap.bat | 12 + .../config/bootstrap.bat | 12 + .../one_line/config/bootstrap.bat | 12 + .../one_line/config/bootstrap.sh | 7 + .../config/bootstrap.bat | 12 + .../one_line/config/bootstrap.bat | 12 + .../one_line/config/bootstrap.sh | 12 - .../step_by_step/config/bootstrap.bat | 12 + .../step_by_step/config/bootstrap.sh | 1 - .../__init__.py | 0 .../custom_data_and_model/README.md | 40 ++ .../custom_data_and_model/__init__.py | 0 .../custom_data_and_model/build_mlops_pkg.sh | 12 + .../config/bootstrap.bat | 12 + .../custom_data_and_model/config/bootstrap.sh | 7 + .../config/fedml_config.yaml | 58 +++ .../custom_data_and_model/run_client.sh | 3 + .../custom_data_and_model/run_server.sh | 3 + .../custom_data_and_model/torch_client.py | 75 ++++ .../custom_data_and_model/torch_server.py | 74 ++++ .../one_line/README.md | 42 ++ .../one_line/build_mlops_pkg.sh | 12 + .../one_line/client/__init__.py | 0 .../one_line/client/torch_client.py | 5 + .../one_line/config/bootstrap.bat | 12 + .../one_line/config/bootstrap.sh | 7 + .../one_line/config/fedml_config.yaml | 55 +++ .../one_line/config/gpu_mapping.yaml | 2 + .../one_line/run_client.bat | 2 + .../one_line/run_client.sh | 3 + .../one_line/run_server.bat | 1 + .../one_line/run_server.sh | 2 + .../one_line/server/__init__.py | 0 .../one_line/server/torch_server.py | 5 + .../step_by_step/README.md | 40 ++ .../step_by_step/__init__.py | 0 .../step_by_step/config/bootstrap.bat | 12 + .../step_by_step/config/bootstrap.sh | 7 + .../step_by_step/config/fedml_config.yaml | 54 +++ .../step_by_step/run_client.sh | 4 + .../step_by_step/run_server.sh | 3 + .../step_by_step/torch_client.py | 18 + .../step_by_step/torch_server.py | 18 + .../__init__.py | 0 .../custom_data_and_model/README.md | 40 ++ .../custom_data_and_model/__init__.py | 0 .../custom_data_and_model/build_mlops_pkg.sh | 12 + .../config/bootstrap.bat | 12 + .../custom_data_and_model/config/bootstrap.sh | 6 + .../config/fedml_config.yaml | 59 +++ .../custom_data_and_model/run_client.sh | 3 + .../custom_data_and_model/run_server.sh | 3 + .../custom_data_and_model/torch_client.py | 75 ++++ .../custom_data_and_model/torch_server.py | 74 ++++ .../one_line/README.md | 42 ++ .../one_line/build_mlops_pkg.sh | 12 + .../one_line/client/__init__.py | 0 .../one_line/client/torch_client.py | 5 + .../one_line/config/bootstrap.bat | 12 + .../one_line/config/bootstrap.sh | 6 + .../one_line/config/fedml_config.yaml | 56 +++ .../one_line/config/gpu_mapping.yaml | 2 + .../one_line/run_client.bat | 2 + .../one_line/run_client.sh | 3 + .../one_line/run_server.bat | 1 + .../one_line/run_server.sh | 2 + .../one_line/server/__init__.py | 0 .../one_line/server/torch_server.py | 5 + .../step_by_step/README.md | 40 ++ .../step_by_step/__init__.py | 0 .../step_by_step/config/bootstrap.bat | 12 + .../step_by_step/config/bootstrap.sh | 6 + .../step_by_step/config/fedml_config.yaml | 55 +++ .../step_by_step/run_client.sh | 4 + .../step_by_step/run_server.sh | 3 + .../step_by_step/torch_client.py | 18 + .../step_by_step/torch_server.py | 18 + .../config/bootstrap.bat | 12 + .../config/bootstrap.sh | 1 - .../config/bootstrap.bat | 12 + .../config/bootstrap.sh | 1 - .../one_line/config/bootstrap.bat | 12 + .../communication/mqtt_ipfs/__init__.py | 3 - .../communication/mqtt_thetastore/__init__.py | 3 + .../mqtt_thetastore_comm_manager.py | 362 ++++++++++++++++++ .../communication/mqtt_web3/__init__.py | 3 + .../mqtt_web3_comm_manager.py} | 62 +-- .../fedml/core/distributed/crypto/README.md | 0 .../fedml/core/distributed/crypto/__init__.py | 0 .../ipfs_crypto.py => crypto/crypto_api.py} | 0 .../distributed/distributed_storage/README.md | 12 + .../distributed_storage/__init__.py | 0 .../theta_storage/__init__.py | 0 .../theta_storage/theta_storage.py | 174 +++++++++ .../web3_storage/__init__.py | 0 .../web3_storage/web3_storage.py} | 10 +- .../core/distributed/fedml_comm_manager.py | 57 ++- python/fedml/core/mlops/mlops_configs.py | 36 +- .../client/fedml_client_master_manager.py | 2 +- .../quick_start/beehive/config/bootstrap.bat | 12 + .../quick_start/beehive/config/bootstrap.sh | 5 +- .../quick_start/octopus/config/bootstrap.bat | 12 + 128 files changed, 2201 insertions(+), 90 deletions(-) create mode 100755 python/examples/cross_device/mqtt_s3_fedavg_cifar10_resnet20_example/config/bootstrap.bat create mode 100755 python/examples/cross_device/mqtt_s3_fedavg_mnist_lenet_example/config/bootstrap.bat create mode 100755 python/examples/cross_silo/cuda_rpc_fedavg_mnist_lr_example/one_line/config/bootstrap.bat create mode 100644 python/examples/cross_silo/cuda_rpc_fedavg_mnist_lr_example/one_line/config/bootstrap.sh create mode 100755 python/examples/cross_silo/grpc_fedavg_mnist_lr_example/custom_data_and_model/config/bootstrap.bat create mode 100755 python/examples/cross_silo/grpc_fedavg_mnist_lr_example/one_line/config/bootstrap.bat create mode 100755 python/examples/cross_silo/grpc_fedavg_mnist_lr_example/step_by_step/config/bootstrap.bat create mode 100755 python/examples/cross_silo/jax_haiku_mqtt_s3_fedavg_mnist_lr_example/config/bootstrap.bat create mode 100755 python/examples/cross_silo/light_sec_agg_example/config/bootstrap.bat create mode 100755 python/examples/cross_silo/mpi_customized_fedavg_mnist_lr_example/config/bootstrap.bat create mode 100644 python/examples/cross_silo/mpi_customized_fedavg_mnist_lr_example/config/bootstrap.sh create mode 100755 python/examples/cross_silo/mpi_fedavg_mnist_lr_example/config/bootstrap.bat create mode 100644 python/examples/cross_silo/mpi_fedavg_mnist_lr_example/config/bootstrap.sh create mode 100755 python/examples/cross_silo/mqtt_s3_fedavg_attack_defense_cifar10_resnet56_example/config/bootstrap.bat create mode 100755 python/examples/cross_silo/mqtt_s3_fedavg_attack_mnist_lr_example/config/bootstrap.bat create mode 100755 python/examples/cross_silo/mqtt_s3_fedavg_cdp_mnist_lr_example/config/bootstrap.bat create mode 100755 python/examples/cross_silo/mqtt_s3_fedavg_cifar10_lr_example/config/bootstrap.bat create mode 100755 python/examples/cross_silo/mqtt_s3_fedavg_defense_mnist_lr_example/config/bootstrap.bat create mode 100755 python/examples/cross_silo/mqtt_s3_fedavg_hierarchical_manual_mnist_lr_example/one_line/config/bootstrap.bat create mode 100755 python/examples/cross_silo/mqtt_s3_fedavg_hierarchical_mnist_lr_example/config/bootstrap.bat create mode 100755 python/examples/cross_silo/mqtt_s3_fedavg_horizontal_mnist_lr_example/one_line/config/bootstrap.bat create mode 100644 python/examples/cross_silo/mqtt_s3_fedavg_horizontal_mnist_lr_example/one_line/config/bootstrap.sh create mode 100755 python/examples/cross_silo/mqtt_s3_fedavg_ldp_mnist_lr_example/config/bootstrap.bat create mode 100755 python/examples/cross_silo/mqtt_s3_fedavg_mnist_lr_example/one_line/config/bootstrap.bat create mode 100755 python/examples/cross_silo/mqtt_s3_fedavg_mnist_lr_example/step_by_step/config/bootstrap.bat create mode 100644 python/examples/cross_silo/mqtt_thetastore_fedavg_mnist_lr_example/__init__.py create mode 100644 python/examples/cross_silo/mqtt_thetastore_fedavg_mnist_lr_example/custom_data_and_model/README.md create mode 100644 python/examples/cross_silo/mqtt_thetastore_fedavg_mnist_lr_example/custom_data_and_model/__init__.py create mode 100644 python/examples/cross_silo/mqtt_thetastore_fedavg_mnist_lr_example/custom_data_and_model/build_mlops_pkg.sh create mode 100755 python/examples/cross_silo/mqtt_thetastore_fedavg_mnist_lr_example/custom_data_and_model/config/bootstrap.bat create mode 100644 python/examples/cross_silo/mqtt_thetastore_fedavg_mnist_lr_example/custom_data_and_model/config/bootstrap.sh create mode 100644 python/examples/cross_silo/mqtt_thetastore_fedavg_mnist_lr_example/custom_data_and_model/config/fedml_config.yaml create mode 100644 python/examples/cross_silo/mqtt_thetastore_fedavg_mnist_lr_example/custom_data_and_model/run_client.sh create mode 100644 python/examples/cross_silo/mqtt_thetastore_fedavg_mnist_lr_example/custom_data_and_model/run_server.sh create mode 100644 python/examples/cross_silo/mqtt_thetastore_fedavg_mnist_lr_example/custom_data_and_model/torch_client.py create mode 100644 python/examples/cross_silo/mqtt_thetastore_fedavg_mnist_lr_example/custom_data_and_model/torch_server.py create mode 100644 python/examples/cross_silo/mqtt_thetastore_fedavg_mnist_lr_example/one_line/README.md create mode 100644 python/examples/cross_silo/mqtt_thetastore_fedavg_mnist_lr_example/one_line/build_mlops_pkg.sh create mode 100644 python/examples/cross_silo/mqtt_thetastore_fedavg_mnist_lr_example/one_line/client/__init__.py create mode 100644 python/examples/cross_silo/mqtt_thetastore_fedavg_mnist_lr_example/one_line/client/torch_client.py create mode 100755 python/examples/cross_silo/mqtt_thetastore_fedavg_mnist_lr_example/one_line/config/bootstrap.bat create mode 100644 python/examples/cross_silo/mqtt_thetastore_fedavg_mnist_lr_example/one_line/config/bootstrap.sh create mode 100644 python/examples/cross_silo/mqtt_thetastore_fedavg_mnist_lr_example/one_line/config/fedml_config.yaml create mode 100644 python/examples/cross_silo/mqtt_thetastore_fedavg_mnist_lr_example/one_line/config/gpu_mapping.yaml create mode 100644 python/examples/cross_silo/mqtt_thetastore_fedavg_mnist_lr_example/one_line/run_client.bat create mode 100644 python/examples/cross_silo/mqtt_thetastore_fedavg_mnist_lr_example/one_line/run_client.sh create mode 100644 python/examples/cross_silo/mqtt_thetastore_fedavg_mnist_lr_example/one_line/run_server.bat create mode 100644 python/examples/cross_silo/mqtt_thetastore_fedavg_mnist_lr_example/one_line/run_server.sh create mode 100644 python/examples/cross_silo/mqtt_thetastore_fedavg_mnist_lr_example/one_line/server/__init__.py create mode 100644 python/examples/cross_silo/mqtt_thetastore_fedavg_mnist_lr_example/one_line/server/torch_server.py create mode 100644 python/examples/cross_silo/mqtt_thetastore_fedavg_mnist_lr_example/step_by_step/README.md create mode 100644 python/examples/cross_silo/mqtt_thetastore_fedavg_mnist_lr_example/step_by_step/__init__.py create mode 100755 python/examples/cross_silo/mqtt_thetastore_fedavg_mnist_lr_example/step_by_step/config/bootstrap.bat create mode 100644 python/examples/cross_silo/mqtt_thetastore_fedavg_mnist_lr_example/step_by_step/config/bootstrap.sh create mode 100644 python/examples/cross_silo/mqtt_thetastore_fedavg_mnist_lr_example/step_by_step/config/fedml_config.yaml create mode 100644 python/examples/cross_silo/mqtt_thetastore_fedavg_mnist_lr_example/step_by_step/run_client.sh create mode 100644 python/examples/cross_silo/mqtt_thetastore_fedavg_mnist_lr_example/step_by_step/run_server.sh create mode 100644 python/examples/cross_silo/mqtt_thetastore_fedavg_mnist_lr_example/step_by_step/torch_client.py create mode 100644 python/examples/cross_silo/mqtt_thetastore_fedavg_mnist_lr_example/step_by_step/torch_server.py create mode 100644 python/examples/cross_silo/mqtt_web3storage_fedavg_mnist_lr_example/__init__.py create mode 100644 python/examples/cross_silo/mqtt_web3storage_fedavg_mnist_lr_example/custom_data_and_model/README.md create mode 100644 python/examples/cross_silo/mqtt_web3storage_fedavg_mnist_lr_example/custom_data_and_model/__init__.py create mode 100644 python/examples/cross_silo/mqtt_web3storage_fedavg_mnist_lr_example/custom_data_and_model/build_mlops_pkg.sh create mode 100755 python/examples/cross_silo/mqtt_web3storage_fedavg_mnist_lr_example/custom_data_and_model/config/bootstrap.bat create mode 100755 python/examples/cross_silo/mqtt_web3storage_fedavg_mnist_lr_example/custom_data_and_model/config/bootstrap.sh create mode 100644 python/examples/cross_silo/mqtt_web3storage_fedavg_mnist_lr_example/custom_data_and_model/config/fedml_config.yaml create mode 100644 python/examples/cross_silo/mqtt_web3storage_fedavg_mnist_lr_example/custom_data_and_model/run_client.sh create mode 100644 python/examples/cross_silo/mqtt_web3storage_fedavg_mnist_lr_example/custom_data_and_model/run_server.sh create mode 100644 python/examples/cross_silo/mqtt_web3storage_fedavg_mnist_lr_example/custom_data_and_model/torch_client.py create mode 100644 python/examples/cross_silo/mqtt_web3storage_fedavg_mnist_lr_example/custom_data_and_model/torch_server.py create mode 100644 python/examples/cross_silo/mqtt_web3storage_fedavg_mnist_lr_example/one_line/README.md create mode 100644 python/examples/cross_silo/mqtt_web3storage_fedavg_mnist_lr_example/one_line/build_mlops_pkg.sh create mode 100644 python/examples/cross_silo/mqtt_web3storage_fedavg_mnist_lr_example/one_line/client/__init__.py create mode 100644 python/examples/cross_silo/mqtt_web3storage_fedavg_mnist_lr_example/one_line/client/torch_client.py create mode 100755 python/examples/cross_silo/mqtt_web3storage_fedavg_mnist_lr_example/one_line/config/bootstrap.bat create mode 100644 python/examples/cross_silo/mqtt_web3storage_fedavg_mnist_lr_example/one_line/config/bootstrap.sh create mode 100644 python/examples/cross_silo/mqtt_web3storage_fedavg_mnist_lr_example/one_line/config/fedml_config.yaml create mode 100644 python/examples/cross_silo/mqtt_web3storage_fedavg_mnist_lr_example/one_line/config/gpu_mapping.yaml create mode 100644 python/examples/cross_silo/mqtt_web3storage_fedavg_mnist_lr_example/one_line/run_client.bat create mode 100644 python/examples/cross_silo/mqtt_web3storage_fedavg_mnist_lr_example/one_line/run_client.sh create mode 100644 python/examples/cross_silo/mqtt_web3storage_fedavg_mnist_lr_example/one_line/run_server.bat create mode 100644 python/examples/cross_silo/mqtt_web3storage_fedavg_mnist_lr_example/one_line/run_server.sh create mode 100644 python/examples/cross_silo/mqtt_web3storage_fedavg_mnist_lr_example/one_line/server/__init__.py create mode 100644 python/examples/cross_silo/mqtt_web3storage_fedavg_mnist_lr_example/one_line/server/torch_server.py create mode 100644 python/examples/cross_silo/mqtt_web3storage_fedavg_mnist_lr_example/step_by_step/README.md create mode 100644 python/examples/cross_silo/mqtt_web3storage_fedavg_mnist_lr_example/step_by_step/__init__.py create mode 100755 python/examples/cross_silo/mqtt_web3storage_fedavg_mnist_lr_example/step_by_step/config/bootstrap.bat create mode 100644 python/examples/cross_silo/mqtt_web3storage_fedavg_mnist_lr_example/step_by_step/config/bootstrap.sh create mode 100644 python/examples/cross_silo/mqtt_web3storage_fedavg_mnist_lr_example/step_by_step/config/fedml_config.yaml create mode 100644 python/examples/cross_silo/mqtt_web3storage_fedavg_mnist_lr_example/step_by_step/run_client.sh create mode 100644 python/examples/cross_silo/mqtt_web3storage_fedavg_mnist_lr_example/step_by_step/run_server.sh create mode 100644 python/examples/cross_silo/mqtt_web3storage_fedavg_mnist_lr_example/step_by_step/torch_client.py create mode 100644 python/examples/cross_silo/mqtt_web3storage_fedavg_mnist_lr_example/step_by_step/torch_server.py create mode 100755 python/examples/cross_silo/mxnet_mqtt_s3_fedavg_mnist_lr_example/config/bootstrap.bat create mode 100755 python/examples/cross_silo/tf_mqtt_s3_fedavg_mnist_lr_example/config/bootstrap.bat create mode 100755 python/examples/cross_silo/trpc_fedavg_mnist_lr_example/one_line/config/bootstrap.bat delete mode 100755 python/fedml/core/distributed/communication/mqtt_ipfs/__init__.py create mode 100755 python/fedml/core/distributed/communication/mqtt_thetastore/__init__.py create mode 100755 python/fedml/core/distributed/communication/mqtt_thetastore/mqtt_thetastore_comm_manager.py create mode 100755 python/fedml/core/distributed/communication/mqtt_web3/__init__.py rename python/fedml/core/distributed/communication/{mqtt_ipfs/mqtt_ipfs_comm_manager.py => mqtt_web3/mqtt_web3_comm_manager.py} (87%) create mode 100644 python/fedml/core/distributed/crypto/README.md create mode 100644 python/fedml/core/distributed/crypto/__init__.py rename python/fedml/core/distributed/{communication/mqtt_ipfs/ipfs_crypto.py => crypto/crypto_api.py} (100%) create mode 100644 python/fedml/core/distributed/distributed_storage/__init__.py create mode 100644 python/fedml/core/distributed/distributed_storage/theta_storage/__init__.py create mode 100644 python/fedml/core/distributed/distributed_storage/theta_storage/theta_storage.py create mode 100644 python/fedml/core/distributed/distributed_storage/web3_storage/__init__.py rename python/fedml/core/distributed/{communication/mqtt_ipfs/ipfs_storage.py => distributed_storage/web3_storage/web3_storage.py} (94%) create mode 100755 python/quick_start/beehive/config/bootstrap.bat create mode 100755 python/quick_start/octopus/config/bootstrap.bat diff --git a/.gitignore b/.gitignore index 33bfde48a6..74c17e7ba5 100644 --- a/.gitignore +++ b/.gitignore @@ -179,3 +179,14 @@ doc/en/_build /swap/actions-runner/ *.txt python/examples/cross_silo/light_sec_agg_example/mpi_host_file +/python/examples/cross_silo/mqtt_thetastore_fedavg_mnist_lr_example/custom_data_and_model/mlops/dist-packages/ +/python/examples/cross_silo/mqtt_web3storage_fedavg_mnist_lr_example/custom_data_and_model/mlops/dist-packages/ +/python/examples/cross_silo/mqtt_s3_fedavg_mnist_lr_example/custom_data_and_model/mlops/dist-packages/ +/python/quick_start/parrot/fedml_data/ +/python/tests/smoke_test/simulation_sp/mnist/__MACOSX/MNIST/ +/python/tests/smoke_test/simulation_sp/mnist/MNIST/ +/swap/ +/.github/workflows/build_wheels_and_releases副本.yml +/devops/dockerfile/device-image/Dockerfile-Local +/FedML-dev-v0.7.0.iml +/python/fedml/cli/debug-cli.py diff --git a/python/examples/cross_device/mqtt_s3_fedavg_cifar10_resnet20_example/config/bootstrap.bat b/python/examples/cross_device/mqtt_s3_fedavg_cifar10_resnet20_example/config/bootstrap.bat new file mode 100755 index 0000000000..fb0dd54d6d --- /dev/null +++ b/python/examples/cross_device/mqtt_s3_fedavg_cifar10_resnet20_example/config/bootstrap.bat @@ -0,0 +1,12 @@ +:: ### don't modify this part ### +:: ############################## + + +:: ### please customize your script in this region #### +set DATA_PATH=%userprofile%\fedml_data +if exist %DATA_PATH% (echo Exist %DATA_PATH%) else mkdir %DATA_PATH% + + +:: ### don't modify this part ### +echo [FedML]Bootstrap Finished +:: ############################## \ No newline at end of file diff --git a/python/examples/cross_device/mqtt_s3_fedavg_cifar10_resnet20_example/config/bootstrap.sh b/python/examples/cross_device/mqtt_s3_fedavg_cifar10_resnet20_example/config/bootstrap.sh index 29c482e7d2..3d969974b0 100644 --- a/python/examples/cross_device/mqtt_s3_fedavg_cifar10_resnet20_example/config/bootstrap.sh +++ b/python/examples/cross_device/mqtt_s3_fedavg_cifar10_resnet20_example/config/bootstrap.sh @@ -2,8 +2,6 @@ # pip install fedml==0.7.15 #pip install --upgrade fedml -# login to wandb - ### don't modify this part ### echo "[FedML]Bootstrap Finished" -############################## \ No newline at end of file +############################## diff --git a/python/examples/cross_device/mqtt_s3_fedavg_mnist_lenet_example/config/bootstrap.bat b/python/examples/cross_device/mqtt_s3_fedavg_mnist_lenet_example/config/bootstrap.bat new file mode 100755 index 0000000000..fb0dd54d6d --- /dev/null +++ b/python/examples/cross_device/mqtt_s3_fedavg_mnist_lenet_example/config/bootstrap.bat @@ -0,0 +1,12 @@ +:: ### don't modify this part ### +:: ############################## + + +:: ### please customize your script in this region #### +set DATA_PATH=%userprofile%\fedml_data +if exist %DATA_PATH% (echo Exist %DATA_PATH%) else mkdir %DATA_PATH% + + +:: ### don't modify this part ### +echo [FedML]Bootstrap Finished +:: ############################## \ No newline at end of file diff --git a/python/examples/cross_device/mqtt_s3_fedavg_mnist_lenet_example/config/bootstrap.sh b/python/examples/cross_device/mqtt_s3_fedavg_mnist_lenet_example/config/bootstrap.sh index f0f2910615..3d969974b0 100644 --- a/python/examples/cross_device/mqtt_s3_fedavg_mnist_lenet_example/config/bootstrap.sh +++ b/python/examples/cross_device/mqtt_s3_fedavg_mnist_lenet_example/config/bootstrap.sh @@ -1,9 +1,6 @@ # pip install fedml==0.7.15 -pip install --upgrade fedml - -# login to wandb -# wandb login ee0b5f53d949c84cee7decbe7a629e63fb2f8408 +#pip install --upgrade fedml ### don't modify this part ### echo "[FedML]Bootstrap Finished" diff --git a/python/examples/cross_silo/cuda_rpc_fedavg_mnist_lr_example/one_line/config/bootstrap.bat b/python/examples/cross_silo/cuda_rpc_fedavg_mnist_lr_example/one_line/config/bootstrap.bat new file mode 100755 index 0000000000..fb0dd54d6d --- /dev/null +++ b/python/examples/cross_silo/cuda_rpc_fedavg_mnist_lr_example/one_line/config/bootstrap.bat @@ -0,0 +1,12 @@ +:: ### don't modify this part ### +:: ############################## + + +:: ### please customize your script in this region #### +set DATA_PATH=%userprofile%\fedml_data +if exist %DATA_PATH% (echo Exist %DATA_PATH%) else mkdir %DATA_PATH% + + +:: ### don't modify this part ### +echo [FedML]Bootstrap Finished +:: ############################## \ No newline at end of file diff --git a/python/examples/cross_silo/cuda_rpc_fedavg_mnist_lr_example/one_line/config/bootstrap.sh b/python/examples/cross_silo/cuda_rpc_fedavg_mnist_lr_example/one_line/config/bootstrap.sh new file mode 100644 index 0000000000..3d969974b0 --- /dev/null +++ b/python/examples/cross_silo/cuda_rpc_fedavg_mnist_lr_example/one_line/config/bootstrap.sh @@ -0,0 +1,7 @@ + +# pip install fedml==0.7.15 +#pip install --upgrade fedml + +### don't modify this part ### +echo "[FedML]Bootstrap Finished" +############################## diff --git a/python/examples/cross_silo/grpc_fedavg_mnist_lr_example/custom_data_and_model/config/bootstrap.bat b/python/examples/cross_silo/grpc_fedavg_mnist_lr_example/custom_data_and_model/config/bootstrap.bat new file mode 100755 index 0000000000..fb0dd54d6d --- /dev/null +++ b/python/examples/cross_silo/grpc_fedavg_mnist_lr_example/custom_data_and_model/config/bootstrap.bat @@ -0,0 +1,12 @@ +:: ### don't modify this part ### +:: ############################## + + +:: ### please customize your script in this region #### +set DATA_PATH=%userprofile%\fedml_data +if exist %DATA_PATH% (echo Exist %DATA_PATH%) else mkdir %DATA_PATH% + + +:: ### don't modify this part ### +echo [FedML]Bootstrap Finished +:: ############################## \ No newline at end of file diff --git a/python/examples/cross_silo/grpc_fedavg_mnist_lr_example/custom_data_and_model/config/bootstrap.sh b/python/examples/cross_silo/grpc_fedavg_mnist_lr_example/custom_data_and_model/config/bootstrap.sh index 6322c8643f..3d969974b0 100644 --- a/python/examples/cross_silo/grpc_fedavg_mnist_lr_example/custom_data_and_model/config/bootstrap.sh +++ b/python/examples/cross_silo/grpc_fedavg_mnist_lr_example/custom_data_and_model/config/bootstrap.sh @@ -1,8 +1,7 @@ -#!/bin/bash # pip install fedml==0.7.15 #pip install --upgrade fedml ### don't modify this part ### echo "[FedML]Bootstrap Finished" -############################## \ No newline at end of file +############################## diff --git a/python/examples/cross_silo/grpc_fedavg_mnist_lr_example/one_line/config/bootstrap.bat b/python/examples/cross_silo/grpc_fedavg_mnist_lr_example/one_line/config/bootstrap.bat new file mode 100755 index 0000000000..fb0dd54d6d --- /dev/null +++ b/python/examples/cross_silo/grpc_fedavg_mnist_lr_example/one_line/config/bootstrap.bat @@ -0,0 +1,12 @@ +:: ### don't modify this part ### +:: ############################## + + +:: ### please customize your script in this region #### +set DATA_PATH=%userprofile%\fedml_data +if exist %DATA_PATH% (echo Exist %DATA_PATH%) else mkdir %DATA_PATH% + + +:: ### don't modify this part ### +echo [FedML]Bootstrap Finished +:: ############################## \ No newline at end of file diff --git a/python/examples/cross_silo/grpc_fedavg_mnist_lr_example/step_by_step/config/bootstrap.bat b/python/examples/cross_silo/grpc_fedavg_mnist_lr_example/step_by_step/config/bootstrap.bat new file mode 100755 index 0000000000..fb0dd54d6d --- /dev/null +++ b/python/examples/cross_silo/grpc_fedavg_mnist_lr_example/step_by_step/config/bootstrap.bat @@ -0,0 +1,12 @@ +:: ### don't modify this part ### +:: ############################## + + +:: ### please customize your script in this region #### +set DATA_PATH=%userprofile%\fedml_data +if exist %DATA_PATH% (echo Exist %DATA_PATH%) else mkdir %DATA_PATH% + + +:: ### don't modify this part ### +echo [FedML]Bootstrap Finished +:: ############################## \ No newline at end of file diff --git a/python/examples/cross_silo/jax_haiku_mqtt_s3_fedavg_mnist_lr_example/config/bootstrap.bat b/python/examples/cross_silo/jax_haiku_mqtt_s3_fedavg_mnist_lr_example/config/bootstrap.bat new file mode 100755 index 0000000000..fb0dd54d6d --- /dev/null +++ b/python/examples/cross_silo/jax_haiku_mqtt_s3_fedavg_mnist_lr_example/config/bootstrap.bat @@ -0,0 +1,12 @@ +:: ### don't modify this part ### +:: ############################## + + +:: ### please customize your script in this region #### +set DATA_PATH=%userprofile%\fedml_data +if exist %DATA_PATH% (echo Exist %DATA_PATH%) else mkdir %DATA_PATH% + + +:: ### don't modify this part ### +echo [FedML]Bootstrap Finished +:: ############################## \ No newline at end of file diff --git a/python/examples/cross_silo/jax_haiku_mqtt_s3_fedavg_mnist_lr_example/config/bootstrap.sh b/python/examples/cross_silo/jax_haiku_mqtt_s3_fedavg_mnist_lr_example/config/bootstrap.sh index 1f0287a7b6..3d969974b0 100644 --- a/python/examples/cross_silo/jax_haiku_mqtt_s3_fedavg_mnist_lr_example/config/bootstrap.sh +++ b/python/examples/cross_silo/jax_haiku_mqtt_s3_fedavg_mnist_lr_example/config/bootstrap.sh @@ -1,4 +1,3 @@ -#!/bin/bash # pip install fedml==0.7.15 #pip install --upgrade fedml diff --git a/python/examples/cross_silo/light_sec_agg_example/config/bootstrap.bat b/python/examples/cross_silo/light_sec_agg_example/config/bootstrap.bat new file mode 100755 index 0000000000..fb0dd54d6d --- /dev/null +++ b/python/examples/cross_silo/light_sec_agg_example/config/bootstrap.bat @@ -0,0 +1,12 @@ +:: ### don't modify this part ### +:: ############################## + + +:: ### please customize your script in this region #### +set DATA_PATH=%userprofile%\fedml_data +if exist %DATA_PATH% (echo Exist %DATA_PATH%) else mkdir %DATA_PATH% + + +:: ### don't modify this part ### +echo [FedML]Bootstrap Finished +:: ############################## \ No newline at end of file diff --git a/python/examples/cross_silo/mpi_customized_fedavg_mnist_lr_example/config/bootstrap.bat b/python/examples/cross_silo/mpi_customized_fedavg_mnist_lr_example/config/bootstrap.bat new file mode 100755 index 0000000000..fb0dd54d6d --- /dev/null +++ b/python/examples/cross_silo/mpi_customized_fedavg_mnist_lr_example/config/bootstrap.bat @@ -0,0 +1,12 @@ +:: ### don't modify this part ### +:: ############################## + + +:: ### please customize your script in this region #### +set DATA_PATH=%userprofile%\fedml_data +if exist %DATA_PATH% (echo Exist %DATA_PATH%) else mkdir %DATA_PATH% + + +:: ### don't modify this part ### +echo [FedML]Bootstrap Finished +:: ############################## \ No newline at end of file diff --git a/python/examples/cross_silo/mpi_customized_fedavg_mnist_lr_example/config/bootstrap.sh b/python/examples/cross_silo/mpi_customized_fedavg_mnist_lr_example/config/bootstrap.sh new file mode 100644 index 0000000000..3d969974b0 --- /dev/null +++ b/python/examples/cross_silo/mpi_customized_fedavg_mnist_lr_example/config/bootstrap.sh @@ -0,0 +1,7 @@ + +# pip install fedml==0.7.15 +#pip install --upgrade fedml + +### don't modify this part ### +echo "[FedML]Bootstrap Finished" +############################## diff --git a/python/examples/cross_silo/mpi_fedavg_mnist_lr_example/config/bootstrap.bat b/python/examples/cross_silo/mpi_fedavg_mnist_lr_example/config/bootstrap.bat new file mode 100755 index 0000000000..fb0dd54d6d --- /dev/null +++ b/python/examples/cross_silo/mpi_fedavg_mnist_lr_example/config/bootstrap.bat @@ -0,0 +1,12 @@ +:: ### don't modify this part ### +:: ############################## + + +:: ### please customize your script in this region #### +set DATA_PATH=%userprofile%\fedml_data +if exist %DATA_PATH% (echo Exist %DATA_PATH%) else mkdir %DATA_PATH% + + +:: ### don't modify this part ### +echo [FedML]Bootstrap Finished +:: ############################## \ No newline at end of file diff --git a/python/examples/cross_silo/mpi_fedavg_mnist_lr_example/config/bootstrap.sh b/python/examples/cross_silo/mpi_fedavg_mnist_lr_example/config/bootstrap.sh new file mode 100644 index 0000000000..3d969974b0 --- /dev/null +++ b/python/examples/cross_silo/mpi_fedavg_mnist_lr_example/config/bootstrap.sh @@ -0,0 +1,7 @@ + +# pip install fedml==0.7.15 +#pip install --upgrade fedml + +### don't modify this part ### +echo "[FedML]Bootstrap Finished" +############################## diff --git a/python/examples/cross_silo/mqtt_s3_fedavg_attack_defense_cifar10_resnet56_example/config/bootstrap.bat b/python/examples/cross_silo/mqtt_s3_fedavg_attack_defense_cifar10_resnet56_example/config/bootstrap.bat new file mode 100755 index 0000000000..fb0dd54d6d --- /dev/null +++ b/python/examples/cross_silo/mqtt_s3_fedavg_attack_defense_cifar10_resnet56_example/config/bootstrap.bat @@ -0,0 +1,12 @@ +:: ### don't modify this part ### +:: ############################## + + +:: ### please customize your script in this region #### +set DATA_PATH=%userprofile%\fedml_data +if exist %DATA_PATH% (echo Exist %DATA_PATH%) else mkdir %DATA_PATH% + + +:: ### don't modify this part ### +echo [FedML]Bootstrap Finished +:: ############################## \ No newline at end of file diff --git a/python/examples/cross_silo/mqtt_s3_fedavg_attack_mnist_lr_example/config/bootstrap.bat b/python/examples/cross_silo/mqtt_s3_fedavg_attack_mnist_lr_example/config/bootstrap.bat new file mode 100755 index 0000000000..fb0dd54d6d --- /dev/null +++ b/python/examples/cross_silo/mqtt_s3_fedavg_attack_mnist_lr_example/config/bootstrap.bat @@ -0,0 +1,12 @@ +:: ### don't modify this part ### +:: ############################## + + +:: ### please customize your script in this region #### +set DATA_PATH=%userprofile%\fedml_data +if exist %DATA_PATH% (echo Exist %DATA_PATH%) else mkdir %DATA_PATH% + + +:: ### don't modify this part ### +echo [FedML]Bootstrap Finished +:: ############################## \ No newline at end of file diff --git a/python/examples/cross_silo/mqtt_s3_fedavg_attack_mnist_lr_example/config/bootstrap.sh b/python/examples/cross_silo/mqtt_s3_fedavg_attack_mnist_lr_example/config/bootstrap.sh index 1f0287a7b6..3d969974b0 100644 --- a/python/examples/cross_silo/mqtt_s3_fedavg_attack_mnist_lr_example/config/bootstrap.sh +++ b/python/examples/cross_silo/mqtt_s3_fedavg_attack_mnist_lr_example/config/bootstrap.sh @@ -1,4 +1,3 @@ -#!/bin/bash # pip install fedml==0.7.15 #pip install --upgrade fedml diff --git a/python/examples/cross_silo/mqtt_s3_fedavg_cdp_mnist_lr_example/config/bootstrap.bat b/python/examples/cross_silo/mqtt_s3_fedavg_cdp_mnist_lr_example/config/bootstrap.bat new file mode 100755 index 0000000000..fb0dd54d6d --- /dev/null +++ b/python/examples/cross_silo/mqtt_s3_fedavg_cdp_mnist_lr_example/config/bootstrap.bat @@ -0,0 +1,12 @@ +:: ### don't modify this part ### +:: ############################## + + +:: ### please customize your script in this region #### +set DATA_PATH=%userprofile%\fedml_data +if exist %DATA_PATH% (echo Exist %DATA_PATH%) else mkdir %DATA_PATH% + + +:: ### don't modify this part ### +echo [FedML]Bootstrap Finished +:: ############################## \ No newline at end of file diff --git a/python/examples/cross_silo/mqtt_s3_fedavg_cifar10_lr_example/config/bootstrap.bat b/python/examples/cross_silo/mqtt_s3_fedavg_cifar10_lr_example/config/bootstrap.bat new file mode 100755 index 0000000000..fb0dd54d6d --- /dev/null +++ b/python/examples/cross_silo/mqtt_s3_fedavg_cifar10_lr_example/config/bootstrap.bat @@ -0,0 +1,12 @@ +:: ### don't modify this part ### +:: ############################## + + +:: ### please customize your script in this region #### +set DATA_PATH=%userprofile%\fedml_data +if exist %DATA_PATH% (echo Exist %DATA_PATH%) else mkdir %DATA_PATH% + + +:: ### don't modify this part ### +echo [FedML]Bootstrap Finished +:: ############################## \ No newline at end of file diff --git a/python/examples/cross_silo/mqtt_s3_fedavg_cifar10_lr_example/config/bootstrap.sh b/python/examples/cross_silo/mqtt_s3_fedavg_cifar10_lr_example/config/bootstrap.sh index 7630aa29ca..3d969974b0 100644 --- a/python/examples/cross_silo/mqtt_s3_fedavg_cifar10_lr_example/config/bootstrap.sh +++ b/python/examples/cross_silo/mqtt_s3_fedavg_cifar10_lr_example/config/bootstrap.sh @@ -1,4 +1,7 @@ -#!/bin/bash # pip install fedml==0.7.15 #pip install --upgrade fedml + +### don't modify this part ### +echo "[FedML]Bootstrap Finished" +############################## diff --git a/python/examples/cross_silo/mqtt_s3_fedavg_defense_mnist_lr_example/config/bootstrap.bat b/python/examples/cross_silo/mqtt_s3_fedavg_defense_mnist_lr_example/config/bootstrap.bat new file mode 100755 index 0000000000..fb0dd54d6d --- /dev/null +++ b/python/examples/cross_silo/mqtt_s3_fedavg_defense_mnist_lr_example/config/bootstrap.bat @@ -0,0 +1,12 @@ +:: ### don't modify this part ### +:: ############################## + + +:: ### please customize your script in this region #### +set DATA_PATH=%userprofile%\fedml_data +if exist %DATA_PATH% (echo Exist %DATA_PATH%) else mkdir %DATA_PATH% + + +:: ### don't modify this part ### +echo [FedML]Bootstrap Finished +:: ############################## \ No newline at end of file diff --git a/python/examples/cross_silo/mqtt_s3_fedavg_defense_mnist_lr_example/config/bootstrap.sh b/python/examples/cross_silo/mqtt_s3_fedavg_defense_mnist_lr_example/config/bootstrap.sh index 1f0287a7b6..3d969974b0 100644 --- a/python/examples/cross_silo/mqtt_s3_fedavg_defense_mnist_lr_example/config/bootstrap.sh +++ b/python/examples/cross_silo/mqtt_s3_fedavg_defense_mnist_lr_example/config/bootstrap.sh @@ -1,4 +1,3 @@ -#!/bin/bash # pip install fedml==0.7.15 #pip install --upgrade fedml diff --git a/python/examples/cross_silo/mqtt_s3_fedavg_hierarchical_manual_mnist_lr_example/one_line/config/bootstrap.bat b/python/examples/cross_silo/mqtt_s3_fedavg_hierarchical_manual_mnist_lr_example/one_line/config/bootstrap.bat new file mode 100755 index 0000000000..fb0dd54d6d --- /dev/null +++ b/python/examples/cross_silo/mqtt_s3_fedavg_hierarchical_manual_mnist_lr_example/one_line/config/bootstrap.bat @@ -0,0 +1,12 @@ +:: ### don't modify this part ### +:: ############################## + + +:: ### please customize your script in this region #### +set DATA_PATH=%userprofile%\fedml_data +if exist %DATA_PATH% (echo Exist %DATA_PATH%) else mkdir %DATA_PATH% + + +:: ### don't modify this part ### +echo [FedML]Bootstrap Finished +:: ############################## \ No newline at end of file diff --git a/python/examples/cross_silo/mqtt_s3_fedavg_hierarchical_mnist_lr_example/config/bootstrap.bat b/python/examples/cross_silo/mqtt_s3_fedavg_hierarchical_mnist_lr_example/config/bootstrap.bat new file mode 100755 index 0000000000..fb0dd54d6d --- /dev/null +++ b/python/examples/cross_silo/mqtt_s3_fedavg_hierarchical_mnist_lr_example/config/bootstrap.bat @@ -0,0 +1,12 @@ +:: ### don't modify this part ### +:: ############################## + + +:: ### please customize your script in this region #### +set DATA_PATH=%userprofile%\fedml_data +if exist %DATA_PATH% (echo Exist %DATA_PATH%) else mkdir %DATA_PATH% + + +:: ### don't modify this part ### +echo [FedML]Bootstrap Finished +:: ############################## \ No newline at end of file diff --git a/python/examples/cross_silo/mqtt_s3_fedavg_horizontal_mnist_lr_example/one_line/config/bootstrap.bat b/python/examples/cross_silo/mqtt_s3_fedavg_horizontal_mnist_lr_example/one_line/config/bootstrap.bat new file mode 100755 index 0000000000..fb0dd54d6d --- /dev/null +++ b/python/examples/cross_silo/mqtt_s3_fedavg_horizontal_mnist_lr_example/one_line/config/bootstrap.bat @@ -0,0 +1,12 @@ +:: ### don't modify this part ### +:: ############################## + + +:: ### please customize your script in this region #### +set DATA_PATH=%userprofile%\fedml_data +if exist %DATA_PATH% (echo Exist %DATA_PATH%) else mkdir %DATA_PATH% + + +:: ### don't modify this part ### +echo [FedML]Bootstrap Finished +:: ############################## \ No newline at end of file diff --git a/python/examples/cross_silo/mqtt_s3_fedavg_horizontal_mnist_lr_example/one_line/config/bootstrap.sh b/python/examples/cross_silo/mqtt_s3_fedavg_horizontal_mnist_lr_example/one_line/config/bootstrap.sh new file mode 100644 index 0000000000..3d969974b0 --- /dev/null +++ b/python/examples/cross_silo/mqtt_s3_fedavg_horizontal_mnist_lr_example/one_line/config/bootstrap.sh @@ -0,0 +1,7 @@ + +# pip install fedml==0.7.15 +#pip install --upgrade fedml + +### don't modify this part ### +echo "[FedML]Bootstrap Finished" +############################## diff --git a/python/examples/cross_silo/mqtt_s3_fedavg_ldp_mnist_lr_example/config/bootstrap.bat b/python/examples/cross_silo/mqtt_s3_fedavg_ldp_mnist_lr_example/config/bootstrap.bat new file mode 100755 index 0000000000..fb0dd54d6d --- /dev/null +++ b/python/examples/cross_silo/mqtt_s3_fedavg_ldp_mnist_lr_example/config/bootstrap.bat @@ -0,0 +1,12 @@ +:: ### don't modify this part ### +:: ############################## + + +:: ### please customize your script in this region #### +set DATA_PATH=%userprofile%\fedml_data +if exist %DATA_PATH% (echo Exist %DATA_PATH%) else mkdir %DATA_PATH% + + +:: ### don't modify this part ### +echo [FedML]Bootstrap Finished +:: ############################## \ No newline at end of file diff --git a/python/examples/cross_silo/mqtt_s3_fedavg_mnist_lr_example/one_line/config/bootstrap.bat b/python/examples/cross_silo/mqtt_s3_fedavg_mnist_lr_example/one_line/config/bootstrap.bat new file mode 100755 index 0000000000..fb0dd54d6d --- /dev/null +++ b/python/examples/cross_silo/mqtt_s3_fedavg_mnist_lr_example/one_line/config/bootstrap.bat @@ -0,0 +1,12 @@ +:: ### don't modify this part ### +:: ############################## + + +:: ### please customize your script in this region #### +set DATA_PATH=%userprofile%\fedml_data +if exist %DATA_PATH% (echo Exist %DATA_PATH%) else mkdir %DATA_PATH% + + +:: ### don't modify this part ### +echo [FedML]Bootstrap Finished +:: ############################## \ No newline at end of file diff --git a/python/examples/cross_silo/mqtt_s3_fedavg_mnist_lr_example/one_line/config/bootstrap.sh b/python/examples/cross_silo/mqtt_s3_fedavg_mnist_lr_example/one_line/config/bootstrap.sh index 90b3b70a2c..5584eaa1af 100644 --- a/python/examples/cross_silo/mqtt_s3_fedavg_mnist_lr_example/one_line/config/bootstrap.sh +++ b/python/examples/cross_silo/mqtt_s3_fedavg_mnist_lr_example/one_line/config/bootstrap.sh @@ -1,15 +1,3 @@ - - -#echo "bootstrap out" -#mkdir -p ~/fednlp_data -# -#pip3 install -r ./requirements.txt -# -#bash ./download_data.sh -#bash ./download_partition.sh -# -#exit 0 - # pip install fedml==0.7.15 #pip install --upgrade fedml diff --git a/python/examples/cross_silo/mqtt_s3_fedavg_mnist_lr_example/step_by_step/config/bootstrap.bat b/python/examples/cross_silo/mqtt_s3_fedavg_mnist_lr_example/step_by_step/config/bootstrap.bat new file mode 100755 index 0000000000..fb0dd54d6d --- /dev/null +++ b/python/examples/cross_silo/mqtt_s3_fedavg_mnist_lr_example/step_by_step/config/bootstrap.bat @@ -0,0 +1,12 @@ +:: ### don't modify this part ### +:: ############################## + + +:: ### please customize your script in this region #### +set DATA_PATH=%userprofile%\fedml_data +if exist %DATA_PATH% (echo Exist %DATA_PATH%) else mkdir %DATA_PATH% + + +:: ### don't modify this part ### +echo [FedML]Bootstrap Finished +:: ############################## \ No newline at end of file diff --git a/python/examples/cross_silo/mqtt_s3_fedavg_mnist_lr_example/step_by_step/config/bootstrap.sh b/python/examples/cross_silo/mqtt_s3_fedavg_mnist_lr_example/step_by_step/config/bootstrap.sh index 3d969974b0..5584eaa1af 100644 --- a/python/examples/cross_silo/mqtt_s3_fedavg_mnist_lr_example/step_by_step/config/bootstrap.sh +++ b/python/examples/cross_silo/mqtt_s3_fedavg_mnist_lr_example/step_by_step/config/bootstrap.sh @@ -1,4 +1,3 @@ - # pip install fedml==0.7.15 #pip install --upgrade fedml diff --git a/python/examples/cross_silo/mqtt_thetastore_fedavg_mnist_lr_example/__init__.py b/python/examples/cross_silo/mqtt_thetastore_fedavg_mnist_lr_example/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/python/examples/cross_silo/mqtt_thetastore_fedavg_mnist_lr_example/custom_data_and_model/README.md b/python/examples/cross_silo/mqtt_thetastore_fedavg_mnist_lr_example/custom_data_and_model/README.md new file mode 100644 index 0000000000..3c6e789f93 --- /dev/null +++ b/python/examples/cross_silo/mqtt_thetastore_fedavg_mnist_lr_example/custom_data_and_model/README.md @@ -0,0 +1,40 @@ +## Training Script + +At the client side, the client ID (a.k.a rank) starts from 1. +Please also modify config/fedml_config.yaml, changing the `worker_num` the as the number of clients you plan to run. + +If you want to use your customized MQTT or theta edge store server as training backends, you should uncomment and set the following lines. +#customized_training_mqtt_config: {'BROKER_HOST': 'your mqtt server address or domain name', 'MQTT_PWD': 'your mqtt password', 'BROKER_PORT': 1883, 'MQTT_KEEPALIVE': 180, 'MQTT_USER': 'your mqtt user'} +#customized_training_thetastore_config: {'store_home_dir': '~/edge-store-playground', 'upload_uri': 'http://localhost:19888/rpc', 'download_uri': 'http://localhost:19888/rpc'} + +Use Theta EdgeStore as federated learning distributed storage for reading and writing models. +You should setup theta edgestore based on https://docs.thetatoken.org/docs/theta-edge-store-setup and set config parameters: +store_home_dir, upload_uri, download_uri. +If you want to use secret key to encrypt models, you should set secret key by calling Context().add("ipfs_secret_key", "your secret key") + +At the server side, run the following script: +``` +bash run_server.sh your_run_id +``` + +For client 1, run the following script: +``` +bash run_client.sh 1 your_run_id +``` +For client 2, run the following script: +``` +bash run_client.sh 2 your_run_id +``` +Note: please run the server first. + +## A Better User-experience with FedML MLOps (open.fedml.ai) +To reduce the difficulty and complexity of these CLI commands. We recommend you to use our MLOps (open.fedml.ai). +FedML MLOps provides: +- Install Client Agent and Login +- Inviting Collaborators and group management +- Project Management +- Experiment Tracking (visualizing training results) +- monitoring device status +- visualizing system performance (including profiling flow chart) +- distributed logging +- model serving \ No newline at end of file diff --git a/python/examples/cross_silo/mqtt_thetastore_fedavg_mnist_lr_example/custom_data_and_model/__init__.py b/python/examples/cross_silo/mqtt_thetastore_fedavg_mnist_lr_example/custom_data_and_model/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/python/examples/cross_silo/mqtt_thetastore_fedavg_mnist_lr_example/custom_data_and_model/build_mlops_pkg.sh b/python/examples/cross_silo/mqtt_thetastore_fedavg_mnist_lr_example/custom_data_and_model/build_mlops_pkg.sh new file mode 100644 index 0000000000..6464fea83e --- /dev/null +++ b/python/examples/cross_silo/mqtt_thetastore_fedavg_mnist_lr_example/custom_data_and_model/build_mlops_pkg.sh @@ -0,0 +1,12 @@ +SOURCE=. +ENTRY=torch_client.py +CONFIG=config +DEST=./mlops +fedml build -t client -sf $SOURCE -ep $ENTRY -cf $CONFIG -df $DEST + + +SOURCE=. +ENTRY=torch_server.py +CONFIG=config +DEST=./mlops +fedml build -t server -sf $SOURCE -ep $ENTRY -cf $CONFIG -df $DEST diff --git a/python/examples/cross_silo/mqtt_thetastore_fedavg_mnist_lr_example/custom_data_and_model/config/bootstrap.bat b/python/examples/cross_silo/mqtt_thetastore_fedavg_mnist_lr_example/custom_data_and_model/config/bootstrap.bat new file mode 100755 index 0000000000..fb0dd54d6d --- /dev/null +++ b/python/examples/cross_silo/mqtt_thetastore_fedavg_mnist_lr_example/custom_data_and_model/config/bootstrap.bat @@ -0,0 +1,12 @@ +:: ### don't modify this part ### +:: ############################## + + +:: ### please customize your script in this region #### +set DATA_PATH=%userprofile%\fedml_data +if exist %DATA_PATH% (echo Exist %DATA_PATH%) else mkdir %DATA_PATH% + + +:: ### don't modify this part ### +echo [FedML]Bootstrap Finished +:: ############################## \ No newline at end of file diff --git a/python/examples/cross_silo/mqtt_thetastore_fedavg_mnist_lr_example/custom_data_and_model/config/bootstrap.sh b/python/examples/cross_silo/mqtt_thetastore_fedavg_mnist_lr_example/custom_data_and_model/config/bootstrap.sh new file mode 100644 index 0000000000..3d969974b0 --- /dev/null +++ b/python/examples/cross_silo/mqtt_thetastore_fedavg_mnist_lr_example/custom_data_and_model/config/bootstrap.sh @@ -0,0 +1,7 @@ + +# pip install fedml==0.7.15 +#pip install --upgrade fedml + +### don't modify this part ### +echo "[FedML]Bootstrap Finished" +############################## diff --git a/python/examples/cross_silo/mqtt_thetastore_fedavg_mnist_lr_example/custom_data_and_model/config/fedml_config.yaml b/python/examples/cross_silo/mqtt_thetastore_fedavg_mnist_lr_example/custom_data_and_model/config/fedml_config.yaml new file mode 100644 index 0000000000..29d6a8ead3 --- /dev/null +++ b/python/examples/cross_silo/mqtt_thetastore_fedavg_mnist_lr_example/custom_data_and_model/config/fedml_config.yaml @@ -0,0 +1,58 @@ +common_args: + training_type: "cross_silo" + scenario: "horizontal" + using_mlops: false + random_seed: 0 + +environment_args: + bootstrap: config/bootstrap.sh + +data_args: + dataset: "mnist" + data_cache_dir: ~/fedml_data + partition_method: "hetero" + partition_alpha: 0.5 + +model_args: + model: "lr" + model_file_cache_folder: "./model_file_cache" # will be filled by the server automatically + global_model_file_path: "./model_file_cache/global_model.pt" + +train_args: + federated_optimizer: "FedAvg" + client_id_list: + client_num_in_total: 1 + client_num_per_round: 1 + comm_round: 10 + epochs: 1 + batch_size: 10 + client_optimizer: sgd + learning_rate: 0.03 + weight_decay: 0.001 + +validation_args: + frequency_of_the_test: 1 + +device_args: + worker_num: 1 + using_gpu: false + gpu_mapping_file: config/gpu_mapping.yaml + gpu_mapping_key: mapping_default + +comm_args: + backend: "MQTT_THETASTORE" + mqtt_config_path: config/mqtt_config.yaml + # If you want to use your customized MQTT or theta edgestore server as training backends, you should uncomment and set the following lines. + #customized_training_mqtt_config: {'BROKER_HOST': 'your mqtt server address or domain name', 'MQTT_PWD': 'your mqtt password', 'BROKER_PORT': 1883, 'MQTT_KEEPALIVE': 180, 'MQTT_USER': 'your mqtt user'} + customized_training_thetastore_config: {'store_home_dir': '~/edge-store-playground', 'upload_uri': 'http://localhost:19888/rpc', 'download_uri': 'http://localhost:19888/rpc'} + +tracking_args: + # the default log path is at ~/fedml-client/fedml/logs/ and ~/fedml-server/fedml/logs/ + enable_wandb: false + wandb_key: ee0b5f53d949c84cee7decbe7a629e63fb2f8408 + wandb_project: fedml + wandb_name: fedml_torch_fedavg_mnist_lr + +#lsa_args: +# prime_number: 2 ** 15 - 19 +# precision_parameter: 10 \ No newline at end of file diff --git a/python/examples/cross_silo/mqtt_thetastore_fedavg_mnist_lr_example/custom_data_and_model/run_client.sh b/python/examples/cross_silo/mqtt_thetastore_fedavg_mnist_lr_example/custom_data_and_model/run_client.sh new file mode 100644 index 0000000000..18d3cea9fe --- /dev/null +++ b/python/examples/cross_silo/mqtt_thetastore_fedavg_mnist_lr_example/custom_data_and_model/run_client.sh @@ -0,0 +1,3 @@ +#!/usr/bin/env bash +RANK=$1 +python3 torch_client.py --cf config/fedml_config.yaml --rank $RANK --role client \ No newline at end of file diff --git a/python/examples/cross_silo/mqtt_thetastore_fedavg_mnist_lr_example/custom_data_and_model/run_server.sh b/python/examples/cross_silo/mqtt_thetastore_fedavg_mnist_lr_example/custom_data_and_model/run_server.sh new file mode 100644 index 0000000000..08007b7e81 --- /dev/null +++ b/python/examples/cross_silo/mqtt_thetastore_fedavg_mnist_lr_example/custom_data_and_model/run_server.sh @@ -0,0 +1,3 @@ +#!/usr/bin/env bash + +python3 torch_server.py --cf config/fedml_config.yaml --rank 0 --role server \ No newline at end of file diff --git a/python/examples/cross_silo/mqtt_thetastore_fedavg_mnist_lr_example/custom_data_and_model/torch_client.py b/python/examples/cross_silo/mqtt_thetastore_fedavg_mnist_lr_example/custom_data_and_model/torch_client.py new file mode 100644 index 0000000000..00c8253b86 --- /dev/null +++ b/python/examples/cross_silo/mqtt_thetastore_fedavg_mnist_lr_example/custom_data_and_model/torch_client.py @@ -0,0 +1,75 @@ +import torch + +import fedml +from fedml import FedMLRunner +import fedml +from fedml.data.MNIST.data_loader import download_mnist, load_partition_data_mnist + + +def load_data(args): + download_mnist(args.data_cache_dir) + fedml.logging.info("load_data. dataset_name = %s" % args.dataset) + + """ + Please read through the data loader at to see how to customize the dataset for FedML framework. + """ + ( + client_num, + train_data_num, + test_data_num, + train_data_global, + test_data_global, + train_data_local_num_dict, + train_data_local_dict, + test_data_local_dict, + class_num, + ) = load_partition_data_mnist( + args, + args.batch_size, + train_path=args.data_cache_dir + "/MNIST/train", + test_path=args.data_cache_dir + "/MNIST/test", + ) + """ + For shallow NN or linear models, + we uniformly sample a fraction of clients each round (as the original FedAvg paper) + """ + args.client_num_in_total = client_num + dataset = [ + train_data_num, + test_data_num, + train_data_global, + test_data_global, + train_data_local_num_dict, + train_data_local_dict, + test_data_local_dict, + class_num, + ] + return dataset, class_num + + +class LogisticRegression(torch.nn.Module): + def __init__(self, input_dim, output_dim): + super(LogisticRegression, self).__init__() + self.linear = torch.nn.Linear(input_dim, output_dim) + + def forward(self, x): + outputs = torch.sigmoid(self.linear(x)) + return outputs + + +if __name__ == "__main__": + # init FedML framework + args = fedml.init() + + # init device + device = fedml.device.get_device(args) + + # load data + dataset, output_dim = load_data(args) + + # load model (the size of MNIST image is 28 x 28) + model = LogisticRegression(28 * 28, output_dim) + + # start training + fedml_runner = FedMLRunner(args, device, dataset, model) + fedml_runner.run() diff --git a/python/examples/cross_silo/mqtt_thetastore_fedavg_mnist_lr_example/custom_data_and_model/torch_server.py b/python/examples/cross_silo/mqtt_thetastore_fedavg_mnist_lr_example/custom_data_and_model/torch_server.py new file mode 100644 index 0000000000..14eb9b96b7 --- /dev/null +++ b/python/examples/cross_silo/mqtt_thetastore_fedavg_mnist_lr_example/custom_data_and_model/torch_server.py @@ -0,0 +1,74 @@ +import torch + +import fedml +from fedml import FedMLRunner +from fedml.data.MNIST.data_loader import download_mnist, load_partition_data_mnist + + +def load_data(args): + download_mnist(args.data_cache_dir) + fedml.logging.info("load_data. dataset_name = %s" % args.dataset) + + """ + Please read through the data loader at to see how to customize the dataset for FedML framework. + """ + ( + client_num, + train_data_num, + test_data_num, + train_data_global, + test_data_global, + train_data_local_num_dict, + train_data_local_dict, + test_data_local_dict, + class_num, + ) = load_partition_data_mnist( + args, + args.batch_size, + train_path=args.data_cache_dir + "/MNIST/train", + test_path=args.data_cache_dir + "/MNIST/test", + ) + """ + For shallow NN or linear models, + we uniformly sample a fraction of clients each round (as the original FedAvg paper) + """ + args.client_num_in_total = client_num + dataset = [ + train_data_num, + test_data_num, + train_data_global, + test_data_global, + train_data_local_num_dict, + train_data_local_dict, + test_data_local_dict, + class_num, + ] + return dataset, class_num + + +class LogisticRegression(torch.nn.Module): + def __init__(self, input_dim, output_dim): + super(LogisticRegression, self).__init__() + self.linear = torch.nn.Linear(input_dim, output_dim) + + def forward(self, x): + outputs = torch.sigmoid(self.linear(x)) + return outputs + + +if __name__ == "__main__": + # init FedML framework + args = fedml.init() + + # init device + device = fedml.device.get_device(args) + + # load data + dataset, output_dim = load_data(args) + + # load model (the size of MNIST image is 28 x 28) + model = LogisticRegression(28 * 28, output_dim) + + # start training + fedml_runner = FedMLRunner(args, device, dataset, model) + fedml_runner.run() diff --git a/python/examples/cross_silo/mqtt_thetastore_fedavg_mnist_lr_example/one_line/README.md b/python/examples/cross_silo/mqtt_thetastore_fedavg_mnist_lr_example/one_line/README.md new file mode 100644 index 0000000000..38c68cddd2 --- /dev/null +++ b/python/examples/cross_silo/mqtt_thetastore_fedavg_mnist_lr_example/one_line/README.md @@ -0,0 +1,42 @@ +## Training Script + +At the client side, the client ID (a.k.a rank) starts from 1. +Please also modify config/fedml_config.yaml, changing the `worker_num` the as the number of clients you plan to run. + +If you want to use your customized MQTT or theta edge store server as training backends, you should uncomment and set the following lines. +#customized_training_mqtt_config: {'BROKER_HOST': 'your mqtt server address or domain name', 'MQTT_PWD': 'your mqtt password', 'BROKER_PORT': 1883, 'MQTT_KEEPALIVE': 180, 'MQTT_USER': 'your mqtt user'} +#customized_training_thetastore_config: {'store_home_dir': '~/edge-store-playground', 'upload_uri': 'http://localhost:19888/rpc', 'download_uri': 'http://localhost:19888/rpc'} + +Use Theta EdgeStore as federated learning distributed storage for reading and writing models. +You should setup theta edgestore based on https://docs.thetatoken.org/docs/theta-edge-store-setup and set config parameters: +store_home_dir, upload_uri, download_uri. +If you want to use secret key to encrypt models, you should set secret key by calling Context().add("ipfs_secret_key", "your secret key") + +At the server side, run the following script: +``` +bash run_server.sh your_run_id +``` + +For client 1, run the following script: +``` +bash run_client.sh 1 your_run_id +``` +For client 2, run the following script: +``` +bash run_client.sh 2 your_run_id +``` +Note: +1. please change the run_id in run_client/run_server scripts to your own. +2. For Windows users, please use *.bat scripts. + +## A Better User-experience with FedML MLOps (open.fedml.ai) +To reduce the difficulty and complexity of these CLI commands. We recommend you to use our MLOps (open.fedml.ai). +FedML MLOps provides: +- Install Client Agent and Login +- Inviting Collaborators and group management +- Project Management +- Experiment Tracking (visualizing training results) +- monitoring device status +- visualizing system performance (including profiling flow chart) +- distributed logging +- model serving \ No newline at end of file diff --git a/python/examples/cross_silo/mqtt_thetastore_fedavg_mnist_lr_example/one_line/build_mlops_pkg.sh b/python/examples/cross_silo/mqtt_thetastore_fedavg_mnist_lr_example/one_line/build_mlops_pkg.sh new file mode 100644 index 0000000000..f2a1ce9105 --- /dev/null +++ b/python/examples/cross_silo/mqtt_thetastore_fedavg_mnist_lr_example/one_line/build_mlops_pkg.sh @@ -0,0 +1,12 @@ +SOURCE=client +ENTRY=torch_client.py +CONFIG=config +DEST=./mlops +fedml build -t client -sf $SOURCE -ep $ENTRY -cf $CONFIG -df $DEST + + +SOURCE=server +ENTRY=torch_server.py +CONFIG=config +DEST=./mlops +fedml build -t server -sf $SOURCE -ep $ENTRY -cf $CONFIG -df $DEST \ No newline at end of file diff --git a/python/examples/cross_silo/mqtt_thetastore_fedavg_mnist_lr_example/one_line/client/__init__.py b/python/examples/cross_silo/mqtt_thetastore_fedavg_mnist_lr_example/one_line/client/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/python/examples/cross_silo/mqtt_thetastore_fedavg_mnist_lr_example/one_line/client/torch_client.py b/python/examples/cross_silo/mqtt_thetastore_fedavg_mnist_lr_example/one_line/client/torch_client.py new file mode 100644 index 0000000000..2385f640c6 --- /dev/null +++ b/python/examples/cross_silo/mqtt_thetastore_fedavg_mnist_lr_example/one_line/client/torch_client.py @@ -0,0 +1,5 @@ +import fedml + + +if __name__ == "__main__": + fedml.run_cross_silo_client() diff --git a/python/examples/cross_silo/mqtt_thetastore_fedavg_mnist_lr_example/one_line/config/bootstrap.bat b/python/examples/cross_silo/mqtt_thetastore_fedavg_mnist_lr_example/one_line/config/bootstrap.bat new file mode 100755 index 0000000000..fb0dd54d6d --- /dev/null +++ b/python/examples/cross_silo/mqtt_thetastore_fedavg_mnist_lr_example/one_line/config/bootstrap.bat @@ -0,0 +1,12 @@ +:: ### don't modify this part ### +:: ############################## + + +:: ### please customize your script in this region #### +set DATA_PATH=%userprofile%\fedml_data +if exist %DATA_PATH% (echo Exist %DATA_PATH%) else mkdir %DATA_PATH% + + +:: ### don't modify this part ### +echo [FedML]Bootstrap Finished +:: ############################## \ No newline at end of file diff --git a/python/examples/cross_silo/mqtt_thetastore_fedavg_mnist_lr_example/one_line/config/bootstrap.sh b/python/examples/cross_silo/mqtt_thetastore_fedavg_mnist_lr_example/one_line/config/bootstrap.sh new file mode 100644 index 0000000000..3d969974b0 --- /dev/null +++ b/python/examples/cross_silo/mqtt_thetastore_fedavg_mnist_lr_example/one_line/config/bootstrap.sh @@ -0,0 +1,7 @@ + +# pip install fedml==0.7.15 +#pip install --upgrade fedml + +### don't modify this part ### +echo "[FedML]Bootstrap Finished" +############################## diff --git a/python/examples/cross_silo/mqtt_thetastore_fedavg_mnist_lr_example/one_line/config/fedml_config.yaml b/python/examples/cross_silo/mqtt_thetastore_fedavg_mnist_lr_example/one_line/config/fedml_config.yaml new file mode 100644 index 0000000000..646f2d1ce6 --- /dev/null +++ b/python/examples/cross_silo/mqtt_thetastore_fedavg_mnist_lr_example/one_line/config/fedml_config.yaml @@ -0,0 +1,55 @@ +common_args: + training_type: "cross_silo" + scenario: "horizontal" + using_mlops: false + random_seed: 0 + config_version: release + +environment_args: + bootstrap: config/bootstrap.sh + +data_args: + dataset: "mnist" + data_cache_dir: ~/fedml_data + partition_method: "hetero" + partition_alpha: 0.5 + +model_args: + model: "lr" + model_file_cache_folder: "./model_file_cache" # will be filled by the server automatically + global_model_file_path: "./model_file_cache/global_model.pt" + +train_args: + federated_optimizer: "FedAvg" + # for CLI running, this can be None; in MLOps deployment, `client_id_list` will be replaced with real-time selected devices + client_id_list: + client_num_in_total: 1 + client_num_per_round: 1 + comm_round: 10 + epochs: 1 + batch_size: 10 + client_optimizer: sgd + learning_rate: 0.03 + weight_decay: 0.001 + +validation_args: + frequency_of_the_test: 1 + +device_args: + using_gpu: false + gpu_mapping_file: config/gpu_mapping.yaml + gpu_mapping_key: mapping_default + +comm_args: + backend: "MQTT_THETASTORE" + mqtt_config_path: config/mqtt_config.yaml + # If you want to use your customized MQTT or theta edgestore server as training backends, you should uncomment and set the following lines. + #customized_training_mqtt_config: {'BROKER_HOST': 'your mqtt server address or domain name', 'MQTT_PWD': 'your mqtt password', 'BROKER_PORT': 1883, 'MQTT_KEEPALIVE': 180, 'MQTT_USER': 'your mqtt user'} + customized_training_thetastore_config: {'store_home_dir': '~/edge-store-playground', 'upload_uri': 'http://localhost:19888/rpc', 'download_uri': 'http://localhost:19888/rpc'} + +tracking_args: + # the default log path is at ~/fedml-client/fedml/logs/ and ~/fedml-server/fedml/logs/ + enable_wandb: false + wandb_key: ee0b5f53d949c84cee7decbe7a629e63fb2f8408 + wandb_project: fedml + wandb_name: fedml_torch_fedavg_mnist_lr \ No newline at end of file diff --git a/python/examples/cross_silo/mqtt_thetastore_fedavg_mnist_lr_example/one_line/config/gpu_mapping.yaml b/python/examples/cross_silo/mqtt_thetastore_fedavg_mnist_lr_example/one_line/config/gpu_mapping.yaml new file mode 100644 index 0000000000..fe6fa86137 --- /dev/null +++ b/python/examples/cross_silo/mqtt_thetastore_fedavg_mnist_lr_example/one_line/config/gpu_mapping.yaml @@ -0,0 +1,2 @@ +mapping_default: + node_1 : [1,1,1] \ No newline at end of file diff --git a/python/examples/cross_silo/mqtt_thetastore_fedavg_mnist_lr_example/one_line/run_client.bat b/python/examples/cross_silo/mqtt_thetastore_fedavg_mnist_lr_example/one_line/run_client.bat new file mode 100644 index 0000000000..48b355571b --- /dev/null +++ b/python/examples/cross_silo/mqtt_thetastore_fedavg_mnist_lr_example/one_line/run_client.bat @@ -0,0 +1,2 @@ + +python client/torch_client.py --cf config/fedml_config.yaml --rank %1 diff --git a/python/examples/cross_silo/mqtt_thetastore_fedavg_mnist_lr_example/one_line/run_client.sh b/python/examples/cross_silo/mqtt_thetastore_fedavg_mnist_lr_example/one_line/run_client.sh new file mode 100644 index 0000000000..ce34f604d5 --- /dev/null +++ b/python/examples/cross_silo/mqtt_thetastore_fedavg_mnist_lr_example/one_line/run_client.sh @@ -0,0 +1,3 @@ +#!/usr/bin/env bash +RANK=$1 +python client/torch_client.py --cf config/fedml_config.yaml --rank $RANK --run_id 123 diff --git a/python/examples/cross_silo/mqtt_thetastore_fedavg_mnist_lr_example/one_line/run_server.bat b/python/examples/cross_silo/mqtt_thetastore_fedavg_mnist_lr_example/one_line/run_server.bat new file mode 100644 index 0000000000..bc6d81b890 --- /dev/null +++ b/python/examples/cross_silo/mqtt_thetastore_fedavg_mnist_lr_example/one_line/run_server.bat @@ -0,0 +1 @@ +python server/torch_server.py --cf config/fedml_config.yaml --rank 0 diff --git a/python/examples/cross_silo/mqtt_thetastore_fedavg_mnist_lr_example/one_line/run_server.sh b/python/examples/cross_silo/mqtt_thetastore_fedavg_mnist_lr_example/one_line/run_server.sh new file mode 100644 index 0000000000..72ea5b47a6 --- /dev/null +++ b/python/examples/cross_silo/mqtt_thetastore_fedavg_mnist_lr_example/one_line/run_server.sh @@ -0,0 +1,2 @@ +#!/usr/bin/env bash +python server/torch_server.py --cf config/fedml_config.yaml --rank 0 --run_id 123 diff --git a/python/examples/cross_silo/mqtt_thetastore_fedavg_mnist_lr_example/one_line/server/__init__.py b/python/examples/cross_silo/mqtt_thetastore_fedavg_mnist_lr_example/one_line/server/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/python/examples/cross_silo/mqtt_thetastore_fedavg_mnist_lr_example/one_line/server/torch_server.py b/python/examples/cross_silo/mqtt_thetastore_fedavg_mnist_lr_example/one_line/server/torch_server.py new file mode 100644 index 0000000000..f61e360d5a --- /dev/null +++ b/python/examples/cross_silo/mqtt_thetastore_fedavg_mnist_lr_example/one_line/server/torch_server.py @@ -0,0 +1,5 @@ +import fedml + + +if __name__ == "__main__": + fedml.run_cross_silo_server() diff --git a/python/examples/cross_silo/mqtt_thetastore_fedavg_mnist_lr_example/step_by_step/README.md b/python/examples/cross_silo/mqtt_thetastore_fedavg_mnist_lr_example/step_by_step/README.md new file mode 100644 index 0000000000..3c6e789f93 --- /dev/null +++ b/python/examples/cross_silo/mqtt_thetastore_fedavg_mnist_lr_example/step_by_step/README.md @@ -0,0 +1,40 @@ +## Training Script + +At the client side, the client ID (a.k.a rank) starts from 1. +Please also modify config/fedml_config.yaml, changing the `worker_num` the as the number of clients you plan to run. + +If you want to use your customized MQTT or theta edge store server as training backends, you should uncomment and set the following lines. +#customized_training_mqtt_config: {'BROKER_HOST': 'your mqtt server address or domain name', 'MQTT_PWD': 'your mqtt password', 'BROKER_PORT': 1883, 'MQTT_KEEPALIVE': 180, 'MQTT_USER': 'your mqtt user'} +#customized_training_thetastore_config: {'store_home_dir': '~/edge-store-playground', 'upload_uri': 'http://localhost:19888/rpc', 'download_uri': 'http://localhost:19888/rpc'} + +Use Theta EdgeStore as federated learning distributed storage for reading and writing models. +You should setup theta edgestore based on https://docs.thetatoken.org/docs/theta-edge-store-setup and set config parameters: +store_home_dir, upload_uri, download_uri. +If you want to use secret key to encrypt models, you should set secret key by calling Context().add("ipfs_secret_key", "your secret key") + +At the server side, run the following script: +``` +bash run_server.sh your_run_id +``` + +For client 1, run the following script: +``` +bash run_client.sh 1 your_run_id +``` +For client 2, run the following script: +``` +bash run_client.sh 2 your_run_id +``` +Note: please run the server first. + +## A Better User-experience with FedML MLOps (open.fedml.ai) +To reduce the difficulty and complexity of these CLI commands. We recommend you to use our MLOps (open.fedml.ai). +FedML MLOps provides: +- Install Client Agent and Login +- Inviting Collaborators and group management +- Project Management +- Experiment Tracking (visualizing training results) +- monitoring device status +- visualizing system performance (including profiling flow chart) +- distributed logging +- model serving \ No newline at end of file diff --git a/python/examples/cross_silo/mqtt_thetastore_fedavg_mnist_lr_example/step_by_step/__init__.py b/python/examples/cross_silo/mqtt_thetastore_fedavg_mnist_lr_example/step_by_step/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/python/examples/cross_silo/mqtt_thetastore_fedavg_mnist_lr_example/step_by_step/config/bootstrap.bat b/python/examples/cross_silo/mqtt_thetastore_fedavg_mnist_lr_example/step_by_step/config/bootstrap.bat new file mode 100755 index 0000000000..fb0dd54d6d --- /dev/null +++ b/python/examples/cross_silo/mqtt_thetastore_fedavg_mnist_lr_example/step_by_step/config/bootstrap.bat @@ -0,0 +1,12 @@ +:: ### don't modify this part ### +:: ############################## + + +:: ### please customize your script in this region #### +set DATA_PATH=%userprofile%\fedml_data +if exist %DATA_PATH% (echo Exist %DATA_PATH%) else mkdir %DATA_PATH% + + +:: ### don't modify this part ### +echo [FedML]Bootstrap Finished +:: ############################## \ No newline at end of file diff --git a/python/examples/cross_silo/mqtt_thetastore_fedavg_mnist_lr_example/step_by_step/config/bootstrap.sh b/python/examples/cross_silo/mqtt_thetastore_fedavg_mnist_lr_example/step_by_step/config/bootstrap.sh new file mode 100644 index 0000000000..3d969974b0 --- /dev/null +++ b/python/examples/cross_silo/mqtt_thetastore_fedavg_mnist_lr_example/step_by_step/config/bootstrap.sh @@ -0,0 +1,7 @@ + +# pip install fedml==0.7.15 +#pip install --upgrade fedml + +### don't modify this part ### +echo "[FedML]Bootstrap Finished" +############################## diff --git a/python/examples/cross_silo/mqtt_thetastore_fedavg_mnist_lr_example/step_by_step/config/fedml_config.yaml b/python/examples/cross_silo/mqtt_thetastore_fedavg_mnist_lr_example/step_by_step/config/fedml_config.yaml new file mode 100644 index 0000000000..d5f6405379 --- /dev/null +++ b/python/examples/cross_silo/mqtt_thetastore_fedavg_mnist_lr_example/step_by_step/config/fedml_config.yaml @@ -0,0 +1,54 @@ +common_args: + training_type: "cross_silo" + scenario: "horizontal" + using_mlops: false + random_seed: 0 + +environment_args: + bootstrap: config/bootstrap.sh + +data_args: + dataset: "mnist" + data_cache_dir: ~/fedml_data + partition_method: "hetero" + partition_alpha: 0.5 + +model_args: + model: "lr" + model_file_cache_folder: "./model_file_cache" # will be filled by the server automatically + global_model_file_path: "./model_file_cache/global_model.pt" + +train_args: + federated_optimizer: "FedAvg" + client_id_list: + client_num_in_total: 1000 + client_num_per_round: 2 + comm_round: 50 + epochs: 1 + batch_size: 10 + client_optimizer: sgd + learning_rate: 0.03 + weight_decay: 0.001 + +validation_args: + frequency_of_the_test: 5 + +device_args: + worker_num: 2 + using_gpu: false + gpu_mapping_file: config/gpu_mapping.yaml + gpu_mapping_key: mapping_default + +comm_args: + backend: "MQTT_THETASTORE" + mqtt_config_path: config/mqtt_config.yaml + # If you want to use your customized MQTT or theta edgestore server as training backends, you should uncomment and set the following lines. + #customized_training_mqtt_config: {'BROKER_HOST': 'your mqtt server address or domain name', 'MQTT_PWD': 'your mqtt password', 'BROKER_PORT': 1883, 'MQTT_KEEPALIVE': 180, 'MQTT_USER': 'your mqtt user'} + customized_training_thetastore_config: {'store_home_dir': '~/edge-store-playground', 'upload_uri': 'http://localhost:19888/rpc', 'download_uri': 'http://localhost:19888/rpc'} + +tracking_args: + # the default log path is at ~/fedml-client/fedml/logs/ and ~/fedml-server/fedml/logs/ + enable_wandb: false + wandb_key: ee0b5f53d949c84cee7decbe7a629e63fb2f8408 + wandb_project: fedml + wandb_name: fedml_torch_fedavg_mnist_lr \ No newline at end of file diff --git a/python/examples/cross_silo/mqtt_thetastore_fedavg_mnist_lr_example/step_by_step/run_client.sh b/python/examples/cross_silo/mqtt_thetastore_fedavg_mnist_lr_example/step_by_step/run_client.sh new file mode 100644 index 0000000000..cd9945fcb1 --- /dev/null +++ b/python/examples/cross_silo/mqtt_thetastore_fedavg_mnist_lr_example/step_by_step/run_client.sh @@ -0,0 +1,4 @@ +#!/usr/bin/env bash +RANK=$1 +RUN_ID=$2 +python3 torch_client.py --cf config/fedml_config.yaml --rank $RANK --role client --run_id $RUN_ID \ No newline at end of file diff --git a/python/examples/cross_silo/mqtt_thetastore_fedavg_mnist_lr_example/step_by_step/run_server.sh b/python/examples/cross_silo/mqtt_thetastore_fedavg_mnist_lr_example/step_by_step/run_server.sh new file mode 100644 index 0000000000..e5d6ce46de --- /dev/null +++ b/python/examples/cross_silo/mqtt_thetastore_fedavg_mnist_lr_example/step_by_step/run_server.sh @@ -0,0 +1,3 @@ +#!/usr/bin/env bash +RUN_ID=$1 +python3 torch_server.py --cf config/fedml_config.yaml --rank 0 --role server --run_id $RUN_ID \ No newline at end of file diff --git a/python/examples/cross_silo/mqtt_thetastore_fedavg_mnist_lr_example/step_by_step/torch_client.py b/python/examples/cross_silo/mqtt_thetastore_fedavg_mnist_lr_example/step_by_step/torch_client.py new file mode 100644 index 0000000000..9085c85ebe --- /dev/null +++ b/python/examples/cross_silo/mqtt_thetastore_fedavg_mnist_lr_example/step_by_step/torch_client.py @@ -0,0 +1,18 @@ +import fedml +from fedml import FedMLRunner + +if __name__ == "__main__": + args = fedml.init() + + # init device + device = fedml.device.get_device(args) + + # load data + dataset, output_dim = fedml.data.load(args) + + # load model + model = fedml.model.create(args, output_dim) + + # start training + fedml_runner = FedMLRunner(args, device, dataset, model) + fedml_runner.run() diff --git a/python/examples/cross_silo/mqtt_thetastore_fedavg_mnist_lr_example/step_by_step/torch_server.py b/python/examples/cross_silo/mqtt_thetastore_fedavg_mnist_lr_example/step_by_step/torch_server.py new file mode 100644 index 0000000000..9085c85ebe --- /dev/null +++ b/python/examples/cross_silo/mqtt_thetastore_fedavg_mnist_lr_example/step_by_step/torch_server.py @@ -0,0 +1,18 @@ +import fedml +from fedml import FedMLRunner + +if __name__ == "__main__": + args = fedml.init() + + # init device + device = fedml.device.get_device(args) + + # load data + dataset, output_dim = fedml.data.load(args) + + # load model + model = fedml.model.create(args, output_dim) + + # start training + fedml_runner = FedMLRunner(args, device, dataset, model) + fedml_runner.run() diff --git a/python/examples/cross_silo/mqtt_web3storage_fedavg_mnist_lr_example/__init__.py b/python/examples/cross_silo/mqtt_web3storage_fedavg_mnist_lr_example/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/python/examples/cross_silo/mqtt_web3storage_fedavg_mnist_lr_example/custom_data_and_model/README.md b/python/examples/cross_silo/mqtt_web3storage_fedavg_mnist_lr_example/custom_data_and_model/README.md new file mode 100644 index 0000000000..c4492da506 --- /dev/null +++ b/python/examples/cross_silo/mqtt_web3storage_fedavg_mnist_lr_example/custom_data_and_model/README.md @@ -0,0 +1,40 @@ +## Training Script + +At the client side, the client ID (a.k.a rank) starts from 1. +Please also modify config/fedml_config.yaml, changing the `worker_num` the as the number of clients you plan to run. + +If you want to use your customized MQTT or web3 storage server as training backends, you should uncomment and set the following lines. +#customized_training_mqtt_config: {'BROKER_HOST': 'your mqtt server address or domain name', 'MQTT_PWD': 'your mqtt password', 'BROKER_PORT': 1883, 'MQTT_KEEPALIVE': 180, 'MQTT_USER': 'your mqtt user'} +#customized_training_web3_config: {'token': 'your ipfs token at web3.storage', 'upload_uri': 'https://api.web3.storage/upload', 'download_uri': 'ipfs.w3s.link'} + +Use Web3 storage as federated learning distributed storage for reading and writing models. +You should register account at https://web3.storage and set config parameters: +token, upload_uri, download_uri. +If you want to use secret key to encrypt models, you should set secret key by calling Context().add("ipfs_secret_key", "your secret key") + +At the server side, run the following script: +``` +bash run_server.sh your_run_id +``` + +For client 1, run the following script: +``` +bash run_client.sh 1 your_run_id +``` +For client 2, run the following script: +``` +bash run_client.sh 2 your_run_id +``` +Note: please run the server first. + +## A Better User-experience with FedML MLOps (open.fedml.ai) +To reduce the difficulty and complexity of these CLI commands. We recommend you to use our MLOps (open.fedml.ai). +FedML MLOps provides: +- Install Client Agent and Login +- Inviting Collaborators and group management +- Project Management +- Experiment Tracking (visualizing training results) +- monitoring device status +- visualizing system performance (including profiling flow chart) +- distributed logging +- model serving \ No newline at end of file diff --git a/python/examples/cross_silo/mqtt_web3storage_fedavg_mnist_lr_example/custom_data_and_model/__init__.py b/python/examples/cross_silo/mqtt_web3storage_fedavg_mnist_lr_example/custom_data_and_model/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/python/examples/cross_silo/mqtt_web3storage_fedavg_mnist_lr_example/custom_data_and_model/build_mlops_pkg.sh b/python/examples/cross_silo/mqtt_web3storage_fedavg_mnist_lr_example/custom_data_and_model/build_mlops_pkg.sh new file mode 100644 index 0000000000..6464fea83e --- /dev/null +++ b/python/examples/cross_silo/mqtt_web3storage_fedavg_mnist_lr_example/custom_data_and_model/build_mlops_pkg.sh @@ -0,0 +1,12 @@ +SOURCE=. +ENTRY=torch_client.py +CONFIG=config +DEST=./mlops +fedml build -t client -sf $SOURCE -ep $ENTRY -cf $CONFIG -df $DEST + + +SOURCE=. +ENTRY=torch_server.py +CONFIG=config +DEST=./mlops +fedml build -t server -sf $SOURCE -ep $ENTRY -cf $CONFIG -df $DEST diff --git a/python/examples/cross_silo/mqtt_web3storage_fedavg_mnist_lr_example/custom_data_and_model/config/bootstrap.bat b/python/examples/cross_silo/mqtt_web3storage_fedavg_mnist_lr_example/custom_data_and_model/config/bootstrap.bat new file mode 100755 index 0000000000..fb0dd54d6d --- /dev/null +++ b/python/examples/cross_silo/mqtt_web3storage_fedavg_mnist_lr_example/custom_data_and_model/config/bootstrap.bat @@ -0,0 +1,12 @@ +:: ### don't modify this part ### +:: ############################## + + +:: ### please customize your script in this region #### +set DATA_PATH=%userprofile%\fedml_data +if exist %DATA_PATH% (echo Exist %DATA_PATH%) else mkdir %DATA_PATH% + + +:: ### don't modify this part ### +echo [FedML]Bootstrap Finished +:: ############################## \ No newline at end of file diff --git a/python/examples/cross_silo/mqtt_web3storage_fedavg_mnist_lr_example/custom_data_and_model/config/bootstrap.sh b/python/examples/cross_silo/mqtt_web3storage_fedavg_mnist_lr_example/custom_data_and_model/config/bootstrap.sh new file mode 100755 index 0000000000..5584eaa1af --- /dev/null +++ b/python/examples/cross_silo/mqtt_web3storage_fedavg_mnist_lr_example/custom_data_and_model/config/bootstrap.sh @@ -0,0 +1,6 @@ +# pip install fedml==0.7.15 +#pip install --upgrade fedml + +### don't modify this part ### +echo "[FedML]Bootstrap Finished" +############################## diff --git a/python/examples/cross_silo/mqtt_web3storage_fedavg_mnist_lr_example/custom_data_and_model/config/fedml_config.yaml b/python/examples/cross_silo/mqtt_web3storage_fedavg_mnist_lr_example/custom_data_and_model/config/fedml_config.yaml new file mode 100644 index 0000000000..5aa5b3541e --- /dev/null +++ b/python/examples/cross_silo/mqtt_web3storage_fedavg_mnist_lr_example/custom_data_and_model/config/fedml_config.yaml @@ -0,0 +1,59 @@ +common_args: + training_type: "cross_silo" + scenario: "horizontal" + using_mlops: false + random_seed: 0 + +environment_args: + bootstrap: config/bootstrap.sh + +data_args: + dataset: "mnist" + data_cache_dir: ~/fedml_data + partition_method: "hetero" + partition_alpha: 0.5 + +model_args: + model: "lr" + model_file_cache_folder: "./model_file_cache" # will be filled by the server automatically + global_model_file_path: "./model_file_cache/global_model.pt" + +train_args: + federated_optimizer: "FedAvg" + client_id_list: + client_num_in_total: 1 + client_num_per_round: 1 + comm_round: 10 + epochs: 1 + batch_size: 10 + client_optimizer: sgd + learning_rate: 0.03 + weight_decay: 0.001 + +validation_args: + frequency_of_the_test: 1 + +device_args: + worker_num: 1 + using_gpu: false + gpu_mapping_file: config/gpu_mapping.yaml + gpu_mapping_key: mapping_default + +comm_args: + backend: "MQTT_WEB3" + mqtt_config_path: config/mqtt_config.yaml + # If you want to use your customized MQTT or web3storage server as training backends, you should uncomment and set the following lines. + #customized_training_mqtt_config: {'BROKER_HOST': 'your mqtt server address or domain name', 'MQTT_PWD': 'your mqtt password', 'BROKER_PORT': 1883, 'MQTT_KEEPALIVE': 180, 'MQTT_USER': 'your mqtt user'} + #customized_training_web3_config: {'token': 'your ipfs token at web3.storage', 'upload_uri': 'https://api.web3.storage/upload', 'download_uri': 'ipfs.w3s.link'} + customized_training_web3_config: {'token': 'eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiJkaWQ6ZXRocjoweDEwMjdFNzE2MDdmNzkzQTNmRjVDODIzZTAwQzcyQ2RERDcxODYwRUQiLCJpc3MiOiJ3ZWIzLXN0b3JhZ2UiLCJpYXQiOjE2NjUxOTk2ODQwNDIsIm5hbWUiOiJmZWRtbC10ZXN0In0.UejyT2d3N9wCD1cNqOei77rgn8Q7or3jTj7ucBAsBtQ', 'upload_uri': 'https://api.web3.storage/upload', 'download_uri': 'ipfs.w3s.link'} + +tracking_args: + # the default log path is at ~/fedml-client/fedml/logs/ and ~/fedml-server/fedml/logs/ + enable_wandb: false + wandb_key: ee0b5f53d949c84cee7decbe7a629e63fb2f8408 + wandb_project: fedml + wandb_name: fedml_torch_fedavg_mnist_lr + +#lsa_args: +# prime_number: 2 ** 15 - 19 +# precision_parameter: 10 \ No newline at end of file diff --git a/python/examples/cross_silo/mqtt_web3storage_fedavg_mnist_lr_example/custom_data_and_model/run_client.sh b/python/examples/cross_silo/mqtt_web3storage_fedavg_mnist_lr_example/custom_data_and_model/run_client.sh new file mode 100644 index 0000000000..18d3cea9fe --- /dev/null +++ b/python/examples/cross_silo/mqtt_web3storage_fedavg_mnist_lr_example/custom_data_and_model/run_client.sh @@ -0,0 +1,3 @@ +#!/usr/bin/env bash +RANK=$1 +python3 torch_client.py --cf config/fedml_config.yaml --rank $RANK --role client \ No newline at end of file diff --git a/python/examples/cross_silo/mqtt_web3storage_fedavg_mnist_lr_example/custom_data_and_model/run_server.sh b/python/examples/cross_silo/mqtt_web3storage_fedavg_mnist_lr_example/custom_data_and_model/run_server.sh new file mode 100644 index 0000000000..08007b7e81 --- /dev/null +++ b/python/examples/cross_silo/mqtt_web3storage_fedavg_mnist_lr_example/custom_data_and_model/run_server.sh @@ -0,0 +1,3 @@ +#!/usr/bin/env bash + +python3 torch_server.py --cf config/fedml_config.yaml --rank 0 --role server \ No newline at end of file diff --git a/python/examples/cross_silo/mqtt_web3storage_fedavg_mnist_lr_example/custom_data_and_model/torch_client.py b/python/examples/cross_silo/mqtt_web3storage_fedavg_mnist_lr_example/custom_data_and_model/torch_client.py new file mode 100644 index 0000000000..00c8253b86 --- /dev/null +++ b/python/examples/cross_silo/mqtt_web3storage_fedavg_mnist_lr_example/custom_data_and_model/torch_client.py @@ -0,0 +1,75 @@ +import torch + +import fedml +from fedml import FedMLRunner +import fedml +from fedml.data.MNIST.data_loader import download_mnist, load_partition_data_mnist + + +def load_data(args): + download_mnist(args.data_cache_dir) + fedml.logging.info("load_data. dataset_name = %s" % args.dataset) + + """ + Please read through the data loader at to see how to customize the dataset for FedML framework. + """ + ( + client_num, + train_data_num, + test_data_num, + train_data_global, + test_data_global, + train_data_local_num_dict, + train_data_local_dict, + test_data_local_dict, + class_num, + ) = load_partition_data_mnist( + args, + args.batch_size, + train_path=args.data_cache_dir + "/MNIST/train", + test_path=args.data_cache_dir + "/MNIST/test", + ) + """ + For shallow NN or linear models, + we uniformly sample a fraction of clients each round (as the original FedAvg paper) + """ + args.client_num_in_total = client_num + dataset = [ + train_data_num, + test_data_num, + train_data_global, + test_data_global, + train_data_local_num_dict, + train_data_local_dict, + test_data_local_dict, + class_num, + ] + return dataset, class_num + + +class LogisticRegression(torch.nn.Module): + def __init__(self, input_dim, output_dim): + super(LogisticRegression, self).__init__() + self.linear = torch.nn.Linear(input_dim, output_dim) + + def forward(self, x): + outputs = torch.sigmoid(self.linear(x)) + return outputs + + +if __name__ == "__main__": + # init FedML framework + args = fedml.init() + + # init device + device = fedml.device.get_device(args) + + # load data + dataset, output_dim = load_data(args) + + # load model (the size of MNIST image is 28 x 28) + model = LogisticRegression(28 * 28, output_dim) + + # start training + fedml_runner = FedMLRunner(args, device, dataset, model) + fedml_runner.run() diff --git a/python/examples/cross_silo/mqtt_web3storage_fedavg_mnist_lr_example/custom_data_and_model/torch_server.py b/python/examples/cross_silo/mqtt_web3storage_fedavg_mnist_lr_example/custom_data_and_model/torch_server.py new file mode 100644 index 0000000000..14eb9b96b7 --- /dev/null +++ b/python/examples/cross_silo/mqtt_web3storage_fedavg_mnist_lr_example/custom_data_and_model/torch_server.py @@ -0,0 +1,74 @@ +import torch + +import fedml +from fedml import FedMLRunner +from fedml.data.MNIST.data_loader import download_mnist, load_partition_data_mnist + + +def load_data(args): + download_mnist(args.data_cache_dir) + fedml.logging.info("load_data. dataset_name = %s" % args.dataset) + + """ + Please read through the data loader at to see how to customize the dataset for FedML framework. + """ + ( + client_num, + train_data_num, + test_data_num, + train_data_global, + test_data_global, + train_data_local_num_dict, + train_data_local_dict, + test_data_local_dict, + class_num, + ) = load_partition_data_mnist( + args, + args.batch_size, + train_path=args.data_cache_dir + "/MNIST/train", + test_path=args.data_cache_dir + "/MNIST/test", + ) + """ + For shallow NN or linear models, + we uniformly sample a fraction of clients each round (as the original FedAvg paper) + """ + args.client_num_in_total = client_num + dataset = [ + train_data_num, + test_data_num, + train_data_global, + test_data_global, + train_data_local_num_dict, + train_data_local_dict, + test_data_local_dict, + class_num, + ] + return dataset, class_num + + +class LogisticRegression(torch.nn.Module): + def __init__(self, input_dim, output_dim): + super(LogisticRegression, self).__init__() + self.linear = torch.nn.Linear(input_dim, output_dim) + + def forward(self, x): + outputs = torch.sigmoid(self.linear(x)) + return outputs + + +if __name__ == "__main__": + # init FedML framework + args = fedml.init() + + # init device + device = fedml.device.get_device(args) + + # load data + dataset, output_dim = load_data(args) + + # load model (the size of MNIST image is 28 x 28) + model = LogisticRegression(28 * 28, output_dim) + + # start training + fedml_runner = FedMLRunner(args, device, dataset, model) + fedml_runner.run() diff --git a/python/examples/cross_silo/mqtt_web3storage_fedavg_mnist_lr_example/one_line/README.md b/python/examples/cross_silo/mqtt_web3storage_fedavg_mnist_lr_example/one_line/README.md new file mode 100644 index 0000000000..528d21bf54 --- /dev/null +++ b/python/examples/cross_silo/mqtt_web3storage_fedavg_mnist_lr_example/one_line/README.md @@ -0,0 +1,42 @@ +## Training Script + +At the client side, the client ID (a.k.a rank) starts from 1. +Please also modify config/fedml_config.yaml, changing the `worker_num` the as the number of clients you plan to run. + +If you want to use your customized MQTT or web3 storage server as training backends, you should uncomment and set the following lines. +#customized_training_mqtt_config: {'BROKER_HOST': 'your mqtt server address or domain name', 'MQTT_PWD': 'your mqtt password', 'BROKER_PORT': 1883, 'MQTT_KEEPALIVE': 180, 'MQTT_USER': 'your mqtt user'} +#customized_training_web3_config: {'token': 'your ipfs token at web3.storage', 'upload_uri': 'https://api.web3.storage/upload', 'download_uri': 'ipfs.w3s.link'} + +Use Web3 storage as federated learning distributed storage for reading and writing models. +You should register account at https://web3.storage and set config parameters: +token, upload_uri, download_uri. +If you want to use secret key to encrypt models, you should set secret key by calling Context().add("ipfs_secret_key", "your secret key") + +At the server side, run the following script: +``` +bash run_server.sh your_run_id +``` + +For client 1, run the following script: +``` +bash run_client.sh 1 your_run_id +``` +For client 2, run the following script: +``` +bash run_client.sh 2 your_run_id +``` +Note: +1. please change the run_id in run_client/run_server scripts to your own. +2. For Windows users, please use *.bat scripts. + +## A Better User-experience with FedML MLOps (open.fedml.ai) +To reduce the difficulty and complexity of these CLI commands. We recommend you to use our MLOps (open.fedml.ai). +FedML MLOps provides: +- Install Client Agent and Login +- Inviting Collaborators and group management +- Project Management +- Experiment Tracking (visualizing training results) +- monitoring device status +- visualizing system performance (including profiling flow chart) +- distributed logging +- model serving \ No newline at end of file diff --git a/python/examples/cross_silo/mqtt_web3storage_fedavg_mnist_lr_example/one_line/build_mlops_pkg.sh b/python/examples/cross_silo/mqtt_web3storage_fedavg_mnist_lr_example/one_line/build_mlops_pkg.sh new file mode 100644 index 0000000000..f2a1ce9105 --- /dev/null +++ b/python/examples/cross_silo/mqtt_web3storage_fedavg_mnist_lr_example/one_line/build_mlops_pkg.sh @@ -0,0 +1,12 @@ +SOURCE=client +ENTRY=torch_client.py +CONFIG=config +DEST=./mlops +fedml build -t client -sf $SOURCE -ep $ENTRY -cf $CONFIG -df $DEST + + +SOURCE=server +ENTRY=torch_server.py +CONFIG=config +DEST=./mlops +fedml build -t server -sf $SOURCE -ep $ENTRY -cf $CONFIG -df $DEST \ No newline at end of file diff --git a/python/examples/cross_silo/mqtt_web3storage_fedavg_mnist_lr_example/one_line/client/__init__.py b/python/examples/cross_silo/mqtt_web3storage_fedavg_mnist_lr_example/one_line/client/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/python/examples/cross_silo/mqtt_web3storage_fedavg_mnist_lr_example/one_line/client/torch_client.py b/python/examples/cross_silo/mqtt_web3storage_fedavg_mnist_lr_example/one_line/client/torch_client.py new file mode 100644 index 0000000000..2385f640c6 --- /dev/null +++ b/python/examples/cross_silo/mqtt_web3storage_fedavg_mnist_lr_example/one_line/client/torch_client.py @@ -0,0 +1,5 @@ +import fedml + + +if __name__ == "__main__": + fedml.run_cross_silo_client() diff --git a/python/examples/cross_silo/mqtt_web3storage_fedavg_mnist_lr_example/one_line/config/bootstrap.bat b/python/examples/cross_silo/mqtt_web3storage_fedavg_mnist_lr_example/one_line/config/bootstrap.bat new file mode 100755 index 0000000000..fb0dd54d6d --- /dev/null +++ b/python/examples/cross_silo/mqtt_web3storage_fedavg_mnist_lr_example/one_line/config/bootstrap.bat @@ -0,0 +1,12 @@ +:: ### don't modify this part ### +:: ############################## + + +:: ### please customize your script in this region #### +set DATA_PATH=%userprofile%\fedml_data +if exist %DATA_PATH% (echo Exist %DATA_PATH%) else mkdir %DATA_PATH% + + +:: ### don't modify this part ### +echo [FedML]Bootstrap Finished +:: ############################## \ No newline at end of file diff --git a/python/examples/cross_silo/mqtt_web3storage_fedavg_mnist_lr_example/one_line/config/bootstrap.sh b/python/examples/cross_silo/mqtt_web3storage_fedavg_mnist_lr_example/one_line/config/bootstrap.sh new file mode 100644 index 0000000000..5584eaa1af --- /dev/null +++ b/python/examples/cross_silo/mqtt_web3storage_fedavg_mnist_lr_example/one_line/config/bootstrap.sh @@ -0,0 +1,6 @@ +# pip install fedml==0.7.15 +#pip install --upgrade fedml + +### don't modify this part ### +echo "[FedML]Bootstrap Finished" +############################## diff --git a/python/examples/cross_silo/mqtt_web3storage_fedavg_mnist_lr_example/one_line/config/fedml_config.yaml b/python/examples/cross_silo/mqtt_web3storage_fedavg_mnist_lr_example/one_line/config/fedml_config.yaml new file mode 100644 index 0000000000..f95187299f --- /dev/null +++ b/python/examples/cross_silo/mqtt_web3storage_fedavg_mnist_lr_example/one_line/config/fedml_config.yaml @@ -0,0 +1,56 @@ +common_args: + training_type: "cross_silo" + scenario: "horizontal" + using_mlops: false + random_seed: 0 + config_version: release + +environment_args: + bootstrap: config/bootstrap.sh + +data_args: + dataset: "mnist" + data_cache_dir: ~/fedml_data + partition_method: "hetero" + partition_alpha: 0.5 + +model_args: + model: "lr" + model_file_cache_folder: "./model_file_cache" # will be filled by the server automatically + global_model_file_path: "./model_file_cache/global_model.pt" + +train_args: + federated_optimizer: "FedAvg" + # for CLI running, this can be None; in MLOps deployment, `client_id_list` will be replaced with real-time selected devices + client_id_list: + client_num_in_total: 1 + client_num_per_round: 1 + comm_round: 10 + epochs: 1 + batch_size: 10 + client_optimizer: sgd + learning_rate: 0.03 + weight_decay: 0.001 + +validation_args: + frequency_of_the_test: 1 + +device_args: + using_gpu: false + gpu_mapping_file: config/gpu_mapping.yaml + gpu_mapping_key: mapping_default + +comm_args: + backend: "MQTT_WEB3" + mqtt_config_path: config/mqtt_config.yaml + # If you want to use your customized MQTT or web3storage server as training backends, you should uncomment and set the following lines. + #customized_training_mqtt_config: {'BROKER_HOST': 'your mqtt server address or domain name', 'MQTT_PWD': 'your mqtt password', 'BROKER_PORT': 1883, 'MQTT_KEEPALIVE': 180, 'MQTT_USER': 'your mqtt user'} + #customized_training_web3_config: {'token': 'your ipfs token at web3.storage', 'upload_uri': 'https://api.web3.storage/upload', 'download_uri': 'ipfs.w3s.link'} + customized_training_web3_config: { 'token': 'eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiJkaWQ6ZXRocjoweDEwMjdFNzE2MDdmNzkzQTNmRjVDODIzZTAwQzcyQ2RERDcxODYwRUQiLCJpc3MiOiJ3ZWIzLXN0b3JhZ2UiLCJpYXQiOjE2NjUxOTk2ODQwNDIsIm5hbWUiOiJmZWRtbC10ZXN0In0.UejyT2d3N9wCD1cNqOei77rgn8Q7or3jTj7ucBAsBtQ', 'upload_uri': 'https://api.web3.storage/upload', 'download_uri': 'ipfs.w3s.link' } + +tracking_args: + # the default log path is at ~/fedml-client/fedml/logs/ and ~/fedml-server/fedml/logs/ + enable_wandb: false + wandb_key: ee0b5f53d949c84cee7decbe7a629e63fb2f8408 + wandb_project: fedml + wandb_name: fedml_torch_fedavg_mnist_lr \ No newline at end of file diff --git a/python/examples/cross_silo/mqtt_web3storage_fedavg_mnist_lr_example/one_line/config/gpu_mapping.yaml b/python/examples/cross_silo/mqtt_web3storage_fedavg_mnist_lr_example/one_line/config/gpu_mapping.yaml new file mode 100644 index 0000000000..fe6fa86137 --- /dev/null +++ b/python/examples/cross_silo/mqtt_web3storage_fedavg_mnist_lr_example/one_line/config/gpu_mapping.yaml @@ -0,0 +1,2 @@ +mapping_default: + node_1 : [1,1,1] \ No newline at end of file diff --git a/python/examples/cross_silo/mqtt_web3storage_fedavg_mnist_lr_example/one_line/run_client.bat b/python/examples/cross_silo/mqtt_web3storage_fedavg_mnist_lr_example/one_line/run_client.bat new file mode 100644 index 0000000000..48b355571b --- /dev/null +++ b/python/examples/cross_silo/mqtt_web3storage_fedavg_mnist_lr_example/one_line/run_client.bat @@ -0,0 +1,2 @@ + +python client/torch_client.py --cf config/fedml_config.yaml --rank %1 diff --git a/python/examples/cross_silo/mqtt_web3storage_fedavg_mnist_lr_example/one_line/run_client.sh b/python/examples/cross_silo/mqtt_web3storage_fedavg_mnist_lr_example/one_line/run_client.sh new file mode 100644 index 0000000000..ce34f604d5 --- /dev/null +++ b/python/examples/cross_silo/mqtt_web3storage_fedavg_mnist_lr_example/one_line/run_client.sh @@ -0,0 +1,3 @@ +#!/usr/bin/env bash +RANK=$1 +python client/torch_client.py --cf config/fedml_config.yaml --rank $RANK --run_id 123 diff --git a/python/examples/cross_silo/mqtt_web3storage_fedavg_mnist_lr_example/one_line/run_server.bat b/python/examples/cross_silo/mqtt_web3storage_fedavg_mnist_lr_example/one_line/run_server.bat new file mode 100644 index 0000000000..bc6d81b890 --- /dev/null +++ b/python/examples/cross_silo/mqtt_web3storage_fedavg_mnist_lr_example/one_line/run_server.bat @@ -0,0 +1 @@ +python server/torch_server.py --cf config/fedml_config.yaml --rank 0 diff --git a/python/examples/cross_silo/mqtt_web3storage_fedavg_mnist_lr_example/one_line/run_server.sh b/python/examples/cross_silo/mqtt_web3storage_fedavg_mnist_lr_example/one_line/run_server.sh new file mode 100644 index 0000000000..72ea5b47a6 --- /dev/null +++ b/python/examples/cross_silo/mqtt_web3storage_fedavg_mnist_lr_example/one_line/run_server.sh @@ -0,0 +1,2 @@ +#!/usr/bin/env bash +python server/torch_server.py --cf config/fedml_config.yaml --rank 0 --run_id 123 diff --git a/python/examples/cross_silo/mqtt_web3storage_fedavg_mnist_lr_example/one_line/server/__init__.py b/python/examples/cross_silo/mqtt_web3storage_fedavg_mnist_lr_example/one_line/server/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/python/examples/cross_silo/mqtt_web3storage_fedavg_mnist_lr_example/one_line/server/torch_server.py b/python/examples/cross_silo/mqtt_web3storage_fedavg_mnist_lr_example/one_line/server/torch_server.py new file mode 100644 index 0000000000..f61e360d5a --- /dev/null +++ b/python/examples/cross_silo/mqtt_web3storage_fedavg_mnist_lr_example/one_line/server/torch_server.py @@ -0,0 +1,5 @@ +import fedml + + +if __name__ == "__main__": + fedml.run_cross_silo_server() diff --git a/python/examples/cross_silo/mqtt_web3storage_fedavg_mnist_lr_example/step_by_step/README.md b/python/examples/cross_silo/mqtt_web3storage_fedavg_mnist_lr_example/step_by_step/README.md new file mode 100644 index 0000000000..c4492da506 --- /dev/null +++ b/python/examples/cross_silo/mqtt_web3storage_fedavg_mnist_lr_example/step_by_step/README.md @@ -0,0 +1,40 @@ +## Training Script + +At the client side, the client ID (a.k.a rank) starts from 1. +Please also modify config/fedml_config.yaml, changing the `worker_num` the as the number of clients you plan to run. + +If you want to use your customized MQTT or web3 storage server as training backends, you should uncomment and set the following lines. +#customized_training_mqtt_config: {'BROKER_HOST': 'your mqtt server address or domain name', 'MQTT_PWD': 'your mqtt password', 'BROKER_PORT': 1883, 'MQTT_KEEPALIVE': 180, 'MQTT_USER': 'your mqtt user'} +#customized_training_web3_config: {'token': 'your ipfs token at web3.storage', 'upload_uri': 'https://api.web3.storage/upload', 'download_uri': 'ipfs.w3s.link'} + +Use Web3 storage as federated learning distributed storage for reading and writing models. +You should register account at https://web3.storage and set config parameters: +token, upload_uri, download_uri. +If you want to use secret key to encrypt models, you should set secret key by calling Context().add("ipfs_secret_key", "your secret key") + +At the server side, run the following script: +``` +bash run_server.sh your_run_id +``` + +For client 1, run the following script: +``` +bash run_client.sh 1 your_run_id +``` +For client 2, run the following script: +``` +bash run_client.sh 2 your_run_id +``` +Note: please run the server first. + +## A Better User-experience with FedML MLOps (open.fedml.ai) +To reduce the difficulty and complexity of these CLI commands. We recommend you to use our MLOps (open.fedml.ai). +FedML MLOps provides: +- Install Client Agent and Login +- Inviting Collaborators and group management +- Project Management +- Experiment Tracking (visualizing training results) +- monitoring device status +- visualizing system performance (including profiling flow chart) +- distributed logging +- model serving \ No newline at end of file diff --git a/python/examples/cross_silo/mqtt_web3storage_fedavg_mnist_lr_example/step_by_step/__init__.py b/python/examples/cross_silo/mqtt_web3storage_fedavg_mnist_lr_example/step_by_step/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/python/examples/cross_silo/mqtt_web3storage_fedavg_mnist_lr_example/step_by_step/config/bootstrap.bat b/python/examples/cross_silo/mqtt_web3storage_fedavg_mnist_lr_example/step_by_step/config/bootstrap.bat new file mode 100755 index 0000000000..fb0dd54d6d --- /dev/null +++ b/python/examples/cross_silo/mqtt_web3storage_fedavg_mnist_lr_example/step_by_step/config/bootstrap.bat @@ -0,0 +1,12 @@ +:: ### don't modify this part ### +:: ############################## + + +:: ### please customize your script in this region #### +set DATA_PATH=%userprofile%\fedml_data +if exist %DATA_PATH% (echo Exist %DATA_PATH%) else mkdir %DATA_PATH% + + +:: ### don't modify this part ### +echo [FedML]Bootstrap Finished +:: ############################## \ No newline at end of file diff --git a/python/examples/cross_silo/mqtt_web3storage_fedavg_mnist_lr_example/step_by_step/config/bootstrap.sh b/python/examples/cross_silo/mqtt_web3storage_fedavg_mnist_lr_example/step_by_step/config/bootstrap.sh new file mode 100644 index 0000000000..5584eaa1af --- /dev/null +++ b/python/examples/cross_silo/mqtt_web3storage_fedavg_mnist_lr_example/step_by_step/config/bootstrap.sh @@ -0,0 +1,6 @@ +# pip install fedml==0.7.15 +#pip install --upgrade fedml + +### don't modify this part ### +echo "[FedML]Bootstrap Finished" +############################## diff --git a/python/examples/cross_silo/mqtt_web3storage_fedavg_mnist_lr_example/step_by_step/config/fedml_config.yaml b/python/examples/cross_silo/mqtt_web3storage_fedavg_mnist_lr_example/step_by_step/config/fedml_config.yaml new file mode 100644 index 0000000000..96248b6cff --- /dev/null +++ b/python/examples/cross_silo/mqtt_web3storage_fedavg_mnist_lr_example/step_by_step/config/fedml_config.yaml @@ -0,0 +1,55 @@ +common_args: + training_type: "cross_silo" + scenario: "horizontal" + using_mlops: false + random_seed: 0 + +environment_args: + bootstrap: config/bootstrap.sh + +data_args: + dataset: "mnist" + data_cache_dir: ~/fedml_data + partition_method: "hetero" + partition_alpha: 0.5 + +model_args: + model: "lr" + model_file_cache_folder: "./model_file_cache" # will be filled by the server automatically + global_model_file_path: "./model_file_cache/global_model.pt" + +train_args: + federated_optimizer: "FedAvg" + client_id_list: + client_num_in_total: 1000 + client_num_per_round: 2 + comm_round: 50 + epochs: 1 + batch_size: 10 + client_optimizer: sgd + learning_rate: 0.03 + weight_decay: 0.001 + +validation_args: + frequency_of_the_test: 5 + +device_args: + worker_num: 2 + using_gpu: false + gpu_mapping_file: config/gpu_mapping.yaml + gpu_mapping_key: mapping_default + +comm_args: + backend: "MQTT_WEB3" + mqtt_config_path: config/mqtt_config.yaml + # If you want to use your customized MQTT or web3storage server as training backends, you should uncomment and set the following lines. + #customized_training_mqtt_config: {'BROKER_HOST': 'your mqtt server address or domain name', 'MQTT_PWD': 'your mqtt password', 'BROKER_PORT': 1883, 'MQTT_KEEPALIVE': 180, 'MQTT_USER': 'your mqtt user'} + #customized_training_web3_config: {'token': 'your ipfs token at web3.storage', 'upload_uri': 'https://api.web3.storage/upload', 'download_uri': 'ipfs.w3s.link'} + customized_training_web3_config: { 'token': 'eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiJkaWQ6ZXRocjoweDEwMjdFNzE2MDdmNzkzQTNmRjVDODIzZTAwQzcyQ2RERDcxODYwRUQiLCJpc3MiOiJ3ZWIzLXN0b3JhZ2UiLCJpYXQiOjE2NjUxOTk2ODQwNDIsIm5hbWUiOiJmZWRtbC10ZXN0In0.UejyT2d3N9wCD1cNqOei77rgn8Q7or3jTj7ucBAsBtQ', 'upload_uri': 'https://api.web3.storage/upload', 'download_uri': 'ipfs.w3s.link' } + +tracking_args: + # the default log path is at ~/fedml-client/fedml/logs/ and ~/fedml-server/fedml/logs/ + enable_wandb: false + wandb_key: ee0b5f53d949c84cee7decbe7a629e63fb2f8408 + wandb_project: fedml + wandb_name: fedml_torch_fedavg_mnist_lr \ No newline at end of file diff --git a/python/examples/cross_silo/mqtt_web3storage_fedavg_mnist_lr_example/step_by_step/run_client.sh b/python/examples/cross_silo/mqtt_web3storage_fedavg_mnist_lr_example/step_by_step/run_client.sh new file mode 100644 index 0000000000..cd9945fcb1 --- /dev/null +++ b/python/examples/cross_silo/mqtt_web3storage_fedavg_mnist_lr_example/step_by_step/run_client.sh @@ -0,0 +1,4 @@ +#!/usr/bin/env bash +RANK=$1 +RUN_ID=$2 +python3 torch_client.py --cf config/fedml_config.yaml --rank $RANK --role client --run_id $RUN_ID \ No newline at end of file diff --git a/python/examples/cross_silo/mqtt_web3storage_fedavg_mnist_lr_example/step_by_step/run_server.sh b/python/examples/cross_silo/mqtt_web3storage_fedavg_mnist_lr_example/step_by_step/run_server.sh new file mode 100644 index 0000000000..e5d6ce46de --- /dev/null +++ b/python/examples/cross_silo/mqtt_web3storage_fedavg_mnist_lr_example/step_by_step/run_server.sh @@ -0,0 +1,3 @@ +#!/usr/bin/env bash +RUN_ID=$1 +python3 torch_server.py --cf config/fedml_config.yaml --rank 0 --role server --run_id $RUN_ID \ No newline at end of file diff --git a/python/examples/cross_silo/mqtt_web3storage_fedavg_mnist_lr_example/step_by_step/torch_client.py b/python/examples/cross_silo/mqtt_web3storage_fedavg_mnist_lr_example/step_by_step/torch_client.py new file mode 100644 index 0000000000..9085c85ebe --- /dev/null +++ b/python/examples/cross_silo/mqtt_web3storage_fedavg_mnist_lr_example/step_by_step/torch_client.py @@ -0,0 +1,18 @@ +import fedml +from fedml import FedMLRunner + +if __name__ == "__main__": + args = fedml.init() + + # init device + device = fedml.device.get_device(args) + + # load data + dataset, output_dim = fedml.data.load(args) + + # load model + model = fedml.model.create(args, output_dim) + + # start training + fedml_runner = FedMLRunner(args, device, dataset, model) + fedml_runner.run() diff --git a/python/examples/cross_silo/mqtt_web3storage_fedavg_mnist_lr_example/step_by_step/torch_server.py b/python/examples/cross_silo/mqtt_web3storage_fedavg_mnist_lr_example/step_by_step/torch_server.py new file mode 100644 index 0000000000..9085c85ebe --- /dev/null +++ b/python/examples/cross_silo/mqtt_web3storage_fedavg_mnist_lr_example/step_by_step/torch_server.py @@ -0,0 +1,18 @@ +import fedml +from fedml import FedMLRunner + +if __name__ == "__main__": + args = fedml.init() + + # init device + device = fedml.device.get_device(args) + + # load data + dataset, output_dim = fedml.data.load(args) + + # load model + model = fedml.model.create(args, output_dim) + + # start training + fedml_runner = FedMLRunner(args, device, dataset, model) + fedml_runner.run() diff --git a/python/examples/cross_silo/mxnet_mqtt_s3_fedavg_mnist_lr_example/config/bootstrap.bat b/python/examples/cross_silo/mxnet_mqtt_s3_fedavg_mnist_lr_example/config/bootstrap.bat new file mode 100755 index 0000000000..fb0dd54d6d --- /dev/null +++ b/python/examples/cross_silo/mxnet_mqtt_s3_fedavg_mnist_lr_example/config/bootstrap.bat @@ -0,0 +1,12 @@ +:: ### don't modify this part ### +:: ############################## + + +:: ### please customize your script in this region #### +set DATA_PATH=%userprofile%\fedml_data +if exist %DATA_PATH% (echo Exist %DATA_PATH%) else mkdir %DATA_PATH% + + +:: ### don't modify this part ### +echo [FedML]Bootstrap Finished +:: ############################## \ No newline at end of file diff --git a/python/examples/cross_silo/mxnet_mqtt_s3_fedavg_mnist_lr_example/config/bootstrap.sh b/python/examples/cross_silo/mxnet_mqtt_s3_fedavg_mnist_lr_example/config/bootstrap.sh index 1f0287a7b6..3d969974b0 100644 --- a/python/examples/cross_silo/mxnet_mqtt_s3_fedavg_mnist_lr_example/config/bootstrap.sh +++ b/python/examples/cross_silo/mxnet_mqtt_s3_fedavg_mnist_lr_example/config/bootstrap.sh @@ -1,4 +1,3 @@ -#!/bin/bash # pip install fedml==0.7.15 #pip install --upgrade fedml diff --git a/python/examples/cross_silo/tf_mqtt_s3_fedavg_mnist_lr_example/config/bootstrap.bat b/python/examples/cross_silo/tf_mqtt_s3_fedavg_mnist_lr_example/config/bootstrap.bat new file mode 100755 index 0000000000..fb0dd54d6d --- /dev/null +++ b/python/examples/cross_silo/tf_mqtt_s3_fedavg_mnist_lr_example/config/bootstrap.bat @@ -0,0 +1,12 @@ +:: ### don't modify this part ### +:: ############################## + + +:: ### please customize your script in this region #### +set DATA_PATH=%userprofile%\fedml_data +if exist %DATA_PATH% (echo Exist %DATA_PATH%) else mkdir %DATA_PATH% + + +:: ### don't modify this part ### +echo [FedML]Bootstrap Finished +:: ############################## \ No newline at end of file diff --git a/python/examples/cross_silo/tf_mqtt_s3_fedavg_mnist_lr_example/config/bootstrap.sh b/python/examples/cross_silo/tf_mqtt_s3_fedavg_mnist_lr_example/config/bootstrap.sh index 1f0287a7b6..3d969974b0 100644 --- a/python/examples/cross_silo/tf_mqtt_s3_fedavg_mnist_lr_example/config/bootstrap.sh +++ b/python/examples/cross_silo/tf_mqtt_s3_fedavg_mnist_lr_example/config/bootstrap.sh @@ -1,4 +1,3 @@ -#!/bin/bash # pip install fedml==0.7.15 #pip install --upgrade fedml diff --git a/python/examples/cross_silo/trpc_fedavg_mnist_lr_example/one_line/config/bootstrap.bat b/python/examples/cross_silo/trpc_fedavg_mnist_lr_example/one_line/config/bootstrap.bat new file mode 100755 index 0000000000..fb0dd54d6d --- /dev/null +++ b/python/examples/cross_silo/trpc_fedavg_mnist_lr_example/one_line/config/bootstrap.bat @@ -0,0 +1,12 @@ +:: ### don't modify this part ### +:: ############################## + + +:: ### please customize your script in this region #### +set DATA_PATH=%userprofile%\fedml_data +if exist %DATA_PATH% (echo Exist %DATA_PATH%) else mkdir %DATA_PATH% + + +:: ### don't modify this part ### +echo [FedML]Bootstrap Finished +:: ############################## \ No newline at end of file diff --git a/python/fedml/core/distributed/communication/mqtt_ipfs/__init__.py b/python/fedml/core/distributed/communication/mqtt_ipfs/__init__.py deleted file mode 100755 index 754f424527..0000000000 --- a/python/fedml/core/distributed/communication/mqtt_ipfs/__init__.py +++ /dev/null @@ -1,3 +0,0 @@ -__all__ = ["MqttIpfsCommManager"] - -from .mqtt_ipfs_comm_manager import MqttIpfsCommManager diff --git a/python/fedml/core/distributed/communication/mqtt_thetastore/__init__.py b/python/fedml/core/distributed/communication/mqtt_thetastore/__init__.py new file mode 100755 index 0000000000..536cad9be8 --- /dev/null +++ b/python/fedml/core/distributed/communication/mqtt_thetastore/__init__.py @@ -0,0 +1,3 @@ +__all__ = ["MqttThetastoreCommManager"] + +from .mqtt_thetastore_comm_manager import MqttThetastoreCommManager diff --git a/python/fedml/core/distributed/communication/mqtt_thetastore/mqtt_thetastore_comm_manager.py b/python/fedml/core/distributed/communication/mqtt_thetastore/mqtt_thetastore_comm_manager.py new file mode 100755 index 0000000000..46842a9abc --- /dev/null +++ b/python/fedml/core/distributed/communication/mqtt_thetastore/mqtt_thetastore_comm_manager.py @@ -0,0 +1,362 @@ +# -*-coding:utf-8-*- +import json +import logging +import traceback +import uuid +from typing import List +from fedml.core.mlops.mlops_profiler_event import MLOpsProfilerEvent +import paho.mqtt.client as mqtt +import yaml + +from ..constants import CommunicationConstants +from ..mqtt.mqtt_manager import MqttManager +from ...distributed_storage.theta_storage.theta_storage import ThetaStorage +from ..base_com_manager import BaseCommunicationManager +from ..message import Message +from ..observer import Observer +from .....core.alg_frame.context import Context +import time + + +class MqttThetastoreCommManager(BaseCommunicationManager): + def __init__( + self, + config_path, + thetastore_config_path, + topic="fedml", + client_rank=0, + client_num=0, + args=None + ): + self.broker_port = None + self.broker_host = None + self.mqtt_user = None + self.mqtt_pwd = None + self.keepalive_time = 180 + client_objects_str = str(args.client_id_list).replace('"', '"') + client_objects_str = client_objects_str.replace("'", "") + logging.info("origin client object " + str(args.client_id_list)) + logging.info("client object " + client_objects_str) + self.client_id_list = json.loads(client_objects_str) + + self._topic = "fedml_" + str(topic) + "_" + self.theta_storage = ThetaStorage(thetastore_config_path) + self.client_real_ids = [] + if args.client_id_list is not None: + logging.info( + "MqttThetastoreCommManager args client_id_list: " + str(args.client_id_list) + ) + self.client_real_ids = json.loads(args.client_id_list) + + self.group_server_id_list = None + if hasattr(args, "group_server_id_list") and args.group_server_id_list is not None: + self.group_server_id_list = args.group_server_id_list + + if args.rank == 0: + if hasattr(args, "server_id"): + self.edge_id = args.server_id + self.server_id = args.server_id + else: + self.edge_id = 0 + self.server_id = 0 + else: + if hasattr(args, "server_id"): + self.server_id = args.server_id + else: + self.server_id = 0 + + if hasattr(args, "client_id"): + self.edge_id = args.client_id + else: + if len(self.client_real_ids) == 1: + self.edge_id = self.client_real_ids[0] + else: + self.edge_id = 0 + + self._observers: List[Observer] = [] + + if client_rank is None: + self._client_id = mqtt.base62(uuid.uuid4().int, padding=22) + else: + self._client_id = client_rank + self.client_num = client_num + logging.info("mqtt_thetastore.init: client_num = %d" % client_num) + + self.set_config_from_file(config_path) + self.set_config_from_objects(config_path) + + self.client_active_list = dict() + self.top_active_msg = CommunicationConstants.CLIENT_TOP_ACTIVE_MSG + self.topic_last_will_msg = CommunicationConstants.CLIENT_TOP_LAST_WILL_MSG + if args.rank == 0: + self.top_active_msg = CommunicationConstants.SERVER_TOP_ACTIVE_MSG + self.topic_last_will_msg = CommunicationConstants.SERVER_TOP_LAST_WILL_MSG + self.last_will_msg = json.dumps({"ID": self.edge_id, "status": CommunicationConstants.MSG_CLIENT_STATUS_OFFLINE}) + self.mqtt_mgr = MqttManager(self.broker_host, self.broker_port, self.mqtt_user, self.mqtt_pwd, + self.keepalive_time, + self._client_id, self.topic_last_will_msg, + self.last_will_msg) + self.mqtt_mgr.add_connected_listener(self.on_connected) + self.mqtt_mgr.add_disconnected_listener(self.on_disconnected) + self.mqtt_mgr.connect() + + self.is_connected = False + + @property + def client_id(self): + return self._client_id + + @property + def topic(self): + return self._topic + + def run_loop_forever(self): + self.mqtt_mgr.loop_forever() + + def on_connected(self, mqtt_client_object): + """ + [server] + sending message topic (publish): serverID_clientID + receiving message topic (subscribe): clientID + + [client] + sending message topic (publish): clientID + receiving message topic (subscribe): serverID_clientID + + """ + if self.is_connected: + return + self.mqtt_mgr.add_message_passthrough_listener(self._on_message) + + # Subscribe one topic + if self.client_id == 0: + # server + self.subscribe_client_status_message() + + # logging.info("self.client_real_ids = {}".format(self.client_real_ids)) + for client_rank in range(0, self.client_num): + real_topic = self._topic + str(self.client_real_ids[client_rank]) + result, mid = mqtt_client_object.subscribe(real_topic, 0) + + # logging.info( + # "mqtt_thetastore.on_connect: subscribes real_topic = %s, mid = %s, result = %s" + # % (real_topic, mid, str(result)) + # ) + # logging.info("mqtt_thetastore.on_connect: server subscribes") + self._notify_connection_ready() + else: + # client + real_topic = self._topic + str(self.server_id) + "_" + str(self.client_real_ids[0]) + result, mid = mqtt_client_object.subscribe(real_topic, 0) + + self._notify_connection_ready() + + # logging.info( + # "mqtt_thetastore.on_connect: client subscribes real_topic = %s, mid = %s, result = %s" + # % (real_topic, mid, str(result)) + # ) + self.is_connected = True + + def on_disconnected(self, mqtt_client_object): + self.is_connected = False + + def add_observer(self, observer: Observer): + self._observers.append(observer) + + def remove_observer(self, observer: Observer): + self._observers.remove(observer) + + def _notify_connection_ready(self): + msg_params = Message() + msg_type = CommunicationConstants.MSG_TYPE_CONNECTION_IS_READY + for observer in self._observers: + observer.receive_message(msg_type, msg_params) + + def _notify(self, msg_obj): + msg_params = Message() + msg_params.init_from_json_object(msg_obj) + msg_type = msg_params.get_type() + logging.info("mqtt_thetastore.notify: msg type = %s" % msg_type) + for observer in self._observers: + observer.receive_message(msg_type, msg_params) + + def _on_message_impl(self, msg): + json_payload = str(msg.payload, encoding="utf-8") + payload_obj = json.loads(json_payload) + sender_id = payload_obj.get(Message.MSG_ARG_KEY_SENDER, "") + receiver_id = payload_obj.get(Message.MSG_ARG_KEY_RECEIVER, "") + thetastore_key_str = payload_obj.get(Message.MSG_ARG_KEY_MODEL_PARAMS, "") + thetastore_key_str = str(thetastore_key_str).strip(" ") + + if thetastore_key_str != "": + logging.info( + "mqtt_thetastore.on_message: use thetastore pack, thetastore message key %s" % thetastore_key_str + ) + + model_params = self.theta_storage.read_model(thetastore_key_str) + Context().add("received_model_cid", thetastore_key_str) + logging.info("Received model cid {}".format(Context().get("received_model_cid"))) + + logging.info( + "mqtt_thetastore.on_message: model params length %d" % len(model_params) + ) + + # replace the thetastore object key with raw model params + payload_obj[Message.MSG_ARG_KEY_MODEL_PARAMS] = model_params + else: + logging.info("mqtt_thetastore.on_message: not use thetastore pack") + + self._notify(payload_obj) + + def _on_message(self, msg): + try: + self._on_message_impl(msg) + except Exception as e: + logging.error("mqtt_thetastore.on_message exception: {}".format(traceback.format_exc())) + + def send_message(self, msg: Message): + """ + [server] + sending message topic (publish): fedml_runid_serverID_clientID + receiving message topic (subscribe): fedml_runid_clientID + + [client] + sending message topic (publish): fedml_runid_clientID + receiving message topic (subscribe): fedml_runid_serverID_clientID + + """ + sender_id = msg.get_sender_id() + receiver_id = msg.get_receiver_id() + if self.client_id == 0: + # topic = "fedml" + "_" + "run_id" + "_0" + "_" + "client_id" + topic = self._topic + str(self.server_id) + "_" + str(receiver_id) + logging.info("mqtt_thetastore.send_message: msg topic = %s" % str(topic)) + + payload = msg.get_params() + model_params_obj = payload.get(Message.MSG_ARG_KEY_MODEL_PARAMS, "") + if model_params_obj != "": + # thetastore + logging.info("mqtt_thetastore.send_message: to python client.") + message_key = model_url = self.theta_storage.write_model(model_params_obj) + Context().add("sent_model_cid", model_url) + logging.info("Sent model cid {}".format(Context().get("sent_model_cid"))) + logging.info( + "mqtt_thetastore.send_message: thetastore+MQTT msg sent, thetastore message key = %s" + % message_key + ) + model_params_key_url = { + "key": message_key, + "url": model_url, + "obj": model_params_obj, + } + payload[Message.MSG_ARG_KEY_MODEL_PARAMS] = model_params_key_url["key"] + payload[Message.MSG_ARG_KEY_MODEL_PARAMS_URL] = model_params_key_url[ + "url" + ] + self.mqtt_mgr.send_message(topic, json.dumps(payload)) + else: + # pure MQTT + logging.info("mqtt_thetastore.send_message: MQTT msg sent") + self.mqtt_mgr.send_message(topic, json.dumps(payload)) + + else: + # client + topic = self._topic + str(msg.get_sender_id()) + + payload = msg.get_params() + model_params_obj = payload.get(Message.MSG_ARG_KEY_MODEL_PARAMS, "") + if model_params_obj != "": + # thetastore + message_key = model_url = self.theta_storage.write_model(model_params_obj) + Context().add("sent_model_cid", model_url) + logging.info("Sent model cid {}".format(Context().get("sent_model_cid"))) + logging.info( + "mqtt_thetastore.send_message: thetastore+MQTT msg sent, message_key = %s" + % message_key + ) + model_params_key_url = { + "key": message_key, + "url": model_url, + "obj": model_params_obj, + } + payload[Message.MSG_ARG_KEY_MODEL_PARAMS] = model_params_key_url["key"] + payload[Message.MSG_ARG_KEY_MODEL_PARAMS_URL] = model_params_key_url[ + "url" + ] + self.mqtt_mgr.send_message(topic, json.dumps(payload)) + else: + logging.info("mqtt_thetastore.send_message: MQTT msg sent") + self.mqtt_mgr.send_message(topic, json.dumps(payload)) + + def send_message_json(self, topic_name, json_message): + self.mqtt_mgr.send_message_json(topic_name, json_message) + + def handle_receive_message(self): + start_listening_time = time.time() + MLOpsProfilerEvent.log_to_wandb({"ListenStart": start_listening_time}) + self.run_loop_forever() + MLOpsProfilerEvent.log_to_wandb({"TotalTime": time.time() - start_listening_time}) + + def stop_receive_message(self): + logging.info("mqtt_thetastore.stop_receive_message: stopping...") + self.mqtt_mgr.loop_stop() + self.mqtt_mgr.disconnect() + + def set_config_from_file(self, config_file_path): + try: + with open(config_file_path, "r") as f: + config = yaml.load(f, Loader=yaml.FullLoader) + self.broker_host = config["BROKER_HOST"] + self.broker_port = config["BROKER_PORT"] + self.mqtt_user = None + self.mqtt_pwd = None + if "MQTT_USER" in config: + self.mqtt_user = config["MQTT_USER"] + if "MQTT_PWD" in config: + self.mqtt_pwd = config["MQTT_PWD"] + except Exception as e: + pass + + def set_config_from_objects(self, mqtt_config): + self.broker_host = mqtt_config["BROKER_HOST"] + self.broker_port = mqtt_config["BROKER_PORT"] + self.mqtt_user = None + self.mqtt_pwd = None + if "MQTT_USER" in mqtt_config: + self.mqtt_user = mqtt_config["MQTT_USER"] + if "MQTT_PWD" in mqtt_config: + self.mqtt_pwd = mqtt_config["MQTT_PWD"] + + def callback_client_last_will_msg(self, topic, payload): + msg = json.loads(payload) + edge_id = msg.get("ID", None) + status = msg.get("status", CommunicationConstants.MSG_CLIENT_STATUS_OFFLINE) + if edge_id is not None and status == CommunicationConstants.MSG_CLIENT_STATUS_OFFLINE: + if self.client_active_list.get(edge_id, None) is not None: + self.client_active_list.pop(edge_id) + + def callback_client_active_msg(self, topic, payload): + msg = json.loads(payload) + edge_id = msg.get("ID", None) + status = msg.get("status", CommunicationConstants.MSG_CLIENT_STATUS_IDLE) + if edge_id is not None: + self.client_active_list[edge_id] = status + + def subscribe_client_status_message(self): + # Setup MQTT message listener to the last will message form the client. + self.mqtt_mgr.add_message_listener(CommunicationConstants.CLIENT_TOP_LAST_WILL_MSG, + self.callback_client_last_will_msg) + + # Setup MQTT message listener to the active status message from the client. + self.mqtt_mgr.add_message_listener(CommunicationConstants.CLIENT_TOP_ACTIVE_MSG, + self.callback_client_active_msg) + + def get_client_status(self, client_id): + return self.client_active_list.get(client_id, CommunicationConstants.MSG_CLIENT_STATUS_OFFLINE) + + def get_client_list_status(self): + return self.client_active_list + + +if __name__ == "__main__": + pass \ No newline at end of file diff --git a/python/fedml/core/distributed/communication/mqtt_web3/__init__.py b/python/fedml/core/distributed/communication/mqtt_web3/__init__.py new file mode 100755 index 0000000000..a39dfd5643 --- /dev/null +++ b/python/fedml/core/distributed/communication/mqtt_web3/__init__.py @@ -0,0 +1,3 @@ +__all__ = ["MqttWeb3CommManager"] + +from .mqtt_web3_comm_manager import MqttWeb3CommManager diff --git a/python/fedml/core/distributed/communication/mqtt_ipfs/mqtt_ipfs_comm_manager.py b/python/fedml/core/distributed/communication/mqtt_web3/mqtt_web3_comm_manager.py similarity index 87% rename from python/fedml/core/distributed/communication/mqtt_ipfs/mqtt_ipfs_comm_manager.py rename to python/fedml/core/distributed/communication/mqtt_web3/mqtt_web3_comm_manager.py index 9d9263f819..cf2cec74b1 100755 --- a/python/fedml/core/distributed/communication/mqtt_ipfs/mqtt_ipfs_comm_manager.py +++ b/python/fedml/core/distributed/communication/mqtt_web3/mqtt_web3_comm_manager.py @@ -10,7 +10,7 @@ from ..constants import CommunicationConstants from ..mqtt.mqtt_manager import MqttManager -from .ipfs_storage import IpfsStorage +from ...distributed_storage.web3_storage.web3_storage import Web3Storage from ..base_com_manager import BaseCommunicationManager from ..message import Message from ..observer import Observer @@ -18,11 +18,11 @@ import time -class MqttIpfsCommManager(BaseCommunicationManager): +class MqttWeb3CommManager(BaseCommunicationManager): def __init__( self, config_path, - ipfs_config_path, + web3_config_path, topic="fedml", client_rank=0, client_num=0, @@ -40,11 +40,11 @@ def __init__( self.client_id_list = json.loads(client_objects_str) self._topic = "fedml_" + str(topic) + "_" - self.ipfs_storage = IpfsStorage(ipfs_config_path) + self.web3_storage = Web3Storage(web3_config_path) self.client_real_ids = [] if args.client_id_list is not None: logging.info( - "MqttIpfsCommManager args client_id_list: " + str(args.client_id_list) + "MqttWeb3CommManager args client_id_list: " + str(args.client_id_list) ) self.client_real_ids = json.loads(args.client_id_list) @@ -80,7 +80,7 @@ def __init__( else: self._client_id = client_rank self.client_num = client_num - logging.info("mqtt_ipfs.init: client_num = %d" % client_num) + logging.info("mqtt_web3.init: client_num = %d" % client_num) self.set_config_from_file(config_path) self.set_config_from_objects(config_path) @@ -139,10 +139,10 @@ def on_connected(self, mqtt_client_object): result, mid = mqtt_client_object.subscribe(real_topic, 0) # logging.info( - # "mqtt_ipfs.on_connect: subscribes real_topic = %s, mid = %s, result = %s" + # "mqtt_web3.on_connect: subscribes real_topic = %s, mid = %s, result = %s" # % (real_topic, mid, str(result)) # ) - # logging.info("mqtt_ipfs.on_connect: server subscribes") + # logging.info("mqtt_web3.on_connect: server subscribes") self._notify_connection_ready() else: # client @@ -152,7 +152,7 @@ def on_connected(self, mqtt_client_object): self._notify_connection_ready() # logging.info( - # "mqtt_ipfs.on_connect: client subscribes real_topic = %s, mid = %s, result = %s" + # "mqtt_web3.on_connect: client subscribes real_topic = %s, mid = %s, result = %s" # % (real_topic, mid, str(result)) # ) self.is_connected = True @@ -176,7 +176,7 @@ def _notify(self, msg_obj): msg_params = Message() msg_params.init_from_json_object(msg_obj) msg_type = msg_params.get_type() - logging.info("mqtt_ipfs.notify: msg type = %s" % msg_type) + logging.info("mqtt_web3.notify: msg type = %s" % msg_type) for observer in self._observers: observer.receive_message(msg_type, msg_params) @@ -185,26 +185,26 @@ def _on_message_impl(self, msg): payload_obj = json.loads(json_payload) sender_id = payload_obj.get(Message.MSG_ARG_KEY_SENDER, "") receiver_id = payload_obj.get(Message.MSG_ARG_KEY_RECEIVER, "") - ipfs_key_str = payload_obj.get(Message.MSG_ARG_KEY_MODEL_PARAMS, "") - ipfs_key_str = str(ipfs_key_str).strip(" ") + web3_key_str = payload_obj.get(Message.MSG_ARG_KEY_MODEL_PARAMS, "") + web3_key_str = str(web3_key_str).strip(" ") - if ipfs_key_str != "": + if web3_key_str != "": logging.info( - "mqtt_ipfs.on_message: use ipfs pack, ipfs message key %s" % ipfs_key_str + "mqtt_web3.on_message: use web3 pack, web3 message key %s" % web3_key_str ) - model_params = self.ipfs_storage.read_model(ipfs_key_str) - Context().add("received_model_cid", ipfs_key_str) + model_params = self.web3_storage.read_model(web3_key_str) + Context().add("received_model_cid", web3_key_str) logging.info("Received model cid {}".format(Context().get("received_model_cid"))) logging.info( - "mqtt_ipfs.on_message: model params length %d" % len(model_params) + "mqtt_web3.on_message: model params length %d" % len(model_params) ) - # replace the IPFS object key with raw model params + # replace the web3 object key with raw model params payload_obj[Message.MSG_ARG_KEY_MODEL_PARAMS] = model_params else: - logging.info("mqtt_ipfs.on_message: not use ipfs pack") + logging.info("mqtt_web3.on_message: not use web3 pack") self._notify(payload_obj) @@ -212,7 +212,7 @@ def _on_message(self, msg): try: self._on_message_impl(msg) except Exception as e: - logging.error("mqtt_ipfs.on_message exception: {}".format(traceback.format_exc())) + logging.error("mqtt_web3.on_message exception: {}".format(traceback.format_exc())) def send_message(self, msg: Message): """ @@ -230,18 +230,18 @@ def send_message(self, msg: Message): if self.client_id == 0: # topic = "fedml" + "_" + "run_id" + "_0" + "_" + "client_id" topic = self._topic + str(self.server_id) + "_" + str(receiver_id) - logging.info("mqtt_ipfs.send_message: msg topic = %s" % str(topic)) + logging.info("mqtt_web3.send_message: msg topic = %s" % str(topic)) payload = msg.get_params() model_params_obj = payload.get(Message.MSG_ARG_KEY_MODEL_PARAMS, "") if model_params_obj != "": - # IPFS - logging.info("mqtt_ipfs.send_message: to python client.") - message_key = model_url = self.ipfs_storage.write_model(model_params_obj) + # web3 + logging.info("mqtt_web3.send_message: to python client.") + message_key = model_url = self.web3_storage.write_model(model_params_obj) Context().add("sent_model_cid", model_url) logging.info("Sent model cid {}".format(Context().get("sent_model_cid"))) logging.info( - "mqtt_ipfs.send_message: IPFS+MQTT msg sent, ipfs message key = %s" + "mqtt_web3.send_message: web3+MQTT msg sent, web3 message key = %s" % message_key ) model_params_key_url = { @@ -256,7 +256,7 @@ def send_message(self, msg: Message): self.mqtt_mgr.send_message(topic, json.dumps(payload)) else: # pure MQTT - logging.info("mqtt_ipfs.send_message: MQTT msg sent") + logging.info("mqtt_web3.send_message: MQTT msg sent") self.mqtt_mgr.send_message(topic, json.dumps(payload)) else: @@ -266,12 +266,12 @@ def send_message(self, msg: Message): payload = msg.get_params() model_params_obj = payload.get(Message.MSG_ARG_KEY_MODEL_PARAMS, "") if model_params_obj != "": - # IPFS - message_key = model_url = self.ipfs_storage.write_model(model_params_obj) + # web3 + message_key = model_url = self.web3_storage.write_model(model_params_obj) Context().add("sent_model_cid", model_url) logging.info("Sent model cid {}".format(Context().get("sent_model_cid"))) logging.info( - "mqtt_ipfs.send_message: IPFS+MQTT msg sent, message_key = %s" + "mqtt_web3.send_message: web3+MQTT msg sent, message_key = %s" % message_key ) model_params_key_url = { @@ -285,7 +285,7 @@ def send_message(self, msg: Message): ] self.mqtt_mgr.send_message(topic, json.dumps(payload)) else: - logging.info("mqtt_ipfs.send_message: MQTT msg sent") + logging.info("mqtt_web3.send_message: MQTT msg sent") self.mqtt_mgr.send_message(topic, json.dumps(payload)) def send_message_json(self, topic_name, json_message): @@ -298,7 +298,7 @@ def handle_receive_message(self): MLOpsProfilerEvent.log_to_wandb({"TotalTime": time.time() - start_listening_time}) def stop_receive_message(self): - logging.info("mqtt_ipfs.stop_receive_message: stopping...") + logging.info("mqtt_web3.stop_receive_message: stopping...") self.mqtt_mgr.loop_stop() self.mqtt_mgr.disconnect() diff --git a/python/fedml/core/distributed/crypto/README.md b/python/fedml/core/distributed/crypto/README.md new file mode 100644 index 0000000000..e69de29bb2 diff --git a/python/fedml/core/distributed/crypto/__init__.py b/python/fedml/core/distributed/crypto/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/python/fedml/core/distributed/communication/mqtt_ipfs/ipfs_crypto.py b/python/fedml/core/distributed/crypto/crypto_api.py similarity index 100% rename from python/fedml/core/distributed/communication/mqtt_ipfs/ipfs_crypto.py rename to python/fedml/core/distributed/crypto/crypto_api.py diff --git a/python/fedml/core/distributed/distributed_storage/README.md b/python/fedml/core/distributed/distributed_storage/README.md index e69de29bb2..0ec9381fc4 100644 --- a/python/fedml/core/distributed/distributed_storage/README.md +++ b/python/fedml/core/distributed/distributed_storage/README.md @@ -0,0 +1,12 @@ +## Web3 storage +Use Web3 storage as federated learning distributed storage for reading and writing models. +You should register account at https://web3.storage and set config parameters: +token, upload_uri, download_uri. +If you want to use secret key to encrypt models, you should set secret key by calling Context().add("ipfs_secret_key", "your secret key") + +## Theta edge store +Use Theta EdgeStore as federated learning distributed storage for reading and writing models. +You should setup theta edgestore based on https://docs.thetatoken.org/docs/theta-edge-store-setup and set config parameters: +store_home_dir, upload_uri, download_uri. +If you want to use secret key to encrypt models, you should set secret key by calling Context().add("ipfs_secret_key", "your secret key") + diff --git a/python/fedml/core/distributed/distributed_storage/__init__.py b/python/fedml/core/distributed/distributed_storage/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/python/fedml/core/distributed/distributed_storage/theta_storage/__init__.py b/python/fedml/core/distributed/distributed_storage/theta_storage/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/python/fedml/core/distributed/distributed_storage/theta_storage/theta_storage.py b/python/fedml/core/distributed/distributed_storage/theta_storage/theta_storage.py new file mode 100644 index 0000000000..5762a198e3 --- /dev/null +++ b/python/fedml/core/distributed/distributed_storage/theta_storage/theta_storage.py @@ -0,0 +1,174 @@ +import json +import os +import time +import pickle +import uuid + +from fedml.core.mlops.mlops_profiler_event import MLOpsProfilerEvent +from ...crypto import crypto_api +from .....core.alg_frame.context import Context +from os.path import expanduser + +import httpx +import shutil + + +class ThetaStorage: + def __init__( + self, thetasotre_config): + self.ipfs_config = thetasotre_config + self.store_home_dir = thetasotre_config.get("store_home_dir", "~/edge-store-playground") + if str(self.store_home_dir).startswith("~"): + home_dir = expanduser("~") + new_store_dir = str(self.store_home_dir).replace('\\', os.sep).replace('/', os.sep) + strip_dir = new_store_dir.lstrip('~').lstrip(os.sep) + self.store_home_dir = os.path.join(home_dir, strip_dir) + self.ipfs_upload_uri = thetasotre_config.get("upload_uri", "http://localhost:19888/rpc") + self.ipfs_download_uri = thetasotre_config.get("download_uri", "http://localhost:19888/rpc") + + def write_model(self, model): + pickle_dump_start_time = time.time() + model_pkl = pickle.dumps(model) + secret_key = Context().get("ipfs_secret_key") + if secret_key is not None and secret_key != "": + secret_key = bytes(secret_key, 'UTF-8') + model_pkl = crypto_api.encrypt(secret_key, model_pkl) + MLOpsProfilerEvent.log_to_wandb( + {"PickleDumpsTime": time.time() - pickle_dump_start_time} + ) + ipfs_upload_start_time = time.time() + result, model_url = self.storage_ipfs_upload_file(model_pkl) + MLOpsProfilerEvent.log_to_wandb( + {"Comm/send_delay": time.time() - ipfs_upload_start_time} + ) + return model_url + + def read_model(self, message_key): + message_handler_start_time = time.time() + model_pkl, _ = self.storage_ipfs_download_file(message_key) + secret_key = Context().get("ipfs_secret_key") + if secret_key is not None and secret_key != "": + secret_key = bytes(secret_key, 'UTF-8') + model_pkl = crypto_api.decrypt(secret_key, model_pkl) + MLOpsProfilerEvent.log_to_wandb( + {"Comm/recieve_delay_s3": time.time() - message_handler_start_time} + ) + unpickle_start_time = time.time() + model = pickle.loads(model_pkl) + MLOpsProfilerEvent.log_to_wandb( + {"UnpickleTime": time.time() - unpickle_start_time} + ) + return model + + def storage_ipfs_upload_file(self, file_obj): + """Upload file to IPFS using web3.storage. + + Args: + file_obj: file-like object in byte mode + + Returns: + Response: (Successful, cid or error message) + """ + # Request: upload a file + # curl -X POST -H 'Content-Type: application/json' --data '{"jsonrpc":"2.0","method":"edgestore.PutFile","params":[{"path": "theta-edge-store-demos/demos/image/data/smiley_explorer.png"}],"id":1}' http://localhost:19888/rpc + # Result + # { + # "jsonrpc": "2.0", + # "id": 1, + # "result": { + # "key": "0xbc0383809da9fb98c5755e3fa4f19f4ebc7e34308ab321246e4bb54e548fad04", + # "relpath": "smiley_explorer.png", + # "success": true + # } + # } + home_dir = expanduser("~") + file_path = os.path.join(home_dir, "thetastore") + if not os.path.exists(file_path): + os.makedirs(file_path) + file_path = os.path.join(file_path, str(uuid.uuid4())) + with open(file_path, "wb") as file_handle: + file_handle.write(file_obj) + + request_data = {"jsonrpc":"2.0", + "method":"edgestore.PutFile", + "params":[{"path": file_path}], + "id":1} + res = httpx.post( + self.ipfs_upload_uri, + headers={"Content-Type": "application/json"}, + data=json.dumps(request_data), + timeout=None, + ) + content = res.json() + result = content.get("result", None) + if result is None: + return False, "Failed to upload file(result is none)." + else: + success = result.get("success", False) + if not success: + return False, "Failed to upload file(success if false)." + file_cid = result.get("key", None) + if file_cid is None: + return False, "Failed to upload file(key is none)." + return True, file_cid + + def storage_ipfs_download_file(self, ipfs_cid, output_path=None): + """Download file stored in IPFS. + + Args: + cid (str): string describing location of the file. + output_path (Optional[str]): if set file will be stored at this path. + + Returns: + Response: (content, output_file_obj) + """ + # Rquest: retrieve a file (the smiley_explorer.png file we uploaded earlier) + # curl -X POST -H 'Content-Type: application/json' --data '{"jsonrpc":"2.0","method":"edgestore.GetFile","params":[{"key": "0xbc0383809da9fb98c5755e3fa4f19f4ebc7e34308ab321246e4bb54e548fad04"}],"id":1}' http://localhost:19888/rpc + # Result + # { + # "jsonrpc": "2.0", + # "id": 1, + # "result": { + # "path": "../data/edgestore/playground/single-node-network/node/storage/file_cache/0xbc0383809da9fb98c5755e3fa4f19f4ebc7e34308ab321246e4bb54e548fad04/smiley_explorer.png" + # } + # } + + request_data = {"jsonrpc":"2.0", + "method":"edgestore.GetFile", + "params":[{"key": ipfs_cid}], + "id":1} + res = httpx.post( + self.ipfs_download_uri, + headers={"Content-Type": "application/json"}, + data=json.dumps(request_data), + timeout=None, + ) + + download_path = None + content = res.json() + result = content.get("result", None) + if result is None: + return False, "Failed to download file(result is none)." + else: + download_path = result.get("path", None) + if download_path is None: + return False, "Failed to download file(path is none)." + else: + download_path = os.path.join(self.store_home_dir, download_path) + + output_file_obj = None + file_content = None + try: + if output_path is not None: + shutil.copyfile(download_path, output_path) + output_file_obj = open(output_path, "rb") + except Exception as e: + pass + + try: + download_file_obj = open(download_path, "rb") + file_content = download_file_obj.read() + except Exception as e: + pass + + return file_content, output_file_obj diff --git a/python/fedml/core/distributed/distributed_storage/web3_storage/__init__.py b/python/fedml/core/distributed/distributed_storage/web3_storage/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/python/fedml/core/distributed/communication/mqtt_ipfs/ipfs_storage.py b/python/fedml/core/distributed/distributed_storage/web3_storage/web3_storage.py similarity index 94% rename from python/fedml/core/distributed/communication/mqtt_ipfs/ipfs_storage.py rename to python/fedml/core/distributed/distributed_storage/web3_storage/web3_storage.py index 9d8a607586..f5f5b1a299 100644 --- a/python/fedml/core/distributed/communication/mqtt_ipfs/ipfs_storage.py +++ b/python/fedml/core/distributed/distributed_storage/web3_storage/web3_storage.py @@ -3,17 +3,17 @@ import pickle from fedml.core.mlops.mlops_profiler_event import MLOpsProfilerEvent -from . import ipfs_crypto +from ...crypto import crypto_api from .....core.alg_frame.context import Context import httpx -class IpfsStorage: +class Web3Storage: def __init__( self, ipfs_config): self.ipfs_config = ipfs_config - self.ipfs_upload_uri = ipfs_config.get("upload_uri", "https://api.web3.storage/upload2") + self.ipfs_upload_uri = ipfs_config.get("upload_uri", "https://api.web3.storage/upload") self.ipfs_download_uri = ipfs_config.get("download_uri", "ipfs.w3s.link2") def write_model(self, model): @@ -22,7 +22,7 @@ def write_model(self, model): secret_key = Context().get("ipfs_secret_key") if secret_key is not None and secret_key != "": secret_key = bytes(secret_key, 'UTF-8') - model_pkl = ipfs_crypto.encrypt(secret_key, model_pkl) + model_pkl = crypto_api.encrypt(secret_key, model_pkl) MLOpsProfilerEvent.log_to_wandb( {"PickleDumpsTime": time.time() - pickle_dump_start_time} ) @@ -39,7 +39,7 @@ def read_model(self, message_key): secret_key = Context().get("ipfs_secret_key") if secret_key is not None and secret_key != "": secret_key = bytes(secret_key, 'UTF-8') - model_pkl = ipfs_crypto.decrypt(secret_key, model_pkl) + model_pkl = crypto_api.decrypt(secret_key, model_pkl) MLOpsProfilerEvent.log_to_wandb( {"Comm/recieve_delay_s3": time.time() - message_handler_start_time} ) diff --git a/python/fedml/core/distributed/fedml_comm_manager.py b/python/fedml/core/distributed/fedml_comm_manager.py index a30a8cc694..721561e66d 100644 --- a/python/fedml/core/distributed/fedml_comm_manager.py +++ b/python/fedml/core/distributed/fedml_comm_manager.py @@ -92,21 +92,37 @@ def get_training_mqtt_s3_config(self): return mqtt_config, s3_config - def get_training_mqtt_ipfs_config(self): + def get_training_mqtt_web3_config(self): mqtt_config = None - ipfs_config = None + web3_config = None if hasattr(self.args, "customized_training_mqtt_config") and self.args.customized_training_mqtt_config != "": mqtt_config = self.args.customized_training_mqtt_config - if hasattr(self.args, "customized_training_ipfs_config") and self.args.customized_training_ipfs_config != "": - ipfs_config = self.args.customized_training_ipfs_config - if mqtt_config is None or ipfs_config is None: - mqtt_config_from_cloud, ipfs_config_from_cloud = MLOpsConfigs.get_instance(self.args).fetch_ipfs_configs() + if hasattr(self.args, "customized_training_web3_config") and self.args.customized_training_web3_config != "": + web3_config = self.args.customized_training_web3_config + if mqtt_config is None or web3_config is None: + mqtt_config_from_cloud, web3_config_from_cloud = MLOpsConfigs.get_instance(self.args).fetch_web3_configs() if mqtt_config is None: mqtt_config = mqtt_config_from_cloud - if ipfs_config is None: - ipfs_config = s3_config_from_cloud + if web3_config is None: + web3_config = web3_config_from_cloud - return mqtt_config, ipfs_config + return mqtt_config, web3_config + + def get_training_mqtt_thetastore_config(self): + mqtt_config = None + thetastore_config = None + if hasattr(self.args, "customized_training_mqtt_config") and self.args.customized_training_mqtt_config != "": + mqtt_config = self.args.customized_training_mqtt_config + if hasattr(self.args, "customized_training_thetastore_config") and self.args.customized_training_thetastore_config != "": + thetastore_config = self.args.customized_training_thetastore_config + if mqtt_config is None or thetastore_config is None: + mqtt_config_from_cloud, thetastore_config_from_cloud = MLOpsConfigs.get_instance(self.args).fetch_thetastore_configs() + if mqtt_config is None: + mqtt_config = mqtt_config_from_cloud + if thetastore_config is None: + thetastore_config = thetastore_config_from_cloud + + return mqtt_config, thetastore_config def _init_manager(self): @@ -140,14 +156,27 @@ def _init_manager(self): client_num=self.size, args=self.args, ) - elif self.backend == "MQTT_IPFS": - from .communication.mqtt_ipfs.mqtt_ipfs_comm_manager import MqttIpfsCommManager + elif self.backend == "MQTT_WEB3": + from .communication.mqtt_web3.mqtt_web3_comm_manager import MqttWeb3CommManager + + mqtt_config, web3_config = self.get_training_mqtt_web3_config() + + self.com_manager = MqttWeb3CommManager( + mqtt_config, + web3_config, + topic=str(self.args.run_id), + client_rank=self.rank, + client_num=self.size, + args=self.args, + ) + elif self.backend == "MQTT_THETASTORE": + from .communication.mqtt_thetastore import MqttThetastoreCommManager - mqtt_config, ipfs_config = self.get_training_mqtt_ipfs_config() + mqtt_config, thetastore_config = self.get_training_mqtt_thetastore_config() - self.com_manager = MqttIpfsCommManager( + self.com_manager = MqttThetastoreCommManager( mqtt_config, - ipfs_config, + thetastore_config, topic=str(self.args.run_id), client_rank=self.rank, client_num=self.size, diff --git a/python/fedml/core/mlops/mlops_configs.py b/python/fedml/core/mlops/mlops_configs.py index 89aa1af162..4fccdd2e99 100644 --- a/python/fedml/core/mlops/mlops_configs.py +++ b/python/fedml/core/mlops/mlops_configs.py @@ -101,9 +101,9 @@ def fetch_configs(self): raise Exception("failed to fetch device configurations!") return mqtt_config, s3_config - def fetch_ipfs_configs(self): + def fetch_web3_configs(self): url, cert_path = self.get_request_params() - json_params = {"config_name": ["mqtt_config", "ipfs_config"]} + json_params = {"config_name": ["mqtt_config", "web3_config"]} if cert_path is not None: try: @@ -124,10 +124,38 @@ def fetch_ipfs_configs(self): status_code = response.json().get("code") if status_code == "SUCCESS": mqtt_config = response.json().get("data").get("mqtt_config") - ipfs_config = response.json().get("data").get("ipfs_config") + web3_config = response.json().get("data").get("web3_config") else: raise Exception("failed to fetch device configurations!") - return mqtt_config, ipfs_config + return mqtt_config, web3_config + + def fetch_thetastore_configs(self): + url, cert_path = self.get_request_params() + json_params = {"config_name": ["mqtt_config", "thetastore_config"]} + + if cert_path is not None: + try: + requests.session().verify = cert_path + response = requests.post( + url, json=json_params, verify=True, headers={"content-type": "application/json", "Connection": "close"} + ) + except requests.exceptions.SSLError as err: + MLOpsConfigs.install_root_ca_file() + response = requests.post( + url, json=json_params, verify=True, headers={"content-type": "application/json", "Connection": "close"} + ) + else: + response = requests.post( + url, json=json_params, headers={"content-type": "application/json", "Connection": "close"} + ) + + status_code = response.json().get("code") + if status_code == "SUCCESS": + mqtt_config = response.json().get("data").get("mqtt_config") + thetastore_config = response.json().get("data").get("thetastore_config") + else: + raise Exception("failed to fetch device configurations!") + return mqtt_config, thetastore_config def fetch_all_configs(self): url, cert_path = self.get_request_params() diff --git a/python/fedml/cross_silo/client/fedml_client_master_manager.py b/python/fedml/cross_silo/client/fedml_client_master_manager.py index f39640bfed..2b0eda2375 100644 --- a/python/fedml/cross_silo/client/fedml_client_master_manager.py +++ b/python/fedml/cross_silo/client/fedml_client_master_manager.py @@ -89,7 +89,7 @@ def handle_message_receive_model_from_server(self, msg_params): self.trainer_dist_adapter.update_dataset(int(client_index)) logging.info("current roundx {}, num rounds {}".format(self.round_idx, self.num_rounds)) self.trainer_dist_adapter.update_model(model_params) - if self.round_idx == self.num_rounds: + if self.round_idx == self.num_rounds-1: mlops.log_training_finished_status() return self.round_idx += 1 diff --git a/python/quick_start/beehive/config/bootstrap.bat b/python/quick_start/beehive/config/bootstrap.bat new file mode 100755 index 0000000000..fb0dd54d6d --- /dev/null +++ b/python/quick_start/beehive/config/bootstrap.bat @@ -0,0 +1,12 @@ +:: ### don't modify this part ### +:: ############################## + + +:: ### please customize your script in this region #### +set DATA_PATH=%userprofile%\fedml_data +if exist %DATA_PATH% (echo Exist %DATA_PATH%) else mkdir %DATA_PATH% + + +:: ### don't modify this part ### +echo [FedML]Bootstrap Finished +:: ############################## \ No newline at end of file diff --git a/python/quick_start/beehive/config/bootstrap.sh b/python/quick_start/beehive/config/bootstrap.sh index f0f2910615..3d969974b0 100644 --- a/python/quick_start/beehive/config/bootstrap.sh +++ b/python/quick_start/beehive/config/bootstrap.sh @@ -1,9 +1,6 @@ # pip install fedml==0.7.15 -pip install --upgrade fedml - -# login to wandb -# wandb login ee0b5f53d949c84cee7decbe7a629e63fb2f8408 +#pip install --upgrade fedml ### don't modify this part ### echo "[FedML]Bootstrap Finished" diff --git a/python/quick_start/octopus/config/bootstrap.bat b/python/quick_start/octopus/config/bootstrap.bat new file mode 100755 index 0000000000..fb0dd54d6d --- /dev/null +++ b/python/quick_start/octopus/config/bootstrap.bat @@ -0,0 +1,12 @@ +:: ### don't modify this part ### +:: ############################## + + +:: ### please customize your script in this region #### +set DATA_PATH=%userprofile%\fedml_data +if exist %DATA_PATH% (echo Exist %DATA_PATH%) else mkdir %DATA_PATH% + + +:: ### don't modify this part ### +echo [FedML]Bootstrap Finished +:: ############################## \ No newline at end of file From c3fd2b9602c071351be0a898010816c87bd2e3c5 Mon Sep 17 00:00:00 2001 From: alexliang Date: Sun, 9 Oct 2022 17:30:08 +0800 Subject: [PATCH 5/5] update version. --- python/fedml/__init__.py | 2 +- python/setup.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/python/fedml/__init__.py b/python/fedml/__init__.py index af4c9b991b..43372476b0 100644 --- a/python/fedml/__init__.py +++ b/python/fedml/__init__.py @@ -23,7 +23,7 @@ _global_training_type = None _global_comm_backend = None -__version__ = "0.7.333" +__version__ = "0.7.334" def init(args=None): diff --git a/python/setup.py b/python/setup.py index 3998554172..52e8fa89dc 100755 --- a/python/setup.py +++ b/python/setup.py @@ -73,7 +73,7 @@ def finalize_options(self): setup( name="fedml", - version="0.7.333", + version="0.7.334", author="FedML Team", author_email="ch@fedml.ai", description="A research and production integrated edge-cloud library for "