Bug 1920142 - part 2: vendor clubcard and clubcard-crlite. r=keeler,supply-chain-reviewers

Differential Revision: https://phabricator.services.mozilla.com/D223011
This commit is contained in:
John Schanck 2024-09-25 23:23:19 +00:00
parent 691af0bcc2
commit a9a2cac131
18 changed files with 2627 additions and 0 deletions

23
Cargo.lock generated
View File

@ -731,6 +731,8 @@ version = "0.0.1"
dependencies = [
"base64 0.21.3",
"byteorder",
"clubcard",
"clubcard-crlite",
"crossbeam-utils",
"cstr",
"firefox-on-glean",
@ -882,6 +884,27 @@ version = "0.7.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1462739cb27611015575c0c11df5df7601141071f07518d56fcc1be504cbec97"
[[package]]
name = "clubcard"
version = "0.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "30b529ab1dcb6d5e3a03107e59da1249f8f4efe1e7e123eea064a2e42c6f1e3e"
dependencies = [
"serde",
]
[[package]]
name = "clubcard-crlite"
version = "0.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "506cfeae76e092240aa4b0c08f996748b2df1c5517e65053eeb9226fb76b93f0"
dependencies = [
"bincode",
"clubcard",
"serde",
"sha2",
]
[[package]]
name = "cmake"
version = "0.1.999"

View File

@ -7,6 +7,8 @@ license = "MPL-2.0"
[dependencies]
base64 = "0.21.0"
byteorder = "1.2.7"
clubcard = "0.3"
clubcard-crlite = "0.2"
crossbeam-utils = "0.8"
cstr = "0.2"
firefox-on-glean = { path = "../../../../toolkit/components/glean/api" }

View File

@ -1125,6 +1125,18 @@ who = "Mike Hommey <mh+mozilla@glandium.org>"
criteria = "safe-to-deploy"
delta = "0.2.2 -> 0.2.4"
[[audits.clubcard]]
who = "John M. Schanck <jschanck@mozilla.com>"
criteria = "safe-to-deploy"
version = "0.3.0"
notes = "This crate is maintained by the CryptoEng team at Mozilla and it contains no unsafe code."
[[audits.clubcard-crlite]]
who = "John M. Schanck <jschanck@mozilla.com>"
criteria = "safe-to-deploy"
version = "0.2.0"
notes = "This crate is maintained by the CryptoEng team at Mozilla and it contains no unsafe code."
[[audits.comedy]]
who = "Nick Alexander <nalexander@mozilla.com>"
criteria = "safe-to-deploy"

View File

@ -0,0 +1 @@
{"files":{"Cargo.toml":"9d0f74b96400111318eaa108c72feb664a8b0bb28b387812706e2cb9fbe0914e","LICENSE":"1f256ecad192880510e84ad60474eab7589218784b9a50bc7ceee34c2b91f1d5","src/builder.rs":"616f7aa1ea6e88bdc0c939c24b0e432e40fa553535ff88098fb8967d32b339a5","src/lib.rs":"d939278e06b1dca0bf827d491213d7ebadc7be6cc89bc53c608af42872e3378d","src/query.rs":"4b105a7608ea79b6e998cb891ffede4e84e01f28ce47d0b28294eef0bcb4c283"},"package":"506cfeae76e092240aa4b0c08f996748b2df1c5517e65053eeb9226fb76b93f0"}

View File

@ -0,0 +1,68 @@
# THIS FILE IS AUTOMATICALLY GENERATED BY CARGO
#
# When uploading crates to the registry Cargo will automatically
# "normalize" Cargo.toml files for maximal compatibility
# with all versions of Cargo and also rewrite `path` dependencies
# to registry (e.g., crates.io) dependencies.
#
# If you are reading this file be aware that the original Cargo.toml
# will likely look very different (and much more reasonable).
# See Cargo.toml.orig for the original contents.
[package]
edition = "2021"
name = "clubcard-crlite"
version = "0.2.0"
authors = ["John M. Schanck <jschanck@mozilla.com>"]
build = false
autobins = false
autoexamples = false
autotests = false
autobenches = false
description = "An instantiation of Clubcard for use in CRLite"
readme = false
license = "MPL-2.0"
repository = "https://github.com/mozilla/clubcard-crlite/"
[lib]
name = "clubcard_crlite"
path = "src/lib.rs"
[dependencies.base64]
version = "0.22"
optional = true
[dependencies.bincode]
version = "1.3"
[dependencies.clubcard]
version = "0.3"
[dependencies.rand]
version = "0.8"
optional = true
[dependencies.serde]
version = "1.0"
features = ["derive"]
[dependencies.serde_json]
version = "1"
optional = true
[dependencies.sha2]
version = "0.10"
[dev-dependencies.rand]
version = "0.8"
[dev-dependencies.sha2]
version = "0.10"
[features]
builder = [
"dep:rand",
"dep:base64",
"dep:serde_json",
"clubcard/builder",
]

373
third_party/rust/clubcard-crlite/LICENSE vendored Normal file
View File

@ -0,0 +1,373 @@
Mozilla Public License Version 2.0
==================================
1. Definitions
--------------
1.1. "Contributor"
means each individual or legal entity that creates, contributes to
the creation of, or owns Covered Software.
1.2. "Contributor Version"
means the combination of the Contributions of others (if any) used
by a Contributor and that particular Contributor's Contribution.
1.3. "Contribution"
means Covered Software of a particular Contributor.
1.4. "Covered Software"
means Source Code Form to which the initial Contributor has attached
the notice in Exhibit A, the Executable Form of such Source Code
Form, and Modifications of such Source Code Form, in each case
including portions thereof.
1.5. "Incompatible With Secondary Licenses"
means
(a) that the initial Contributor has attached the notice described
in Exhibit B to the Covered Software; or
(b) that the Covered Software was made available under the terms of
version 1.1 or earlier of the License, but not also under the
terms of a Secondary License.
1.6. "Executable Form"
means any form of the work other than Source Code Form.
1.7. "Larger Work"
means a work that combines Covered Software with other material, in
a separate file or files, that is not Covered Software.
1.8. "License"
means this document.
1.9. "Licensable"
means having the right to grant, to the maximum extent possible,
whether at the time of the initial grant or subsequently, any and
all of the rights conveyed by this License.
1.10. "Modifications"
means any of the following:
(a) any file in Source Code Form that results from an addition to,
deletion from, or modification of the contents of Covered
Software; or
(b) any new file in Source Code Form that contains any Covered
Software.
1.11. "Patent Claims" of a Contributor
means any patent claim(s), including without limitation, method,
process, and apparatus claims, in any patent Licensable by such
Contributor that would be infringed, but for the grant of the
License, by the making, using, selling, offering for sale, having
made, import, or transfer of either its Contributions or its
Contributor Version.
1.12. "Secondary License"
means either the GNU General Public License, Version 2.0, the GNU
Lesser General Public License, Version 2.1, the GNU Affero General
Public License, Version 3.0, or any later versions of those
licenses.
1.13. "Source Code Form"
means the form of the work preferred for making modifications.
1.14. "You" (or "Your")
means an individual or a legal entity exercising rights under this
License. For legal entities, "You" includes any entity that
controls, is controlled by, or is under common control with You. For
purposes of this definition, "control" means (a) the power, direct
or indirect, to cause the direction or management of such entity,
whether by contract or otherwise, or (b) ownership of more than
fifty percent (50%) of the outstanding shares or beneficial
ownership of such entity.
2. License Grants and Conditions
--------------------------------
2.1. Grants
Each Contributor hereby grants You a world-wide, royalty-free,
non-exclusive license:
(a) under intellectual property rights (other than patent or trademark)
Licensable by such Contributor to use, reproduce, make available,
modify, display, perform, distribute, and otherwise exploit its
Contributions, either on an unmodified basis, with Modifications, or
as part of a Larger Work; and
(b) under Patent Claims of such Contributor to make, use, sell, offer
for sale, have made, import, and otherwise transfer either its
Contributions or its Contributor Version.
2.2. Effective Date
The licenses granted in Section 2.1 with respect to any Contribution
become effective for each Contribution on the date the Contributor first
distributes such Contribution.
2.3. Limitations on Grant Scope
The licenses granted in this Section 2 are the only rights granted under
this License. No additional rights or licenses will be implied from the
distribution or licensing of Covered Software under this License.
Notwithstanding Section 2.1(b) above, no patent license is granted by a
Contributor:
(a) for any code that a Contributor has removed from Covered Software;
or
(b) for infringements caused by: (i) Your and any other third party's
modifications of Covered Software, or (ii) the combination of its
Contributions with other software (except as part of its Contributor
Version); or
(c) under Patent Claims infringed by Covered Software in the absence of
its Contributions.
This License does not grant any rights in the trademarks, service marks,
or logos of any Contributor (except as may be necessary to comply with
the notice requirements in Section 3.4).
2.4. Subsequent Licenses
No Contributor makes additional grants as a result of Your choice to
distribute the Covered Software under a subsequent version of this
License (see Section 10.2) or under the terms of a Secondary License (if
permitted under the terms of Section 3.3).
2.5. Representation
Each Contributor represents that the Contributor believes its
Contributions are its original creation(s) or it has sufficient rights
to grant the rights to its Contributions conveyed by this License.
2.6. Fair Use
This License is not intended to limit any rights You have under
applicable copyright doctrines of fair use, fair dealing, or other
equivalents.
2.7. Conditions
Sections 3.1, 3.2, 3.3, and 3.4 are conditions of the licenses granted
in Section 2.1.
3. Responsibilities
-------------------
3.1. Distribution of Source Form
All distribution of Covered Software in Source Code Form, including any
Modifications that You create or to which You contribute, must be under
the terms of this License. You must inform recipients that the Source
Code Form of the Covered Software is governed by the terms of this
License, and how they can obtain a copy of this License. You may not
attempt to alter or restrict the recipients' rights in the Source Code
Form.
3.2. Distribution of Executable Form
If You distribute Covered Software in Executable Form then:
(a) such Covered Software must also be made available in Source Code
Form, as described in Section 3.1, and You must inform recipients of
the Executable Form how they can obtain a copy of such Source Code
Form by reasonable means in a timely manner, at a charge no more
than the cost of distribution to the recipient; and
(b) You may distribute such Executable Form under the terms of this
License, or sublicense it under different terms, provided that the
license for the Executable Form does not attempt to limit or alter
the recipients' rights in the Source Code Form under this License.
3.3. Distribution of a Larger Work
You may create and distribute a Larger Work under terms of Your choice,
provided that You also comply with the requirements of this License for
the Covered Software. If the Larger Work is a combination of Covered
Software with a work governed by one or more Secondary Licenses, and the
Covered Software is not Incompatible With Secondary Licenses, this
License permits You to additionally distribute such Covered Software
under the terms of such Secondary License(s), so that the recipient of
the Larger Work may, at their option, further distribute the Covered
Software under the terms of either this License or such Secondary
License(s).
3.4. Notices
You may not remove or alter the substance of any license notices
(including copyright notices, patent notices, disclaimers of warranty,
or limitations of liability) contained within the Source Code Form of
the Covered Software, except that You may alter any license notices to
the extent required to remedy known factual inaccuracies.
3.5. Application of Additional Terms
You may choose to offer, and to charge a fee for, warranty, support,
indemnity or liability obligations to one or more recipients of Covered
Software. However, You may do so only on Your own behalf, and not on
behalf of any Contributor. You must make it absolutely clear that any
such warranty, support, indemnity, or liability obligation is offered by
You alone, and You hereby agree to indemnify every Contributor for any
liability incurred by such Contributor as a result of warranty, support,
indemnity or liability terms You offer. You may include additional
disclaimers of warranty and limitations of liability specific to any
jurisdiction.
4. Inability to Comply Due to Statute or Regulation
---------------------------------------------------
If it is impossible for You to comply with any of the terms of this
License with respect to some or all of the Covered Software due to
statute, judicial order, or regulation then You must: (a) comply with
the terms of this License to the maximum extent possible; and (b)
describe the limitations and the code they affect. Such description must
be placed in a text file included with all distributions of the Covered
Software under this License. Except to the extent prohibited by statute
or regulation, such description must be sufficiently detailed for a
recipient of ordinary skill to be able to understand it.
5. Termination
--------------
5.1. The rights granted under this License will terminate automatically
if You fail to comply with any of its terms. However, if You become
compliant, then the rights granted under this License from a particular
Contributor are reinstated (a) provisionally, unless and until such
Contributor explicitly and finally terminates Your grants, and (b) on an
ongoing basis, if such Contributor fails to notify You of the
non-compliance by some reasonable means prior to 60 days after You have
come back into compliance. Moreover, Your grants from a particular
Contributor are reinstated on an ongoing basis if such Contributor
notifies You of the non-compliance by some reasonable means, this is the
first time You have received notice of non-compliance with this License
from such Contributor, and You become compliant prior to 30 days after
Your receipt of the notice.
5.2. If You initiate litigation against any entity by asserting a patent
infringement claim (excluding declaratory judgment actions,
counter-claims, and cross-claims) alleging that a Contributor Version
directly or indirectly infringes any patent, then the rights granted to
You by any and all Contributors for the Covered Software under Section
2.1 of this License shall terminate.
5.3. In the event of termination under Sections 5.1 or 5.2 above, all
end user license agreements (excluding distributors and resellers) which
have been validly granted by You or Your distributors under this License
prior to termination shall survive termination.
************************************************************************
* *
* 6. Disclaimer of Warranty *
* ------------------------- *
* *
* Covered Software is provided under this License on an "as is" *
* basis, without warranty of any kind, either expressed, implied, or *
* statutory, including, without limitation, warranties that the *
* Covered Software is free of defects, merchantable, fit for a *
* particular purpose or non-infringing. The entire risk as to the *
* quality and performance of the Covered Software is with You. *
* Should any Covered Software prove defective in any respect, You *
* (not any Contributor) assume the cost of any necessary servicing, *
* repair, or correction. This disclaimer of warranty constitutes an *
* essential part of this License. No use of any Covered Software is *
* authorized under this License except under this disclaimer. *
* *
************************************************************************
************************************************************************
* *
* 7. Limitation of Liability *
* -------------------------- *
* *
* Under no circumstances and under no legal theory, whether tort *
* (including negligence), contract, or otherwise, shall any *
* Contributor, or anyone who distributes Covered Software as *
* permitted above, be liable to You for any direct, indirect, *
* special, incidental, or consequential damages of any character *
* including, without limitation, damages for lost profits, loss of *
* goodwill, work stoppage, computer failure or malfunction, or any *
* and all other commercial damages or losses, even if such party *
* shall have been informed of the possibility of such damages. This *
* limitation of liability shall not apply to liability for death or *
* personal injury resulting from such party's negligence to the *
* extent applicable law prohibits such limitation. Some *
* jurisdictions do not allow the exclusion or limitation of *
* incidental or consequential damages, so this exclusion and *
* limitation may not apply to You. *
* *
************************************************************************
8. Litigation
-------------
Any litigation relating to this License may be brought only in the
courts of a jurisdiction where the defendant maintains its principal
place of business and such litigation shall be governed by laws of that
jurisdiction, without reference to its conflict-of-law provisions.
Nothing in this Section shall prevent a party's ability to bring
cross-claims or counter-claims.
9. Miscellaneous
----------------
This License represents the complete agreement concerning the subject
matter hereof. If any provision of this License is held to be
unenforceable, such provision shall be reformed only to the extent
necessary to make it enforceable. Any law or regulation which provides
that the language of a contract shall be construed against the drafter
shall not be used to construe this License against a Contributor.
10. Versions of the License
---------------------------
10.1. New Versions
Mozilla Foundation is the license steward. Except as provided in Section
10.3, no one other than the license steward has the right to modify or
publish new versions of this License. Each version will be given a
distinguishing version number.
10.2. Effect of New Versions
You may distribute the Covered Software under the terms of the version
of the License under which You originally received the Covered Software,
or under the terms of any subsequent version published by the license
steward.
10.3. Modified Versions
If you create software not governed by this License, and you want to
create a new license for such software, you may create and use a
modified version of this License if you rename the license and remove
any references to the name of the license steward (except to note that
such modified license differs from this License).
10.4. Distributing Source Code Form that is Incompatible With Secondary
Licenses
If You choose to distribute Source Code Form that is Incompatible With
Secondary Licenses under the terms of this version of the License, the
notice described in Exhibit B of this License must be attached.
Exhibit A - Source Code Form License Notice
-------------------------------------------
This Source Code Form is subject to the terms of the Mozilla Public
License, v. 2.0. If a copy of the MPL was not distributed with this
file, You can obtain one at http://mozilla.org/MPL/2.0/.
If it is not possible or desirable to put the notice in a particular
file, then You may include the notice in a location (such as a LICENSE
file in a relevant directory) where a recipient would be likely to look
for such a notice.
You may add additional accurate notices of copyright ownership.
Exhibit B - "Incompatible With Secondary Licenses" Notice
---------------------------------------------------------
This Source Code Form is "Incompatible With Secondary Licenses", as
defined by the Mozilla Public License, v. 2.0.

View File

@ -0,0 +1,296 @@
/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
use crate::query::{CRLiteCoverage, CRLiteQuery};
use clubcard::{AsQuery, Equation, Filterable};
use serde::Deserialize;
use std::collections::HashMap;
use base64::Engine;
use std::io::Read;
impl CRLiteCoverage {
// The ct-logs.json file tells us which CT logs the ct-fetch process
// monitored. For each log, it lists
// (1) the contiguous range of indices of Merkle tree leaves that
// ct-fetch downloaded,
// (2) the earliest and latest timestamps on those Merkle tree
// leaves, and
// (3) the maximum merge delay (MMD).
//
// Intuitively, "coverage" should reflect the [MinEntry, MaxEntry] range.
// However, certificates only include timestamps, not indices, and
// timestamps do not increase monotonically with leaf index.
//
// The timestamp in an embedded SCT is a promise from a log that it will
// assign an index in the next MMD window. So if
// timestamp(Cert A) + MMD <= timestamp(Cert B)
// then
// index(Cert A) < index(Cert B).
//
// It follows that a certificate has an index in [MinEntry, MaxEntry] if
// MinTimestamp + MMD <= timestamp(certificate) <= MaxTimestamp - MMD
//
// In the event that MinEntry = 0, we can refine this to
// 0 <= timestamp(certificate) <= MaxTimestamp - MMD
//
pub fn from_mozilla_ct_logs_json<T>(reader: T) -> Self
where
T: Read,
{
#[allow(non_snake_case)]
#[derive(Deserialize)]
struct MozillaCtLogsJson {
LogID: String,
MaxTimestamp: u64,
MinTimestamp: u64,
MMD: u64,
MinEntry: u64,
}
let mut coverage = HashMap::new();
let json_entries: Vec<MozillaCtLogsJson> = match serde_json::from_reader(reader) {
Ok(json_entries) => json_entries,
_ => return CRLiteCoverage(Default::default()),
};
for entry in json_entries {
let mut log_id = [0u8; 32];
match base64::prelude::BASE64_STANDARD.decode(&entry.LogID) {
Ok(bytes) if bytes.len() == 32 => log_id.copy_from_slice(&bytes),
_ => continue,
};
let min_covered = if entry.MinEntry == 0 {
0
} else {
entry.MinTimestamp + entry.MMD
};
let max_covered = entry.MaxTimestamp.saturating_sub(entry.MMD);
if min_covered < max_covered {
coverage.insert(log_id, (min_covered, max_covered));
}
}
CRLiteCoverage(coverage)
}
}
pub struct CRLiteBuilderItem {
/// issuer spki hash
issuer: [u8; 32],
/// serial number. TODO: smallvec?
serial: Vec<u8>,
/// revocation status
revoked: bool,
}
impl CRLiteBuilderItem {
pub fn revoked(issuer: [u8; 32], serial: Vec<u8>) -> Self {
Self {
issuer,
serial,
revoked: true,
}
}
pub fn not_revoked(issuer: [u8; 32], serial: Vec<u8>) -> Self {
Self {
issuer,
serial,
revoked: false,
}
}
}
impl<'a> From<&'a CRLiteBuilderItem> for CRLiteQuery<'a> {
fn from(item: &'a CRLiteBuilderItem) -> Self {
Self {
issuer: &item.issuer,
serial: &item.serial,
log_timestamp: None,
}
}
}
impl AsQuery<4> for CRLiteBuilderItem {
fn as_query(&self, m: usize) -> Equation<4> {
CRLiteQuery::from(self).as_query(m)
}
fn block(&self) -> &[u8] {
&self.issuer
}
fn discriminant(&self) -> &[u8] {
&self.serial
}
}
impl Filterable<4> for CRLiteBuilderItem {
fn included(&self) -> bool {
self.revoked
}
}
#[cfg(test)]
mod tests {
use crate::builder::*;
use clubcard::builder::*;
use clubcard::Membership;
use std::collections::HashMap;
#[test]
fn test_crlite_clubcard() {
let subset_sizes = [1 << 17, 1 << 16, 1 << 15, 1 << 14, 1 << 13];
let universe_size = 1 << 18;
let mut clubcard_builder = ClubcardBuilder::new();
let mut approx_builders = vec![];
for (i, n) in subset_sizes.iter().enumerate() {
let mut r = clubcard_builder.new_approx_builder(&[i as u8; 32]);
for j in 0usize..*n {
let eq = CRLiteBuilderItem::revoked([i as u8; 32], j.to_le_bytes().to_vec());
r.insert(eq);
}
r.set_universe_size(universe_size);
approx_builders.push(r)
}
let approx_ribbons = approx_builders
.drain(..)
.map(ApproximateRibbon::from)
.collect();
println!("Approx ribbons:");
for r in &approx_ribbons {
println!("\t{}", r);
}
clubcard_builder.collect_approx_ribbons(approx_ribbons);
let mut exact_builders = vec![];
for (i, n) in subset_sizes.iter().enumerate() {
let mut r = clubcard_builder.new_exact_builder(&[i as u8; 32]);
for j in 0usize..universe_size {
let item = if j < *n {
CRLiteBuilderItem::revoked([i as u8; 32], j.to_le_bytes().to_vec())
} else {
CRLiteBuilderItem::not_revoked([i as u8; 32], j.to_le_bytes().to_vec())
};
r.insert(item);
}
exact_builders.push(r)
}
let exact_ribbons = exact_builders.drain(..).map(ExactRibbon::from).collect();
println!("Exact ribbons:");
for r in &exact_ribbons {
println!("\t{}", r);
}
clubcard_builder.collect_exact_ribbons(exact_ribbons);
let mut log_coverage = HashMap::new();
log_coverage.insert([0u8; 32], (0u64, u64::MAX));
let clubcard =
clubcard_builder.build::<CRLiteQuery>(CRLiteCoverage(log_coverage), Default::default());
println!("{}", clubcard);
let sum_subset_sizes: usize = subset_sizes.iter().sum();
let sum_universe_sizes: usize = subset_sizes.len() * universe_size;
let min_size = (sum_subset_sizes as f64)
* ((sum_universe_sizes as f64) / (sum_subset_sizes as f64)).log2()
+ 1.44 * ((sum_subset_sizes) as f64);
println!("Size lower bound {}", min_size);
println!("Checking construction");
println!(
"\texpecting {} included, {} excluded",
sum_subset_sizes,
subset_sizes.len() * universe_size - sum_subset_sizes
);
let mut included = 0;
let mut excluded = 0;
for i in 0..subset_sizes.len() {
let issuer = [i as u8; 32];
for j in 0..universe_size {
let serial = j.to_le_bytes();
let item = CRLiteQuery {
issuer: &issuer,
serial: &serial,
log_timestamp: None,
};
if clubcard.unchecked_contains(&item) {
included += 1;
} else {
excluded += 1;
}
}
}
println!("\tfound {} included, {} excluded", included, excluded);
assert!(sum_subset_sizes == included);
assert!(sum_universe_sizes - sum_subset_sizes == excluded);
// Test that querying a serial from a never-before-seen issuer results in a non-member return.
let issuer = [subset_sizes.len() as u8; 32];
let serial = 0usize.to_le_bytes();
let item = CRLiteQuery {
issuer: &issuer,
serial: &serial,
log_timestamp: None,
};
assert!(!clubcard.unchecked_contains(&item));
assert!(subset_sizes.len() > 0 && subset_sizes[0] > 0 && subset_sizes[0] < universe_size);
let issuer = [0u8; 32];
let revoked_serial = 0usize.to_le_bytes();
let nonrevoked_serial = (universe_size - 1).to_le_bytes();
// Test that calling contains() a without a timestamp results in a NotInUniverse return
let item = CRLiteQuery {
issuer: &issuer,
serial: &revoked_serial,
log_timestamp: None,
};
assert!(matches!(
clubcard.contains(&item),
Membership::NotInUniverse
));
// Test that calling contains() without a timestamp in a covered interval results in a
// Member return.
let log_id = [0u8; 32];
let timestamp = (&log_id, 100);
let item = CRLiteQuery {
issuer: &issuer,
serial: &revoked_serial,
log_timestamp: Some(timestamp),
};
assert!(matches!(clubcard.contains(&item), Membership::Member));
// Test that calling contains() without a timestamp in a covered interval results in a
// Member return.
let timestamp = (&log_id, 100);
let item = CRLiteQuery {
issuer: &issuer,
serial: &nonrevoked_serial,
log_timestamp: Some(timestamp),
};
assert!(matches!(clubcard.contains(&item), Membership::Nonmember));
// Test that calling contains() without a timestamp in a covered interval results in a
// Member return.
let log_id = [1u8; 32];
let timestamp = (&log_id, 100);
let item = CRLiteQuery {
issuer: &issuer,
serial: &revoked_serial,
log_timestamp: Some(timestamp),
};
assert!(matches!(
clubcard.contains(&item),
Membership::NotInUniverse
));
}
}

View File

@ -0,0 +1,9 @@
/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
#[cfg(feature = "builder")]
pub mod builder;
mod query;
pub use query::{CRLiteClubcard, CRLiteCoverage, CRLiteQuery, CRLiteStatus};

View File

@ -0,0 +1,199 @@
/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
use clubcard::{ApproximateSizeOf, AsQuery, Clubcard, Equation, Membership, Queryable};
use serde::{Deserialize, Serialize};
use sha2::{Digest, Sha256};
use std::cmp::max;
use std::collections::HashMap;
use std::fmt;
const W: usize = 4;
type IssuerSpkiHash = [u8; 32];
type LogId = [u8; 32];
type Timestamp = u64;
type TimestampInterval = (Timestamp, Timestamp);
#[derive(Serialize, Deserialize)]
pub struct CRLiteCoverage(pub(crate) HashMap<LogId, TimestampInterval>);
#[derive(Clone, Debug)]
pub struct CRLiteQuery<'a> {
pub(crate) issuer: &'a IssuerSpkiHash,
pub(crate) serial: &'a [u8],
pub(crate) log_timestamp: Option<(&'a LogId, Timestamp)>,
}
impl<'a> CRLiteQuery<'a> {
pub fn new(
issuer: &'a IssuerSpkiHash,
serial: &'a [u8],
log_timestamp: Option<(&'a LogId, u64)>,
) -> CRLiteQuery<'a> {
CRLiteQuery {
issuer,
serial,
log_timestamp,
}
}
}
impl<'a> AsQuery<W> for CRLiteQuery<'a> {
fn block(&self) -> &[u8] {
self.issuer.as_ref()
}
fn as_query(&self, m: usize) -> Equation<W> {
let mut digest = [0u8; 32];
let mut hasher = Sha256::new();
hasher.update(self.issuer);
hasher.update(self.serial);
hasher.finalize_into((&mut digest).into());
let mut a = [0u64; 4];
for (i, x) in digest
.chunks_exact(8) // TODO: use array_chunks::<8>() when stable
.map(|x| TryInto::<[u8; 8]>::try_into(x).unwrap())
.map(u64::from_le_bytes)
.enumerate()
{
a[i] = x;
}
a[0] |= 1;
let s = (a[3] as usize) % max(1, m);
Equation::homogeneous(s, a)
}
fn discriminant(&self) -> &[u8] {
self.serial
}
}
impl<'a> Queryable<W> for CRLiteQuery<'a> {
type UniverseMetadata = CRLiteCoverage;
// The set of CRLiteKeys is partitioned by issuer, and each
// CRLiteKey knows its issuer. So there's no need for additional
// partition metadata.
type PartitionMetadata = ();
fn in_universe(&self, universe: &Self::UniverseMetadata) -> bool {
let Some((log_id, timestamp)) = self.log_timestamp else {
return false;
};
if let Some((low, high)) = universe.0.get(log_id) {
if *low <= timestamp && timestamp <= *high {
return true;
}
}
false
}
}
#[derive(Debug)]
pub enum ClubcardError {
Serialize,
Deserialize,
UnsupportedVersion,
}
#[derive(Debug, PartialEq, Eq)]
pub enum CRLiteStatus {
Good,
NotCovered,
NotEnrolled,
Revoked,
}
impl From<Membership> for CRLiteStatus {
fn from(membership: Membership) -> CRLiteStatus {
match membership {
Membership::Nonmember => CRLiteStatus::Good,
Membership::NotInUniverse => CRLiteStatus::NotCovered,
Membership::NoData => CRLiteStatus::NotEnrolled,
Membership::Member => CRLiteStatus::Revoked,
}
}
}
pub struct CRLiteClubcard(Clubcard<W, CRLiteCoverage, ()>);
impl From<Clubcard<W, CRLiteCoverage, ()>> for CRLiteClubcard {
fn from(inner: Clubcard<W, CRLiteCoverage, ()>) -> CRLiteClubcard {
CRLiteClubcard(inner)
}
}
impl AsRef<Clubcard<W, CRLiteCoverage, ()>> for CRLiteClubcard {
fn as_ref(&self) -> &Clubcard<W, CRLiteCoverage, ()> {
&self.0
}
}
impl std::fmt::Display for CRLiteClubcard {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
self.0.fmt(f)
}
}
impl CRLiteClubcard {
// Cascade-based CRLite filters use version numbers 0x0000, 0x0001, and 0x0002.
const SERIALIZATION_VERSION: u16 = 0x0003;
/// Serialize this clubcard.
pub fn to_bytes(&self) -> Result<Vec<u8>, ClubcardError> {
let mut out = u16::to_le_bytes(Self::SERIALIZATION_VERSION).to_vec();
bincode::serialize_into(&mut out, &self.0).map_err(|_| ClubcardError::Serialize)?;
Ok(out)
}
/// Deserialize a clubcard.
pub fn from_bytes(bytes: &[u8]) -> Result<Self, ClubcardError> {
if bytes.len() < std::mem::size_of::<u16>() {
return Err(ClubcardError::Deserialize);
}
let (version_bytes, rest) = bytes.split_at(std::mem::size_of::<u16>());
let Ok(version_bytes) = version_bytes.try_into() else {
return Err(ClubcardError::Deserialize);
};
let version = u16::from_le_bytes(version_bytes);
if version != Self::SERIALIZATION_VERSION {
return Err(ClubcardError::UnsupportedVersion);
}
bincode::deserialize(rest)
.map(CRLiteClubcard)
.map_err(|_| ClubcardError::Deserialize)
}
pub fn contains<'a>(
&self,
issuer_spki_hash: &'a IssuerSpkiHash,
serial: &'a [u8],
timestamps: impl Iterator<Item = (&'a LogId, Timestamp)>,
) -> CRLiteStatus {
for (log_id, timestamp) in timestamps {
let crlite_key = CRLiteQuery::new(issuer_spki_hash, serial, Some((log_id, timestamp)));
let status = self.0.contains(&crlite_key).into();
if status == CRLiteStatus::NotCovered {
continue;
}
return status;
}
CRLiteStatus::NotCovered
}
}
impl ApproximateSizeOf for CRLiteCoverage {
fn approximate_size_of(&self) -> usize {
size_of::<HashMap<LogId, TimestampInterval>>()
+ self.0.len() * (size_of::<LogId>() + size_of::<TimestampInterval>())
}
}
impl ApproximateSizeOf for CRLiteClubcard {
fn approximate_size_of(&self) -> usize {
self.0.approximate_size_of()
}
}

View File

@ -0,0 +1 @@
{"files":{"Cargo.toml":"5211a25c813f720cc98b0a461c53316e741741d37d3c4f4a69c410b94cbd1a0b","LICENSE":"1f256ecad192880510e84ad60474eab7589218784b9a50bc7ceee34c2b91f1d5","README.md":"cb41e528a187cc241c4d7a27a77fc49416f74ee16fd1a576cdb173e0155ce384","src/builder.rs":"36043cdf45e77b93c9ac0f8573cdc9d584fb3159ea78e77b425001ab210df279","src/clubcard.rs":"f09a84aaaef5b52461e8c94a31bb3c37a701cd44980117f7d49a0d44b59b995e","src/equation.rs":"00c8b782a3398a296281acaa942e29d2fa89cafda525ba1ff2cc7142fe6780bf","src/lib.rs":"79701c7179054a801800576868d7eb3a812ca87c4a345b092d16790f92364764","src/query.rs":"23c397a4e20f8009716e7fc5081f3f1473294a49d16713c44087c229eabc4412"},"package":"30b529ab1dcb6d5e3a03107e59da1249f8f4efe1e7e123eea064a2e42c6f1e3e"}

40
third_party/rust/clubcard/Cargo.toml vendored Normal file
View File

@ -0,0 +1,40 @@
# THIS FILE IS AUTOMATICALLY GENERATED BY CARGO
#
# When uploading crates to the registry Cargo will automatically
# "normalize" Cargo.toml files for maximal compatibility
# with all versions of Cargo and also rewrite `path` dependencies
# to registry (e.g., crates.io) dependencies.
#
# If you are reading this file be aware that the original Cargo.toml
# will likely look very different (and much more reasonable).
# See Cargo.toml.orig for the original contents.
[package]
edition = "2021"
name = "clubcard"
version = "0.3.0"
authors = ["John M. Schanck <jschanck@mozilla.com>"]
build = false
autobins = false
autoexamples = false
autotests = false
autobenches = false
description = "Clubcard is an exact membership query filter for static sets"
readme = "README.md"
license = "MPL-2.0"
repository = "https://github.com/jschanck/clubcard/"
[lib]
name = "clubcard"
path = "src/lib.rs"
[dependencies.rand]
version = "0.8.5"
optional = true
[dependencies.serde]
version = "1.0"
features = ["derive"]
[features]
builder = ["dep:rand"]

373
third_party/rust/clubcard/LICENSE vendored Normal file
View File

@ -0,0 +1,373 @@
Mozilla Public License Version 2.0
==================================
1. Definitions
--------------
1.1. "Contributor"
means each individual or legal entity that creates, contributes to
the creation of, or owns Covered Software.
1.2. "Contributor Version"
means the combination of the Contributions of others (if any) used
by a Contributor and that particular Contributor's Contribution.
1.3. "Contribution"
means Covered Software of a particular Contributor.
1.4. "Covered Software"
means Source Code Form to which the initial Contributor has attached
the notice in Exhibit A, the Executable Form of such Source Code
Form, and Modifications of such Source Code Form, in each case
including portions thereof.
1.5. "Incompatible With Secondary Licenses"
means
(a) that the initial Contributor has attached the notice described
in Exhibit B to the Covered Software; or
(b) that the Covered Software was made available under the terms of
version 1.1 or earlier of the License, but not also under the
terms of a Secondary License.
1.6. "Executable Form"
means any form of the work other than Source Code Form.
1.7. "Larger Work"
means a work that combines Covered Software with other material, in
a separate file or files, that is not Covered Software.
1.8. "License"
means this document.
1.9. "Licensable"
means having the right to grant, to the maximum extent possible,
whether at the time of the initial grant or subsequently, any and
all of the rights conveyed by this License.
1.10. "Modifications"
means any of the following:
(a) any file in Source Code Form that results from an addition to,
deletion from, or modification of the contents of Covered
Software; or
(b) any new file in Source Code Form that contains any Covered
Software.
1.11. "Patent Claims" of a Contributor
means any patent claim(s), including without limitation, method,
process, and apparatus claims, in any patent Licensable by such
Contributor that would be infringed, but for the grant of the
License, by the making, using, selling, offering for sale, having
made, import, or transfer of either its Contributions or its
Contributor Version.
1.12. "Secondary License"
means either the GNU General Public License, Version 2.0, the GNU
Lesser General Public License, Version 2.1, the GNU Affero General
Public License, Version 3.0, or any later versions of those
licenses.
1.13. "Source Code Form"
means the form of the work preferred for making modifications.
1.14. "You" (or "Your")
means an individual or a legal entity exercising rights under this
License. For legal entities, "You" includes any entity that
controls, is controlled by, or is under common control with You. For
purposes of this definition, "control" means (a) the power, direct
or indirect, to cause the direction or management of such entity,
whether by contract or otherwise, or (b) ownership of more than
fifty percent (50%) of the outstanding shares or beneficial
ownership of such entity.
2. License Grants and Conditions
--------------------------------
2.1. Grants
Each Contributor hereby grants You a world-wide, royalty-free,
non-exclusive license:
(a) under intellectual property rights (other than patent or trademark)
Licensable by such Contributor to use, reproduce, make available,
modify, display, perform, distribute, and otherwise exploit its
Contributions, either on an unmodified basis, with Modifications, or
as part of a Larger Work; and
(b) under Patent Claims of such Contributor to make, use, sell, offer
for sale, have made, import, and otherwise transfer either its
Contributions or its Contributor Version.
2.2. Effective Date
The licenses granted in Section 2.1 with respect to any Contribution
become effective for each Contribution on the date the Contributor first
distributes such Contribution.
2.3. Limitations on Grant Scope
The licenses granted in this Section 2 are the only rights granted under
this License. No additional rights or licenses will be implied from the
distribution or licensing of Covered Software under this License.
Notwithstanding Section 2.1(b) above, no patent license is granted by a
Contributor:
(a) for any code that a Contributor has removed from Covered Software;
or
(b) for infringements caused by: (i) Your and any other third party's
modifications of Covered Software, or (ii) the combination of its
Contributions with other software (except as part of its Contributor
Version); or
(c) under Patent Claims infringed by Covered Software in the absence of
its Contributions.
This License does not grant any rights in the trademarks, service marks,
or logos of any Contributor (except as may be necessary to comply with
the notice requirements in Section 3.4).
2.4. Subsequent Licenses
No Contributor makes additional grants as a result of Your choice to
distribute the Covered Software under a subsequent version of this
License (see Section 10.2) or under the terms of a Secondary License (if
permitted under the terms of Section 3.3).
2.5. Representation
Each Contributor represents that the Contributor believes its
Contributions are its original creation(s) or it has sufficient rights
to grant the rights to its Contributions conveyed by this License.
2.6. Fair Use
This License is not intended to limit any rights You have under
applicable copyright doctrines of fair use, fair dealing, or other
equivalents.
2.7. Conditions
Sections 3.1, 3.2, 3.3, and 3.4 are conditions of the licenses granted
in Section 2.1.
3. Responsibilities
-------------------
3.1. Distribution of Source Form
All distribution of Covered Software in Source Code Form, including any
Modifications that You create or to which You contribute, must be under
the terms of this License. You must inform recipients that the Source
Code Form of the Covered Software is governed by the terms of this
License, and how they can obtain a copy of this License. You may not
attempt to alter or restrict the recipients' rights in the Source Code
Form.
3.2. Distribution of Executable Form
If You distribute Covered Software in Executable Form then:
(a) such Covered Software must also be made available in Source Code
Form, as described in Section 3.1, and You must inform recipients of
the Executable Form how they can obtain a copy of such Source Code
Form by reasonable means in a timely manner, at a charge no more
than the cost of distribution to the recipient; and
(b) You may distribute such Executable Form under the terms of this
License, or sublicense it under different terms, provided that the
license for the Executable Form does not attempt to limit or alter
the recipients' rights in the Source Code Form under this License.
3.3. Distribution of a Larger Work
You may create and distribute a Larger Work under terms of Your choice,
provided that You also comply with the requirements of this License for
the Covered Software. If the Larger Work is a combination of Covered
Software with a work governed by one or more Secondary Licenses, and the
Covered Software is not Incompatible With Secondary Licenses, this
License permits You to additionally distribute such Covered Software
under the terms of such Secondary License(s), so that the recipient of
the Larger Work may, at their option, further distribute the Covered
Software under the terms of either this License or such Secondary
License(s).
3.4. Notices
You may not remove or alter the substance of any license notices
(including copyright notices, patent notices, disclaimers of warranty,
or limitations of liability) contained within the Source Code Form of
the Covered Software, except that You may alter any license notices to
the extent required to remedy known factual inaccuracies.
3.5. Application of Additional Terms
You may choose to offer, and to charge a fee for, warranty, support,
indemnity or liability obligations to one or more recipients of Covered
Software. However, You may do so only on Your own behalf, and not on
behalf of any Contributor. You must make it absolutely clear that any
such warranty, support, indemnity, or liability obligation is offered by
You alone, and You hereby agree to indemnify every Contributor for any
liability incurred by such Contributor as a result of warranty, support,
indemnity or liability terms You offer. You may include additional
disclaimers of warranty and limitations of liability specific to any
jurisdiction.
4. Inability to Comply Due to Statute or Regulation
---------------------------------------------------
If it is impossible for You to comply with any of the terms of this
License with respect to some or all of the Covered Software due to
statute, judicial order, or regulation then You must: (a) comply with
the terms of this License to the maximum extent possible; and (b)
describe the limitations and the code they affect. Such description must
be placed in a text file included with all distributions of the Covered
Software under this License. Except to the extent prohibited by statute
or regulation, such description must be sufficiently detailed for a
recipient of ordinary skill to be able to understand it.
5. Termination
--------------
5.1. The rights granted under this License will terminate automatically
if You fail to comply with any of its terms. However, if You become
compliant, then the rights granted under this License from a particular
Contributor are reinstated (a) provisionally, unless and until such
Contributor explicitly and finally terminates Your grants, and (b) on an
ongoing basis, if such Contributor fails to notify You of the
non-compliance by some reasonable means prior to 60 days after You have
come back into compliance. Moreover, Your grants from a particular
Contributor are reinstated on an ongoing basis if such Contributor
notifies You of the non-compliance by some reasonable means, this is the
first time You have received notice of non-compliance with this License
from such Contributor, and You become compliant prior to 30 days after
Your receipt of the notice.
5.2. If You initiate litigation against any entity by asserting a patent
infringement claim (excluding declaratory judgment actions,
counter-claims, and cross-claims) alleging that a Contributor Version
directly or indirectly infringes any patent, then the rights granted to
You by any and all Contributors for the Covered Software under Section
2.1 of this License shall terminate.
5.3. In the event of termination under Sections 5.1 or 5.2 above, all
end user license agreements (excluding distributors and resellers) which
have been validly granted by You or Your distributors under this License
prior to termination shall survive termination.
************************************************************************
* *
* 6. Disclaimer of Warranty *
* ------------------------- *
* *
* Covered Software is provided under this License on an "as is" *
* basis, without warranty of any kind, either expressed, implied, or *
* statutory, including, without limitation, warranties that the *
* Covered Software is free of defects, merchantable, fit for a *
* particular purpose or non-infringing. The entire risk as to the *
* quality and performance of the Covered Software is with You. *
* Should any Covered Software prove defective in any respect, You *
* (not any Contributor) assume the cost of any necessary servicing, *
* repair, or correction. This disclaimer of warranty constitutes an *
* essential part of this License. No use of any Covered Software is *
* authorized under this License except under this disclaimer. *
* *
************************************************************************
************************************************************************
* *
* 7. Limitation of Liability *
* -------------------------- *
* *
* Under no circumstances and under no legal theory, whether tort *
* (including negligence), contract, or otherwise, shall any *
* Contributor, or anyone who distributes Covered Software as *
* permitted above, be liable to You for any direct, indirect, *
* special, incidental, or consequential damages of any character *
* including, without limitation, damages for lost profits, loss of *
* goodwill, work stoppage, computer failure or malfunction, or any *
* and all other commercial damages or losses, even if such party *
* shall have been informed of the possibility of such damages. This *
* limitation of liability shall not apply to liability for death or *
* personal injury resulting from such party's negligence to the *
* extent applicable law prohibits such limitation. Some *
* jurisdictions do not allow the exclusion or limitation of *
* incidental or consequential damages, so this exclusion and *
* limitation may not apply to You. *
* *
************************************************************************
8. Litigation
-------------
Any litigation relating to this License may be brought only in the
courts of a jurisdiction where the defendant maintains its principal
place of business and such litigation shall be governed by laws of that
jurisdiction, without reference to its conflict-of-law provisions.
Nothing in this Section shall prevent a party's ability to bring
cross-claims or counter-claims.
9. Miscellaneous
----------------
This License represents the complete agreement concerning the subject
matter hereof. If any provision of this License is held to be
unenforceable, such provision shall be reformed only to the extent
necessary to make it enforceable. Any law or regulation which provides
that the language of a contract shall be construed against the drafter
shall not be used to construe this License against a Contributor.
10. Versions of the License
---------------------------
10.1. New Versions
Mozilla Foundation is the license steward. Except as provided in Section
10.3, no one other than the license steward has the right to modify or
publish new versions of this License. Each version will be given a
distinguishing version number.
10.2. Effect of New Versions
You may distribute the Covered Software under the terms of the version
of the License under which You originally received the Covered Software,
or under the terms of any subsequent version published by the license
steward.
10.3. Modified Versions
If you create software not governed by this License, and you want to
create a new license for such software, you may create and use a
modified version of this License if you rename the license and remove
any references to the name of the license steward (except to note that
such modified license differs from this License).
10.4. Distributing Source Code Form that is Incompatible With Secondary
Licenses
If You choose to distribute Source Code Form that is Incompatible With
Secondary Licenses under the terms of this version of the License, the
notice described in Exhibit B of this License must be attached.
Exhibit A - Source Code Form License Notice
-------------------------------------------
This Source Code Form is subject to the terms of the Mozilla Public
License, v. 2.0. If a copy of the MPL was not distributed with this
file, You can obtain one at http://mozilla.org/MPL/2.0/.
If it is not possible or desirable to put the notice in a particular
file, then You may include the notice in a location (such as a LICENSE
file in a relevant directory) where a recipient would be likely to look
for such a notice.
You may add additional accurate notices of copyright ownership.
Exhibit B - "Incompatible With Secondary Licenses" Notice
---------------------------------------------------------
This Source Code Form is "Incompatible With Secondary Licenses", as
defined by the Mozilla Public License, v. 2.0.

78
third_party/rust/clubcard/README.md vendored Normal file
View File

@ -0,0 +1,78 @@
# Clubcard
Clubcard is an exact membership query filter for static sets.
It is based on the *Ribbon filters* of Dillinger and Walzer[^1] and Dillinger, Hübschle-Schneider, Sanders, and Walzer[^2]. And it makes use of a partitioning strategy described by Mike Hamburg in his Real World Crypto 2022 talk[^3].
> [!WARNING]
> This is work in progress. Neither the API nor the serialization format for clubcard are stable.
## Application to CRLite and performance
CRLite[^4] publishes the revocation status of certificates in the WebPKI in a compact exact membership query filter. The system, as described in[^4] and as implemented by Mozilla, encodes revocation statuses in a *Bloom filter cascade*.
The Bloom filter cascade published by Mozilla's CRLite infrastructure on 2024-08-29 is 19.3 MB. That filter encodes the revocation status of 816 million certificates from 795 distinct issuers. Out of the 816 million certificates, 11.7 million are revoked.
Clubcard was developed as a replacement for the Bloom filter cascade in CRLite. A clubcard for the 2024-08-29 revocation status dataset is **8.5 MB**&mdash;a 56% reduction in size.
Some of the improvement is due to Clubcard partitioning the set of revocations by issuer. The information theoretic lower bound for encoding an generic 11.7 million element subset of an 816 million element set is 11 MB; whereas the lower bound for encoding the 795 sets obtained by partitioning the revocations by issuer is 7.6 MB. CRLite does not employ the partitioning-by-issuer trick, but we estimate that Bloom filter cascades for the 2024-08-29 data set sharded by issuer would be ~13MB.
The remainder of the improvement is due to the use of Ribbon filters.
While we have not finished implementing all of the tricks employed by state-of-the-art Ribbon filter implementations, our 8.5 MB clubcard filter for is within 12% of the 7.6 MB lower bound for the data set. We believe the overhead can be reduced to ~8% without any new ideas.
> [!NOTE]
> The "information theoretic lower bound" should be taken with a grain of salt, as it depends on how the set of certificates is partitioned. There are many other ways to partition the set of certificates, e.g. by issuer AND the month of the notAfter date. We are currently exploring whether other partitioning strategies could further reduce filter size.
## Mathematical intuition
We say that a set U is *filterable* if, for every positive integer m, there is a hash function h<sub>m</sub> : U → ({0,...,m-1}, {0,1}<sup>256</sup>). We view tuples (s, a) in the range of h<sub>m</sub> as elements of (**F**<sub>2</sub>)<sup>d</sup>, for any d, by equating (s, a) with \[0<sup>s</sup> | a | 0<sup>d - s - 256</sup>\] (or some prefix thereof if d < s + 256).
Let R be a subset of a filterable set U. A *clubcard for R* is a pair of matrices X and Y defined over **F**<sub>2</sub> and a pair of hash functions h and g. The matrices and hash functions are chosen such that
R = { u ∈ U : h(u) · X = 0 ∧ g(u) · Y = 0 }.
Given a clubcard for R and an element u of U, one can determine membership of u in R by computing h(u) · X and g(u) · Y.
To produce a clubcard for R ⊆ U, we:
1) Enumerate the elements of R (arbitrarily, any order will do).
2) Let m = (1+epsilon)|R|.
3) Let H be the |R| × m matrix with rows given by h<sub>m</sub>(r) for r ∈ R.
4) Let X be a solution to H · X = 0<sup>|R| x k</sup> where k = floor(lg(|U \ R| / |R|)).
5) Let S = {u ∈ U : h(u) · X = 0}.
6) Let m' = (1+epsilon)|S|.
7) Let G be the |S| × m' matrix with rows given by h<sub>m'</sub>(s) for s ∈ S
8) Let C be the |S| × 1 matrix with the row for element s equal to 0 iff s ∈ R.
9) Let Y be a solution to G · Y = C.
The clubcard is (X, Y, m, m').
We use the fast algorithm for solving the systems in steps 4 and 9 from [^1][^2]. The system in step 4 is always solvable, but the epsilon parameter must be chosen carefully to ensure that |S| is not too large. The system in step 9 may not be solvable. So, in addition to (X, Y, m, m'), clubcards may be published with a list of elements of U\R that are not encoded correctly. The epsilon parameter can be chosen to ensure that this list is small with high probability. Alternatively one may repeat the construction until the system is solvable, e.g. by tweaking the definition of U to include a seed which is used to randomize the hash functions.
## When partitioning is a good idea
The binomial coefficient n choose r is approximately equal to 2<sup>n h(r/n)</sup> where h is the binary entropy function (this follows from Stirling's approximation for n!).
The number of bits required to encode an arbitrary r element subset R of an n element set U is therefore Ω(lg(n choose r)) = Ω(n h(r/n)). The binary entropy function is concave, so
&nbsp;&nbsp;&nbsp;&nbsp;n<sub>1</sub> h(r<sub>1</sub>/n<sub>1</sub>) + n<sub>2</sub> h(r<sub>2</sub>/n<sub>2</sub>) ≤ (n<sub>1</sub> + n<sub>2</sub>) h((r<sub>1</sub>+r<sub>2</sub>)/(n<sub>1</sub>+n<sub>2</sub>)).
It follows that if {U1, U2} is a partition of U, then one may be able to encode the pair (R1, R2) in fewer bits than it would take to encode R.
> [!NOTE]
> For partitioning to be beneficial, the values |Ri|/|Ui| must vary considerably between blocks of the partition.
## Minor optimizations
TODO:
- How this library handles partitions.
- Benefits of sorting partitions by decreasing rank(Xi) and storing only the coefficients x<sub>i,j</sub> of X where j < rank(Xi)
- Cache locality of queries / use of interleaved column-major order from[^2].
- Compact encoding of R = {} and R = U cases.
- .....
[^1]: Peter C. Dillinger, Stefan Walzer. "Ribbon filter: practically smaller than Bloom and Xor". https://arxiv.org/pdf/2103.02515
[^2]: Peter C. Dillinger, Lorenz Hübschle-Schneider, Peter Sanders, Stefan Walzer. "Fast Succinct Retrieval and Approximate
Membership using Ribbon". https://arxiv.org/pdf/2109.01892
[^3]: Mike Hamburg. "Improved CRL compression with structured linear functions". https://youtu.be/Htms5rNy7B8?list=PLeeS-3Ml-rpovBDh6do693We_CP3KTnHU&t=2357
[^4]: James Larisch, David Choffnes, Dave Levin, Bruce M. Maggs, Alan Mislove, Christo Wilson. "CRLite: A Scalable System for Pushing All TLS Revocations to All Browsers". https://jameslarisch.com/pdf/crlite.pdf

692
third_party/rust/clubcard/src/builder.rs vendored Normal file
View File

@ -0,0 +1,692 @@
/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
use crate::{
clubcard::ClubcardIndex, Clubcard, ClubcardIndexEntry, Equation, Filterable, Queryable,
};
use rand::{thread_rng, Rng};
use std::collections::BTreeMap;
use std::fmt;
/// Marker type for checking that, for example, only Exact ribbons are passed to functions such as
/// Clubcard::collect_exact_ribbons.
pub struct Exact;
/// A Ribbon Filter that encodes a one bit value for every element of the associated universe.
pub type ExactRibbon<const W: usize, T> = Ribbon<W, T, Exact>;
/// Marker type for checking that, for example, only Approximate ribbons are passed to functions such as
/// Clubcard::collect_approximate_ribbons.
pub struct Approximate;
/// A Ribbon Filter that identifies a subset of a universe with a false positive rate of
/// roughly |subset| / |universe|.
pub type ApproximateRibbon<const W: usize, T> = Ribbon<W, T, Approximate>;
/// A RibbonBuilder collects a set of items for insertion into a Ribbon. If the optional filter is
/// provided, then only items that are contained in the filter will be inserted.
pub struct RibbonBuilder<'a, const W: usize, T: Filterable<W>> {
/// block id.
id: Vec<u8>,
/// items to be inserted.
items: Vec<T>,
/// filter for pruning insertions.
filter: Option<&'a PartitionedRibbonFilter<W, T, Approximate>>,
/// size of the universe that contains self.items
universe_size: usize,
/// Whether queries against this ribbon indicate membership in R (inverted = false) or
/// membership in U \ R (inverted = true).
inverted: bool,
}
impl<'a, const W: usize, T: Filterable<W>> RibbonBuilder<'a, W, T> {
fn new(
id: &[u8],
filter: Option<&'a PartitionedRibbonFilter<W, T, Approximate>>,
) -> RibbonBuilder<'a, W, T> {
RibbonBuilder {
id: AsRef::<[u8]>::as_ref(id).to_vec(),
items: vec![],
filter,
universe_size: 0,
inverted: false,
}
}
/// Queue `item` for insertion into the ribbon (if it is contained in the provided filter).
pub fn insert(&mut self, item: T) {
if let Some(filter) = self.filter {
if filter.contains(&item) {
self.items.push(item);
}
} else {
self.items.push(item);
}
}
/// Set the size of the universe. This only needs to be called if you
/// are constructing an ApproximateRibbon.
pub fn set_universe_size(&mut self, universe_size: usize) {
self.universe_size = universe_size;
}
}
impl<'a, const W: usize, T: Filterable<W>> From<RibbonBuilder<'a, W, T>>
for ApproximateRibbon<W, T>
{
/// Denote the inserted set by R and the universe by U.
/// The ribbon returned by ApproximateRibbon::from encodes a function f : U -> {0, 1} where
/// f(x) = 0 if and only if x is in R union S where S is a (random) subset of U \ R of size
/// ~|R|. In other words, the ribbon solves the approximate membership query problem with a
/// false positive rate roughly 2^-r = |R| / (|U| - |R|).
/// The size of this ribbon is proportional to r|R|.
fn from(mut builder: RibbonBuilder<'a, W, T>) -> ApproximateRibbon<W, T> {
assert!(builder.items.len() <= builder.universe_size);
if builder.items.len() == builder.universe_size {
ApproximateRibbon::new(&builder.id, 0, builder.universe_size, !builder.inverted)
} else {
let mut out = ApproximateRibbon::new(
&builder.id,
builder.items.len(),
builder.universe_size,
builder.inverted,
);
for item in builder.items.drain(..) {
out.insert(item);
}
// Insertions should not fail for a homogeneous system.
assert!(out.exceptions.is_empty());
out
}
}
}
impl<'a, const W: usize, T: Filterable<W>> From<RibbonBuilder<'a, W, T>> for ExactRibbon<W, T> {
/// Denote the inserted set by R and the universe by U.
/// The ribbon returned by ExactRibbon::from encodes the function "f(x) = 0 iff x in R". The
/// size of this ribbon is proportional to |U|. In the typical use case, the set U is the
/// result of filtering a larger universe with a false positive rate of 2^-r. This allows for
/// exact encoding of R-membership using a pair of filters of total size ~(r+2)|R|.
fn from(mut builder: RibbonBuilder<'a, W, T>) -> ExactRibbon<W, T> {
assert!(builder.universe_size == 0 || builder.universe_size == builder.items.len());
if let Some(filter) = builder.filter {
if filter.block_is_empty(&builder.id) {
// The approximate filter is empty, so it gives a definitive result on every
// item and there's nothing to encode in the exact filter.
return ExactRibbon::new(&builder.id, 0, filter.block_is_inverted(&builder.id));
}
}
let mut out = ExactRibbon::new(&builder.id, builder.items.len(), builder.inverted);
// By inserting the included items first, we ensure that any exceptions that occur during
// insertion are for excluded items.
let mut excluded = vec![];
for item in builder.items.drain(..) {
if item.included() {
out.insert(item);
} else {
excluded.push(item);
}
}
for item in excluded.drain(..) {
out.insert(item);
}
out
}
}
/// A compact representation of a linear system AX = B
pub struct Ribbon<const W: usize, T: Filterable<W>, ApproxOrExact> {
/// A block identifier. Used to build an index for partitioned filters.
id: Vec<u8>,
/// The overhead.
epsilon: f64,
/// Equal to (1+epsilon) * |R|
m: usize,
/// The rank is round(-log2(subset_size / (universe_size - subset_size)))
rank: usize,
/// A linear system in which each equation has s in {0, ..., m-1}
rows: Vec<Equation<W>>,
/// A (typically short) list of items that failed insertion
exceptions: Vec<T>,
/// Whether queries against this ribbon indicate membership in R (inverted = false) or
/// membership in U \ R (inverted = true).
inverted: bool,
/// Marker for whether this is an Approximate or an Exact filter.
phantom: std::marker::PhantomData<ApproxOrExact>,
}
impl<const W: usize, T: Filterable<W>, ApproxOrExact> fmt::Display for Ribbon<W, T, ApproxOrExact> {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(
f,
"ribbon({:?}): m: {}, rows: {}, rank: {}, exceptions: {}, epsilon: {}, overhead {}",
self.id,
self.m,
self.rows.len(),
self.rank,
self.exceptions.len(),
self.epsilon,
(self.rows.iter().filter(|eq| eq.is_zero()).count() as f64 / (self.rows.len() as f64))
)
}
}
impl<const W: usize, T: Filterable<W>> ApproximateRibbon<W, T> {
/// Construct an empty ribbon to encode a set R of size `subset_size` in a universe U of size
/// `universe_size`.
fn new(id: &[u8], subset_size: usize, universe_size: usize, inverted: bool) -> Self {
assert!(subset_size <= universe_size);
// TODO: Tune epsilon as a function of the inputs. Numerical experiments?
let epsilon = 0.02;
let m = ((1.0 + epsilon) * (subset_size as f64)).floor() as usize;
let rank = if subset_size == 0 || 2 * subset_size >= universe_size {
0
} else {
(((universe_size - subset_size) as f64) / (subset_size as f64))
.log2()
.floor() as usize
};
Ribbon {
id: AsRef::<[u8]>::as_ref(id).to_vec(),
rows: vec![Equation::zero(); m],
m,
epsilon,
rank,
exceptions: vec![],
inverted,
phantom: std::marker::PhantomData,
}
}
}
impl<const W: usize, T: Filterable<W>> ExactRibbon<W, T> {
/// Construct an empty ribbon to encode a set R of size `subset_size` in a universe U of size
/// `universe_size`.
fn new(id: &impl AsRef<[u8]>, size: usize, inverted: bool) -> Self {
// TODO: Tune epsilon as a function of the inputs. Numerical experiments?
let epsilon = 0.02;
let m = ((1.0 + epsilon) * (size as f64)).floor() as usize;
Ribbon {
id: AsRef::<[u8]>::as_ref(id).to_vec(),
rows: vec![Equation::zero(); m],
m,
epsilon,
rank: 1,
exceptions: vec![],
inverted,
phantom: std::marker::PhantomData,
}
}
}
impl<const W: usize, T: Filterable<W>, ApproxOrExact> Ribbon<W, T, ApproxOrExact> {
/// Hash the item to an Equation and insert it into the system.
fn insert(&mut self, item: T) -> bool {
let mut eq = item.as_query(self.m);
eq.b = if item.included() { 0 } else { 1 };
assert!(eq.is_zero() || eq.a[0] & 1 == 1);
let rv = self.insert_equation(eq);
if !rv {
self.exceptions.push(item)
}
rv
}
/// Insert an equation into the system using Algorithm 1 from <https://arxiv.org/pdf/2103.02515>
fn insert_equation(&mut self, mut eq: Equation<W>) -> bool {
loop {
if eq.is_zero() {
return eq.b == 0; /* redundant (b=0) or inconsistent (b!=0) */
}
if eq.s >= self.rows.len() {
// TODO: could be smarter here
self.rows.resize_with(eq.s + 1, Equation::zero);
}
let cur = &mut self.rows[eq.s];
if cur.is_zero() {
*cur = eq;
return true; /* inserted */
}
eq.add(cur);
}
}
/// Solve the system using back-substitution. If this is a block in a larger system, the `tail`
/// argument should be set to the the solution vector for the block to the right of this one.
fn solve(&self, tail: &[u64]) -> Vec<u64> {
let mut z = vec![0u64; ((self.rows.len() + 63) / 64) + tail.len()];
// insert tail into z starting at bit self.rows.len()
let k = self.rows.len() / 64;
let p = self.rows.len() % 64;
if p == 0 {
z[k..(tail.len() + k)].copy_from_slice(tail);
} else {
for i in 0..tail.len() {
z[k + i] |= tail[i] << p;
z[k + i + 1] = tail[i] >> (64 - p)
}
}
// Solve by back substitution
for i in (0..self.rows.len()).rev() {
let limb = i / 64;
let pos = i % 64;
let z_i = if self.rows[i].is_zero() {
// Row i has a zero in column i, so we're free to choose.
// TODO: We want multiple calls to solve() to give a different
// solutions (when the system is suitably under-determined),
// but it might be nice if this was deterministic.
thread_rng().gen::<u8>()
} else {
// Let z' be the vector we get by setting bit i of z to z'_i.
// Since z_i is zero, and row i has a one in column i, we have
// row_i(z') = z'_i ^ row_i(z).
// We want row_i(z') = b, so we must choose
// z'_i = row_i(z) ^ b.
self.rows[i].eval(&z) ^ self.rows[i].b
};
z[limb] |= ((z_i & 1) as u64) << pos;
}
z
}
}
#[derive(Debug)]
struct PartitionedRibbonFilterIndexEntry {
offset: usize,
m: usize,
rank: usize,
exceptions: Vec<Vec<u8>>,
inverted: bool,
}
type PartitionedRibbonFilterIndex =
BTreeMap</* block id */ Vec<u8>, PartitionedRibbonFilterIndexEntry>;
/// A solution to a ribbon system, along with metadata necessary for querying it.
struct PartitionedRibbonFilter<const W: usize, T: Filterable<W>, ApproxOrExact> {
index: PartitionedRibbonFilterIndex,
solution: Vec<Vec<u64>>,
phantom: std::marker::PhantomData<T>,
phantom2: std::marker::PhantomData<ApproxOrExact>,
}
impl<const W: usize, T: Filterable<W>, ApproxOrExact> fmt::Display
for PartitionedRibbonFilter<W, T, ApproxOrExact>
{
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "PartitionedRibbonFilter({:?})", self.index)
}
}
impl<const W: usize, T: Filterable<W>, Approximate> PartitionedRibbonFilter<W, T, Approximate> {
fn block_is_empty(&self, block: &[u8]) -> bool {
let Some(entry) = self.index.get(block) else {
return false;
};
entry.m == 0
}
fn block_is_inverted(&self, block: &[u8]) -> bool {
let Some(entry) = self.index.get(block) else {
return false;
};
entry.inverted
}
/// Check if this filter contains the given item in the given block.
fn contains(&self, item: &T) -> bool {
let Some(entry) = self.index.get(item.block()) else {
return false;
};
let result = (|| {
// Empty blocks do not contain anything,
// despite having inner product 0 with everything.
if entry.m == 0 {
return false;
}
let mut eq = item.as_query(entry.m);
eq.s += entry.offset;
for i in 0..entry.rank {
if eq.eval(&self.solution[i]) != 0 {
return false;
}
}
for exception in &entry.exceptions {
if exception == item.discriminant() {
return false;
}
}
true
})();
result ^ entry.inverted
}
}
impl<const W: usize, T: Filterable<W>, ApproxOrExact> From<Vec<Ribbon<W, T, ApproxOrExact>>>
for PartitionedRibbonFilter<W, T, ApproxOrExact>
{
fn from(
mut blocks: Vec<Ribbon<W, T, ApproxOrExact>>,
) -> PartitionedRibbonFilter<W, T, ApproxOrExact> {
// Sort ribbons by descending rank (descending simplifies indexing).
blocks.sort_unstable_by(|a, b| b.rank.cmp(&a.rank));
// Solve the (block) system.
// The blocks are sorted by descending rank. We need at least one solution (i.e. column
// vector) per block, but we need no more than i solutions for a block of rank i.
// Concretely, suppose the ranks are [4, 2, 1, 0]. Then our solution can look like
// block 0: | | | |
// block 1: | | 0 0
// block 2: | 0 0 0
// block 3: | 0 0 0
// Since we serialize the block identifiers, offsets, and ranks in the final filter, we
// don't need to encode the zeros.
let mut solution = vec![];
for i in 0..blocks.first().map_or(0, |first| first.rank) {
// Back substitution across blocks.
let mut tail = vec![];
for j in (0..blocks.len()).rev() {
if blocks[j].rank > i {
tail = blocks[j].solve(&tail);
}
}
solution.push(tail);
}
// construct the index---a map from a block identifier to that
// block's offset in the solution vector.
let mut index = PartitionedRibbonFilterIndex::new();
let mut offset = 0;
for block in &blocks {
let exceptions = block
.exceptions
.iter()
.map(|x| x.discriminant().to_vec())
.collect();
index.insert(
block.id.clone(),
PartitionedRibbonFilterIndexEntry {
offset,
m: block.m,
rank: block.rank,
exceptions,
inverted: block.inverted,
},
);
offset += block.rows.len();
}
PartitionedRibbonFilter {
index,
solution,
phantom: std::marker::PhantomData,
phantom2: std::marker::PhantomData,
}
}
}
/// A pair of ribbon filters that, together, solve the exact membership query problem.
pub struct ClubcardBuilder<const W: usize, T: Filterable<W>> {
/// An approximate membership query filter to whittle down the universe
/// to a managable size.
approx_filter: Option<PartitionedRibbonFilter<W, T, Approximate>>,
/// An exact membership query filter to confirm membership in R for items that
/// pass through the approximate filter.
exact_filter: Option<PartitionedRibbonFilter<W, T, Exact>>,
}
impl<const W: usize, T: Filterable<W>> Default for ClubcardBuilder<W, T> {
fn default() -> Self {
ClubcardBuilder {
approx_filter: None,
exact_filter: None,
}
}
}
impl<const W: usize, T: Filterable<W>> ClubcardBuilder<W, T> {
pub fn new() -> Self {
ClubcardBuilder::default()
}
pub fn new_approx_builder(&self, block: &[u8]) -> RibbonBuilder<'static, W, T> {
assert!(self.approx_filter.is_none());
RibbonBuilder::new(block, None)
}
pub fn new_exact_builder<'a>(&'a self, block: &[u8]) -> RibbonBuilder<'a, W, T> {
RibbonBuilder::new(block, self.approx_filter.as_ref())
}
pub fn collect_approx_ribbons(&mut self, ribbons: Vec<ApproximateRibbon<W, T>>) {
self.approx_filter = Some(PartitionedRibbonFilter::from(ribbons));
}
pub fn collect_exact_ribbons(&mut self, ribbons: Vec<Ribbon<W, T, Exact>>) {
self.exact_filter = Some(PartitionedRibbonFilter::from(ribbons));
}
pub fn build<U: Queryable<W>>(
self,
universe: U::UniverseMetadata,
partition: U::PartitionMetadata,
) -> Clubcard<W, U::UniverseMetadata, U::PartitionMetadata> {
let mut index: ClubcardIndex = BTreeMap::new();
assert!(self.approx_filter.is_some());
let approx_filter = self.approx_filter.unwrap();
for (block, entry) in approx_filter.index {
let meta = ClubcardIndexEntry {
approx_filter_offset: entry.offset,
approx_filter_m: entry.m,
approx_filter_rank: entry.rank,
exact_filter_offset: 0,
exact_filter_m: 0,
inverted: entry.inverted,
exceptions: entry.exceptions,
};
index.insert(block, meta);
}
assert!(self.exact_filter.is_some());
let mut exact_filter = self.exact_filter.unwrap();
for (block, entry) in exact_filter.index {
assert!(entry.rank == 1);
let meta = index.get_mut(&block).unwrap();
meta.exact_filter_offset = entry.offset;
meta.exact_filter_m = entry.m;
assert!(meta.inverted == entry.inverted);
meta.exceptions.extend(entry.exceptions);
}
assert!(exact_filter.solution.len() == 1);
let exact_filter = exact_filter.solution.pop().unwrap();
Clubcard {
universe,
partition,
index,
approx_filter: approx_filter.solution,
exact_filter,
}
}
}
#[cfg(test)]
mod tests {
use crate::builder::*;
use crate::*;
use rand::distributions::{Distribution, Uniform};
use rand::Rng;
// Construct the equation a(x) = x_i
fn std_eq<const W: usize>(i: usize) -> Equation<W> {
let mut a = [0u64; W];
a[0] = 1;
Equation::homogeneous(i, a)
}
// Construct an random aligned equation using the given distribution for s.
fn rand<const W: usize>(s_dist: &impl Distribution<usize>) -> Equation<W> {
let mut rng = rand::thread_rng();
let s = s_dist.sample(&mut rng);
let mut a = [0u64; W];
for a_i in a.iter_mut() {
*a_i = rng.gen();
}
a[0] |= 1;
Equation::inhomogeneous(s, a, rng.gen::<u8>() & 1)
}
impl<const W: usize> AsQuery<W> for Equation<W> {
fn as_query(&self, _m: usize) -> Equation<W> {
self.clone()
}
fn block(&self) -> &[u8] {
&[]
}
fn discriminant(&self) -> &[u8] {
unsafe { std::mem::transmute(&self.a[..]) }
}
}
impl<const W: usize> Filterable<W> for Equation<W> {
fn included(&self) -> bool {
self.b == 0
}
}
impl<const W: usize> Queryable<W> for Equation<W> {
type UniverseMetadata = ();
type PartitionMetadata = ();
fn in_universe(&self, _meta: &Self::UniverseMetadata) -> bool {
true
}
}
#[test]
fn test_solve_identity() {
let n = 1024;
let mut builder = RibbonBuilder::new(&[], None);
for i in 0usize..n {
let eq: Equation<1> = std_eq(i);
builder.insert(eq);
}
let ribbon = ExactRibbon::from(builder);
let filter = PartitionedRibbonFilter::from(vec![ribbon]);
for i in 0usize..n {
let eq: Equation<1> = std_eq(i);
assert!(eq.eval(&filter.solution[0]) == 0);
}
}
#[test]
fn test_solve_empty() {
let builder = RibbonBuilder::<4, Equation<4>>::new(&[0], None);
let ribbon = ApproximateRibbon::from(builder);
let filter = PartitionedRibbonFilter::from(vec![ribbon]);
assert!(!filter.contains(&std_eq(0)));
}
#[test]
fn test_solve_random() {
let n = 1024;
const W: usize = 2;
let mut r = Ribbon::<W, Equation<W>, Exact>::new(&[0], n, false);
let mut s_dist = Uniform::new(0, r.m);
let mut eqs = Vec::with_capacity(n);
for _ in 0..n {
let eq = rand(&mut s_dist);
eqs.push(eq.clone());
r.insert(eq);
}
let x = r.solve(&[]);
for eq in &eqs {
assert!(eq.eval(&x) == eq.b);
}
}
#[test]
fn test_total_approx_filter() {
// test that approximate filters that encode R=U are encoded
// as a zero-length solution vector with m=0 and inverted=true
// in the metadata.
let n = 1024;
let mut approx_builder = RibbonBuilder::new(&[], None);
approx_builder.set_universe_size(n);
for i in 0usize..n {
let eq: Equation<1> = std_eq(i);
approx_builder.insert(eq);
}
let approx_ribbon = ApproximateRibbon::from(approx_builder);
let approx_filter = PartitionedRibbonFilter::from(vec![approx_ribbon]);
let approx_index_entry = approx_filter
.index
.get(&vec![])
.expect("should have metadata");
assert!(approx_index_entry.m == 0);
assert!(approx_index_entry.rank == 0);
assert!(approx_index_entry.exceptions.is_empty());
assert!(approx_index_entry.inverted);
for i in 0usize..n {
let eq = std_eq(i);
assert!(approx_filter.contains(&eq));
}
assert!(approx_filter.solution.len() == 0);
let mut exact_builder = RibbonBuilder::new(&[], Some(&approx_filter));
for i in 0usize..n {
let mut eq = std_eq(i);
eq.b = 0;
exact_builder.insert(eq);
}
let exact_ribbon = ExactRibbon::from(exact_builder);
let exact_filter = PartitionedRibbonFilter::from(vec![exact_ribbon]);
let exact_index_entry = exact_filter
.index
.get(&vec![])
.expect("should have metadata");
assert!(exact_index_entry.m == 0);
assert!(exact_index_entry.rank == 1);
assert!(exact_index_entry.exceptions.is_empty());
assert!(exact_index_entry.inverted);
for i in 0usize..n {
let eq = std_eq(i);
assert!(exact_filter.contains(&eq));
}
assert!(exact_filter.solution.len() == 1);
assert!(exact_filter.solution[0].len() == 0);
}
#[test]
fn test_rank_0_approx_filter() {
let n = 1024;
let mut builder = RibbonBuilder::new(&[], None);
builder.set_universe_size(n);
for i in 0usize..768 {
let eq: Equation<1> = std_eq(i);
builder.insert(eq);
}
let ribbon = ApproximateRibbon::from(builder);
let filter = PartitionedRibbonFilter::from(vec![ribbon]);
let entry = filter.index.get(&vec![]).expect("should have metadata");
assert!(entry.rank == 0);
assert!(!entry.inverted);
assert!(filter.solution.len() == 0);
for i in 0usize..n {
let eq = std_eq(i);
assert!(filter.contains(&eq));
}
}
}

View File

@ -0,0 +1,189 @@
/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
use crate::query::Queryable;
use serde::{Deserialize, Serialize};
use std::collections::BTreeMap;
use std::fmt;
#[derive(PartialEq, Eq)]
pub enum Membership {
Member,
Nonmember,
NotInUniverse,
NoData,
}
impl From<bool> for Membership {
fn from(b: bool) -> Membership {
match b {
true => Membership::Member,
false => Membership::Nonmember,
}
}
}
/// Metadata needed to compute membership in a clubcard.
#[derive(Default, Serialize, Deserialize)]
pub struct ClubcardIndexEntry {
/// Description of the hash function h.
pub approx_filter_m: usize,
/// Description of the hash function g.
pub exact_filter_m: usize,
/// The number of columns in X.
pub approx_filter_rank: usize,
/// An offset t such that [0^t || h(u)] * X = h(u) * Xi, where i is the block identifier.
pub approx_filter_offset: usize,
/// An offset t such that [0^t || g(u)] * Y = g(u) * Yi, where i is the block identifier.
pub exact_filter_offset: usize,
/// Whether to invert the output of queries to this block.
pub inverted: bool,
/// A list of elements of Ui \ Ri that are not correctly encoded by this block.
pub exceptions: Vec<Vec<u8>>,
}
/// Lookup table from block identifiers to block metadata.
pub type ClubcardIndex = BTreeMap</* block id */ Vec<u8>, ClubcardIndexEntry>;
/// A queryable Clubcard
#[derive(Serialize, Deserialize)]
pub struct Clubcard<const W: usize, UniverseMetadata, PartitionMetadata> {
/// Metadata for determining whether a Queryable is in the encoded universe.
pub(crate) universe: UniverseMetadata,
/// Metadata for determining the block to which a Queryable belongs.
pub(crate) partition: PartitionMetadata,
/// Lookup table for per-block metadata.
pub(crate) index: ClubcardIndex,
/// The matrix X
pub(crate) approx_filter: Vec<Vec<u64>>,
/// The matrix Y
pub(crate) exact_filter: Vec<u64>,
}
impl<const W: usize, UniverseMetadata, PartitionMetadata> fmt::Display
for Clubcard<W, UniverseMetadata, PartitionMetadata>
{
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
let approx_size = 8 * self.approx_filter.iter().map(|x| x.len()).sum::<usize>();
let exact_size = 8 * self.exact_filter.len();
let exceptions = self
.index
.values()
.map(|meta| meta.exceptions.len())
.sum::<usize>();
writeln!(
f,
"Clubcard of size {} ({} + {})",
approx_size + exact_size,
approx_size,
exact_size
)?;
writeln!(f, "- exceptions: {}", exceptions)
}
}
impl<const W: usize, UniverseMetadata, PartitionMetadata>
Clubcard<W, UniverseMetadata, PartitionMetadata>
{
/// Perform a membership query without checking whether the item is in the universe.
/// The result is undefined if the item is not in the universe. The result is also
/// undefined if U's implementation of AsQuery differs from T's.
pub fn unchecked_contains<T>(&self, item: &T) -> bool
where
T: Queryable<W, PartitionMetadata = PartitionMetadata>,
{
let Some(meta) = self.index.get(item.block()) else {
return false;
};
let result = (|| {
// All queries evaluate to 0 on an empty filter, but logically
// such a filter does not include anything. So we handle it as a
// special case.
if meta.approx_filter_m == 0 {
return false;
}
// Check if h(item) * X is 0
let approx_query = item.as_approx_query(meta);
for i in 0..meta.approx_filter_rank {
if approx_query.eval(&self.approx_filter[i]) != 0 {
return false;
}
}
// Check if g(item) * X is 0
let exact_query = item.as_exact_query(meta);
if exact_query.eval(&self.exact_filter) != 0 {
return false;
}
for exception in &meta.exceptions {
if exception == item.discriminant() {
return false;
}
}
true
})();
result ^ meta.inverted
}
/// Check that the item is in the appropriate universe, and then perform a membership query.
pub fn contains<T>(&self, item: &T) -> Membership
where
T: Queryable<W, UniverseMetadata = UniverseMetadata, PartitionMetadata = PartitionMetadata>,
{
if !item.in_universe(&self.universe) {
return Membership::NotInUniverse;
};
if !self.index.contains_key(item.block()) {
return Membership::NoData;
};
self.unchecked_contains(item).into()
}
pub fn universe(&self) -> &UniverseMetadata {
&self.universe
}
pub fn partition(&self) -> &PartitionMetadata {
&self.partition
}
}
/// Helper trait for (approximate) heap memory usage analysis in Firefox
pub trait ApproximateSizeOf {
fn approximate_size_of(&self) -> usize
where
Self: Sized,
{
size_of::<Self>()
}
}
impl ApproximateSizeOf for () {}
impl ApproximateSizeOf for ClubcardIndex {
fn approximate_size_of(&self) -> usize {
size_of::<ClubcardIndex>() + self.len() * size_of::<ClubcardIndexEntry>()
}
}
impl<const W: usize, UniverseMetadata, PartitionMetadata> ApproximateSizeOf
for Clubcard<W, UniverseMetadata, PartitionMetadata>
where
UniverseMetadata: ApproximateSizeOf,
PartitionMetadata: ApproximateSizeOf,
{
fn approximate_size_of(&self) -> usize {
self.universe.approximate_size_of()
+ self.partition.approximate_size_of()
+ self.index.approximate_size_of()
+ self.approx_filter.iter().map(|x| x.len()).sum::<usize>()
+ self.exact_filter.len()
}
}

View File

@ -0,0 +1,158 @@
/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
use std::cmp::min;
/// An Equation\<W\> is a representation of a GF(2) linear functional
/// a(x) = b + sum_i a_i x_i
/// where a_i is equal to zero except for i in a block of 64*W coefficients
/// starting at i=s. We say an Equation is /aligned/ if a_s = 1.
/// (Note: a_i above denotes the i-th bit, not the i'th 64-bit limb.)
#[derive(Clone, Debug, PartialEq, Eq)]
pub struct Equation<const W: usize> {
pub s: usize, // the row number
pub a: [u64; W], // the non-trivial columns
pub b: u8, // the constant term
}
impl<const W: usize> Equation<W> {
/// Construct the equation a(x) = sum_{i=s}^{s+64*W} a_i x^i.
/// The result is aligned.
pub fn homogeneous(s: usize, a: [u64; W]) -> Equation<W> {
Equation::inhomogeneous(s, a, 0)
}
/// Construct the equation a(x) = b + sum_{i=s}^{s+64*W} a_i x^i.
/// The result is aligned.
pub fn inhomogeneous(s: usize, a: [u64; W], b: u8) -> Equation<W> {
let mut eq = Equation { s: 0, a, b };
eq.add(&Equation::zero());
eq.s += s;
eq
}
/// Construct the equation a(x) = 0.
pub fn zero() -> Self {
Equation {
s: 0,
a: [0u64; W],
b: 0,
}
}
/// Is this a(x) = 1 or a(x) = 0?
pub fn is_zero(&self) -> bool {
// TODO: is_const? or maybe this gets the point across.
self.a == [0u64; W]
}
/// Adds `other` into `self`, i.e. sets self.a ^= other.a and self.b ^= other.b and then aligns
/// the result.
pub fn add(&mut self, other: &Equation<W>) {
assert!(self.s == other.s);
// Add the equations in GF(2)
for i in 0..W {
self.a[i] ^= other.a[i];
}
self.b ^= other.b;
// Exit early if this equation is now zero.
if self.is_zero() {
return;
}
// Shift until there is a non-zero bit in the lowest limb.
while self.a[0] == 0 {
self.a.rotate_left(1);
}
// Shift first non-zero bit to position 0.
let k = self.a[0].trailing_zeros();
if k == 0 {
return;
}
for i in 0..W - 1 {
self.a[i] >>= k;
self.a[i] |= self.a[i + 1] << (64 - k);
}
self.a[W - 1] >>= k;
// Update the starting position
self.s += k as usize;
}
/// Computes a(z) = sum a_i z_i.
pub fn eval(&self, z: &[u64]) -> u8 {
// Compute a(z), noting that this only depends
// on 64*W bits of z starting from position s.
let limb = self.s / 64;
let shift = self.s % 64;
let mut r = 0;
for i in limb..min(z.len(), limb + W) {
let mut tmp = z[i] >> shift;
if i + 1 < z.len() && shift != 0 {
tmp |= z[i + 1] << (64 - shift);
}
r ^= tmp & self.a[i - limb];
}
(r.count_ones() & 1) as u8
}
}
#[cfg(test)]
mod tests {
use crate::Equation;
#[test]
fn test_equation_add() {
let mut e1 = Equation {
s: 127,
a: [0b11],
b: 1,
};
let e2 = Equation {
s: 127,
a: [0b01],
b: 1,
};
e1.add(&e2);
// test that shifting works
assert!(e1.s == 128);
assert!(e1.a[0] == 0b1);
assert!(e1.b == 0);
let mut e1 = Equation {
s: 127,
a: [0b11, 0b1110, 0b1, 0],
b: 1,
};
let e2 = Equation {
s: 127,
a: [0b01, 0b0100, 0b0, 0],
b: 1,
};
e1.add(&e2);
// test that shifting works
assert!(e1.s == 128);
assert!(e1.a[0] == 0b1);
// test that bits move between limbs
assert!(e1.a[1] == (1 << 63) | 0b101);
assert!(e1.a[2] == 0);
assert!(e1.a[3] == 0);
assert!(e1.b == 0);
}
#[test]
fn test_equation_eval() {
for s in 0..64 {
let eq = Equation {
s,
a: [0xffffffffffffffff, 0, 0, 0],
b: 0,
};
assert!(0 == eq.eval(&[]));
for i in 0..64 {
assert!(((i >= eq.s) as u8) == eq.eval(&[1 << i, 0]));
assert!(((i < eq.s) as u8) == eq.eval(&[0, 1 << i]));
assert!(0 == eq.eval(&[0, 0, 1 << i]));
}
}
}
}

53
third_party/rust/clubcard/src/lib.rs vendored Normal file
View File

@ -0,0 +1,53 @@
/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
//! **UNSTABLE / EXPERIMENTAL**
//!
//! Clubcard is a compact data-structure that solves the exact membership query problem.
//!
//! Given some universe of objects U, a subset R of U, and two hash functions defined on U (as
//! described below), Clubcard outputs a compact encoding of the function `f : U -> {0, 1}` defined
//! by `f(x) = 0 if and only if x ∈ R`.
//!
//! Clubcard is based on the Ribbon filters from
//! - <https://arxiv.org/pdf/2103.02515>, and
//! - <https://arxiv.org/pdf/2109.01892>
//!
//! And some ideas from Mike Hamburg's RWC 2022 talk
//! - <https://rwc.iacr.org/2022/program.php#abstract-talk-39>
//! - <https://youtu.be/Htms5rNy7B8?list=PLeeS-3Ml-rpovBDh6do693We_CP3KTnHU&t=2357>
//!
//! The construction will be described in detail in a forthcoming paper.
//!
//! At a high level, a clubcard is a pair of matrices (X, Y) defined over GF(2). The hash
//! functions h and g map elements of U to vectors in the domain of X and Y respectively.
//!
//! The matrix X is a solution to `H * X = 0` where the rows of H are obtained by hashing the
//! elements of R with h. The number of columns in X is floor(lg(|U\R| / |R|)).
//!
//! The matrix Y is a solution to `G * Y = C` where the rows of G are obtained by hashing, with g,
//! the elements u ∈ U for which h(u) * X = 0. The matrix Y has one column. The rows of C encode
//! membership in R.
//!
//! Given (X, Y) and an element u ∈ U, we have that u ∈ R iff h(u) * X == 0 and g(u) * Y = 0.
//!
//! Clubcard was developed to replace the use of Bloom cascades in CRLite. In a preliminary
//! experiment using a real-world collection of 12.2M revoked certs and 789.2M non-revoked certs,
//! the currently-deployed Bloom cascade implementation of CRLite produces a 19.8MB filter in 293
//! seconds (on a Ryzen 3975WX with 64GB of RAM). This Clubcard implementation produces an 8.5MB
//! filter in 200 seconds.
//!
//#![warn(missing_docs)]
#[cfg(feature = "builder")]
pub mod builder;
mod clubcard;
pub use clubcard::{ApproximateSizeOf, Clubcard, ClubcardIndexEntry, Membership};
mod equation;
pub use equation::Equation;
mod query;
pub use query::{AsQuery, Filterable, Queryable};

60
third_party/rust/clubcard/src/query.rs vendored Normal file
View File

@ -0,0 +1,60 @@
/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
use crate::clubcard::ClubcardIndexEntry;
use crate::equation::Equation;
pub trait AsQuery<const W: usize> {
/// Hash this item to a homogeneous equation (s, a) such that
/// (1) s is uniform in {0, 1, ..., m-1},
/// (2) a satisfies the alignment requirement (a\[0\] & 1 == 1) but is otherwise uniformly random,
fn as_query(&self, m: usize) -> Equation<W>;
/// The block that this item belongs in.
fn block(&self) -> &[u8];
/// A unique identifier for this item. If this item cannot be inserted into the linear system,
/// then we will store its `included()` status in a secondary retrieval mechanism keyed by
/// `discriminant()`.
fn discriminant(&self) -> &[u8];
#[doc(hidden)]
fn as_approx_query(&self, meta: &ClubcardIndexEntry) -> Equation<W> {
let mut approx_eq = self.as_query(meta.approx_filter_m);
approx_eq.s += meta.approx_filter_offset;
approx_eq
}
#[doc(hidden)]
fn as_exact_query(&self, meta: &ClubcardIndexEntry) -> Equation<W> {
let mut exact_eq = self.as_query(meta.exact_filter_m);
exact_eq.s += meta.exact_filter_offset;
exact_eq
}
}
/// A Filterable is an item that can be inserted into a RibbonBuilder.
pub trait Filterable<const W: usize>: AsQuery<W> {
/// Whether this item should be included in an exact filter.
fn included(&self) -> bool {
false
}
}
/// A Queryable is an item that can be passed to Clubcard::contains.
pub trait Queryable<const W: usize>: AsQuery<W> {
/// The universe metadata is stored in the Clubcard and passed to Queryable::in_universe before
/// a query is performed. The query is performed only if Queryable::in_universe returns true.
type UniverseMetadata;
/// The partition metadata is stored in the Clubcard and is used to construct a Queryable with
/// the correct block identifier. For example, a clubcard that encodes a subset of the integers
/// {0, ..., n} might have a set of named non-overlapping intervals as its partition metadata:
/// { "a": {0, ..., 32}, "b": {33, ..., 100}, ... }. To perform a membership test for 37,
/// the user would use the partition metadata to construct a Queryable with block = "b" and
/// discriminant = 37.
type PartitionMetadata;
fn in_universe(&self, meta: &Self::UniverseMetadata) -> bool;
}