diff --git a/Cargo.lock b/Cargo.lock index 164c974..c1ec355 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -74,7 +74,7 @@ checksum = "965c2d33e53cb6b267e148a4cb0760bc01f4904c1cd4bb4002a085bb016d1490" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.111", "synstructure", ] @@ -86,7 +86,7 @@ checksum = "3109e49b1e4909e9db6515a30c633684d68cdeaa252f215214cb4fa1a5bfee2c" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.111", "synstructure", ] @@ -98,7 +98,7 @@ checksum = "7b18050c2cd6fe86c3a76584ef5e0baf286d038cda203eb6223df2cc413565f7" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.111", ] [[package]] @@ -109,7 +109,7 @@ checksum = "9035ad2d096bed7955a320ee7e2230574d28fd3c3a0f186cbea1ff3c7eed5dbb" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.111", ] [[package]] @@ -130,6 +130,15 @@ version = "2.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "812e12b5285cc515a9c72a5c1d3b6d46a19dac5acfef5265968c166106e31dd3" +[[package]] +name = "block-buffer" +version = "0.10.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3078c7629b62d3f0439517fa394996acacc5cbc91c5a20d8c658e77abd503a71" +dependencies = [ + "generic-array", +] + [[package]] name = "boa_ast" version = "0.20.0" @@ -227,7 +236,7 @@ checksum = "9fd3f870829131332587f607a7ff909f1af5fc523fd1b192db55fbbdf52e8d3c" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.111", "synstructure", ] @@ -292,7 +301,7 @@ checksum = "f9abbd1bc6865053c427f7198e6af43bfdedc55ab791faed4fbd361d789575ff" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.111", ] [[package]] @@ -346,6 +355,15 @@ dependencies = [ "unicode-segmentation", ] +[[package]] +name = "cpufeatures" +version = "0.2.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "59ed5838eebb26a2bb2e58f6d5b5316989ae9d08bab10e0e6d103e656d1b0280" +dependencies = [ + "libc", +] + [[package]] name = "crossbeam-utils" version = "0.8.21" @@ -358,6 +376,16 @@ version = "0.2.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "460fbee9c2c2f33933d720630a6a0bac33ba7053db5344fac858d4b8952d77d5" +[[package]] +name = "crypto-common" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "78c8292055d1c1df0cce5d180393dc8cce0abec0a7102adb6c7b1eef6016d60a" +dependencies = [ + "generic-array", + "typenum", +] + [[package]] name = "ctor" version = "0.6.2" @@ -431,6 +459,16 @@ dependencies = [ "powerfmt", ] +[[package]] +name = "digest" +version = "0.10.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9ed9a281f7bc9b7576e61468ba615a66a5c8cfdff42420a70aa82701a3b1e292" +dependencies = [ + "block-buffer", + "crypto-common", +] + [[package]] name = "displaydoc" version = "0.2.5" @@ -439,7 +477,7 @@ checksum = "97369cbbc041bc366949bc74d34658d6cda5621039731c6310521892a3a20ae0" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.111", ] [[package]] @@ -449,14 +487,17 @@ dependencies = [ "anyhow", "boa_engine", "droplet-rs", + "format-bytes", + "hashing-reader", "hex", - "md5", + "humansize", "napi", "napi-build", "napi-derive", "rhai", "serde", "serde_json", + "sha2", "tokio", "tokio-util", "uuid", @@ -537,6 +578,26 @@ version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "77ce24cb58228fbb8aa041425bb1050850ac19177686ea6e0f41a70416f56fdb" +[[package]] +name = "format-bytes" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "48942366ef93975da38e175ac9e10068c6fc08ca9e85930d4f098f4d5b14c2fd" +dependencies = [ + "format-bytes-macros", +] + +[[package]] +name = "format-bytes-macros" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "203aadebefcc73d12038296c228eabf830f99cba991b0032adf20e9fa6ce7e4f" +dependencies = [ + "proc-macro2", + "quote", + "syn 1.0.109", +] + [[package]] name = "futures" version = "0.3.31" @@ -593,7 +654,7 @@ checksum = "162ee34ebcb7c64a8abebc059ce0fee27c2262618d7b60ed8faf72fef13c3650" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.111", ] [[package]] @@ -626,6 +687,16 @@ dependencies = [ "slab", ] +[[package]] +name = "generic-array" +version = "0.14.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "85649ca51fd72272d7821adaf274ad91c288277713d9c18820d8499a7ff69e9a" +dependencies = [ + "typenum", + "version_check", +] + [[package]] name = "getrandom" version = "0.2.16" @@ -677,12 +748,32 @@ dependencies = [ "foldhash 0.2.0", ] +[[package]] +name = "hashing-reader" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "490b80ea8c6d700506827951d7196fede236081b9c976a42b30598fa77377d15" +dependencies = [ + "digest", + "pin-project", + "tokio", +] + [[package]] name = "hex" version = "0.4.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7f24254aa9a54b5c858eaee2f5bccdb46aaf0e486a595ed5fd8f86ba55232a70" +[[package]] +name = "humansize" +version = "2.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6cb51c9a029ddc91b07a787f1d86b53ccfa49b0e86688c946ebe8d3555685dd7" +dependencies = [ + "libm", +] + [[package]] name = "icu_collections" version = "1.5.0" @@ -798,7 +889,7 @@ checksum = "1ec89e9337638ecdc08744df490b221a7399bf8d164eb52a665454e60e075ad6" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.111", ] [[package]] @@ -876,6 +967,12 @@ dependencies = [ "windows-link", ] +[[package]] +name = "libm" +version = "0.2.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f9fbbcab51052fe104eb5e5d351cf728d30a5be1fe14d9be8a3b097481fb97de" + [[package]] name = "litemap" version = "0.7.5" @@ -891,12 +988,6 @@ dependencies = [ "scopeguard", ] -[[package]] -name = "md5" -version = "0.7.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "490cc448043f947bae3cbee9c203358d62dbee0db12107a74be5c30ccfd09771" - [[package]] name = "memchr" version = "2.7.6" @@ -965,7 +1056,7 @@ dependencies = [ "napi-derive-backend", "proc-macro2", "quote", - "syn", + "syn 2.0.111", ] [[package]] @@ -978,7 +1069,7 @@ dependencies = [ "proc-macro2", "quote", "semver", - "syn", + "syn 2.0.111", ] [[package]] @@ -1060,7 +1151,7 @@ dependencies = [ "proc-macro-crate", "proc-macro2", "quote", - "syn", + "syn 2.0.111", ] [[package]] @@ -1158,7 +1249,7 @@ dependencies = [ "phf_shared", "proc-macro2", "quote", - "syn", + "syn 2.0.111", ] [[package]] @@ -1170,6 +1261,26 @@ dependencies = [ "siphasher", ] +[[package]] +name = "pin-project" +version = "1.1.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "677f1add503faace112b9f1373e43e9e054bfdd22ff1a63c1bc485eaec6a6a8a" +dependencies = [ + "pin-project-internal", +] + +[[package]] +name = "pin-project-internal" +version = "1.1.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6e918e4ff8c4549eb882f14b3a4bc8c8bc93de829416eacf579f1207a8fbf861" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.111", +] + [[package]] name = "pin-project-lite" version = "0.2.16" @@ -1359,7 +1470,7 @@ checksum = "d4322a2a4e8cf30771dd9f27f7f37ca9ac8fe812dddd811096a98483080dabe6" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.111", ] [[package]] @@ -1457,7 +1568,7 @@ checksum = "d540f220d3187173da220f885ab66608367b6574e925011a9353e4badda91d79" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.111", ] [[package]] @@ -1473,6 +1584,17 @@ dependencies = [ "serde_core", ] +[[package]] +name = "sha2" +version = "0.10.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a7507d819769d01a365ab707794a4084392c824f54a7a6a7862f8c3d0892b283" +dependencies = [ + "cfg-if", + "cpufeatures", + "digest", +] + [[package]] name = "shlex" version = "1.3.0" @@ -1535,6 +1657,17 @@ version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f" +[[package]] +name = "syn" +version = "1.0.109" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "72b64191b275b66ffe2469e8af2c1cfe3bafa67b529ead792a6d0160888b4237" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + [[package]] name = "syn" version = "2.0.111" @@ -1554,7 +1687,7 @@ checksum = "728a70f3dbaf5bab7f0c4b1ac8d7ae5ea60a4b5549c8a5914361c99147a709d2" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.111", ] [[package]] @@ -1595,7 +1728,7 @@ checksum = "4fee6c4efc90059e10f81e6d42c60a18f76588c3d74cb83a0b242a2b6c7504c1" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.111", ] [[package]] @@ -1606,7 +1739,7 @@ checksum = "3ff15c8ecd7de3849db632e14d18d2571fa09dfc5ed93479bc4485c7a517c913" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.111", ] [[package]] @@ -1730,6 +1863,12 @@ dependencies = [ "winnow", ] +[[package]] +name = "typenum" +version = "1.19.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "562d481066bde0658276a35467c4af00bdc6ee726305698a55b86e61d7ad82bb" + [[package]] name = "unicode-ident" version = "1.0.22" @@ -1781,7 +1920,7 @@ checksum = "39d11901c36b3650df7acb0f9ebe624f35b5ac4e1922ecd3c57f444648429594" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.111", ] [[package]] @@ -1837,7 +1976,7 @@ dependencies = [ "bumpalo", "proc-macro2", "quote", - "syn", + "syn 2.0.111", "wasm-bindgen-shared", ] @@ -2040,7 +2179,7 @@ checksum = "2380878cad4ac9aac1e2435f3eb4020e8374b5f13c296cb75b4620ff8e229154" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.111", "synstructure", ] @@ -2061,7 +2200,7 @@ checksum = "d8a8d209fdf45cf5138cbb5a506f6b52522a25afccc534d1475dad8e31105c6a" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.111", ] [[package]] @@ -2081,7 +2220,7 @@ checksum = "d71e5d6e06ab090c67b5e44993ec16b72dcbaabc526db883a360057678b48502" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.111", "synstructure", ] @@ -2110,5 +2249,5 @@ checksum = "6eafa6dfb17584ea3e2bd6e76e0cc15ad7af12b09abdd1ca55961bed9b1063c6" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.111", ] diff --git a/Cargo.toml b/Cargo.toml index 2587e81..6635eeb 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -17,7 +17,6 @@ napi = { version = "3.0.0-beta.11", default-features = false, features = [ ] } napi-derive = "3.0.0-beta.11" hex = "0.4.3" -md5 = "0.7.0" tokio = { version = "1.45.1", features = ["fs", "io-util"] } tokio-util = { version = "0.7.15", features = ["codec"] } rhai = "1.22.2" @@ -26,6 +25,10 @@ boa_engine = "0.20.0" serde_json = "1.0.143" anyhow = "*" droplet-rs = { git = "https://github.com/Drop-OSS/droplet-rs.git" } +hashing-reader = "0.1.0" +sha2 = "0.10.9" +format-bytes = "0.3.0" +humansize = "2.1.3" [dependencies.serde] version = "1.0.210" diff --git a/__test__/utils.spec.mjs b/__test__/utils.spec.mjs index 6edc2fc..6660e20 100644 --- a/__test__/utils.spec.mjs +++ b/__test__/utils.spec.mjs @@ -149,6 +149,9 @@ test("zip manifest test", async (t) => { (_, __) => {} ) ); + const files = await listFiles("./assets/" + zipFile); + + if(Object.keys(manifest).length == 0) return t.fail("manifest was empty") for (const [filename, data] of Object.entries(manifest)) { let start = 0; diff --git a/src/lib.rs b/src/lib.rs index f149e46..15ee362 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -3,6 +3,7 @@ #![deny(clippy::panic)] #![feature(trait_alias)] #![feature(iterator_try_collect)] +#![feature(int_roundings)] pub mod manifest; diff --git a/src/manifest.rs b/src/manifest.rs index a623b8f..28e87d0 100644 --- a/src/manifest.rs +++ b/src/manifest.rs @@ -1,23 +1,36 @@ use std::{collections::HashMap, sync::Arc, thread}; +use droplet_rs::versions::types::VersionFile; +use hashing_reader::HashingReader; +use hex::ToHex; +use humansize::{format_size, BINARY}; use napi::{ threadsafe_function::{ThreadsafeFunction, ThreadsafeFunctionCallMode}, Result, }; +use serde::Serialize; use serde_json::json; +use sha2::{Digest, Sha256}; use tokio::io::AsyncReadExt as _; use uuid::Uuid; use crate::version::create_backend_for_path; -const CHUNK_SIZE: usize = 1024 * 1024 * 64; +const CHUNK_SIZE: u64 = 1024 * 1024 * 64; +const WIGGLE: u64 = 1024 * 1024 * 1; -#[derive(serde::Serialize)] -struct ChunkData { +#[derive(Serialize)] +struct FileEntry { + filename: String, + start: usize, + length: usize, permissions: u32, - ids: Vec, - checksums: Vec, - lengths: Vec, +} + +#[derive(Serialize)] +struct ChunkData { + files: Vec, + checksum: String, } #[napi] @@ -29,89 +42,185 @@ pub fn call_alt_thread_func(tsfn: Arc>) -> Result<(), Str Ok(()) } +#[derive(Serialize)] +struct Manifest { + version: String, + chunks: HashMap, + size: u64, +} + #[napi] -pub async fn generate_manifest<'a>( +pub async fn generate_manifest( dir: String, - progress_sfn: ThreadsafeFunction, + progress_sfn: ThreadsafeFunction, log_sfn: ThreadsafeFunction, ) -> anyhow::Result { let mut backend = create_backend_for_path(dir).ok_or(napi::Error::from_reason( "Could not create backend for path.", ))?; - let required_single_file = backend.require_whole_files(); + let required_single_file = true; //backend.require_whole_files(); - let files = backend.list_files().await?; + let mut files = backend.list_files().await?; // Filepath to chunk data - let mut chunks: HashMap = HashMap::new(); + let mut chunks: Vec> = Vec::new(); + let mut current_chunk: Vec<(VersionFile, u64, u64)> = Vec::new(); - let total: i32 = files.len() as i32; - let mut i: i32 = 0; - - let mut buf = [0u8; 1024 * 16]; + log_sfn.call( + Ok(format!("organizing files into chunks...",)), + ThreadsafeFunctionCallMode::NonBlocking, + ); for version_file in files { - let mut reader = backend.reader(&version_file, 0, 0).await?; + // let mut reader = backend.reader(&version_file, 0, 0).await?; - let mut chunk_data = ChunkData { - permissions: version_file.permission, - ids: Vec::new(), - checksums: Vec::new(), - lengths: Vec::new(), - }; + // If we need the whole file, and this file would take up a whole chunk, add it to it's own chunk and move on + if required_single_file && version_file.size >= CHUNK_SIZE { + let size = version_file.size; + chunks.push(vec![(version_file, 0, size)]); - let mut chunk_index = 0; - loop { - let mut length = 0; - let mut buffer: Vec = Vec::new(); - let mut file_empty = false; - - loop { - let read = reader.read(&mut buf).await?; - - length += read; - - // If we're out of data, add this chunk and then move onto the next file - if read == 0 { - file_empty = true; - break; - } - - buffer.extend_from_slice(&buf[0..read]); - - if length >= CHUNK_SIZE && !required_single_file { - break; - } - } - - let chunk_id = Uuid::new_v4(); - let checksum = md5::compute(buffer).0; - let checksum_string = hex::encode(checksum); - - chunk_data.ids.push(chunk_id.to_string()); - chunk_data.checksums.push(checksum_string); - chunk_data.lengths.push(length); - - let log_str = format!( - "Processed chunk {} for {}", - chunk_index, &version_file.relative_filename - ); - - log_sfn.call(Ok(log_str), ThreadsafeFunctionCallMode::Blocking); - - chunk_index += 1; - - if file_empty { - break; - } + continue; } - chunks.insert(version_file.relative_filename, chunk_data); + let mut current_size = current_chunk.iter().map(|v| v.2 - v.1).sum::(); - i += 1; - let progress = i * 100 / total; + // If we need the whole file, add this current file and move on, potentially adding and creating new chunk if need be + if required_single_file { + let size = version_file.size.try_into().unwrap(); + current_chunk.push((version_file, 0, size)); + + current_size += size; + + if current_size >= CHUNK_SIZE { + // Pop current and add, then reset + let new_chunk = std::mem::replace(&mut current_chunk, Vec::new()); + chunks.push(new_chunk); + } + + continue; + } + + // Otherwise we calculate how much of the file we need, then use that much + let remaining_budget = (CHUNK_SIZE + WIGGLE) - current_size; + if version_file.size >= remaining_budget { + let remaining_budget = CHUNK_SIZE - current_size; + current_chunk.push((version_file.clone(), 0, remaining_budget)); + + let new_chunk = std::mem::replace(&mut current_chunk, Vec::new()); + chunks.push(new_chunk); + + let remaining_size = version_file.size - remaining_budget; + let mut running_offset = remaining_budget; + // Do everything but the last one + while running_offset < remaining_size { + let chunk_size = CHUNK_SIZE.min(remaining_size); + let chunk = vec![(version_file.clone(), running_offset, chunk_size)]; + if chunk_size == CHUNK_SIZE { + chunks.push(chunk); + } else { + current_chunk = chunk; + } + running_offset += chunk_size; + } + + continue; + } else { + let size = version_file.size; + current_chunk.push((version_file, 0, size)); + current_size += size; + } + + if current_size >= CHUNK_SIZE { + // Pop current and add, then reset + let new_chunk = std::mem::replace(&mut current_chunk, Vec::new()); + chunks.push(new_chunk); + } + } + if current_chunk.len() > 0 { + chunks.push(current_chunk); + } + + log_sfn.call( + Ok(format!( + "organized into {} chunks, generating checksums...", + chunks.len() + )), + ThreadsafeFunctionCallMode::Blocking, + ); + + let mut manifest: HashMap = HashMap::new(); + let mut total_manifest_length = 0; + + let mut read_buf = vec![0; 1024 * 1024 * 64]; + + let chunk_len = chunks.len(); + for (index, chunk) in chunks.into_iter().enumerate() { + let uuid = uuid::Uuid::new_v4().to_string(); + let mut hasher = Sha256::new(); + + let mut chunk_data = ChunkData { + files: Vec::new(), + checksum: String::new(), + }; + + let mut chunk_length = 0; + + for (file, start, length) in chunk { + log_sfn.call( + Ok(format!( + "reading {} from {} to {}, {}", + file.relative_filename, + start, + start + length, + format_size(length, BINARY) + )), + ThreadsafeFunctionCallMode::Blocking, + ); + let mut reader = backend.reader(&file, start, start + length).await?; + + loop { + let amount = reader.read(&mut read_buf).await?; + if amount == 0 { + break; + } + hasher.update(&read_buf[0..amount]); + } + + chunk_length += length; + + chunk_data.files.push(FileEntry { + filename: file.relative_filename, + start: start.try_into().unwrap(), + length: length.try_into().unwrap(), + permissions: file.permission, + }); + } + + log_sfn.call( + Ok(format!( + "created chunk of size {} ({}/{})", + format_size(chunk_length, BINARY), + index, + chunk_len + )), + ThreadsafeFunctionCallMode::Blocking, + ); + total_manifest_length += chunk_length; + + let hash: String = hasher.finalize().encode_hex(); + chunk_data.checksum = hash; + manifest.insert(uuid, chunk_data); + + let progress: f32 = (index as f32 / chunk_len as f32) * 100.0f32; progress_sfn.call(Ok(progress), ThreadsafeFunctionCallMode::Blocking); } - Ok(json!(chunks).to_string()) + Ok( + json!(Manifest { + version: "2".to_string(), + chunks: manifest, + size: total_manifest_length + }) + .to_string(), + ) } diff --git a/test.js b/test.js new file mode 100644 index 0000000..9c1ec9f --- /dev/null +++ b/test.js @@ -0,0 +1,7 @@ +const droplet = require('.'); +const fs = require('fs'); + +(async () => { + const manifest = await droplet.generateManifest("/home/decduck/Games/STAR WARS Jedi Survivor", console.log, console.log); + fs.writeFileSync('./manifest.json', manifest); +})();