mirror of
https://github.com/open-goal/jak-project.git
synced 2024-11-27 00:10:31 +00:00
extractor: Validate ISO contents and report specific errors (#1322)
* deps: add `xxhash` library * extractor: hash ISO files as they are extracted * extractor: report on game data validation errors * lint: formatting * extractor: automatically pick the right decompiler config
This commit is contained in:
parent
e4c2f81e3a
commit
c18502d5f5
@ -131,7 +131,7 @@
|
||||
"project" : "CMakeLists.txt",
|
||||
"projectTarget" : "extractor.exe (bin\\extractor.exe)",
|
||||
"name" : "Run - Extractor - Extract",
|
||||
"args" : [ "\"E:\\ISOs\\Jak\\Jak 1.iso\"", "--extract", "-proj-path", "C:\\Users\\xtvas\\Repositories\\opengoal\\launcher\\bundle-test\\data"]
|
||||
"args" : [ "\"E:\\ISOs\\Jak\\Jak 1.iso\"", "--extract", "--proj-path", "C:\\Users\\xtvas\\Repositories\\opengoal\\launcher\\bundle-test\\data"]
|
||||
}
|
||||
]
|
||||
}
|
||||
|
@ -76,12 +76,15 @@ void add_from_dir(FILE* fp, u32 sector, u32 size, IsoFile::Entry* parent) {
|
||||
}
|
||||
}
|
||||
|
||||
void unpack_entry(FILE* fp, const IsoFile::Entry& entry, const std::filesystem::path& dest) {
|
||||
void unpack_entry(FILE* fp,
|
||||
IsoFile& iso,
|
||||
const IsoFile::Entry& entry,
|
||||
const std::filesystem::path& dest) {
|
||||
std::filesystem::path path_to_entry = dest / entry.name;
|
||||
if (entry.is_dir) {
|
||||
std::filesystem::create_directory(path_to_entry);
|
||||
for (const auto& child : entry.children) {
|
||||
unpack_entry(fp, child, path_to_entry);
|
||||
unpack_entry(fp, iso, child, path_to_entry);
|
||||
}
|
||||
} else {
|
||||
std::vector<u8> buffer(entry.size);
|
||||
@ -92,6 +95,11 @@ void unpack_entry(FILE* fp, const IsoFile::Entry& entry, const std::filesystem::
|
||||
ASSERT_MSG(false, "Failed to fread iso when unpacking");
|
||||
}
|
||||
file_util::write_binary_file(path_to_entry.string(), buffer.data(), buffer.size());
|
||||
iso.files_extracted++;
|
||||
if (iso.shouldHash) {
|
||||
xxh::hash_t<64> hash = xxh::xxhash<64>(buffer);
|
||||
iso.hashes.push_back(hash);
|
||||
}
|
||||
}
|
||||
}
|
||||
} // namespace
|
||||
@ -105,10 +113,13 @@ IsoFile find_files_in_iso(FILE* fp) {
|
||||
return result;
|
||||
}
|
||||
|
||||
void unpack_iso_files(FILE* fp, const IsoFile& layout, const std::filesystem::path& dest) {
|
||||
unpack_entry(fp, layout.root, dest);
|
||||
void unpack_iso_files(FILE* fp, IsoFile& layout, const std::filesystem::path& dest) {
|
||||
unpack_entry(fp, layout, layout.root, dest);
|
||||
}
|
||||
|
||||
void unpack_iso_files(FILE* fp, const std::filesystem::path& dest) {
|
||||
unpack_iso_files(fp, find_files_in_iso(fp), dest);
|
||||
IsoFile unpack_iso_files(FILE* fp, const std::filesystem::path& dest, const bool hashFiles) {
|
||||
auto file = find_files_in_iso(fp);
|
||||
file.shouldHash = hashFiles;
|
||||
unpack_iso_files(fp, file, dest);
|
||||
return file;
|
||||
}
|
||||
|
@ -3,6 +3,7 @@
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <filesystem>
|
||||
#include "third-party/xxhash.hpp"
|
||||
|
||||
struct IsoFile {
|
||||
struct Entry {
|
||||
@ -22,9 +23,15 @@ struct IsoFile {
|
||||
|
||||
Entry root;
|
||||
|
||||
int files_extracted = 0;
|
||||
bool shouldHash = false;
|
||||
// There is no reason to map to the files, as we don't retain mappings of each file's expected
|
||||
// hash
|
||||
std::vector<xxh::hash64_t> hashes = {};
|
||||
|
||||
IsoFile();
|
||||
};
|
||||
|
||||
IsoFile find_files_in_iso(FILE* fp);
|
||||
void unpack_iso_files(FILE* fp, const IsoFile& layout, const std::filesystem::path& dest);
|
||||
void unpack_iso_files(FILE* fp, const std::filesystem::path& dest);
|
||||
void unpack_iso_files(FILE* fp, IsoFile& layout, const std::filesystem::path& dest);
|
||||
IsoFile unpack_iso_files(FILE* fp, const std::filesystem::path& dest, const bool hashFiles = false);
|
||||
|
@ -7,37 +7,222 @@
|
||||
#include "decompiler/config.h"
|
||||
#include "goalc/compiler/Compiler.h"
|
||||
#include "common/util/read_iso_file.h"
|
||||
#include <regex>
|
||||
|
||||
enum class ExtractorErrorCode {
|
||||
SUCCESS = 0,
|
||||
VALIDATION_CANT_LOCATE_ELF = 4000,
|
||||
VALIDATION_SERIAL_MISSING_FROM_DB = 4001,
|
||||
VALIDATION_ELF_MISSING_FROM_DB = 4002,
|
||||
VALIDATION_BAD_ISO_CONTENTS = 4010,
|
||||
VALIDATION_INCORRECT_EXTRACTION_COUNT = 4011,
|
||||
VALIDATION_BAD_EXTRACTION = 4020
|
||||
};
|
||||
|
||||
struct ISOMetadata {
|
||||
std::string canonical_name;
|
||||
std::string region;
|
||||
int num_files;
|
||||
xxh::hash64_t contents_hash;
|
||||
std::string decomp_config;
|
||||
};
|
||||
|
||||
// TODO - when we support jak2 and beyond, add which game it's for as well
|
||||
// this will let the installer reject (or gracefully handle) jak2 isos on the jak1 page, etc.
|
||||
|
||||
// { SERIAL : { ELF_HASH : ISOMetadataDatabase } }
|
||||
static std::map<std::string, std::map<xxh::hash64_t, ISOMetadata>> isoDatabase{
|
||||
{"SCUS-97124",
|
||||
{{7280758013604870207U,
|
||||
{"Jak and Daxter: The Precursor Legacy - Black Label", "NTSC-U", 337, 11363853835861842434U,
|
||||
"jak1_ntsc_black_label"}}}}};
|
||||
|
||||
void setup_global_decompiler_stuff(std::optional<std::filesystem::path> project_path_override) {
|
||||
decompiler::init_opcode_info();
|
||||
file_util::setup_project_path(project_path_override);
|
||||
}
|
||||
|
||||
void extract_files(std::filesystem::path data_dir_path, std::filesystem::path extracted_iso_path) {
|
||||
IsoFile extract_files(std::filesystem::path data_dir_path,
|
||||
std::filesystem::path extracted_iso_path) {
|
||||
fmt::print("Note: input isn't a folder, assuming it's an ISO file...\n");
|
||||
|
||||
std::filesystem::create_directories(extracted_iso_path);
|
||||
|
||||
auto fp = fopen(data_dir_path.string().c_str(), "rb");
|
||||
ASSERT_MSG(fp, "failed to open input ISO file\n");
|
||||
unpack_iso_files(fp, extracted_iso_path);
|
||||
IsoFile iso = unpack_iso_files(fp, extracted_iso_path, true);
|
||||
fclose(fp);
|
||||
return iso;
|
||||
}
|
||||
|
||||
int validate(std::filesystem::path path_to_iso_files) {
|
||||
if (!std::filesystem::exists(path_to_iso_files / "DGO")) {
|
||||
fmt::print("Error: input folder doesn't have a DGO folder. Is this the right input?\n");
|
||||
return 1;
|
||||
std::pair<std::optional<std::string>, std::optional<xxh::hash64_t>> findElfFile(
|
||||
const std::filesystem::path& extracted_iso_path) {
|
||||
std::optional<std::string> serial = std::nullopt;
|
||||
std::optional<xxh::hash64_t> elf_hash = std::nullopt;
|
||||
for (const auto& entry : fs::directory_iterator(extracted_iso_path)) {
|
||||
auto as_str = entry.path().filename().string();
|
||||
if (std::regex_match(as_str, std::regex(".{4}_.{3}\\..{2}"))) {
|
||||
serial = std::make_optional(
|
||||
fmt::format("{}-{}", as_str.substr(0, 4), as_str.substr(5, 3) + as_str.substr(9, 2)));
|
||||
// We already found the path, so hash it while we're here
|
||||
auto fp = fopen(entry.path().string().c_str(), "rb");
|
||||
fseek(fp, 0, SEEK_END);
|
||||
size_t size = ftell(fp);
|
||||
std::vector<u8> buffer(size);
|
||||
rewind(fp);
|
||||
fread(&buffer[0], sizeof(std::vector<u8>::value_type), buffer.size(), fp);
|
||||
elf_hash = std::make_optional(xxh::xxhash<64>(buffer));
|
||||
break;
|
||||
}
|
||||
}
|
||||
return {serial, elf_hash};
|
||||
}
|
||||
|
||||
ExtractorErrorCode validate(const IsoFile& iso_file,
|
||||
const std::filesystem::path& extracted_iso_path) {
|
||||
if (!std::filesystem::exists(extracted_iso_path / "DGO")) {
|
||||
fmt::print(stderr, "ERROR: input folder doesn't have a DGO folder. Is this the right input?\n");
|
||||
return ExtractorErrorCode::VALIDATION_BAD_EXTRACTION;
|
||||
}
|
||||
|
||||
std::optional<ExtractorErrorCode> error_code;
|
||||
std::optional<std::string> serial = std::nullopt;
|
||||
std::optional<xxh::hash64_t> elf_hash = std::nullopt;
|
||||
std::tie(serial, elf_hash) = findElfFile(extracted_iso_path);
|
||||
|
||||
// - XOR all hashes together and hash the result. This makes the ordering of the hashes (aka
|
||||
// files) irrelevant
|
||||
xxh::hash64_t combined_hash = 0;
|
||||
for (const auto& hash : iso_file.hashes) {
|
||||
combined_hash ^= hash;
|
||||
}
|
||||
xxh::hash64_t contents_hash = xxh::xxhash<64>({combined_hash});
|
||||
|
||||
if (!serial || !elf_hash) {
|
||||
fmt::print(stderr, "ERROR: Unable to locate a Serial/ELF file!\n");
|
||||
if (!error_code.has_value()) {
|
||||
error_code = std::make_optional(ExtractorErrorCode::VALIDATION_CANT_LOCATE_ELF);
|
||||
}
|
||||
// No point in continuing here
|
||||
return error_code.value();
|
||||
}
|
||||
|
||||
// Find the game in our tracking database
|
||||
auto dbEntry = isoDatabase.find(serial.value());
|
||||
if (dbEntry == isoDatabase.end()) {
|
||||
fmt::print(stderr, "ERROR: Serial '{}' not found in the validation database\n", serial.value());
|
||||
if (!error_code.has_value()) {
|
||||
error_code = std::make_optional(ExtractorErrorCode::VALIDATION_SERIAL_MISSING_FROM_DB);
|
||||
}
|
||||
} else {
|
||||
auto& metaMap = dbEntry->second;
|
||||
auto meta_entry = metaMap.find(elf_hash.value());
|
||||
if (meta_entry == metaMap.end()) {
|
||||
fmt::print(stderr,
|
||||
"ERROR: ELF Hash '{}' not found in the validation database, is this a new or "
|
||||
"modified version of the same game?\n",
|
||||
elf_hash.value());
|
||||
if (!error_code.has_value()) {
|
||||
error_code = std::make_optional(ExtractorErrorCode::VALIDATION_ELF_MISSING_FROM_DB);
|
||||
}
|
||||
} else {
|
||||
auto meta = meta_entry->second;
|
||||
// Print out some information
|
||||
fmt::print("Detected Game Metadata:\n");
|
||||
fmt::print("\tDetected - {}\n", meta.canonical_name);
|
||||
fmt::print("\tRegion - {}\n", meta.region);
|
||||
fmt::print("\tSerial - {}\n", dbEntry->first);
|
||||
fmt::print("\tUses Decompiler Config - {}\n", meta.decomp_config);
|
||||
|
||||
// - Number of Files
|
||||
if (meta.num_files != iso_file.files_extracted) {
|
||||
fmt::print(stderr,
|
||||
"ERROR: Extracted an unexpected number of files. Expected '{}', Actual '{}'\n",
|
||||
meta.num_files, iso_file.files_extracted);
|
||||
if (!error_code.has_value()) {
|
||||
error_code =
|
||||
std::make_optional(ExtractorErrorCode::VALIDATION_INCORRECT_EXTRACTION_COUNT);
|
||||
}
|
||||
}
|
||||
// Check the ISO Hash
|
||||
if (meta.contents_hash != contents_hash) {
|
||||
fmt::print(stderr,
|
||||
"ERROR: Overall ISO content's hash does not match. Expected '{}', Actual '{}'\n",
|
||||
meta.contents_hash, contents_hash);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Finally, return the result
|
||||
if (error_code.has_value()) {
|
||||
// Generate the map entry to make things simple, just convienance
|
||||
if (error_code.value() == ExtractorErrorCode::VALIDATION_SERIAL_MISSING_FROM_DB) {
|
||||
fmt::print(
|
||||
"If this is a new release or version that should be supported, consider adding the "
|
||||
"following serial entry to the database:\n");
|
||||
fmt::print(
|
||||
"\t'{{\"{}\", {{{{{}U, {{\"GAME_TITLE\", \"NTSC-U/PAL/NTSC-J\", {}, {}U, "
|
||||
"\"DECOMP_CONFIF_FILENAME_NO_EXTENSION\"}}}}}}}}'\n",
|
||||
serial.value(), elf_hash.value(), iso_file.files_extracted, contents_hash);
|
||||
} else if (error_code.value() == ExtractorErrorCode::VALIDATION_ELF_MISSING_FROM_DB) {
|
||||
fmt::print(
|
||||
"If this is a new release or version that should be supported, consider adding the "
|
||||
"following ELF entry to the database under the '{}' serial:\n",
|
||||
serial.value());
|
||||
fmt::print(
|
||||
"\t'{{{}, {{\"GAME_TITLE\", \"NTSC-U/PAL/NTSC-J\", {}, {}U, "
|
||||
"\"DECOMP_CONFIF_FILENAME_NO_EXTENSION\"}}}}'\n",
|
||||
elf_hash.value(), iso_file.files_extracted, contents_hash);
|
||||
} else {
|
||||
fmt::print(stderr,
|
||||
"Validation has failed to match with expected values, see the above errors for "
|
||||
"specific. This may be an error in the validation database!\n");
|
||||
}
|
||||
return error_code.value();
|
||||
}
|
||||
|
||||
return ExtractorErrorCode::SUCCESS;
|
||||
}
|
||||
|
||||
std::optional<ISOMetadata> determineRelease(const std::filesystem::path& jak1_input_files) {
|
||||
std::optional<std::string> serial = std::nullopt;
|
||||
std::optional<xxh::hash64_t> elf_hash = std::nullopt;
|
||||
std::tie(serial, elf_hash) = findElfFile(jak1_input_files);
|
||||
|
||||
if (!serial || !elf_hash) {
|
||||
return std::nullopt;
|
||||
}
|
||||
|
||||
// Find the game in our tracking database
|
||||
auto dbEntry = isoDatabase.find(serial.value());
|
||||
if (dbEntry == isoDatabase.end()) {
|
||||
return std::nullopt;
|
||||
} else {
|
||||
auto& metaMap = dbEntry->second;
|
||||
auto meta_entry = metaMap.find(elf_hash.value());
|
||||
if (meta_entry == metaMap.end()) {
|
||||
return std::nullopt;
|
||||
} else {
|
||||
return std::make_optional(meta_entry->second);
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
void decompile(std::filesystem::path jak1_input_files) {
|
||||
using namespace decompiler;
|
||||
Config config = read_config_file(
|
||||
(file_util::get_jak_project_dir() / "decompiler" / "config" / "jak1_ntsc_black_label.jsonc")
|
||||
.string(),
|
||||
{});
|
||||
|
||||
// Determine which config to use from the database
|
||||
auto meta = determineRelease(jak1_input_files);
|
||||
std::string decomp_config = "jak1_ntsc_black_label";
|
||||
if (meta.has_value()) {
|
||||
decomp_config = meta.value().decomp_config;
|
||||
fmt::print("INFO: Automatically detected decompiler config, using - {}\n", decomp_config);
|
||||
}
|
||||
|
||||
Config config = read_config_file((file_util::get_jak_project_dir() / "decompiler" / "config" /
|
||||
fmt::format("{}.jsonc", decomp_config))
|
||||
.string(),
|
||||
{});
|
||||
|
||||
std::vector<std::string> dgos, objs;
|
||||
|
||||
@ -128,7 +313,7 @@ int main(int argc, char** argv) {
|
||||
std::filesystem::path project_path_override;
|
||||
bool flag_runall = false;
|
||||
bool flag_extract = false;
|
||||
bool flag_validate = false;
|
||||
bool flag_fail_on_validation = false;
|
||||
bool flag_decompile = false;
|
||||
bool flag_compile = false;
|
||||
bool flag_play = false;
|
||||
@ -145,7 +330,7 @@ int main(int argc, char** argv) {
|
||||
->check(CLI::ExistingPath);
|
||||
app.add_flag("-a,--all", flag_runall, "Run all steps, from extraction to playing the game");
|
||||
app.add_flag("-e,--extract", flag_extract, "Extract the ISO");
|
||||
app.add_flag("-v,--validate", flag_validate, "Validate the ISO / game files");
|
||||
app.add_flag("-v,--validate", flag_fail_on_validation, "Fail on Validation Errors");
|
||||
app.add_flag("-d,--decompile", flag_decompile, "Decompile the game data");
|
||||
app.add_flag("-c,--compile", flag_compile, "Compile the game");
|
||||
app.add_flag("-p,--play", flag_play, "Play the game");
|
||||
@ -156,7 +341,7 @@ int main(int argc, char** argv) {
|
||||
fmt::print("Working Directory - {}\n", std::filesystem::current_path().string());
|
||||
|
||||
// If no flag is set, we default to running everything
|
||||
if (!flag_extract && !flag_validate && !flag_decompile && !flag_compile && !flag_play) {
|
||||
if (!flag_extract && !flag_decompile && !flag_compile && !flag_play) {
|
||||
fmt::print("Running all steps, no flags provided!\n");
|
||||
flag_runall = true;
|
||||
}
|
||||
@ -177,15 +362,15 @@ int main(int argc, char** argv) {
|
||||
}
|
||||
|
||||
if (flag_runall || flag_extract) {
|
||||
if (!std::filesystem::is_directory(path_to_iso_files)) {
|
||||
extract_files(data_dir_path, path_to_iso_files);
|
||||
}
|
||||
}
|
||||
|
||||
if (flag_runall || flag_validate) {
|
||||
auto ok = validate(path_to_iso_files);
|
||||
if (ok != 0) {
|
||||
return ok;
|
||||
if (!std::filesystem::is_directory(data_dir_path)) {
|
||||
auto iso_file = extract_files(data_dir_path, path_to_iso_files);
|
||||
auto validation_res = validate(iso_file, path_to_iso_files);
|
||||
if (validation_res == ExtractorErrorCode::VALIDATION_BAD_EXTRACTION) {
|
||||
// We fail here regardless of the flag
|
||||
return static_cast<int>(validation_res);
|
||||
} else if (flag_fail_on_validation && validation_res != ExtractorErrorCode::SUCCESS) {
|
||||
return static_cast<int>(validation_res);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
1938
third-party/xxhash.hpp
generated
vendored
Normal file
1938
third-party/xxhash.hpp
generated
vendored
Normal file
File diff suppressed because it is too large
Load Diff
@ -8,3 +8,7 @@ third-party/discord-rpc:
|
||||
sha: 963aa9f3e5ce81a4682c6ca3d136cddda614db33
|
||||
third-party/fpng:
|
||||
sha: bfe5f9c69e93b99b31268c10db8e645c9125a07f
|
||||
third-party/CLI11.hpp:
|
||||
git: https://github.com/CLIUtils/CLI11/tree/v2.2.0
|
||||
third-party/xxhash.hpp:
|
||||
git: https://github.com/RedSpah/xxhash_cpp/tree/0.7.3
|
||||
|
Loading…
Reference in New Issue
Block a user