From 3f78168ce526cbdbbebf47361d7e6e4c3a5e4a31 Mon Sep 17 00:00:00 2001 From: Ronald Caesar Date: Fri, 16 Jan 2026 20:20:19 -0400 Subject: [PATCH] tools: create a rustdoc like documentation generator While I do not like the Rust Language as a whole, their documentation generator is the best I've ever seen. in any language. I want to implement something like it for Ballistic. Like I said in the README, I have absolutely zero motivation to create a documentation generator so `cdoc.c` is made completely with AI. The code is messy but the generated HTML files look beautiful. Signed-off-by: Ronald Caesar --- .gitignore | 1 + CMakeLists.txt | 27 ++ include/bal_engine.h | 175 ++++----- include/bal_memory.h | 170 ++++----- tools/README.md | 40 +- tools/cdoc.c | 883 +++++++++++++++++++++++++++++++++++++++++++ 6 files changed, 1100 insertions(+), 196 deletions(-) create mode 100644 tools/cdoc.c diff --git a/.gitignore b/.gitignore index 522debd..088e648 100644 --- a/.gitignore +++ b/.gitignore @@ -2,6 +2,7 @@ *.d build/ *.bin +docs/cdoc # Object files *.o diff --git a/CMakeLists.txt b/CMakeLists.txt index aea15c8..13d2eeb 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -86,6 +86,33 @@ target_link_libraries(${COVERAGE_CLI_NAME} PRIVATE ${PROJECT_NAME}) set (BALLISTIC_CLI_NAME "ballistic_cli") add_executable(${BALLISTIC_CLI_NAME} tools/ballistic_cli.c) target_link_libraries(${BALLISTIC_CLI_NAME} PRIVATE ${PROJECT_NAME}) + +# Our documentation generator completely relies on Clang running on UNIX +# compatable machines for parsing C code. I do not have a Windows machine to +# to test this so only Linux and macOS are supported. +if(CMAKE_SYSTEM_NAME STREQUAL "Linux" OR CMAKE_SYSTEM_NAME STREQUAL "Darwin") + set (CDOC_NAME "cdoc") + find_library(CMARK_LIBRARY NAMES cmark libcmark REQUIRED) + find_path(CMARK_INCLUDE_DIR NAMES cmark.h REQUIRED) + find_library(CLANG_LIBRARY NAMES clang libclang REQUIRED) + find_path(CLANG_INCLUDE_DIR NAMES clang-c/Index.h REQUIRED) + + add_executable(${CDOC_NAME} tools/cdoc.c) + target_compile_options(${CDOC_NAME} PRIVATE -Wno-missing-prototypes + -Wno-sign-conversion -Wno-int-conversion -Wno-unused-parameter + -Wno-implicit-function-declaration -Wno-shorten-64-to-32) + target_link_libraries(${CDOC_NAME} PRIVATE clang cmark) + set (PROJECT_HEADERS include/bal_engine.h include/bal_decoder.h + include/bal_memory.h include/bal_types.h include/bal_errors.h) + + add_custom_target(doc + COMMAND ${CMAKE_CURRENT_BINARY_DIR}/cdoc docs/cdoc ${PROJECT_HEADERS} + WORKING_DIRECTORY ${CMAKE_SOURCE_DIR} + COMMENT "Generating Documentation..." + DEPENDS cdoc + ) +endif() + # ----------------------------------------------------------------------------- # Compile Tests # ----------------------------------------------------------------------------- diff --git a/include/bal_engine.h b/include/bal_engine.h index f8eb98f..b6ee49d 100644 --- a/include/bal_engine.h +++ b/include/bal_engine.h @@ -1,9 +1,3 @@ -/** @file bal_engine.h - * - * @brief Manages resources while translating ARM blocks to Itermediate - * Representation. - */ - #ifndef BALLISTIC_ENGINE_H #define BALLISTIC_ENGINE_H @@ -13,150 +7,127 @@ #include "bal_errors.h" #include -/*! - * @brief Pattern written to memory during initialization. - * @details Used to detect reads from uninitialized allocated memory. - */ -#define POISON_UNINITIALIZED_MEMORY 0x5a +/// A byte pattern written to memory during initialization, poisoning allocated +/// regions. This is mainly used for detecting reads from uninitialized memory. +#define POISON_UNINITIALIZED_MEMORY 0xFF -/*! - * @brief Represents the mapping of a Guest Register to an SSA variable. - * @details This is state used only used during the SSA construction pass. - */ +/// Represents the mapping of a Guest Register to an SSA variable. +/// This is only used during Single Static Assignment construction +/// to track variable definitions across basic blocks. typedef struct { - /*! - * @brief The most recent SSA definition for this register. - */ + /// The index of the most recent SSA definition for this register. uint32_t current_ssa_index; - /*! - * @brief The SSA definition that existed at the start of the block. - */ + /// The index of the SSA definition that existed at the start of the + /// current block. uint32_t original_variable_index; } bal_source_variable_t; +/// Holds the Intermediate Representation buffers, SSA state, and other +/// important metadata. The structure is divided into hot and cold data aligned +/// to 64 bytes. Both hot and cold data lives on their own cache lines. BAL_ALIGNED(64) typedef struct { /* Hot Data */ - /*! - * @brief Map of ARM registers to curret SSA definitions. - */ + /// Map of ARM registers to their current SSA definitions. bal_source_variable_t *source_variables; - /*! - * @brief The linear buffer of generated IR instructions in a compilation - * unit. - */ + /// The linear buffer of generated IR instructions for the current + /// compilation unit. bal_instruction_t *instructions; - /*! - * @brief Metadata tracking the bit-width (32/64) of each SSA definition. - */ + /// Metadata tracking the bit-width (32 or 64 bit) for each variable. bal_bit_width_t *ssa_bit_widths; - /*! - * @brief Linear buffer of constants generated in a compilation unit. - */ + /// Linear buffer of constants generated in the current compilation unit. bal_constant_t *constants; - /*! - * @brief Size of source variable array. - */ + /// The size of the `source_variables` array in bytes. size_t source_variables_size; - /*! - * @brief Size of instruction array. - */ + /// The size of the `instructions` array in bytes. size_t instructions_size; - /*! - * @brief Size of the constants array. - */ + /// The size of the `constants` array in bytes. size_t constants_size; - /*! - * @brief The current number of instructions emitted. - * - * @detials - * This keeps track of where we are in the instruction and ssa bit width - * arrays to make sure we dont cause an instruction overflow. - */ + /// The current number of instructions emitted. + /// + /// This tracks the current position in `instructions` and `ssa_bit_widths` + /// arrays. bal_instruction_count_t instruction_count; + /// Padding to maintain 64 byte alignment. char _padding[2]; - /*! - * @brief The Engine's state. - * - * @details - * If an operation fails, this is set to a specific error code. - * Subsequent operations will silently fail until the engine is reset. - */ + /// The current error state of the Engine. + /// + /// If an operation fails, this field is set to a specific error code. + /// See [`bal_opcode_t`]. Once set to an error state, subsequent operation + /// on this engine will silently fail until [`bal_engine_reset`] is called. bal_error_t status; /* Cold Data */ - // The pointer returned during heap initialization. - // We need this to free the engine's allocated arrays. - // + /// The base pointer returned during the underlying heap allocation. This + /// is required to correctly free the engine's internal arrays. void *arena_base; + + /// The total size of the allocated arena. size_t arena_size; } bal_engine_t; -/*! - * @brief Initialize a Ballistic engine. - * - * @details - * This is a high-cost memory allocation operation that should be done - * sparingly. - * - * @param[in] allocator Pointer to the memory allocator interface. - * @param[out] engine Pointer to the engine struct to initialize. - * - * @return BAL_SUCCESS on success. - * @return BAL_ERROR_INVALID_ARG if arguments are NULL. - * @return BAL_ERROR_ALLOCATION_FAILED if the allocator returns NULL. - */ +/// Initializes a Ballistic engine. +/// +/// Populates `engine` with empty buffers allocated with `allocator`. This is +/// a high cost memory operation that reserves a lot of memory and should +/// be called sparingly. +/// +/// Returns [`BAL_SUCCESS`] if the engine iz ready for use. +/// +/// # Errors +/// +/// Returns [`BAL_ERROR_INVALID_ARGUMENT` if the pointers are `NULL`. +/// +/// Returns [`BAL_ERROR_ALLOCATION_FAILED`] if the allocator cannot fulfill the +/// request. BAL_COLD bal_error_t bal_engine_init(bal_allocator_t *allocator, bal_engine_t *engine); -/*! - * @brief Executes the main JIT translation loop. - * - * @param[in,out] engine The engine context. Must be initialized. - * @param[in] arm_entry_point Pointer to the start of the ARM machine code - * to translate. - * - * @return BAL_SUCCESS on successfull translation of arm_entry_point. - * @return BAL_ERROR_ENGINE_STATE_INVALID if any function parameters are NULL. - */ + +/// Translates machine code starting at `arm_entry_point` into the engine's +/// internal IR. `interface` provides memory access handling (like instruction +/// fetching). +/// +/// Returns [`BAL_SUCCESS`] on success. +/// +/// # Errors +/// +/// Returns [`BAL_ERROR_ENGINE_STATE_INVALID`] if `engine` is not initialized +/// or `engine->status != BAL_SUCCESS`. BAL_HOT bal_error_t bal_engine_translate(bal_engine_t *BAL_RESTRICT engine, bal_memory_interface_t *BAL_RESTRICT interface, const uint32_t *BAL_RESTRICT arm_entry_point); -/*! - * @brief Resets the engine for the next compilation unit. - * - * @details - * This is a low cost memory operation. - * - * @param[in,out] engine The engine to reset. - * - * @return BAL_SUCCESS on success. - * @return BAL_ERROR_INVALID_ARG if arguments are NULL. - */ +/// Resets `engine` for the next compilation unit. This is a low cost memory +/// operation designed to be called between translation units. +/// +/// Returns [`BAL_SUCCESS`] on success. +/// +/// # Errors +/// +/// Returns [`BAL_ERROR_INVALID_ARGUMENT`] if `engine` is `NULl`. BAL_HOT bal_error_t bal_engine_reset(bal_engine_t *engine); -/*! - * Frees all engine heap allocated resources. - * - * @param[in,out] engine The engine to destroy. - * - * @warning The engine struct itself is NOT freed (it may be stack allocated). - */ +/// Frees all `engine` heap-allocated resources using `allocator`. +/// +/// # Warning +/// +/// This function does not free the [`bal_engine_t`] struct itself, as the +/// caller may have allocated it on the stack. BAL_COLD void bal_engine_destroy(bal_allocator_t *allocator, bal_engine_t *engine); diff --git a/include/bal_memory.h b/include/bal_memory.h index acecf1d..f366bba 100644 --- a/include/bal_memory.h +++ b/include/bal_memory.h @@ -1,12 +1,3 @@ -/** @file bal_memory.h - * - * @brief Memory management interface for Ballistic. - * - * @par - * The host is responsible for providing an allocator capable of handling - * aligned memory requests. - */ - #ifndef BALLISTIC_MEMORY_H #define BALLISTIC_MEMORY_H @@ -16,120 +7,125 @@ #include #include -/*! - * @brief Function signature for memory allocation. - * - * @param[in] allocator The opaque pointer registered in @ref bal_allocator_t. - * @param[in] alignment The required alignment in bytes. Must be power of 2. - * @param[in] size The number of bytes to allocate. - * - * @return A pointer to the allocated memory, or NULL on failure. - */ -typedef void *(*bal_allocate_function_t)(void *allocator, +/// A function signature for allocating aligned memory. +/// +/// Implementations must allocate a block of memory of at least `size` bytes +/// with an address that is a multiple of `alignment`. The `alignment` +/// parameter is guaranteed to be a power of two. The `allocator` parameter +/// provides access to the opaque state registered in [`bal_allocator_t`]. +/// +/// Returns a pointer to the allocated memory, or `NULL` if the request could +/// not be fulfilled. +typedef void *(*bal_allocate_function_t)(void *allocator, size_t alignment, size_t size); -/*! - * @brief Function signature for memory deallocation. - * - * @param[in] allocator The opaque pointer registered in @ref al_allocator_t. - * @param[in] pointer The pointer to the memory to free. - * @param[in] size The size of the allocation being freed. - */ +/// A function signature for releasing memory. +/// +/// Implementations must deallocate the memory at `pointer`, which was +/// previously allocated by the corresponding allocate function. The `size` +/// parameter indicates the size of the allocation being freed. Access to the +/// heap state is provided via `allocator`. typedef void (*bal_free_function_t)(void *allocator, void *pointer, size_t size); -/*! - * @brief Translate a Guest Virtual Address to a Host Virtual Address. - * - * @details - * Ballistic calls this when it needs to fetch instructions. The implementer - * must return a pointer to the host memory corresponding to the requested - * guest address. - * - * @param[in] context The oapque pointer provided in @ref - * bal_memory_interface_t. - * @param[in] guest_address The guest address to translate. - * @param[out] max_readable_size The implementer MUST write the number of - * contiguous bytes valid to read from the - * returned pointer. This prevents Ballistic from - * reading off the end of a mapped page or buffer. - * - * @return A pointer to the host memory containing the data at @p guest_address. - * @return NULL if the address is unmapped or invalid. - */ +/// Translates a Guest Virtual Address (GVA) to a Host Virtual Address (HVA). +/// +/// Ballistic invokes this callback when it needs to fetch instructions or +/// access data. The implementation must translate the provided `guest_address` +/// using the opaque `context` and return a pointer to the corresponding host +/// memory. +/// +/// Returns a pointer to the host memory containing the data at `guest_address`, or `NULL` +/// if the address is unmapped or invalid. +/// +/// # Safety +/// +/// The implementation must write the number of contiguous, readable bytes +/// available at the returned pointer into `max_readable_size`. This prevents +/// Ballistic from reading beyond the end of a mapped page or buffer. typedef const uint8_t *(*bal_translate_function_t)( void *context, bal_guest_address_t guest_address, size_t *max_readable_size); +/// The host application is responsible for providing an allocator capable of +/// handling aligned memory requests. typedef struct { - /*! - * @brief Use this to store heap state or tracking information. - */ + /// An opaque pointer defining the state or tracking information for the + /// heap. void *allocator; - /*! - * @brief Callback for allocating aligned memory. - * @warning Must return 64-byte aligned memory if requested. - */ + /// The callback invoked to allocate aligned memory. + /// + /// # Safety + /// + /// The implementation must return memory aligned to at least 64 bytes if + /// requested. bal_allocate_function_t allocate; - /*! - * @brief Callback for freeing memory. - */ + /// The callback to release memory. bal_free_function_t free; } bal_allocator_t; +/// Defines the interface for translating guest addresses to host memory. typedef struct { + /// An opaque pointer to the context required for address translation + /// (e.g, a page walker or a buffer descriptor.). void *context; + + /// The callback invoked to perform address translation. bal_translate_function_t translate; } bal_memory_interface_t; -/*! - * @brief Populates an allocator struct with the default implementation. - * - * @param[out] allocator The struct to populate. Must not be NULL. - * - * @warn Only supports Windows and POSIX systems. - */ +/// Populates `out_allocator` with the default system implementation. +/// +/// # Safety +/// +/// `out_allocator` must not be `NULL`. +/// +/// # Platform Support +/// +/// This function only supports windowsnand POSIX-compliant systems. BAL_COLD void bal_get_default_allocator(bal_allocator_t *out_allocator); -/*! - * @brief Initializes a translation interface that uses a flat, contiguous - * memory buffer. - * - * @params[in] allocator The allocator used to allocate the internal interface - * structure. - * @params[out] interface The interface struct to populate. - * @params[in] buffer Pointer to the pre-allocated host memory containing - * the guest code. - * @params[in] size The size of the buffer in bytes. - * - * @return BAL_SUCCESS on success. - * @return BAL_ERROR_INVALID_ARGUMENT if arguments are NULL. - * @return BAL_ERROR_MEMORY_ALIGNMENT if buffer is not align to 4 bytes. - * - * @warning The caller retains ownership of buffer. It must remain valid for - * the lifetime of the interface. - */ +/// Initializes a flat, contiguous translation interface. +/// +/// This convenience function sets up a [`bal_memory_interface_t`] where guest +/// addresses map directly to offsets within the provided host `buffer`. +/// +/// The internal interface is allocated with `allocator`. `interface` is +/// populated with the resulting context and translation callbacks. `buffer` +/// must be a pre-allocated block of host memory of at least `size bytes. +/// +/// Returns [`BAL_SUCCESS`] on success. +/// +/// # Errors +/// +/// Returns [`BAL_ERROR_INVALID_ARGUMENT`] if any pointer are `NULL`. +/// +/// Returns [`BAL_ERROR_MEMORY_ALIGNMENT`] if `buffer` is not aligned to a 4-byte +/// boundary. +/// +/// # Safety +/// +/// The caller retains ownership of `buffer`. It must remain valid and +/// unmodified for the lifetime of the created interface. BAL_COLD bal_error_t bal_memory_init_flat(bal_allocator_t *BAL_RESTRICT allocator, bal_memory_interface_t *BAL_RESTRICT interface, void *BAL_RESTRICT buffer, size_t size); -/*! - * @brief Frees the interface's internal state. - * - * @details - * This does not free the buffer passed during initialization, as Ballistic - * does not own it. - */ +/// Frees the internal sttae allocated within `interface` using the provided +/// `allocator`. +/// +/// This does **not** free the buffer passed to [`bal_memory_init_flat`] as +/// Ballistic does not take ownership of the host memory region. BAL_COLD void bal_memory_destroy_flat(bal_allocator_t *allocator, bal_memory_interface_t *interface); #endif /* BALLISTIC_MEMORY_H */ diff --git a/tools/README.md b/tools/README.md index a76800d..bc713ff 100644 --- a/tools/README.md +++ b/tools/README.md @@ -6,6 +6,39 @@ This folder holds scritps needed to build Ballistic and standalone programs used These programs will appear in directory you compile Ballistic with. +### Ballistic CLI + +This is used by developers to test Ballistic's translation loop. + +### CDoc + +This creates rustdoc like documentation for C code. CDoc completely relies on +Clang and LLVM so only Unix systems are supported. Builing CDoc on Windows is +not supported at this time. + +```bash +# Parses all header files in `include/` and output HTML files to `docs/cdoc`. +./cdoc docs/cdoc include/* +Using Clang headers: /usr/lib/clang/21/include +Parsing ../include/bal_attributes.h... +Parsing ../include/bal_decoder.h... +Parsing ../include/bal_engine.h... +Parsing ../include/bal_errors.h... +Parsing ../include/bal_memory.h... +Parsing ../include/bal_platform.h... +Parsing ../include/bal_types.h... +Generating HTML in '../docs/cdoc'... +Done! Open ../docs/cdoc/index.html +``` + +**Disclaimer**: I have absolutely zero motivation to create a documentation +generator so `cdoc.c` is made completely with AI. The code is messy but the +generated HTML files look beautiful. + +### Coverage CLI + +This program takes an ARM64 binary file and outputs the 20 most common instructions. + ### Decoder CLI This program is used for decoding ARM64 instructions. The following example shows how to use it: @@ -13,13 +46,6 @@ This program is used for decoding ARM64 instructions. The following example show ./decoder_cli 0b5f1da1 # ADD extended registers Mnemonic: ADD - Mask: 0x7F200000 - Expected: 0x0B000000 ``` -### Ballistic CLI - -This is used by developers to test Ballistic's translation loop. - -### Coverage CLI - -This program takes an ARM64 binary file and outputs the 20 most common instructions. ## Scripts diff --git a/tools/cdoc.c b/tools/cdoc.c new file mode 100644 index 0000000..535096b --- /dev/null +++ b/tools/cdoc.c @@ -0,0 +1,883 @@ +#define _POSIX_C_SOURCE 200809L +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +// --- 1. DATA STRUCTURES --- + +typedef enum { + KIND_FUNCTION, + KIND_STRUCT, + KIND_ENUM, + KIND_TYPEDEF +} ItemKind; + +typedef struct { + char* name; + char* type; // For Structs: type name. For Enums: value. + char* doc; +} Field; + +typedef struct { + char* name; + char* doc_comment; + ItemKind kind; + char* return_type; + char* underlying_type; + Field* args; + Field* fields; + int arg_count; + int field_count; + char* source_file; + char* anchor_id; +} DocItem; + +typedef struct { + DocItem* items; + size_t count; + size_t capacity; + char* filename; + char* file_doc; +} FileContext; + +typedef struct { + FileContext* files; + size_t count; + size_t capacity; + DocItem** registry; + size_t reg_count; + size_t reg_cap; +} ProjectContext; + +// --- 2. HELPERS --- + +char* my_strdup(const char* s) { + if (!s) return NULL; + size_t len = strlen(s); + char* d = malloc(len + 1); + if (d) memcpy(d, s, len + 1); + return d; +} + +char* to_cstr(CXString cx_str) { + const char* temp = clang_getCString(cx_str); + char* result = temp ? my_strdup(temp) : my_strdup(""); + clang_disposeString(cx_str); + return result; +} + +const char* get_filename(const char* path) { + const char* last_slash = strrchr(path, '/'); + const char* last_backslash = strrchr(path, '\\'); + const char* filename = path; + if (last_slash && last_slash > filename) filename = last_slash + 1; + if (last_backslash && last_backslash > filename) filename = last_backslash + 1; + return filename; +} + +void init_project(ProjectContext* proj) { + proj->count = 0; + proj->capacity = 10; + proj->files = malloc(sizeof(FileContext) * proj->capacity); + proj->reg_count = 0; + proj->reg_cap = 100; + proj->registry = malloc(sizeof(DocItem*) * proj->reg_cap); +} + +FileContext* add_file(ProjectContext* proj, const char* filepath) { + if (proj->count >= proj->capacity) { + proj->capacity *= 2; + proj->files = realloc(proj->files, sizeof(FileContext) * proj->capacity); + } + FileContext* ctx = &proj->files[proj->count++]; + ctx->filename = my_strdup(get_filename(filepath)); + ctx->file_doc = NULL; + ctx->count = 0; + ctx->capacity = 32; + ctx->items = malloc(sizeof(DocItem) * ctx->capacity); + return ctx; +} + +void register_item(ProjectContext* proj, DocItem* item) { + if (proj->reg_count >= proj->reg_cap) { + proj->reg_cap *= 2; + proj->registry = realloc(proj->registry, sizeof(DocItem*) * proj->reg_cap); + } + proj->registry[proj->reg_count++] = item; +} + +DocItem* add_item(FileContext* ctx) { + if (ctx->count >= ctx->capacity) { + ctx->capacity *= 2; + ctx->items = realloc(ctx->items, sizeof(DocItem) * ctx->capacity); + } + DocItem* item = &ctx->items[ctx->count++]; + memset(item, 0, sizeof(DocItem)); + item->source_file = ctx->filename; + return item; +} + +int item_exists(FileContext* ctx, const char* name) { + if (!name) return 0; + for (size_t i = 0; i < ctx->count; i++) { + if (ctx->items[i].name && strcmp(ctx->items[i].name, name) == 0) { + return 1; + } + } + return 0; +} + +// --- 3. LINK RESOLUTION --- +// --- 3. LINK RESOLUTION --- + +// NEW: Helper to find any symbol (Item, Enum Variant, or Struct.Field) +// Returns 1 if found, setting out_file and out_anchor. 0 otherwise. +int find_link_target(ProjectContext* proj, const char* name, const char** out_file, const char** out_anchor) { + if (!name) return 0; + + for (size_t i = 0; i < proj->reg_count; i++) { + DocItem* item = proj->registry[i]; + + // 1. Check Top-Level Item Name (Struct, Enum, Function, Typedef) + if (item->name && strcmp(item->name, name) == 0) { + *out_file = item->source_file; + *out_anchor = item->anchor_id; + return 1; + } + + // 2. Check Enum Variants (Global Scope in C) + // Allows linking to [`SOME_ENUM_VALUE`] + if (item->kind == KIND_ENUM) { + for (int j = 0; j < item->field_count; j++) { + if (item->fields[j].name && strcmp(item->fields[j].name, name) == 0) { + *out_file = item->source_file; + *out_anchor = item->fields[j].name; // Anchor is the variant name + return 1; + } + } + } + + // 3. Check Struct Fields (Scoped: StructName.FieldName) + // Allows linking to [`MyStruct.my_field`] + if (item->kind == KIND_STRUCT && item->name) { + size_t len = strlen(item->name); + if (strncmp(name, item->name, len) == 0 && name[len] == '.') { + const char* field_part = name + len + 1; + for (int j = 0; j < item->field_count; j++) { + if (item->fields[j].name && strcmp(item->fields[j].name, field_part) == 0) { + *out_file = item->source_file; + *out_anchor = item->fields[j].name; + return 1; + } + } + } + } + } + return 0; +} + +// Kept for internal logic if needed, but mostly replaced by find_link_target +DocItem* find_item(ProjectContext* proj, const char* name) { + if (!name) return NULL; + for (size_t i = 0; i < proj->reg_count; i++) { + if (proj->registry[i]->name && strcmp(proj->registry[i]->name, name) == 0) { + return proj->registry[i]; + } + } + return NULL; +} + +int is_ident_char(char c) { + return isalnum((unsigned char)c) || c == '_'; +} + +char* linkify_type(ProjectContext* proj, const char* raw_type, const char* current_file) { + if (!raw_type) return NULL; + + size_t cap = strlen(raw_type) * 3 + 256; + char* out = malloc(cap); + size_t out_len = 0; + out[0] = '\0'; + + const char* p = raw_type; + char word[256]; + int w_idx = 0; + + while (*p) { + if (is_ident_char(*p)) { + if (w_idx < 255) word[w_idx++] = *p; + } else { + if (w_idx > 0) { + word[w_idx] = '\0'; + + const char *target_file, *target_anchor; + if (find_link_target(proj, word, &target_file, &target_anchor)) { + // Check if same file for relative link + if (current_file && strcmp(target_file, current_file) == 0) { + out_len += snprintf(out + out_len, cap - out_len, + "%s", + target_anchor, word); + } else { + out_len += snprintf(out + out_len, cap - out_len, + "%s", + target_file, target_anchor, word); + } + } else { + out_len += snprintf(out + out_len, cap - out_len, "%s", word); + } + w_idx = 0; + } + if (out_len < cap - 1) { + out[out_len++] = *p; + out[out_len] = '\0'; + } + } + p++; + } + + if (w_idx > 0) { + word[w_idx] = '\0'; + const char *target_file, *target_anchor; + if (find_link_target(proj, word, &target_file, &target_anchor)) { + if (current_file && strcmp(target_file, current_file) == 0) { + out_len += snprintf(out + out_len, cap - out_len, + "%s", + target_anchor, word); + } else { + out_len += snprintf(out + out_len, cap - out_len, + "%s", + target_file, target_anchor, word); + } + } else { + out_len += snprintf(out + out_len, cap - out_len, "%s", word); + } + } + + return out; +} + +char* resolve_links(ProjectContext* proj, const char* text, const char* current_file) { + if (!text) return NULL; + size_t cap = strlen(text) * 2 + 512; + char* output = malloc(cap); + size_t out_len = 0; + const char* p = text; + + while (*p) { + if (p[0] == '[' && p[1] == '`') { + const char* start = p + 2; + const char* end = strstr(start, "`]"); + if (end) { + int name_len = end - start; + char name[128]; + if (name_len < 127) { + strncpy(name, start, name_len); + name[name_len] = '\0'; + + const char *target_file, *target_anchor; + if (find_link_target(proj, name, &target_file, &target_anchor)) { + if (current_file && strcmp(target_file, current_file) == 0) { + out_len += sprintf(output + out_len, "[`%s`](#%s)", name, target_anchor); + } else { + out_len += sprintf(output + out_len, "[`%s`](%s.html#%s)", name, target_file, target_anchor); + } + p = end + 2; + continue; + } + } + } + } + output[out_len++] = *p++; + if (out_len >= cap - 100) { + cap *= 2; + output = realloc(output, cap); + } + } + output[out_len] = '\0'; + return output; +} + +char* clean_comment(const char* raw) { + if (!raw) return NULL; + size_t len = strlen(raw); + char* output = malloc(len + 1); + char* out_ptr = output; + char* temp = my_strdup(raw); + char* line = strtok(temp, "\n"); + + while (line != NULL) { + char* p = line; + while (*p && isspace((unsigned char)*p)) p++; + if (strncmp(p, "/**", 3) == 0) p += 3; + else if (strncmp(p, "/*!", 3) == 0) p += 3; + else if (strncmp(p, "///", 3) == 0) p += 3; + else if (strncmp(p, "*/", 2) == 0) { line = strtok(NULL, "\n"); continue; } + else if (*p == '*') p++; + if (*p == ' ') p++; + while (*p) *out_ptr++ = *p++; + *out_ptr++ = '\n'; + line = strtok(NULL, "\n"); + } + *out_ptr = '\0'; + free(temp); + return output; +} + +// --- 4. PARSING --- + +void parse_file_level_docs(FileContext* ctx, const char* real_path) { + FILE* f = fopen(real_path, "r"); + if (!f) return; + char* buffer = malloc(50000); + if (!buffer) { fclose(f); return; } + buffer[0] = '\0'; + char line[2048]; + while (fgets(line, sizeof(line), f)) { + char* p = line; + while (*p && isspace((unsigned char)*p)) p++; + if (strncmp(p, "//!", 3) == 0) { + p += 3; if (*p == ' ') p++; + strcat(buffer, p); + } + } + fclose(f); + if (strlen(buffer) > 0) ctx->file_doc = buffer; + else free(buffer); +} + +int is_skippable(CXCursor cursor) { + if (clang_Cursor_isAnonymous(cursor)) return 1; + CXString cx = clang_getCursorSpelling(cursor); + const char* s = clang_getCString(cx); + int skip = (!s || strlen(s) == 0 || strstr(s, "(unnamed") != NULL); + clang_disposeString(cx); + return skip; +} + +enum CXChildVisitResult struct_visitor(CXCursor cursor, CXCursor parent, CXClientData client_data) { + DocItem* item = (DocItem*)client_data; + if (clang_getCursorKind(cursor) == CXCursor_FieldDecl) { + int idx = item->field_count++; + item->fields = realloc(item->fields, sizeof(Field) * item->field_count); + item->fields[idx].name = to_cstr(clang_getCursorSpelling(cursor)); + item->fields[idx].type = to_cstr(clang_getTypeSpelling(clang_getCursorType(cursor))); + item->fields[idx].doc = to_cstr(clang_Cursor_getRawCommentText(cursor)); + } + return CXChildVisit_Continue; +} + +enum CXChildVisitResult enum_visitor(CXCursor cursor, CXCursor parent, CXClientData client_data) { + DocItem* item = (DocItem*)client_data; + if (clang_getCursorKind(cursor) == CXCursor_EnumConstantDecl) { + int idx = item->field_count++; + item->fields = realloc(item->fields, sizeof(Field) * item->field_count); + item->fields[idx].name = to_cstr(clang_getCursorSpelling(cursor)); + long long val = clang_getEnumConstantDeclValue(cursor); + char val_str[64]; + snprintf(val_str, 64, "%lld", val); + item->fields[idx].type = my_strdup(val_str); + item->fields[idx].doc = to_cstr(clang_Cursor_getRawCommentText(cursor)); + } + return CXChildVisit_Continue; +} + +enum CXChildVisitResult typedef_param_visitor(CXCursor cursor, CXCursor parent, CXClientData client_data) { + DocItem* item = (DocItem*)client_data; + if (clang_getCursorKind(cursor) == CXCursor_ParmDecl) { + int idx = item->arg_count++; + item->args = realloc(item->args, sizeof(Field) * item->arg_count); + item->args[idx].name = to_cstr(clang_getCursorSpelling(cursor)); + item->args[idx].type = to_cstr(clang_getTypeSpelling(clang_getCursorType(cursor))); + item->args[idx].doc = NULL; + } + return CXChildVisit_Continue; +} + +enum CXChildVisitResult main_visitor(CXCursor cursor, CXCursor parent, CXClientData client_data) { + void** args = (void**)client_data; + FileContext* ctx = (FileContext*)args[0]; + ProjectContext* proj = (ProjectContext*)args[1]; + + CXSourceLocation location = clang_getCursorLocation(cursor); + if (clang_Location_isFromMainFile(location) == 0) return CXChildVisit_Continue; + + enum CXCursorKind kind = clang_getCursorKind(cursor); + DocItem* item = NULL; + + if (kind == CXCursor_FunctionDecl) { + char* name = to_cstr(clang_getCursorSpelling(cursor)); + if (item_exists(ctx, name)) { free(name); return CXChildVisit_Continue; } + + item = add_item(ctx); + item->kind = KIND_FUNCTION; + item->name = name; + item->doc_comment = to_cstr(clang_Cursor_getRawCommentText(cursor)); + item->return_type = to_cstr(clang_getTypeSpelling(clang_getCursorResultType(cursor))); + + char buf[256]; + snprintf(buf, 256, "fn.%s", item->name ? item->name : "unknown"); + item->anchor_id = my_strdup(buf); + + int num = clang_Cursor_getNumArguments(cursor); + item->arg_count = num; + item->args = malloc(sizeof(Field) * num); + for (int i=0; iargs[i].name = to_cstr(clang_getCursorSpelling(arg)); + item->args[i].type = to_cstr(clang_getTypeSpelling(clang_getCursorType(arg))); + item->args[i].doc = NULL; + } + } + else if (kind == CXCursor_StructDecl) { + if (is_skippable(cursor) || !clang_isCursorDefinition(cursor)) return CXChildVisit_Continue; + + char* name = to_cstr(clang_getCursorSpelling(cursor)); + if (item_exists(ctx, name)) { free(name); return CXChildVisit_Continue; } + + item = add_item(ctx); + item->kind = KIND_STRUCT; + item->name = name; + item->doc_comment = to_cstr(clang_Cursor_getRawCommentText(cursor)); + + char buf[256]; + snprintf(buf, 256, "struct.%s", item->name ? item->name : "unknown"); + item->anchor_id = my_strdup(buf); + + clang_visitChildren(cursor, struct_visitor, item); + } + else if (kind == CXCursor_EnumDecl) { + if (is_skippable(cursor) || !clang_isCursorDefinition(cursor)) return CXChildVisit_Continue; + + char* name = to_cstr(clang_getCursorSpelling(cursor)); + if (item_exists(ctx, name)) { free(name); return CXChildVisit_Continue; } + + item = add_item(ctx); + item->kind = KIND_ENUM; + item->name = name; + item->doc_comment = to_cstr(clang_Cursor_getRawCommentText(cursor)); + + char buf[256]; + snprintf(buf, 256, "enum.%s", item->name ? item->name : "unknown"); + item->anchor_id = my_strdup(buf); + + clang_visitChildren(cursor, enum_visitor, item); + } + else if (kind == CXCursor_TypedefDecl) { + char* name = to_cstr(clang_getCursorSpelling(cursor)); + if (item_exists(ctx, name)) { free(name); return CXChildVisit_Continue; } + + item = add_item(ctx); + item->name = name; + + CXType underlying = clang_getTypedefDeclUnderlyingType(cursor); + CXType canonical = clang_getCanonicalType(underlying); + + if (canonical.kind == CXType_Record) { + item->kind = KIND_STRUCT; + char buf[256]; + snprintf(buf, 256, "struct.%s", item->name ? item->name : "unknown"); + item->anchor_id = my_strdup(buf); + + char* doc = to_cstr(clang_Cursor_getRawCommentText(cursor)); + if (!doc || !*doc) { + if(doc) free(doc); + CXCursor sc = clang_getTypeDeclaration(canonical); + doc = to_cstr(clang_Cursor_getRawCommentText(sc)); + } + item->doc_comment = doc; + + CXCursor sc = clang_getTypeDeclaration(canonical); + clang_visitChildren(sc, struct_visitor, item); + } + else if (canonical.kind == CXType_Enum) { + item->kind = KIND_ENUM; + char buf[256]; + snprintf(buf, 256, "enum.%s", item->name ? item->name : "unknown"); + item->anchor_id = my_strdup(buf); + + char* doc = to_cstr(clang_Cursor_getRawCommentText(cursor)); + if (!doc || !*doc) { + if(doc) free(doc); + CXCursor sc = clang_getTypeDeclaration(canonical); + doc = to_cstr(clang_Cursor_getRawCommentText(sc)); + } + item->doc_comment = doc; + + CXCursor sc = clang_getTypeDeclaration(canonical); + clang_visitChildren(sc, enum_visitor, item); + } + else { + item->kind = KIND_TYPEDEF; + char buf[256]; + snprintf(buf, 256, "type.%s", item->name ? item->name : "unknown"); + item->anchor_id = my_strdup(buf); + item->doc_comment = to_cstr(clang_Cursor_getRawCommentText(cursor)); + item->underlying_type = to_cstr(clang_getTypeSpelling(underlying)); + + // UPDATED: Check if it's a function pointer and extract params + if (underlying.kind == CXType_Pointer) { + CXType pointee = clang_getPointeeType(underlying); + if (pointee.kind == CXType_FunctionProto) { + item->return_type = to_cstr(clang_getTypeSpelling(clang_getResultType(pointee))); + // Visit children to find ParmDecl parameters + clang_visitChildren(cursor, typedef_param_visitor, item); + } + } + } + } + + if (item) register_item(proj, item); + + return CXChildVisit_Recurse; +} + +// --- 5. HTML GENERATION --- + +void render_md(FILE* f, ProjectContext* proj, const char* raw, const char* current_file) { + if (!raw) return; + char* clean = clean_comment(raw); + char* linked = resolve_links(proj, clean, current_file); + char* html = cmark_markdown_to_html(linked, strlen(linked), CMARK_OPT_UNSAFE); + fprintf(f, "%s", html); + free(html); + free(linked); + free(clean); +} + +void write_common_head(FILE* f, const char* title) { + fprintf(f, ""); + fprintf(f, "%s", title); + fprintf(f, ""); + fprintf(f, ""); +} + +void render_sidebar_section(FILE* f, FileContext* ctx, ItemKind kind, const char* title) { + int found = 0; + // Check if any items of this kind exist + for(size_t i=0; icount; i++) { + if (ctx->items[i].kind == kind) { + found = 1; + break; + } + } + // If found, print the header and the links + if (found) { + fprintf(f, "

%s

", title); + for(size_t i=0; icount; i++) { + if (ctx->items[i].kind == kind) { + fprintf(f, "%s", ctx->items[i].anchor_id, ctx->items[i].name); + } + } + } +} + +void generate_file_html(ProjectContext* proj, FileContext* ctx, const char* out_dir) { + char path[1024]; + snprintf(path, 1024, "%s/%s.html", out_dir, ctx->filename); + FILE* f = fopen(path, "w"); + if (!f) return; + + write_common_head(f, ctx->filename); + + // --- SIDEBAR GENERATION --- + fprintf(f, ""); + // -------------------------- + + fprintf(f, "
"); + fprintf(f, "

Header %s

", ctx->filename); + + if (ctx->file_doc) { + fprintf(f, "
"); + render_md(f, proj, ctx->file_doc, ctx->filename); + fprintf(f, "
"); + } + + for(size_t i=0; icount; i++) { + DocItem* item = &ctx->items[i]; + const char* kind_str = "Unknown"; + if (item->kind == KIND_FUNCTION) kind_str = "Function"; + else if (item->kind == KIND_STRUCT) kind_str = "Struct"; + else if (item->kind == KIND_ENUM) kind_str = "Enum"; + else if (item->kind == KIND_TYPEDEF) kind_str = "Type Alias"; + + fprintf(f, "

%s %s

", item->anchor_id, kind_str, item->anchor_id, item->name); + + fprintf(f, "
"); + if (item->kind == KIND_FUNCTION) { + char* ret_linked = linkify_type(proj, item->return_type, ctx->filename); + fprintf(f, "%s %s(", ret_linked, item->name); + free(ret_linked); + + for(int j=0; jarg_count; j++) { + char* arg_linked = linkify_type(proj, item->args[j].type, ctx->filename); + fprintf(f, "\n %s %s", arg_linked, item->args[j].name); + free(arg_linked); + if(jarg_count-1) fprintf(f, ","); + } + fprintf(f, "\n)"); + } + else if (item->kind == KIND_STRUCT) { + fprintf(f, "struct %s {", item->name); + for(int j=0; jfield_count; j++) { + char* f_linked = linkify_type(proj, item->fields[j].type, ctx->filename); + fprintf(f, "\n %s %s;", f_linked, item->fields[j].name); + free(f_linked); + } + fprintf(f, "\n}"); + } + else if (item->kind == KIND_ENUM) { + fprintf(f, "enum %s {", item->name); + for(int j=0; jfield_count; j++) { + fprintf(f, "\n %s = %s,", item->fields[j].name, item->fields[j].type); + } + fprintf(f, "\n}"); + } + else { + if (item->return_type && item->arg_count > 0) { + char* ret_linked = linkify_type(proj, item->return_type, ctx->filename); + fprintf(f, "typedef %s = %s (*)(", item->name, ret_linked); + free(ret_linked); + + for(int j=0; jarg_count; j++) { + char* arg_linked = linkify_type(proj, item->args[j].type, ctx->filename); + fprintf(f, "%s %s", arg_linked, item->args[j].name); + free(arg_linked); + if(j < item->arg_count - 1) fprintf(f, ", "); + } + fprintf(f, ");"); + } else { + char* under_linked = linkify_type(proj, item->underlying_type, ctx->filename); + fprintf(f, "typedef %s = %s;", item->name, under_linked); + free(under_linked); + } + } + fprintf(f, "
"); + + if (item->doc_comment) { + fprintf(f, "
"); + render_md(f, proj, item->doc_comment, ctx->filename); + fprintf(f, "
"); + } + + if ((item->kind == KIND_STRUCT || item->kind == KIND_ENUM) && item->field_count > 0) { + int has_field_docs = 0; + for(int k=0; kfield_count; k++) if(item->fields[k].doc) has_field_docs=1; + + if(has_field_docs) { + fprintf(f, "

%s

", item->kind == KIND_ENUM ? "Variants" : "Fields"); + for(int j=0; jfield_count; j++) { + if(item->fields[j].doc) { + fprintf(f, "
", item->fields[j].name); + fprintf(f, "%s", item->fields[j].name); + fprintf(f, "
"); + render_md(f, proj, item->fields[j].doc, ctx->filename); + fprintf(f, "
"); + } + } + } + } + } + fprintf(f, "
"); + fclose(f); +} + +void generate_index(ProjectContext* proj, const char* out_dir) { + char path[1024]; + snprintf(path, 1024, "%s/index.html", out_dir); + FILE* f = fopen(path, "w"); + if (!f) return; + + write_common_head(f, "Project Documentation"); + + fprintf(f, ""); + + fprintf(f, "
"); + fprintf(f, "

Project Documentation

"); + fprintf(f, "

Headers

    "); + for(size_t i=0; icount; i++) { + fprintf(f, "
  • %s
  • ", proj->files[i].filename, proj->files[i].filename); + } + fprintf(f, "
"); + + fprintf(f, "

Global Symbols

"); + for(size_t i=0; ireg_count; i++) { + DocItem* item = proj->registry[i]; + if (item->name) { + fprintf(f, "%s", + item->source_file, item->anchor_id, item->name); + } + } + fprintf(f, "
"); + fprintf(f, "
"); + fclose(f); +} + +int dir_exists(const char* path) { + struct stat sb; + return stat(path, &sb) == 0 && S_ISDIR(sb.st_mode); +} + +int compare_items(const void* a, const void* b) { + const DocItem* da = (const DocItem*)a; + const DocItem* db = (const DocItem*)b; + + // Safety checks for NULL names + if (!da->name && !db->name) return 0; + if (!da->name) return 1; + if (!db->name) return -1; + + return strcmp(da->name, db->name); +} + +int main(int argc, char** argv) { + #ifndef _WIN32 + signal(SIGPIPE, SIG_IGN); + #endif + + if (argc < 3) { + printf("Usage: %s [file2.h ...]\n", argv[0]); + return 1; + } + + const char* out_dir = argv[1]; + + char clang_inc_path[1024] = {0}; + int found_inc = 0; + FILE* p = popen("clang -print-resource-dir", "r"); + if (p) { + if (fgets(clang_inc_path, sizeof(clang_inc_path), p)) { + size_t len = strlen(clang_inc_path); + while(len > 0 && isspace((unsigned char)clang_inc_path[len-1])) clang_inc_path[--len] = '\0'; + strcat(clang_inc_path, "/include"); + if (dir_exists(clang_inc_path)) found_inc = 1; + } + pclose(p); + } + + if (!found_inc) { + const char* common_paths[] = { + "/usr/lib/clang/18/include", "/usr/lib/clang/17/include", + "/usr/lib/clang/16/include", "/usr/lib/clang/15/include", + "/usr/lib/clang/14/include", "/usr/lib64/clang/18/include", + NULL + }; + for (int i = 0; common_paths[i]; i++) { + if (dir_exists(common_paths[i])) { + strcpy(clang_inc_path, common_paths[i]); + found_inc = 1; + break; + } + } + } + + char arg_include_flag[1100]; + const char* clang_args[8]; + int num_args = 0; + clang_args[num_args++] = "-I."; + clang_args[num_args++] = "-Iinclude"; + clang_args[num_args++] = "-xc"; + if (found_inc) { + snprintf(arg_include_flag, sizeof(arg_include_flag), "-I%s", clang_inc_path); + clang_args[num_args++] = arg_include_flag; + printf("Using Clang headers: %s\n", clang_inc_path); + } + + ProjectContext proj; + init_project(&proj); + CXIndex index = clang_createIndex(0, 1); + + for (int i = 2; i < argc; i++) { + const char* filepath = argv[i]; + printf("Parsing %s...\n", filepath); + + FileContext* file_ctx = add_file(&proj, filepath); + parse_file_level_docs(file_ctx, filepath); + + CXTranslationUnit unit = clang_parseTranslationUnit( + index, filepath, clang_args, num_args, NULL, 0, CXTranslationUnit_None + ); + + if (!unit) { + printf("Failed to parse %s\n", filepath); + continue; + } + + CXCursor root = clang_getTranslationUnitCursor(unit); + void* args[] = { file_ctx, &proj }; + clang_visitChildren(root, main_visitor, args); + clang_disposeTranslationUnit(unit); + } + + for (size_t i = 0; i < proj.count; i++) { + qsort(proj.files[i].items, proj.files[i].count, sizeof(DocItem), compare_items); + } + + printf("Generating HTML in '%s'...\n", out_dir); + + #ifdef _WIN32 + _mkdir(out_dir); + #else + mkdir(out_dir, 0777); + #endif + + for (size_t i = 0; i < proj.count; i++) { + generate_file_html(&proj, &proj.files[i], out_dir); + } + generate_index(&proj, out_dir); + + clang_disposeIndex(index); + printf("Done! Open %s/index.html\n", out_dir); + return 0; +}