feat: move separate commands to a unified interace

This commit is contained in:
Clelia (Astra) Bertelli
2026-02-13 16:32:21 +01:00
parent 11fada6b25
commit e98ed6f241
10 changed files with 487 additions and 437 deletions
+3 -18
View File
@@ -11,24 +11,9 @@ keywords = ["semantic-search", "document-parsing", "cli", "pdf", "search"]
readme = "README.md"
[[bin]]
name = "parse"
path = "src/bin/parse.rs"
required-features = ["parse"]
[[bin]]
name = "search"
path = "src/bin/search.rs"
required-features = ["search"]
[[bin]]
name = "workspace"
path = "src/bin/workspace.rs"
required-features = ["workspace", "search"]
[[bin]]
name = "ask"
path = "src/bin/ask.rs"
required-features = ["ask", "search"]
name = "semtools"
path = "src/bin/semtools.rs"
required-features = ["ask", "search", "workspace", "parse"]
[dependencies]
# Common dependencies
-68
View File
@@ -1,68 +0,0 @@
use anyhow::Result;
use clap::Parser;
use std::path::Path;
use semtools::{LlamaParseBackend, SemtoolsConfig};
#[derive(Parser, Debug)]
#[command(version, about = "A CLI tool for parsing documents using various backends", long_about = None)]
struct Args {
/// Path to the config file. Defaults to ~/.semtools_config.json
#[clap(short = 'c', long)]
config: Option<String>,
/// The backend type to use for parsing. Defaults to `llama-parse`
#[clap(short, long, default_value = "llama-parse")]
backend: String,
/// Files to parse
#[clap(required = true)]
files: Vec<String>,
/// Verbose output while parsing
#[clap(short, long)]
verbose: bool,
}
#[tokio::main]
async fn main() -> Result<()> {
let args = Args::parse();
// Get config file path
let config_path = args
.config
.unwrap_or_else(SemtoolsConfig::default_config_path);
// Load configuration
let semtools_config = SemtoolsConfig::from_config_file(&config_path)?;
let parse_config = semtools_config.parse.unwrap_or_default();
// Validate that files exist
for file in &args.files {
if !Path::new(file).exists() {
eprintln!("Warning: File does not exist: {file}");
}
}
// Create backend and process files
match args.backend.as_str() {
"llama-parse" => {
let backend = LlamaParseBackend::new(parse_config, args.verbose)?;
let results = backend.parse(args.files).await?;
// Output the paths to parsed files, one per line
for result_path in results {
println!("{result_path}");
}
}
_ => {
eprintln!(
"Error: Unknown backend '{}'. Supported backends: llama-parse",
args.backend
);
std::process::exit(1);
}
}
Ok(())
}
+181
View File
@@ -0,0 +1,181 @@
use clap::{Parser, Subcommand};
use semtools::cmds::ask::ask_cmd;
use semtools::cmds::parse::parse_cmd;
use semtools::cmds::search::search_cmd;
use semtools::cmds::workspace::{workspace_prune_cmd, workspace_status_cmd, workspace_use_cmd};
#[derive(Parser, Debug)]
struct SemtoolsArgs {
#[command(subcommand)]
cmd: Commands,
}
#[derive(Subcommand, Debug)]
enum WorkspaceCommands {
/// Use or create a workspace (prints export command to run)
Use { name: String },
/// Show active workspace and basic stats
Status,
/// Remove stale or missing files from store
Prune {},
}
#[derive(Subcommand, Debug)]
enum Commands {
#[cfg(feature = "parse")]
/// A CLI tool for parsing documents using various backends
Parse {
/// Path to the config file. Defaults to ~/.semtools_config.json
#[clap(short = 'c', long)]
config: Option<String>,
/// The backend type to use for parsing. Defaults to `llama-parse`
#[clap(short, long, default_value = "llama-parse")]
backend: String,
/// Files to parse
#[clap(required = true)]
files: Vec<String>,
/// Verbose output while parsing
#[clap(short, long)]
verbose: bool,
},
#[cfg(feature = "search")]
/// A CLI tool for fast semantic keyword search
Search {
/// Query to search for (positional argument)
query: String,
/// Files to search (positional arguments, optional if using stdin)
#[arg(help = "Files to search, optional if using stdin")]
files: Vec<String>,
/// How many lines before/after to return as context
#[arg(short = 'n', long = "n-lines", alias = "context", default_value_t = 3)]
n_lines: usize,
/// The top-k files or texts to return (ignored if max_distance is set)
#[arg(long, default_value_t = 3)]
top_k: usize,
/// Return all results with distance below this threshold (0.0+)
#[arg(short = 'm', long = "max-distance", alias = "threshold")]
max_distance: Option<f64>,
/// Perform case-insensitive search (default is false)
#[arg(short, long, default_value_t = false)]
ignore_case: bool,
/// Output results in JSON format
#[clap(short, long)]
json: bool,
},
#[cfg(feature = "ask")]
/// A CLI tool for document-based question-answering
Ask {
/// Query to prompt the agent with
query: String,
/// Files to search (positional arguments, optional if using stdin)
#[arg(help = "Files to search, optional if using stdin")]
files: Vec<String>,
/// Path to the config file. Defaults to ~/.semtools_config.json
#[clap(short = 'c', long)]
config: Option<String>,
/// OpenAI API key (overrides config file and env var)
#[clap(long)]
api_key: Option<String>,
/// OpenAI base URL (overrides config file)
#[clap(long)]
base_url: Option<String>,
/// Model to use for the agent (overrides config file)
#[clap(short, long)]
model: Option<String>,
/// API mode to use: 'chat' or 'responses' (overrides config file)
#[clap(long)]
api_mode: Option<String>,
/// Output results in JSON or text format
#[clap(short, long)]
json: bool,
},
#[cfg(feature = "workspace")]
/// Manage semtools workspaces
Workspace {
/// Output results in JSON format
#[clap(short, long, global = true)]
json: bool,
#[command(subcommand)]
command: WorkspaceCommands,
},
}
#[tokio::main]
async fn main() -> anyhow::Result<()> {
let args = SemtoolsArgs::parse();
match args.cmd {
Commands::Ask {
query,
files,
config,
api_key,
base_url,
model,
api_mode,
json,
} => {
ask_cmd(
query, files, config, api_key, base_url, model, api_mode, json,
)
.await?;
}
Commands::Parse {
config,
backend,
files,
verbose,
} => {
parse_cmd(config, backend, files, verbose).await?;
}
Commands::Search {
query,
files,
n_lines,
top_k,
max_distance,
ignore_case,
json,
} => {
search_cmd(
query,
files,
n_lines,
top_k,
max_distance,
ignore_case,
json,
)
.await?;
}
Commands::Workspace { json, command } => match command {
WorkspaceCommands::Use { name } => {
workspace_use_cmd(name, json).await?;
}
WorkspaceCommands::Prune {} => {
workspace_prune_cmd(json).await?;
}
WorkspaceCommands::Status => {
workspace_status_cmd(json).await?;
}
},
}
Ok(())
}
-200
View File
@@ -1,200 +0,0 @@
use anyhow::{Context, Result};
use clap::{Parser, Subcommand};
#[cfg(feature = "workspace")]
use semtools::workspace::{Workspace, WorkspaceConfig, store::Store};
use semtools::json_mode::{PruneOutput, WorkspaceOutput};
#[cfg(not(feature = "workspace"))]
use semtools::json_mode::ErrorOutput;
#[derive(Parser, Debug)]
#[command(version, about = "Manage semtools workspaces", long_about = None)]
struct Args {
/// Output results in JSON format
#[clap(short, long, global = true)]
json: bool,
#[command(subcommand)]
command: Commands,
}
#[derive(Subcommand, Debug)]
enum Commands {
/// Use or create a workspace (prints export command to run)
Use { name: String },
/// Show active workspace and basic stats
Status,
/// Remove stale or missing files from store
Prune {},
}
#[tokio::main]
async fn main() -> Result<()> {
let args = Args::parse();
match args.command {
Commands::Use { name } => {
#[cfg(feature = "workspace")]
{
// Initialize new workspace configuration
let ws = Workspace {
config: WorkspaceConfig {
name: name.clone(),
root_dir: Workspace::root_path(&name)?,
..Default::default()
},
};
ws.save()?;
if args.json {
// Try to get document count from store, or use 0 for new workspace
let total_documents = if let Ok(store) = Store::open(&ws.config.root_dir) {
if let Ok(stats) = store.get_stats() {
stats.total_documents
} else {
0
}
} else {
0
};
let output = WorkspaceOutput {
name: ws.config.name.clone(),
root_dir: ws.config.root_dir.clone(),
total_documents,
};
let json_output = serde_json::to_string_pretty(&output)?;
println!("{}", json_output);
} else {
println!("Workspace '{name}' configured.");
println!("To activate it, run:");
println!(" export SEMTOOLS_WORKSPACE={name}");
println!();
println!("Or add this to your shell profile (.bashrc, .zshrc, etc.)");
}
}
#[cfg(not(feature = "workspace"))]
{
if args.json {
let error_output = ErrorOutput {
error: "workspace feature not enabled".to_string(),
error_type: "FeatureNotEnabled".to_string(),
};
let json_output = serde_json::to_string_pretty(&error_output)?;
eprintln!("{}", json_output);
} else {
println!("workspace feature not enabled");
}
}
}
Commands::Status => {
#[cfg(feature = "workspace")]
{
let _name = Workspace::active().context("No active workspace")?;
let ws = Workspace::open()?;
// Open store and get stats
let store = Store::open(&ws.config.root_dir)?;
let stats = store.get_stats()?;
if args.json {
let output = WorkspaceOutput {
name: ws.config.name.clone(),
root_dir: ws.config.root_dir.clone(),
total_documents: stats.total_documents,
};
let json_output = serde_json::to_string_pretty(&output)?;
println!("{}", json_output);
} else {
println!("Active workspace: {}", ws.config.name);
println!("Root: {}", ws.config.root_dir);
println!("Documents: {}", stats.total_documents);
if stats.has_index {
let index_info = stats.index_type.unwrap_or_else(|| "Unknown".to_string());
println!("Index: Yes ({index_info})");
} else {
println!("Index: No");
}
}
}
#[cfg(not(feature = "workspace"))]
{
if args.json {
let error_output = ErrorOutput {
error: "workspace feature not enabled".to_string(),
error_type: "FeatureNotEnabled".to_string(),
};
let json_output = serde_json::to_string_pretty(&error_output)?;
eprintln!("{}", json_output);
} else {
println!("workspace feature not enabled");
}
}
}
Commands::Prune {} => {
#[cfg(feature = "workspace")]
{
let _name = Workspace::active().context("No active workspace")?;
let ws = Workspace::open()?;
let store = Store::open(&ws.config.root_dir)?;
// Get all document paths from the workspace
let all_paths = store.get_all_document_paths()?;
let total_before = all_paths.len();
// Check which files no longer exist
let mut missing_paths = Vec::new();
for path in &all_paths {
if !std::path::Path::new(path).exists() {
missing_paths.push(path.clone());
}
}
let files_removed = missing_paths.len();
let files_remaining = total_before - files_removed;
if !missing_paths.is_empty() {
// Remove stale documents
store.delete_documents(&missing_paths)?;
}
if args.json {
let output = PruneOutput {
files_removed,
files_remaining,
};
let json_output = serde_json::to_string_pretty(&output)?;
println!("{}", json_output);
} else if missing_paths.is_empty() {
println!("No stale documents found. Workspace is clean.");
} else {
println!("Found {} stale documents:", missing_paths.len());
for path in &missing_paths {
println!(" - {path}");
}
println!(
"Removed {} stale documents from workspace.",
missing_paths.len()
);
}
}
#[cfg(not(feature = "workspace"))]
{
if args.json {
let error_output = ErrorOutput {
error: "workspace feature not enabled".to_string(),
error_type: "FeatureNotEnabled".to_string(),
};
let json_output = serde_json::to_string_pretty(&error_output)?;
eprintln!("{}", json_output);
} else {
println!("workspace feature not enabled");
}
}
}
}
Ok(())
}
+32 -86
View File
@@ -1,51 +1,15 @@
use anyhow::Result;
use async_openai::Client;
use async_openai::config::OpenAIConfig;
use clap::Parser;
use model2vec_rs::model::StaticModel;
use std::io::{self, BufRead, IsTerminal};
use semtools::SemtoolsConfig;
use semtools::ask::chat_agent::{ask_agent, ask_agent_with_stdin};
use semtools::ask::responses_agent::{ask_agent_responses, ask_agent_responses_with_stdin};
use semtools::config::ApiMode;
use semtools::json_mode::ErrorOutput;
use semtools::search::MODEL_NAME;
#[derive(Parser, Debug)]
#[command(version, about = "A CLI tool for fast semantic keyword search", long_about = None)]
struct Args {
/// Query to prompt the agent with
query: String,
/// Files to search (positional arguments, optional if using stdin)
#[arg(help = "Files to search, optional if using stdin")]
files: Vec<String>,
/// Path to the config file. Defaults to ~/.semtools_config.json
#[clap(short = 'c', long)]
config: Option<String>,
/// OpenAI API key (overrides config file and env var)
#[clap(long)]
api_key: Option<String>,
/// OpenAI base URL (overrides config file)
#[clap(long)]
base_url: Option<String>,
/// Model to use for the agent (overrides config file)
#[clap(short, long)]
model: Option<String>,
/// API mode to use: 'chat' or 'responses' (overrides config file)
#[clap(long)]
api_mode: Option<String>,
/// Output results in JSON or text format
#[clap(short, long)]
json: bool,
}
use crate::SemtoolsConfig;
use crate::ask::chat_agent::{ask_agent, ask_agent_with_stdin};
use crate::ask::responses_agent::{ask_agent_responses, ask_agent_responses_with_stdin};
use crate::config::ApiMode;
use crate::json_mode::ErrorOutput;
use crate::search::MODEL_NAME;
fn read_from_stdin() -> Result<Vec<String>> {
let stdin = io::stdin();
@@ -53,20 +17,24 @@ fn read_from_stdin() -> Result<Vec<String>> {
Ok(lines?)
}
#[tokio::main]
async fn main() -> Result<()> {
let args = Args::parse();
#[allow(clippy::too_many_arguments)]
pub async fn ask_cmd(
query: String,
files: Vec<String>,
config: Option<String>,
api_key: Option<String>,
base_url: Option<String>,
model: Option<String>,
api_mode: Option<String>,
json: bool,
) -> Result<()> {
// Load configuration
let config_path = args
.config
.unwrap_or_else(SemtoolsConfig::default_config_path);
let config_path = config.unwrap_or_else(SemtoolsConfig::default_config_path);
let semtools_config = SemtoolsConfig::from_config_file(&config_path)?;
let ask_config = semtools_config.ask.unwrap_or_default();
// Resolve API key with priority: CLI arg > config file > env var > error
let api_key = args
.api_key
let api_key = api_key
.or(ask_config.api_key)
.or_else(|| std::env::var("OPENAI_API_KEY").ok())
.ok_or_else(|| {
@@ -76,11 +44,10 @@ async fn main() -> Result<()> {
})?;
// Resolve base URL with priority: CLI arg > config file > default
let base_url = args.base_url.or(ask_config.base_url);
let base_url = base_url.or(ask_config.base_url);
// Resolve model with priority: CLI arg > config file > default
let model_name = args
.model
let model_name = model
.or(ask_config.model)
.unwrap_or_else(|| "gpt-4o-mini".to_string());
@@ -88,7 +55,7 @@ async fn main() -> Result<()> {
let max_iterations = ask_config.max_iterations;
// Resolve API mode with priority: CLI arg > config file > default
let api_mode = if let Some(mode_str) = args.api_mode {
let api_mode = if let Some(mode_str) = api_mode {
match mode_str.to_lowercase().as_str() {
"chat" => ApiMode::Chat,
"responses" => ApiMode::Responses,
@@ -111,7 +78,7 @@ async fn main() -> Result<()> {
let client = Client::with_config(openai_config);
// Check if we have stdin input (no files and stdin is not a terminal)
if args.files.is_empty() && !io::stdin().is_terminal() {
if files.is_empty() && !io::stdin().is_terminal() {
let stdin_lines = read_from_stdin()?;
if !stdin_lines.is_empty() {
let stdin_content = stdin_lines.join("\n");
@@ -119,20 +86,15 @@ async fn main() -> Result<()> {
// Run the appropriate agent with stdin content (no tools)
let output = match api_mode {
ApiMode::Chat => {
ask_agent_with_stdin(&stdin_content, &args.query, &client, &model_name).await?
ask_agent_with_stdin(&stdin_content, &query, &client, &model_name).await?
}
ApiMode::Responses => {
ask_agent_responses_with_stdin(
&stdin_content,
&args.query,
&client,
&model_name,
)
.await?
ask_agent_responses_with_stdin(&stdin_content, &query, &client, &model_name)
.await?
}
};
if args.json {
if json {
let json_output = serde_json::to_string_pretty(&output)?;
println!("\n{}", json_output);
} else {
@@ -144,10 +106,10 @@ async fn main() -> Result<()> {
}
// If no stdin, we need files to search through
if args.files.is_empty() {
if files.is_empty() {
let error_msg =
"No input provided. Either specify files as arguments or pipe input to stdin.";
if args.json {
if json {
let error_output = ErrorOutput {
error: error_msg.to_string(),
error_type: "NoInput".to_string(),
@@ -172,30 +134,14 @@ async fn main() -> Result<()> {
// Run the appropriate agent based on API mode
let output = match api_mode {
ApiMode::Chat => {
ask_agent(
args.files,
&args.query,
&model,
&client,
&model_name,
max_iterations,
)
.await?
ask_agent(files, &query, &model, &client, &model_name, max_iterations).await?
}
ApiMode::Responses => {
ask_agent_responses(
args.files,
&args.query,
&model,
&client,
&model_name,
max_iterations,
)
.await?
ask_agent_responses(files, &query, &model, &client, &model_name, max_iterations).await?
}
};
if args.json {
if json {
let json_output = serde_json::to_string_pretty(&output)?;
println!("\n{}", json_output);
} else {
+11
View File
@@ -0,0 +1,11 @@
#[cfg(feature = "ask")]
pub mod ask;
#[cfg(feature = "parse")]
pub mod parse;
#[cfg(feature = "search")]
pub mod search;
#[cfg(feature = "workspace")]
pub mod workspace;
+47
View File
@@ -0,0 +1,47 @@
use anyhow::Result;
use std::path::Path;
use crate::{LlamaParseBackend, SemtoolsConfig};
pub async fn parse_cmd(
config: Option<String>,
backend: String,
files: Vec<String>,
verbose: bool,
) -> Result<()> {
// Get config file path
let config_path = config.unwrap_or_else(SemtoolsConfig::default_config_path);
// Load configuration
let semtools_config = SemtoolsConfig::from_config_file(&config_path)?;
let parse_config = semtools_config.parse.unwrap_or_default();
// Validate that files exist
for file in &files {
if !Path::new(file).exists() {
eprintln!("Warning: File does not exist: {file}");
}
}
// Create backend and process files
match backend.as_str() {
"llama-parse" => {
let backend = LlamaParseBackend::new(parse_config, verbose)?;
let results = backend.parse(files).await?;
// Output the paths to parsed files, one per line
for result_path in results {
println!("{result_path}");
}
}
_ => {
eprintln!(
"Error: Unknown backend '{}'. Supported backends: llama-parse",
backend
);
std::process::exit(1);
}
}
Ok(())
}
+38 -65
View File
@@ -1,50 +1,18 @@
use anyhow::Result;
use clap::Parser;
use model2vec_rs::model::StaticModel;
use std::io::{self, BufRead, IsTerminal};
#[cfg(feature = "workspace")]
use semtools::workspace::{Workspace, store::RankedLine};
use crate::workspace::{Workspace, store::RankedLine};
#[cfg(feature = "workspace")]
use semtools::search::search_with_workspace;
use crate::search::search_with_workspace;
use semtools::json_mode::{ErrorOutput, SearchOutput, SearchResultJSON};
use semtools::search::{
use crate::json_mode::{ErrorOutput, SearchOutput, SearchResultJSON};
use crate::search::{
Document, MODEL_NAME, SearchConfig, SearchResult, search_documents, search_files,
};
#[derive(Parser, Debug)]
#[command(version, about = "A CLI tool for fast semantic keyword search", long_about = None)]
struct Args {
/// Query to search for (positional argument)
query: String,
/// Files to search (positional arguments, optional if using stdin)
#[arg(help = "Files to search, optional if using stdin")]
files: Vec<String>,
/// How many lines before/after to return as context
#[arg(short = 'n', long = "n-lines", alias = "context", default_value_t = 3)]
n_lines: usize,
/// The top-k files or texts to return (ignored if max_distance is set)
#[arg(long, default_value_t = 3)]
top_k: usize,
/// Return all results with distance below this threshold (0.0+)
#[arg(short = 'm', long = "max-distance", alias = "threshold")]
max_distance: Option<f64>,
/// Perform case-insensitive search (default is false)
#[arg(short, long, default_value_t = false)]
ignore_case: bool,
/// Output results in JSON format
#[clap(short, long)]
json: bool,
}
fn read_from_stdin() -> Result<Vec<String>> {
let stdin = io::stdin();
let lines: Result<Vec<String>, _> = stdin.lock().lines().collect();
@@ -141,10 +109,15 @@ fn print_workspace_search_results(ranked_lines: &[RankedLine], n_lines: usize) {
}
}
#[tokio::main]
async fn main() -> Result<()> {
let args = Args::parse();
pub async fn search_cmd(
query: String,
files: Vec<String>,
n_lines: usize,
top_k: usize,
max_distance: Option<f64>,
ignore_case: bool,
json: bool,
) -> Result<()> {
let model = StaticModel::from_pretrained(
MODEL_NAME, // "minishlab/potion-multilingual-128M",
None, // Optional: Hugging Face API token for private models
@@ -152,25 +125,25 @@ async fn main() -> Result<()> {
None, // Optional: subfolder if model files are not at the root of the repo/path
)?;
let query = if args.ignore_case {
args.query.to_lowercase()
let query = if ignore_case {
query.to_lowercase()
} else {
args.query.clone()
query.clone()
};
let query_embedding = model.encode_single(&query);
let config = SearchConfig {
n_lines: args.n_lines,
top_k: args.top_k,
max_distance: args.max_distance,
ignore_case: args.ignore_case,
n_lines,
top_k,
max_distance,
ignore_case,
};
// Handle stdin input (non-workspace mode)
if args.files.is_empty() && !io::stdin().is_terminal() {
if files.is_empty() && !io::stdin().is_terminal() {
let stdin_lines = read_from_stdin()?;
if !stdin_lines.is_empty() {
let lines_for_embedding = if args.ignore_case {
let lines_for_embedding = if ignore_case {
stdin_lines.iter().map(|s| s.to_lowercase()).collect()
} else {
stdin_lines.clone()
@@ -186,7 +159,7 @@ async fn main() -> Result<()> {
let search_results = search_documents(&documents, &query_embedding, &config);
if args.json {
if json {
let output = SearchOutput {
results: search_results.iter().map(search_result_to_json).collect(),
};
@@ -200,10 +173,10 @@ async fn main() -> Result<()> {
}
}
if args.files.is_empty() {
if files.is_empty() {
let error_msg =
"No input provided. Either specify files as arguments or pipe input to stdin.";
if args.json {
if json {
let error_output = ErrorOutput {
error: error_msg.to_string(),
error_type: "NoInput".to_string(),
@@ -222,21 +195,21 @@ async fn main() -> Result<()> {
if Workspace::active().is_ok() {
// Workspace mode: use persisted line embeddings for speed
let config = SearchConfig {
n_lines: args.n_lines,
top_k: args.top_k,
max_distance: args.max_distance,
ignore_case: args.ignore_case,
n_lines,
top_k,
max_distance,
ignore_case,
};
let ranked_lines = search_with_workspace(&args.files, &query, &model, &config).await?;
let ranked_lines = search_with_workspace(&files, &query, &model, &config).await?;
if args.json {
if json {
// Convert workspace results to SearchResultJSON
let results: Vec<SearchResultJSON> = ranked_lines
.iter()
.map(|ranked_line| {
let match_line_number = ranked_line.line_number as usize;
let start = match_line_number.saturating_sub(args.n_lines);
let end = match_line_number + args.n_lines + 1;
let start = match_line_number.saturating_sub(n_lines);
let end = match_line_number + n_lines + 1;
// Read file content for the result
let content =
@@ -264,12 +237,12 @@ async fn main() -> Result<()> {
let json_output = serde_json::to_string_pretty(&output)?;
println!("{}", json_output);
} else {
print_workspace_search_results(&ranked_lines, args.n_lines);
print_workspace_search_results(&ranked_lines, n_lines);
}
} else {
let search_results = search_files(&args.files, &query, &model, &config)?;
let search_results = search_files(&files, &query, &model, &config)?;
if args.json {
if json {
let output = SearchOutput {
results: search_results.iter().map(search_result_to_json).collect(),
};
@@ -283,9 +256,9 @@ async fn main() -> Result<()> {
#[cfg(not(feature = "workspace"))]
{
let search_results = search_files(&args.files, &query, &model, &config)?;
let search_results = search_files(&files, &query, &model, &config)?;
if args.json {
if json {
let output = SearchOutput {
results: search_results.iter().map(search_result_to_json).collect(),
};
+174
View File
@@ -0,0 +1,174 @@
use anyhow::{Context, Result};
#[cfg(feature = "workspace")]
use crate::workspace::{Workspace, WorkspaceConfig, store::Store};
use crate::json_mode::{PruneOutput, WorkspaceOutput};
#[cfg(not(feature = "workspace"))]
use crate::json_mode::ErrorOutput;
pub async fn workspace_use_cmd(name: String, json: bool) -> Result<()> {
#[cfg(feature = "workspace")]
{
// Initialize new workspace configuration
let ws = Workspace {
config: WorkspaceConfig {
name: name.clone(),
root_dir: Workspace::root_path(&name)?,
..Default::default()
},
};
ws.save()?;
if json {
// Try to get document count from store, or use 0 for new workspace
let total_documents = if let Ok(store) = Store::open(&ws.config.root_dir) {
if let Ok(stats) = store.get_stats() {
stats.total_documents
} else {
0
}
} else {
0
};
let output = WorkspaceOutput {
name: ws.config.name.clone(),
root_dir: ws.config.root_dir.clone(),
total_documents,
};
let json_output = serde_json::to_string_pretty(&output)?;
println!("{}", json_output);
} else {
println!("Workspace '{name}' configured.");
println!("To activate it, run:");
println!(" export SEMTOOLS_WORKSPACE={name}");
println!();
println!("Or add this to your shell profile (.bashrc, .zshrc, etc.)");
}
}
#[cfg(not(feature = "workspace"))]
{
if json {
let error_output = ErrorOutput {
error: "workspace feature not enabled".to_string(),
error_type: "FeatureNotEnabled".to_string(),
};
let json_output = serde_json::to_string_pretty(&error_output)?;
eprintln!("{}", json_output);
} else {
println!("workspace feature not enabled");
}
}
Ok(())
}
pub async fn workspace_status_cmd(json: bool) -> Result<()> {
#[cfg(feature = "workspace")]
{
let _name = Workspace::active().context("No active workspace")?;
let ws = Workspace::open()?;
// Open store and get stats
let store = Store::open(&ws.config.root_dir)?;
let stats = store.get_stats()?;
if json {
let output = WorkspaceOutput {
name: ws.config.name.clone(),
root_dir: ws.config.root_dir.clone(),
total_documents: stats.total_documents,
};
let json_output = serde_json::to_string_pretty(&output)?;
println!("{}", json_output);
} else {
println!("Active workspace: {}", ws.config.name);
println!("Root: {}", ws.config.root_dir);
println!("Documents: {}", stats.total_documents);
if stats.has_index {
let index_info = stats.index_type.unwrap_or_else(|| "Unknown".to_string());
println!("Index: Yes ({index_info})");
} else {
println!("Index: No");
}
}
}
#[cfg(not(feature = "workspace"))]
{
if json {
let error_output = ErrorOutput {
error: "workspace feature not enabled".to_string(),
error_type: "FeatureNotEnabled".to_string(),
};
let json_output = serde_json::to_string_pretty(&error_output)?;
eprintln!("{}", json_output);
} else {
println!("workspace feature not enabled");
}
}
Ok(())
}
pub async fn workspace_prune_cmd(json: bool) -> Result<()> {
#[cfg(feature = "workspace")]
{
let _name = Workspace::active().context("No active workspace")?;
let ws = Workspace::open()?;
let store = Store::open(&ws.config.root_dir)?;
// Get all document paths from the workspace
let all_paths = store.get_all_document_paths()?;
let total_before = all_paths.len();
// Check which files no longer exist
let mut missing_paths = Vec::new();
for path in &all_paths {
if !std::path::Path::new(path).exists() {
missing_paths.push(path.clone());
}
}
let files_removed = missing_paths.len();
let files_remaining = total_before - files_removed;
if !missing_paths.is_empty() {
// Remove stale documents
store.delete_documents(&missing_paths)?;
}
if json {
let output = PruneOutput {
files_removed,
files_remaining,
};
let json_output = serde_json::to_string_pretty(&output)?;
println!("{}", json_output);
} else if missing_paths.is_empty() {
println!("No stale documents found. Workspace is clean.");
} else {
println!("Found {} stale documents:", missing_paths.len());
for path in &missing_paths {
println!(" - {path}");
}
println!(
"Removed {} stale documents from workspace.",
missing_paths.len()
);
}
}
#[cfg(not(feature = "workspace"))]
{
if json {
let error_output = ErrorOutput {
error: "workspace feature not enabled".to_string(),
error_type: "FeatureNotEnabled".to_string(),
};
let json_output = serde_json::to_string_pretty(&error_output)?;
eprintln!("{}", json_output);
} else {
println!("workspace feature not enabled");
}
}
Ok(())
}
+1
View File
@@ -3,6 +3,7 @@
pub mod config;
pub use config::{AskConfig, SemtoolsConfig};
pub mod cmds;
pub mod json_mode;
#[cfg(feature = "parse")]