mirror of
https://github.com/run-llama/mcp_resume_screening.git
synced 2026-06-30 21:57:58 -04:00
Initial commit (cleaned history)
This commit is contained in:
+87
@@ -0,0 +1,87 @@
|
||||
# Environment variables and sensitive files
|
||||
.env
|
||||
.env.local
|
||||
.env.production
|
||||
config_local.py
|
||||
|
||||
# Python
|
||||
__pycache__/
|
||||
*.pyc
|
||||
*.pyo
|
||||
*.pyd
|
||||
.Python
|
||||
build/
|
||||
develop-eggs/
|
||||
dist/
|
||||
downloads/
|
||||
eggs/
|
||||
.eggs/
|
||||
lib/
|
||||
lib64/
|
||||
parts/
|
||||
sdist/
|
||||
var/
|
||||
wheels/
|
||||
*.egg-info/
|
||||
.installed.cfg
|
||||
*.egg
|
||||
MANIFEST
|
||||
|
||||
# Virtual environments
|
||||
.venv/
|
||||
venv/
|
||||
ENV/
|
||||
env/
|
||||
.env/
|
||||
|
||||
# IDE and Editor files
|
||||
.vscode/
|
||||
.idea/
|
||||
*.swp
|
||||
*.swo
|
||||
*~
|
||||
.spyderproject
|
||||
.spyproject
|
||||
.ropeproject
|
||||
|
||||
# OS generated files
|
||||
.DS_Store
|
||||
.DS_Store?
|
||||
._*
|
||||
.Spotlight-V100
|
||||
.Trashes
|
||||
ehthumbs.db
|
||||
Thumbs.db
|
||||
|
||||
# Logs
|
||||
*.log
|
||||
logs/
|
||||
|
||||
# Testing
|
||||
.coverage
|
||||
.pytest_cache/
|
||||
.tox/
|
||||
.nox/
|
||||
coverage.xml
|
||||
*.cover
|
||||
.hypothesis/
|
||||
|
||||
# Documentation builds
|
||||
docs/_build/
|
||||
|
||||
# Jupyter Notebook
|
||||
.ipynb_checkpoints
|
||||
|
||||
# pyenv
|
||||
.python-version
|
||||
|
||||
# Backup files
|
||||
*_backup.py
|
||||
*_backup_*.py
|
||||
server_clean.py
|
||||
test_server_clean.py
|
||||
|
||||
# Temporary files
|
||||
*.tmp
|
||||
*.temp
|
||||
temp_*
|
||||
+20
@@ -0,0 +1,20 @@
|
||||
# Use the official Python lightweight image
|
||||
FROM python:3.13-slim
|
||||
|
||||
# Install uv
|
||||
COPY --from=ghcr.io/astral-sh/uv:latest /uv /uvx /bin/
|
||||
|
||||
# Install the project into /app
|
||||
COPY . /app
|
||||
WORKDIR /app
|
||||
|
||||
# Allow statements and log messages to immediately appear in the logs
|
||||
ENV PYTHONUNBUFFERED=1
|
||||
|
||||
# Install dependencies
|
||||
RUN uv sync
|
||||
|
||||
EXPOSE $PORT
|
||||
|
||||
# Run the FastMCP server
|
||||
CMD ["uv", "run", "server.py"]
|
||||
@@ -0,0 +1,500 @@
|
||||
# Job Matching MCP Server
|
||||
|
||||
A Model Context Protocol (MCP) server that provides intelligent job matching capabilities. Extract structured job requirements from job descriptions and find/rank candidates from your LlamaCloud resume index.
|
||||
|
||||
## 🚀 Features
|
||||
|
||||
### Core Job Matching Functions
|
||||
|
||||
1. **`extract_job_requirements`** - Extract structured data from job description text
|
||||
2. **`find_matching_candidates`** - Find and rank candidates from LlamaCloud index
|
||||
3. **`search_candidates_by_skills`** - Search candidates by specific skills
|
||||
4. **`score_candidate_qualifications`** - Score candidate against job requirements
|
||||
|
||||
### Additional Functions
|
||||
|
||||
- **`add`** / **`subtract`** / **`multiply`** - Basic math functions (backward compatibility)
|
||||
|
||||
## 📋 Available MCP Tools
|
||||
|
||||
|
||||
### Job Description Processing
|
||||
- `extract_job_requirements(job_description_text: str)` - Extract structured job requirements from text
|
||||
|
||||
### Candidate Management
|
||||
- `find_matching_candidates(required_qualifications: str, preferred_qualifications: str, top_k: int, enable_reranking: bool)` - Find candidates matching job qualifications
|
||||
- `search_candidates_by_skills(skills: str, top_k: int)` - Search candidates by specific skills
|
||||
- `score_candidate_qualifications(candidate_resume: str, required_qualifications: str, preferred_qualifications: str, job_title: str, job_description: str)` - Score candidate against job requirements
|
||||
|
||||
## 📋 Function Details
|
||||
|
||||
### 1. Extract Job Requirements
|
||||
```python
|
||||
extract_job_requirements(jd_text: str) -> str
|
||||
```
|
||||
|
||||
**Input:** Job description text (copied from job posting)
|
||||
|
||||
**Output:** JSON string containing:
|
||||
- `title`: Job title
|
||||
- `company`: Company name
|
||||
- `location`: Job location
|
||||
- `required_qualifications`: Array of required qualifications
|
||||
- `preferred_qualifications`: Array of preferred qualifications
|
||||
- `description`: Job summary
|
||||
- `experience_level`: Experience level (entry/mid/senior)
|
||||
- `employment_type`: Employment type (full-time/contract/etc.)
|
||||
|
||||
### 2. Find and Rank Candidates from LlamaCloud
|
||||
```python
|
||||
find_matching_candidates(required_qualifications: str, preferred_qualifications: str, top_k: int, enable_reranking: bool) -> str
|
||||
```
|
||||
|
||||
**Input:**
|
||||
- `required_qualifications`: Comma-separated required qualifications
|
||||
- `preferred_qualifications`: Comma-separated preferred qualifications
|
||||
- `top_k`: Maximum candidates to retrieve (default: 10)
|
||||
- `enable_reranking`: Whether to enable reranking (default: True)
|
||||
|
||||
**Output:** JSON string containing:
|
||||
- `candidates`: Array of candidates with scores and analysis
|
||||
- `total_candidates`: Number of candidates found
|
||||
- `search_parameters`: Details about search configuration
|
||||
|
||||
**Key Features:**
|
||||
- **Retrieves candidates from LlamaCloud index** using semantic search
|
||||
- **Weighted scoring**: Required qualifications have higher weight
|
||||
- **Match percentage**: Overall compatibility score
|
||||
- **Detailed explanations**: For each qualification assessment
|
||||
|
||||
### 3. Search Candidates by Skills
|
||||
```python
|
||||
search_candidates_by_skills(skills: str, top_k: int) -> str
|
||||
```
|
||||
|
||||
**Input:**
|
||||
- `skills`: Comma-separated list of skills or keywords
|
||||
- `top_k`: Number of top candidates to retrieve (default: 10)
|
||||
|
||||
**Output:** JSON with matching candidates and their scores
|
||||
|
||||
### 4. Score Candidate Qualifications
|
||||
```python
|
||||
score_candidate_qualifications(candidate_resume: str, required_qualifications: str, preferred_qualifications: str, job_title: str, job_description: str) -> str
|
||||
```
|
||||
|
||||
**Input:**
|
||||
- `candidate_resume`: The candidate's resume text
|
||||
- `required_qualifications`: Comma-separated required qualifications
|
||||
- `preferred_qualifications`: Comma-separated preferred qualifications
|
||||
- `job_title`: Job title for context (optional)
|
||||
- `job_description`: Job description for context (optional)
|
||||
|
||||
**Output:** Comprehensive analysis including:
|
||||
- Strengths and weaknesses
|
||||
- Detailed scoring breakdown
|
||||
- Hiring recommendations
|
||||
- Role fit assessment
|
||||
|
||||
## 🛠️ Setup & Configuration
|
||||
|
||||
### 1. Configure API Keys & Settings
|
||||
|
||||
You have **two options** for configuration:
|
||||
|
||||
#### Option A: Environment Variables (Recommended for Production)
|
||||
```bash
|
||||
# Required: OpenAI API Key
|
||||
export OPENAI_API_KEY="your-openai-api-key"
|
||||
|
||||
# Required: LlamaCloud Configuration
|
||||
export LLAMA_CLOUD_API_KEY="your-llamacloud-api-key"
|
||||
export LLAMA_CLOUD_INDEX_NAME="your-resume-index-name"
|
||||
export LLAMA_CLOUD_PROJECT_NAME="your-project-name"
|
||||
export LLAMA_CLOUD_ORGANIZATION_ID="your-organization-id"
|
||||
|
||||
# Optional: Server Configuration
|
||||
export PORT="8080"
|
||||
export HOST="0.0.0.0"
|
||||
export REQUEST_TIMEOUT="30.0"
|
||||
export OPENAI_TEMPERATURE="0.1"
|
||||
```
|
||||
|
||||
#### Option B: Direct Configuration in config.py (For Local Development)
|
||||
1. **Open `config.py`** and replace the placeholder values:
|
||||
```python
|
||||
# Replace these placeholder values with your actual API keys:
|
||||
OPENAI_API_KEY = "your-actual-openai-api-key-here"
|
||||
LLAMA_CLOUD_API_KEY = "your-actual-llamacloud-api-key-here"
|
||||
LLAMA_CLOUD_ORGANIZATION_ID = "your-actual-org-id-here"
|
||||
LLAMA_CLOUD_INDEX_NAME = "your-actual-index-name"
|
||||
```
|
||||
|
||||
2. **⚠️ Security Warning**: If you edit `config.py` directly, **never commit your API keys to version control!**
|
||||
|
||||
#### Getting Your API Keys:
|
||||
- **OpenAI API Key**: Get from [OpenAI Platform](https://platform.openai.com/api-keys)
|
||||
- **LlamaCloud API Key**: Get from [LlamaCloud Console](https://cloud.llamaindex.ai/)
|
||||
- **LlamaCloud Org ID**: Found in your LlamaCloud project settings
|
||||
- **LlamaCloud Index Name**: The name of your resume index in LlamaCloud
|
||||
|
||||
*Without LlamaCloud credentials, the server uses mock candidate data for testing.*
|
||||
|
||||
### 2. Protect Your API Keys (Important!)
|
||||
|
||||
If you're planning to commit this code to version control, create a `.gitignore` file to protect your sensitive information:
|
||||
|
||||
```bash
|
||||
# Create .gitignore file
|
||||
cat > .gitignore << EOF
|
||||
# Environment variables and sensitive files
|
||||
.env
|
||||
.env.local
|
||||
.env.production
|
||||
config_local.py
|
||||
|
||||
# Python
|
||||
__pycache__/
|
||||
*.pyc
|
||||
*.pyo
|
||||
*.pyd
|
||||
.Python
|
||||
.venv/
|
||||
venv/
|
||||
|
||||
# IDE
|
||||
.vscode/
|
||||
.idea/
|
||||
*.swp
|
||||
*.swo
|
||||
|
||||
# OS
|
||||
.DS_Store
|
||||
Thumbs.db
|
||||
EOF
|
||||
```
|
||||
|
||||
**Alternative**: You can also create a separate `config_local.py` file with your actual keys and import it in `config.py`, then add `config_local.py` to `.gitignore`.
|
||||
|
||||
### 3. Install Dependencies
|
||||
|
||||
```bash
|
||||
# Install using uv (recommended)
|
||||
uv install
|
||||
|
||||
# Or using pip
|
||||
pip install fastmcp httpx
|
||||
```
|
||||
|
||||
### 4. Run the Server
|
||||
|
||||
```bash
|
||||
python server.py
|
||||
```
|
||||
|
||||
Server starts on `http://localhost:8080/mcp` (or `PORT` environment variable)
|
||||
|
||||
## 🧪 Testing
|
||||
|
||||
Run the comprehensive test suite:
|
||||
|
||||
```bash
|
||||
# Start the server first
|
||||
python server.py
|
||||
|
||||
# In another terminal, run tests
|
||||
python test_server.py
|
||||
```
|
||||
|
||||
The test suite will:
|
||||
- ✅ Extract job requirements from sample JD
|
||||
- ✅ Retrieve and rank candidates from LlamaCloud (or mock data)
|
||||
- ✅ Perform detailed analysis of top candidate
|
||||
- ✅ Test backward compatibility functions
|
||||
|
||||
## 📊 How It Works
|
||||
|
||||
### Architecture Overview
|
||||
|
||||
1. **Job Description Processing**
|
||||
- Uses OpenAI to extract structured requirements from free-form JD text
|
||||
- Separates required vs. preferred qualifications
|
||||
- Extracts metadata (title, company, location, etc.)
|
||||
|
||||
2. **Candidate Retrieval**
|
||||
- Queries LlamaCloud index using semantic search
|
||||
- Builds search query from job requirements
|
||||
- Retrieves top candidates with similarity scores
|
||||
|
||||
3. **Intelligent Scoring**
|
||||
- Uses OpenAI to score each candidate (0-2 scale)
|
||||
- **0**: Not Met, **1**: Somewhat Met, **2**: Strongly Met
|
||||
- Required qualifications weighted 2x in final score
|
||||
- Provides explanations for each score
|
||||
|
||||
4. **Match Calculation**
|
||||
```
|
||||
Weighted Score = (Required Total × 2) + Preferred Total
|
||||
Match % = (Weighted Score / Max Possible Score) × 100
|
||||
```
|
||||
|
||||
### Data Flow
|
||||
|
||||
```mermaid
|
||||
graph TD
|
||||
A[Job Description Text] --> B[extract_job_requirements]
|
||||
B --> C[Structured Requirements JSON]
|
||||
C --> D[find_matching_candidates]
|
||||
E[LlamaCloud Index] --> D
|
||||
D --> F[Ranked Candidates with Scores]
|
||||
C --> G[score_candidate_qualifications]
|
||||
H[Individual Resume] --> G
|
||||
G --> I[Detailed Analysis & Recommendations]
|
||||
```
|
||||
|
||||
## 🎯 Use Cases
|
||||
|
||||
### 1. Automated Resume Screening
|
||||
```python
|
||||
# Extract requirements from job posting
|
||||
job_reqs = extract_job_requirements(job_posting_text)
|
||||
|
||||
# Find top candidates from your resume database
|
||||
top_candidates = find_matching_candidates("Python, JavaScript, React", "AWS, Docker", 10, True)
|
||||
|
||||
# Get detailed analysis of promising candidates
|
||||
for candidate in top_5:
|
||||
analysis = score_candidate_qualifications(candidate['resume'], job_reqs, candidate['name'])
|
||||
```
|
||||
|
||||
### 2. Hiring Pipeline Integration
|
||||
- **ATS Integration**: Automatically score incoming applications
|
||||
- **Recruiter Tools**: Provide data-driven candidate rankings
|
||||
- **Interview Prep**: Generate candidate-specific interview questions
|
||||
|
||||
### 3. Job Market Analysis
|
||||
- **Requirement Trends**: Track common qualifications across postings
|
||||
- **Candidate Gap Analysis**: Identify missing skills in candidate pool
|
||||
- **Salary Benchmarking**: Correlate requirements with compensation data
|
||||
|
||||
## 🚀 Deployment
|
||||
|
||||
### Prerequisites
|
||||
|
||||
Make sure you have the following set up:
|
||||
- Python 3.10+
|
||||
- UV package manager
|
||||
- Google Cloud SDK (gcloud)
|
||||
- Project ID configured: `export PROJECT_ID=<your-project-id>`
|
||||
- Artifact Registry repository created: `remote-mcp-servers`
|
||||
|
||||
### Local Development
|
||||
|
||||
```bash
|
||||
# 1. Install dependencies
|
||||
uv install
|
||||
|
||||
# 2. Configure API keys (choose one method):
|
||||
|
||||
# Method A: Set environment variables
|
||||
export OPENAI_API_KEY="your-openai-api-key"
|
||||
export LLAMA_CLOUD_API_KEY="your-llamacloud-api-key"
|
||||
export LLAMA_CLOUD_INDEX_NAME="your-index-name"
|
||||
export LLAMA_CLOUD_ORGANIZATION_ID="your-org-id"
|
||||
|
||||
# Method B: Edit config.py directly (see configuration section above)
|
||||
|
||||
# 3. Run server
|
||||
python server.py
|
||||
```
|
||||
|
||||
The server will start on `http://localhost:8080/mcp` and log which configuration it's using:
|
||||
```
|
||||
[INFO]: LlamaCloudService initialized with index: your-index-name
|
||||
[INFO]: MCP server starting on 0.0.0.0:8080
|
||||
```
|
||||
|
||||
#### Quick Configuration Test:
|
||||
You can verify your configuration is working by running:
|
||||
```bash
|
||||
python -c "from config import OPENAI_API_KEY, LLAMA_CLOUD_API_KEY, LLAMA_CLOUD_INDEX_NAME; print(f'OpenAI: {OPENAI_API_KEY[:10]}..., LlamaCloud: {LLAMA_CLOUD_API_KEY[:10]}..., Index: {LLAMA_CLOUD_INDEX_NAME}')"
|
||||
```
|
||||
|
||||
If you see placeholder values like "your-openai-api-key-here", your configuration needs to be updated.
|
||||
|
||||
### Docker Deployment
|
||||
```bash
|
||||
# Build image
|
||||
docker build -t job-matching-mcp .
|
||||
|
||||
# Run container
|
||||
docker run -p 8080:8080 \
|
||||
-e OPENAI_API_KEY="your-key" \
|
||||
-e LLAMA_CLOUD_API_KEY="your-key" \
|
||||
job-matching-mcp
|
||||
```
|
||||
|
||||
### Google Cloud Run Deployment
|
||||
|
||||
**📚 Reference Documentation**: [Build and Deploy a Remote MCP Server to Google Cloud Run in Under 10 Minutes](https://cloud.google.com/blog/topics/developers-practitioners/build-and-deploy-a-remote-mcp-server-to-google-cloud-run-in-under-10-minutes)
|
||||
|
||||
#### Initial Deployment
|
||||
|
||||
```bash
|
||||
# Build and push to Artifact Registry
|
||||
gcloud builds submit --region=us-central1 \
|
||||
--tag us-central1-docker.pkg.dev/$PROJECT_ID/remote-mcp-servers/mcp-server:latest
|
||||
|
||||
# Deploy to Cloud Run
|
||||
gcloud run deploy mcp-server \
|
||||
--image us-central1-docker.pkg.dev/$PROJECT_ID/remote-mcp-servers/mcp-server:latest \
|
||||
--region=us-central1 \
|
||||
--no-allow-unauthenticated \
|
||||
--set-env-vars OPENAI_API_KEY="your-key",LLAMA_CLOUD_API_KEY="your-key"
|
||||
```
|
||||
|
||||
#### Redeployment Steps
|
||||
|
||||
After making code changes to your MCP server, follow these steps to redeploy:
|
||||
|
||||
**Step 1: Rebuild the container and push to Artifact Registry**
|
||||
|
||||
```bash
|
||||
gcloud builds submit --region=us-central1 \
|
||||
--tag us-central1-docker.pkg.dev/$PROJECT_ID/remote-mcp-servers/mcp-server:latest
|
||||
```
|
||||
|
||||
**Step 2: Re-deploy the updated container to Cloud Run**
|
||||
|
||||
```bash
|
||||
gcloud run deploy mcp-server \
|
||||
--image us-central1-docker.pkg.dev/$PROJECT_ID/remote-mcp-servers/mcp-server:latest \
|
||||
--region=us-central1 \
|
||||
--no-allow-unauthenticated
|
||||
```
|
||||
|
||||
**Step 3: Test the deployment (optional)**
|
||||
|
||||
Start the Cloud Run proxy to test your updated server:
|
||||
|
||||
```bash
|
||||
gcloud run services proxy mcp-server --region=us-central1
|
||||
```
|
||||
|
||||
Then run your test script:
|
||||
|
||||
```bash
|
||||
uv run test_server.py
|
||||
```
|
||||
|
||||
## 📁 Project Structure
|
||||
|
||||
```
|
||||
mcp-on-cloudrun/
|
||||
├── config.py # Configuration constants
|
||||
├── models.py # Data structures
|
||||
├── server.py # Main MCP server
|
||||
├── Dockerfile # Container configuration
|
||||
├── pyproject.toml # Python dependencies
|
||||
├── services/
|
||||
│ ├── openai_service.py # OpenAI API integration
|
||||
│ └── llamacloud_service.py # LlamaCloud integration
|
||||
├── tools/
|
||||
│ ├── math_tools.py # Math operations (add, subtract, multiply)
|
||||
│ ├── job_tools.py # Job description extraction
|
||||
│ └── candidate_tools.py # Candidate search and scoring
|
||||
└── test_server.py # Test client
|
||||
```
|
||||
|
||||
## 🔧 Customization
|
||||
|
||||
### Scoring Criteria
|
||||
Modify the scoring prompts in `services/openai_service.py` to adjust evaluation criteria:
|
||||
- Change scoring scale (0-2 to 0-5, etc.)
|
||||
- Adjust weighting between required/preferred qualifications
|
||||
- Add domain-specific evaluation criteria
|
||||
|
||||
### LlamaCloud Integration
|
||||
For production deployment with real candidate data:
|
||||
1. Set up LlamaCloud account and create resume index
|
||||
2. Configure environment variables in `config.py`
|
||||
3. Replace mock candidate data with actual LlamaCloud API calls
|
||||
|
||||
### OpenAI Model Selection
|
||||
Change the model in `config.py`:
|
||||
```python
|
||||
OPENAI_MODEL = "gpt-4o-mini" # Fast and cost-effective
|
||||
# OPENAI_MODEL = "gpt-4o" # Higher quality, more expensive
|
||||
```
|
||||
|
||||
## 📈 Performance & Scaling
|
||||
|
||||
- **Concurrent Requests**: FastMCP handles multiple simultaneous job matching requests
|
||||
- **Caching**: Consider implementing Redis for frequent job requirement extractions
|
||||
- **Rate Limiting**: OpenAI API has rate limits; implement queuing for high-volume usage
|
||||
- **Cost Optimization**: Use `gpt-4o-mini` for most operations, `gpt-4o` for critical analysis
|
||||
|
||||
## 🔐 Security Considerations
|
||||
|
||||
### Security Notes
|
||||
|
||||
- **Always use `--no-allow-unauthenticated`** to require authentication for Cloud Run
|
||||
- Ensure users have the `roles/run.invoker` IAM role to access the server
|
||||
- Use the Cloud Run proxy for local testing with authentication
|
||||
- **API Keys**: Never commit API keys to version control
|
||||
- **Input Validation**: Server validates all inputs and handles malformed data
|
||||
- **Error Handling**: Graceful degradation when external services are unavailable
|
||||
- **Data Privacy**: Resume data processed through OpenAI; consider data retention policies
|
||||
|
||||
## 🔍 Troubleshooting
|
||||
|
||||
### Common Issues:
|
||||
|
||||
#### Configuration Issues:
|
||||
1. **"Invalid API key" errors**:
|
||||
- Check that your API keys are correctly set in `config.py` or environment variables
|
||||
- Verify API keys are valid and have proper permissions
|
||||
- For OpenAI: Ensure you have credits/billing set up
|
||||
|
||||
2. **"LlamaCloud index not found"**:
|
||||
- Verify `LLAMA_CLOUD_INDEX_NAME` matches your actual index name
|
||||
- Check `LLAMA_CLOUD_ORGANIZATION_ID` is correct
|
||||
- Ensure your LlamaCloud API key has access to the specified index
|
||||
|
||||
3. **Server shows placeholder values**:
|
||||
- If you see "your-openai-api-key-here" in logs, your config isn't loading properly
|
||||
- Check that you've either set environment variables OR edited `config.py` directly
|
||||
- Restart the server after making configuration changes
|
||||
|
||||
#### Deployment Issues:
|
||||
4. **Authentication errors**: Ensure Cloud Run proxy is running and you have proper IAM roles
|
||||
5. **Build failures**: Check Dockerfile and dependencies in pyproject.toml
|
||||
6. **Port conflicts**: Use `lsof -ti:8080 | xargs kill -9` to free up port 8080
|
||||
|
||||
### Logs:
|
||||
View Cloud Run logs:
|
||||
```bash
|
||||
gcloud run services logs tail mcp-server --region=us-central1
|
||||
```
|
||||
|
||||
View local server logs:
|
||||
```bash
|
||||
# Server logs are printed to console when running locally
|
||||
python server.py
|
||||
```
|
||||
|
||||
## 🤝 Contributing
|
||||
|
||||
1. Fork the repository
|
||||
2. Create a feature branch
|
||||
3. Add tests for new functionality
|
||||
4. Ensure all tests pass
|
||||
5. Submit a pull request
|
||||
|
||||
## 📄 License
|
||||
|
||||
MIT License - see LICENSE file for details.
|
||||
|
||||
---
|
||||
|
||||
**Ready to revolutionize your hiring process with AI-powered job matching!** 🎯✨
|
||||
+150
@@ -0,0 +1,150 @@
|
||||
# MCP Server - Clean Modular Structure
|
||||
|
||||
This document explains the refactored, modular structure of the MCP server that follows Python best practices.
|
||||
|
||||
## 📁 Project Structure
|
||||
|
||||
```
|
||||
mcp-on-cloudrun/
|
||||
├── config.py # Configuration constants and settings
|
||||
├── models.py # Data models and structures
|
||||
├── services/ # Business logic services
|
||||
│ ├── __init__.py
|
||||
│ ├── openai_service.py # OpenAI API interactions
|
||||
│ └── llamacloud_service.py # LlamaCloud resume index interactions
|
||||
├── tools/ # MCP tool definitions
|
||||
│ ├── __init__.py
|
||||
│ ├── job_tools.py # Job description related tools
|
||||
│ ├── math_tools.py # Mathematical operation tools
|
||||
│ └── candidate_tools.py # Candidate retrieval tools
|
||||
├── server_clean.py # Clean main server entry point
|
||||
├── test_server_clean.py # Test script for clean server
|
||||
├── server.py # Original monolithic server (backup)
|
||||
└── test_server.py # Original test script (backup)
|
||||
```
|
||||
|
||||
## 🏗️ Architecture Overview
|
||||
|
||||
### Separation of Concerns
|
||||
|
||||
The refactored structure follows the **Single Responsibility Principle** by separating different concerns into dedicated modules:
|
||||
|
||||
1. **Configuration** (`config.py`): All constants, API keys, and configuration settings
|
||||
2. **Models** (`models.py`): Data structures and models like `JobDescriptionData`
|
||||
3. **Services** (`services/`): Business logic and external API interactions
|
||||
4. **Tools** (`tools/`): MCP tool definitions organized by functionality
|
||||
5. **Server** (`server_clean.py`): FastMCP setup and tool registration only
|
||||
|
||||
### Benefits of This Structure
|
||||
|
||||
✅ **Maintainability**: Each module has a clear purpose and can be modified independently
|
||||
✅ **Testability**: Individual components can be unit tested in isolation
|
||||
✅ **Reusability**: Services and models can be reused across different tools
|
||||
✅ **Scalability**: Easy to add new tools, services, or models
|
||||
✅ **Readability**: Clean, focused code that's easy to understand
|
||||
✅ **Best Practices**: Follows Python packaging and project structure conventions
|
||||
|
||||
## 📋 Module Details
|
||||
|
||||
### `config.py`
|
||||
- Contains all configuration constants
|
||||
- Environment variables and API keys
|
||||
- Server settings (host, port, timeouts)
|
||||
- Easy to modify without touching business logic
|
||||
|
||||
### `models.py`
|
||||
- Defines data structures like `JobDescriptionData`
|
||||
- Includes validation and serialization methods
|
||||
- Type hints for better IDE support and documentation
|
||||
|
||||
### `services/openai_service.py`
|
||||
- Encapsulates all OpenAI API interactions
|
||||
- Handles HTTP requests, error handling, and response parsing
|
||||
- Can be easily mocked for testing
|
||||
- Configurable through the config module
|
||||
|
||||
### `tools/job_tools.py`
|
||||
- Contains job description related MCP tools
|
||||
- Uses the OpenAI service for processing
|
||||
- Handles input validation and error responses
|
||||
- Clean separation between tool interface and business logic
|
||||
|
||||
### `tools/math_tools.py`
|
||||
- Simple mathematical operation tools
|
||||
- Demonstrates how to organize related tools
|
||||
- Static methods for stateless operations
|
||||
|
||||
### `server_clean.py`
|
||||
- Minimal server setup code
|
||||
- Imports and registers tools from their respective modules
|
||||
- Clean main function with proper error handling
|
||||
- Easy to understand and modify
|
||||
|
||||
## 🚀 Running the Clean Server
|
||||
|
||||
```bash
|
||||
# Start the clean server
|
||||
uv run server_clean.py
|
||||
|
||||
# Test the clean server (in another terminal)
|
||||
uv run test_server_clean.py
|
||||
```
|
||||
|
||||
## 🔧 Adding New Tools
|
||||
|
||||
To add a new tool:
|
||||
|
||||
1. **Create the tool class** in the appropriate `tools/` module
|
||||
2. **Add any required services** in the `services/` directory
|
||||
3. **Register the tool** in `server_clean.py` with the `@mcp.tool()` decorator
|
||||
4. **Add tests** to verify functionality
|
||||
|
||||
Example:
|
||||
```python
|
||||
# In tools/new_tools.py
|
||||
class NewTools:
|
||||
def my_new_tool(self, param: str) -> str:
|
||||
return f"Processed: {param}"
|
||||
|
||||
# In server_clean.py
|
||||
from tools.new_tools import NewTools
|
||||
new_tools = NewTools()
|
||||
|
||||
@mcp.tool()
|
||||
def my_new_tool(param: str) -> str:
|
||||
return new_tools.my_new_tool(param)
|
||||
```
|
||||
|
||||
## 📊 Comparison: Before vs After
|
||||
|
||||
| Aspect | Before (server.py) | After (Clean Structure) |
|
||||
|--------|-------------------|-------------------------|
|
||||
| Lines of code | 275 lines | ~100 lines in main server |
|
||||
| Concerns mixed | ✗ All in one file | ✅ Separated by purpose |
|
||||
| Testability | ✗ Hard to test parts | ✅ Easy to unit test |
|
||||
| Maintainability | ✗ Changes affect everything | ✅ Isolated changes |
|
||||
| Readability | ✗ Long, complex file | ✅ Short, focused files |
|
||||
| Scalability | ✗ Gets worse over time | ✅ Easy to extend |
|
||||
|
||||
## 🛠️ Available MCP Tools
|
||||
|
||||
### Mathematical Operations
|
||||
- `add(a: int, b: int)` - Add two numbers
|
||||
- `subtract(a: int, b: int)` - Subtract two numbers
|
||||
- `multiply(a: int, b: int)` - Multiply two numbers
|
||||
|
||||
### Job Description Processing
|
||||
- `extract_job_requirements(jd_text: str)` - Extract structured data from job description text
|
||||
|
||||
### Candidate Retrieval (LlamaCloud)
|
||||
- `find_matching_candidates(required_qualifications: str, preferred_qualifications: str, top_k: int, enable_reranking: bool)` - Find candidates matching job qualifications from LlamaCloud resume index
|
||||
- `search_candidates_by_skills(skills: str, top_k: int)` - Search candidates by specific skills or keywords
|
||||
- `score_candidate_qualifications(candidate_resume: str, required_qualifications: str, preferred_qualifications: str, job_title: str, job_description: str)` - Score a candidate's resume against specific job qualifications using LLM evaluation
|
||||
|
||||
## 🎯 Next Steps
|
||||
|
||||
1. **Configure LlamaCloud**: Set your API key and index details in `config.py`
|
||||
2. **Add more tools**: Follow the established patterns
|
||||
3. **Add unit tests**: Test individual components
|
||||
4. **Add type checking**: Use `mypy` for static type checking
|
||||
5. **Add documentation**: Use docstrings and type hints throughout
|
||||
@@ -0,0 +1,21 @@
|
||||
"""Configuration settings for the MCP server."""
|
||||
|
||||
import os
|
||||
|
||||
# OpenAI Configuration
|
||||
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY", "your-openai-api-key-here") # Replace with your actual API key or set OPENAI_API_KEY env var
|
||||
DEFAULT_MODEL = "gpt-4o-mini"
|
||||
|
||||
# LlamaCloud Configuration - Public Resumes Index
|
||||
LLAMA_CLOUD_API_KEY = os.getenv("LLAMA_CLOUD_API_KEY", "your-llamacloud-api-key-here") # Replace with your actual API key or set LLAMA_CLOUD_API_KEY env var
|
||||
LLAMA_CLOUD_PROJECT_NAME = os.getenv("LLAMA_CLOUD_PROJECT_NAME", "Default") # Replace with your project name or set env var
|
||||
LLAMA_CLOUD_ORGANIZATION_ID = os.getenv("LLAMA_CLOUD_ORGANIZATION_ID", "your-org-id-here") # Replace with your organization ID or set env var
|
||||
LLAMA_CLOUD_INDEX_NAME = os.getenv("LLAMA_CLOUD_INDEX_NAME", "resume_public") # Replace with your index name or set env var
|
||||
|
||||
# Server Configuration
|
||||
DEFAULT_PORT = int(os.getenv("PORT", "8080"))
|
||||
DEFAULT_HOST = os.getenv("HOST", "0.0.0.0")
|
||||
|
||||
# API Configuration
|
||||
REQUEST_TIMEOUT = float(os.getenv("REQUEST_TIMEOUT", "30.0"))
|
||||
OPENAI_TEMPERATURE = float(os.getenv("OPENAI_TEMPERATURE", "0.1"))
|
||||
@@ -0,0 +1,68 @@
|
||||
"""Data models for the MCP server."""
|
||||
|
||||
import logging
|
||||
from typing import List, Dict, Any
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class JobDescriptionData:
|
||||
"""Data structure for job description information matching reference implementation."""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
title: str,
|
||||
company: str,
|
||||
location: str,
|
||||
required_qualifications: List[str],
|
||||
preferred_qualifications: List[str],
|
||||
description: str,
|
||||
experience_level: str,
|
||||
employment_type: str
|
||||
):
|
||||
"""Initialize JobDescriptionData object.
|
||||
|
||||
Args:
|
||||
title: The job title
|
||||
company: The company name
|
||||
location: The job location
|
||||
required_qualifications: List of required qualifications
|
||||
preferred_qualifications: List of preferred qualifications
|
||||
description: Job description summary
|
||||
experience_level: Experience level (entry, mid, senior, etc.)
|
||||
employment_type: Employment type (full-time, part-time, etc.)
|
||||
"""
|
||||
logger.info(f"Creating JobDescriptionData with title: {title}")
|
||||
self.title = title
|
||||
self.company = company
|
||||
self.location = location
|
||||
self.required_qualifications = required_qualifications
|
||||
self.preferred_qualifications = preferred_qualifications
|
||||
self.description = description
|
||||
self.experience_level = experience_level
|
||||
self.employment_type = employment_type
|
||||
logger.info("JobDescriptionData object created successfully")
|
||||
|
||||
def to_dict(self) -> Dict[str, Any]:
|
||||
"""Convert the object to a dictionary for JSON serialization.
|
||||
|
||||
Returns:
|
||||
Dictionary representation of the job description data
|
||||
"""
|
||||
logger.info("Converting JobDescriptionData to dict")
|
||||
result = {
|
||||
"title": self.title,
|
||||
"company": self.company,
|
||||
"location": self.location,
|
||||
"required_qualifications": self.required_qualifications,
|
||||
"preferred_qualifications": self.preferred_qualifications,
|
||||
"description": self.description,
|
||||
"experience_level": self.experience_level,
|
||||
"employment_type": self.employment_type
|
||||
}
|
||||
logger.info("Successfully converted to dict")
|
||||
return result
|
||||
|
||||
def __repr__(self) -> str:
|
||||
"""String representation of the object."""
|
||||
return f"JobDescriptionData(title='{self.title}', company='{self.company}')"
|
||||
@@ -0,0 +1,11 @@
|
||||
[project]
|
||||
name = "mcp-on-cloudrun"
|
||||
version = "0.1.0"
|
||||
description = "Example of deploying a MCP server on Cloud Run"
|
||||
requires-python = ">=3.10"
|
||||
dependencies = [
|
||||
"fastmcp==2.6.1",
|
||||
"httpx>=0.27.0",
|
||||
"llama-cloud>=0.1.26",
|
||||
"llama-index>=0.12.43",
|
||||
]
|
||||
@@ -0,0 +1,164 @@
|
||||
"""Main MCP server entry point with clean modular structure."""
|
||||
|
||||
import asyncio
|
||||
import logging
|
||||
import os
|
||||
|
||||
from fastmcp import FastMCP
|
||||
|
||||
from config import DEFAULT_PORT, DEFAULT_HOST
|
||||
from tools.job_tools import JobTools
|
||||
from tools.math_tools import MathTools
|
||||
from tools.candidate_tools import CandidateTools
|
||||
|
||||
# Configure logging
|
||||
logger = logging.getLogger(__name__)
|
||||
logging.basicConfig(format="[%(levelname)s]: %(message)s", level=logging.INFO)
|
||||
|
||||
# Initialize FastMCP server
|
||||
mcp = FastMCP("MCP Server on Cloud Run")
|
||||
|
||||
# Initialize tool instances
|
||||
job_tools = JobTools()
|
||||
math_tools = MathTools()
|
||||
candidate_tools = CandidateTools()
|
||||
|
||||
|
||||
# Register job description tools
|
||||
@mcp.tool()
|
||||
async def extract_job_requirements(jd_text: str) -> str:
|
||||
"""Extract structured job requirements from job description text.
|
||||
|
||||
Args:
|
||||
jd_text: The job description text to analyze
|
||||
|
||||
Returns:
|
||||
JSON string containing structured job requirements including title, company,
|
||||
location, required_qualifications, preferred_qualifications, description,
|
||||
experience_level, and employment_type.
|
||||
"""
|
||||
return await job_tools.extract_job_requirements(jd_text)
|
||||
|
||||
|
||||
# Register mathematical operation tools
|
||||
@mcp.tool()
|
||||
def add(a: int, b: int) -> int:
|
||||
"""Add two numbers together.
|
||||
|
||||
Args:
|
||||
a: The first number
|
||||
b: The second number
|
||||
|
||||
Returns:
|
||||
The sum of the two numbers
|
||||
"""
|
||||
return math_tools.add(a, b)
|
||||
|
||||
|
||||
@mcp.tool()
|
||||
def subtract(a: int, b: int) -> int:
|
||||
"""Subtract two numbers.
|
||||
|
||||
Args:
|
||||
a: The first number
|
||||
b: The second number
|
||||
|
||||
Returns:
|
||||
The difference of the two numbers
|
||||
"""
|
||||
return math_tools.subtract(a, b)
|
||||
|
||||
|
||||
@mcp.tool()
|
||||
def multiply(a: int, b: int) -> int:
|
||||
"""Multiply two numbers.
|
||||
|
||||
Args:
|
||||
a: The first number
|
||||
b: The second number
|
||||
|
||||
Returns:
|
||||
The product of the two numbers
|
||||
"""
|
||||
return math_tools.multiply(a, b)
|
||||
|
||||
|
||||
# Register candidate retrieval tools
|
||||
@mcp.tool()
|
||||
async def find_matching_candidates(required_qualifications: str, preferred_qualifications: str = "", top_k: int = 10, enable_reranking: bool = True) -> str:
|
||||
"""Find candidates matching job qualifications from LlamaCloud resume index.
|
||||
|
||||
Args:
|
||||
required_qualifications: Comma-separated string of required qualifications (e.g., "Python, Machine Learning, 3+ years experience")
|
||||
preferred_qualifications: Comma-separated string of preferred qualifications (optional, e.g., "AWS, Docker, PhD")
|
||||
top_k: Number of top candidates to retrieve (default: 10, max: 50)
|
||||
enable_reranking: Whether to enable reranking for better results (default: True)
|
||||
|
||||
Returns:
|
||||
JSON string containing list of matching candidates with their scores and information
|
||||
"""
|
||||
return await candidate_tools.find_matching_candidates(required_qualifications, preferred_qualifications, top_k, enable_reranking)
|
||||
|
||||
|
||||
@mcp.tool()
|
||||
async def search_candidates_by_skills(skills: str, top_k: int = 10) -> str:
|
||||
"""Search candidates by specific skills or keywords from LlamaCloud resume index.
|
||||
|
||||
Args:
|
||||
skills: Comma-separated list of skills or keywords to search for (e.g., "Python, Machine Learning, AWS")
|
||||
top_k: Number of top candidates to retrieve (default: 10, max: 50)
|
||||
|
||||
Returns:
|
||||
JSON string containing list of matching candidates with their scores and information
|
||||
"""
|
||||
return await candidate_tools.search_candidates_by_skills(skills, top_k)
|
||||
|
||||
|
||||
@mcp.tool()
|
||||
async def score_candidate_qualifications(
|
||||
candidate_resume: str,
|
||||
required_qualifications: str,
|
||||
preferred_qualifications: str = "",
|
||||
job_title: str = "",
|
||||
job_description: str = ""
|
||||
) -> str:
|
||||
"""Score a candidate's resume against specific job qualifications using LLM evaluation.
|
||||
|
||||
Args:
|
||||
candidate_resume: The candidate's resume text content
|
||||
required_qualifications: Comma-separated string of required qualifications (e.g., "Python, 3+ years experience, Bachelor's degree")
|
||||
preferred_qualifications: Comma-separated string of preferred qualifications (optional, e.g., "AWS, Docker, Master's degree")
|
||||
job_title: Job title for context (optional)
|
||||
job_description: Job description for context (optional)
|
||||
|
||||
Returns:
|
||||
JSON string containing detailed scoring results for each qualification with explanations and overall feedback
|
||||
"""
|
||||
return await candidate_tools.score_candidate_qualifications(
|
||||
candidate_resume,
|
||||
required_qualifications,
|
||||
preferred_qualifications,
|
||||
job_title,
|
||||
job_description
|
||||
)
|
||||
|
||||
|
||||
async def main():
|
||||
"""Main server startup function."""
|
||||
port = int(os.getenv("PORT", DEFAULT_PORT))
|
||||
logger.info(f"MCP server starting on {DEFAULT_HOST}:{port}")
|
||||
|
||||
try:
|
||||
# Could also use 'sse' transport, host="0.0.0.0" required for Cloud Run.
|
||||
await mcp.run_async(
|
||||
transport="streamable-http",
|
||||
host=DEFAULT_HOST,
|
||||
port=port,
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(f"Server startup failed: {e}")
|
||||
raise
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(main())
|
||||
@@ -0,0 +1 @@
|
||||
"""Services package for business logic."""
|
||||
@@ -0,0 +1,430 @@
|
||||
"""LlamaCloud service for candidate retrieval from resume index."""
|
||||
|
||||
import json
|
||||
import logging
|
||||
from typing import List, Dict, Any, Optional
|
||||
import asyncio
|
||||
|
||||
from config import (
|
||||
LLAMA_CLOUD_API_KEY,
|
||||
LLAMA_CLOUD_INDEX_NAME,
|
||||
LLAMA_CLOUD_PROJECT_NAME,
|
||||
LLAMA_CLOUD_ORGANIZATION_ID
|
||||
)
|
||||
from models import JobDescriptionData
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
try:
|
||||
from llama_index.indices.managed.llama_cloud import LlamaCloudIndex
|
||||
LLAMA_INDEX_AVAILABLE = True
|
||||
except ImportError:
|
||||
logger.error("llama-index package is required for LlamaCloud functionality. Please install it with: pip install llama-index")
|
||||
LLAMA_INDEX_AVAILABLE = False
|
||||
|
||||
|
||||
class CandidateMatch:
|
||||
"""Data structure for candidate match results."""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
node_id: str,
|
||||
score: float,
|
||||
content: str,
|
||||
metadata: Dict[str, Any],
|
||||
candidate_name: Optional[str] = None,
|
||||
file_name: Optional[str] = None
|
||||
):
|
||||
self.node_id = node_id
|
||||
self.score = score
|
||||
self.content = content
|
||||
self.metadata = metadata
|
||||
self.candidate_name = candidate_name or "Unknown Candidate"
|
||||
self.file_name = file_name or ""
|
||||
|
||||
def to_dict(self) -> Dict[str, Any]:
|
||||
"""Convert to dictionary for JSON serialization."""
|
||||
return {
|
||||
"node_id": self.node_id,
|
||||
"score": self.score,
|
||||
"content": self.content,
|
||||
"metadata": self.metadata,
|
||||
"candidate_name": self.candidate_name,
|
||||
"file_name": self.file_name
|
||||
}
|
||||
|
||||
|
||||
class LlamaCloudService:
|
||||
"""Service class for interacting with LlamaCloud resume index using LlamaIndex."""
|
||||
|
||||
def __init__(self):
|
||||
"""Initialize the LlamaCloud service."""
|
||||
if not LLAMA_INDEX_AVAILABLE:
|
||||
raise ImportError("llama-index package is required for LlamaCloud functionality. Please install it with: pip install llama-index")
|
||||
|
||||
if not LLAMA_CLOUD_API_KEY or LLAMA_CLOUD_API_KEY == "llx-your-api-key-here":
|
||||
raise ValueError("LLAMA_CLOUD_API_KEY is required and must be set to a valid API key")
|
||||
|
||||
if not LLAMA_CLOUD_INDEX_NAME:
|
||||
raise ValueError("LLAMA_CLOUD_INDEX_NAME is required")
|
||||
|
||||
self.api_key = LLAMA_CLOUD_API_KEY
|
||||
self.index_name = LLAMA_CLOUD_INDEX_NAME
|
||||
self.project_name = LLAMA_CLOUD_PROJECT_NAME
|
||||
self.organization_id = LLAMA_CLOUD_ORGANIZATION_ID
|
||||
|
||||
# Initialize LlamaCloud index (will be created lazily)
|
||||
self._index = None
|
||||
|
||||
logger.info(f"LlamaCloudService initialized with index: {self.index_name}")
|
||||
|
||||
def _get_index(self):
|
||||
"""Get or create the LlamaCloud index instance."""
|
||||
if self._index is None:
|
||||
try:
|
||||
# Set the API key in environment if not already set
|
||||
import os
|
||||
if not os.environ.get("LLAMA_CLOUD_API_KEY"):
|
||||
os.environ["LLAMA_CLOUD_API_KEY"] = self.api_key
|
||||
|
||||
logger.info(f"Connecting to LlamaCloud index: {self.index_name}")
|
||||
|
||||
# Connect to existing index as per the documentation
|
||||
self._index = LlamaCloudIndex(
|
||||
name=self.index_name,
|
||||
project_name=self.project_name
|
||||
)
|
||||
|
||||
logger.info("Successfully connected to LlamaCloud index")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to connect to LlamaCloud index: {e}")
|
||||
raise
|
||||
|
||||
return self._index
|
||||
|
||||
def _build_search_query(self, job_description: JobDescriptionData) -> str:
|
||||
"""Build a search query from job description data."""
|
||||
query_parts = []
|
||||
|
||||
if job_description.title:
|
||||
query_parts.append(f"Job Title: {job_description.title}")
|
||||
|
||||
if job_description.required_qualifications:
|
||||
query_parts.append(f"Required Qualifications: {' '.join(job_description.required_qualifications)}")
|
||||
|
||||
if job_description.preferred_qualifications:
|
||||
query_parts.append(f"Preferred Qualifications: {' '.join(job_description.preferred_qualifications)}")
|
||||
|
||||
if job_description.experience_level:
|
||||
query_parts.append(f"Experience Level: {job_description.experience_level}")
|
||||
|
||||
query = " ".join(query_parts)
|
||||
logger.info(f"Built search query: {query}")
|
||||
return query
|
||||
|
||||
def _build_qualifications_query(self, required_qualifications: List[str], preferred_qualifications: List[str]) -> str:
|
||||
"""Build a search query from qualification lists."""
|
||||
query_parts = []
|
||||
|
||||
if required_qualifications:
|
||||
query_parts.append(f"Required skills and qualifications: {', '.join(required_qualifications)}")
|
||||
|
||||
if preferred_qualifications:
|
||||
query_parts.append(f"Preferred skills and experience: {', '.join(preferred_qualifications)}")
|
||||
|
||||
# Combine all qualifications for a comprehensive search
|
||||
all_qualifications = required_qualifications + preferred_qualifications
|
||||
if all_qualifications:
|
||||
query_parts.append(f"Relevant experience with: {', '.join(all_qualifications)}")
|
||||
|
||||
query = " ".join(query_parts)
|
||||
logger.info(f"Built qualifications query: {query}")
|
||||
return query
|
||||
|
||||
def _extract_candidate_info(self, node) -> CandidateMatch:
|
||||
"""Extract candidate information from a retrieved node."""
|
||||
try:
|
||||
# Extract basic information from the node
|
||||
node_id = getattr(node, 'id_', '') or getattr(node, 'node_id', '')
|
||||
score = getattr(node, 'score', 0.0)
|
||||
|
||||
# Extract content from different possible locations
|
||||
content = ""
|
||||
metadata = {}
|
||||
|
||||
# Handle different node structures
|
||||
if hasattr(node, 'node'):
|
||||
# Node with nested structure
|
||||
inner_node = node.node
|
||||
node_id = node_id or getattr(inner_node, 'id_', '')
|
||||
content = getattr(inner_node, 'text', '') or getattr(inner_node, 'content', '')
|
||||
metadata = getattr(inner_node, 'metadata', {}) or getattr(inner_node, 'extra_info', {})
|
||||
else:
|
||||
# Direct node structure
|
||||
content = getattr(node, 'text', '') or getattr(node, 'content', '')
|
||||
metadata = getattr(node, 'metadata', {}) or getattr(node, 'extra_info', {})
|
||||
|
||||
# Extract candidate name and file name from metadata
|
||||
candidate_name = "Unknown Candidate"
|
||||
file_name = metadata.get('file_name', '') or metadata.get('filename', '') or metadata.get('file_path', '')
|
||||
|
||||
# Try to extract candidate name from file name
|
||||
if file_name:
|
||||
# Remove file extension and replace underscores with spaces
|
||||
import os
|
||||
base_name = os.path.basename(file_name)
|
||||
name_part = base_name.split('.')[0].replace('_', ' ').replace('-', ' ')
|
||||
if name_part and not name_part.lower().startswith('resume'):
|
||||
candidate_name = name_part.title()
|
||||
|
||||
# Try to extract name from content if not found in metadata
|
||||
if candidate_name == "Unknown Candidate" and content:
|
||||
# Simple pattern matching for names in resume content
|
||||
import re
|
||||
name_patterns = [
|
||||
r'^([A-Z][a-z]+ [A-Z][a-z]+)', # First line with Name format
|
||||
r'Name:?\s*([A-Z][a-z]+ [A-Z][a-z]+)', # Name: John Doe
|
||||
r'([A-Z][a-z]+ [A-Z][a-z]+)\s*\n', # Name followed by newline
|
||||
]
|
||||
|
||||
for pattern in name_patterns:
|
||||
match = re.search(pattern, content[:200]) # Search in first 200 chars
|
||||
if match:
|
||||
candidate_name = match.group(1)
|
||||
break
|
||||
|
||||
return CandidateMatch(
|
||||
node_id=node_id,
|
||||
score=score,
|
||||
content=content,
|
||||
metadata=metadata,
|
||||
candidate_name=candidate_name,
|
||||
file_name=file_name
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error extracting candidate info from node: {e}")
|
||||
# Return a basic match with available information
|
||||
return CandidateMatch(
|
||||
node_id=str(getattr(node, 'id_', 'unknown')),
|
||||
score=getattr(node, 'score', 0.0),
|
||||
content=str(getattr(node, 'text', getattr(node, 'content', ''))),
|
||||
metadata=getattr(node, 'metadata', {})
|
||||
)
|
||||
|
||||
async def retrieve_candidates(
|
||||
self,
|
||||
job_description: JobDescriptionData,
|
||||
top_k: int = 20,
|
||||
enable_reranking: bool = True
|
||||
) -> List[CandidateMatch]:
|
||||
"""Retrieve top candidates matching the job description."""
|
||||
try:
|
||||
logger.info(f"Starting candidate retrieval for job: {job_description.title}")
|
||||
|
||||
# Build search query from job description
|
||||
query = self._build_search_query(job_description)
|
||||
|
||||
# Get the index and configure retriever
|
||||
index = self._get_index()
|
||||
|
||||
# Configure retriever as per the documentation
|
||||
# alpha=1.0 restricts it to vector search
|
||||
retriever_config = {
|
||||
"dense_similarity_top_k": top_k,
|
||||
"alpha": 1.0, # Restricts to vector search
|
||||
"enable_reranking": enable_reranking,
|
||||
}
|
||||
|
||||
logger.info(f"Configuring retriever with: {retriever_config}")
|
||||
retriever = index.as_retriever(**retriever_config)
|
||||
|
||||
# Perform retrieval
|
||||
logger.info(f"Retrieving candidates with query: {query}")
|
||||
|
||||
# Run the retrieval in a thread pool to avoid blocking the async loop
|
||||
import asyncio
|
||||
loop = asyncio.get_event_loop()
|
||||
nodes = await loop.run_in_executor(
|
||||
None,
|
||||
lambda: retriever.retrieve(query)
|
||||
)
|
||||
|
||||
logger.info(f"Retrieved {len(nodes)} nodes from LlamaCloud")
|
||||
|
||||
# Convert nodes to CandidateMatch objects
|
||||
candidates = []
|
||||
seen_files = set() # Track files to avoid duplicates
|
||||
|
||||
for i, node in enumerate(nodes):
|
||||
try:
|
||||
candidate = self._extract_candidate_info(node)
|
||||
|
||||
# Deduplicate by file name if available
|
||||
if candidate.file_name:
|
||||
if candidate.file_name in seen_files:
|
||||
logger.info(f"Skipping duplicate file: {candidate.file_name}")
|
||||
continue
|
||||
seen_files.add(candidate.file_name)
|
||||
|
||||
candidates.append(candidate)
|
||||
logger.info(f"Processed candidate {i+1}: {candidate.candidate_name} (score: {candidate.score:.3f})")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error processing node {i}: {e}")
|
||||
continue
|
||||
|
||||
# Sort by score (descending)
|
||||
candidates.sort(key=lambda x: x.score, reverse=True)
|
||||
|
||||
logger.info(f"Successfully retrieved {len(candidates)} unique candidates")
|
||||
return candidates
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error retrieving candidates: {e}")
|
||||
raise
|
||||
|
||||
async def retrieve_candidates_by_qualifications(
|
||||
self,
|
||||
required_qualifications: List[str],
|
||||
preferred_qualifications: List[str],
|
||||
top_k: int = 20,
|
||||
enable_reranking: bool = True
|
||||
) -> List[CandidateMatch]:
|
||||
"""Retrieve candidates matching specific qualifications."""
|
||||
try:
|
||||
logger.info(f"Starting candidate retrieval by qualifications")
|
||||
logger.info(f"Required: {required_qualifications}")
|
||||
logger.info(f"Preferred: {preferred_qualifications}")
|
||||
|
||||
# Build search query from qualifications
|
||||
query = self._build_qualifications_query(required_qualifications, preferred_qualifications)
|
||||
|
||||
# Get the index and configure retriever
|
||||
index = self._get_index()
|
||||
|
||||
# Configure retriever as per the documentation
|
||||
retriever_config = {
|
||||
"dense_similarity_top_k": top_k,
|
||||
"alpha": 1.0, # Restricts to vector search
|
||||
"enable_reranking": enable_reranking,
|
||||
}
|
||||
|
||||
logger.info(f"Configuring retriever with: {retriever_config}")
|
||||
retriever = index.as_retriever(**retriever_config)
|
||||
|
||||
# Perform retrieval
|
||||
logger.info(f"Retrieving candidates with qualifications query: {query}")
|
||||
|
||||
# Run the retrieval in a thread pool to avoid blocking the async loop
|
||||
import asyncio
|
||||
loop = asyncio.get_event_loop()
|
||||
nodes = await loop.run_in_executor(
|
||||
None,
|
||||
lambda: retriever.retrieve(query)
|
||||
)
|
||||
|
||||
logger.info(f"Retrieved {len(nodes)} nodes from LlamaCloud")
|
||||
|
||||
# Convert nodes to CandidateMatch objects
|
||||
candidates = []
|
||||
seen_files = set() # Track files to avoid duplicates
|
||||
|
||||
for i, node in enumerate(nodes):
|
||||
try:
|
||||
candidate = self._extract_candidate_info(node)
|
||||
|
||||
# Deduplicate by file name if available
|
||||
if candidate.file_name:
|
||||
if candidate.file_name in seen_files:
|
||||
logger.info(f"Skipping duplicate file: {candidate.file_name}")
|
||||
continue
|
||||
seen_files.add(candidate.file_name)
|
||||
|
||||
candidates.append(candidate)
|
||||
logger.info(f"Processed candidate {i+1}: {candidate.candidate_name} (score: {candidate.score:.3f})")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error processing node {i}: {e}")
|
||||
continue
|
||||
|
||||
# Sort by score (descending)
|
||||
candidates.sort(key=lambda x: x.score, reverse=True)
|
||||
|
||||
logger.info(f"Successfully retrieved {len(candidates)} unique candidates by qualifications")
|
||||
return candidates
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error retrieving candidates by qualifications: {e}")
|
||||
raise
|
||||
|
||||
async def search_by_skills(
|
||||
self,
|
||||
skills: str,
|
||||
top_k: int = 20
|
||||
) -> List[CandidateMatch]:
|
||||
"""Search candidates by specific skills."""
|
||||
try:
|
||||
logger.info(f"Starting skill-based search for: {skills}")
|
||||
|
||||
# Get the index and configure retriever
|
||||
index = self._get_index()
|
||||
|
||||
# Configure retriever for skill search (no reranking for simplicity)
|
||||
retriever_config = {
|
||||
"dense_similarity_top_k": top_k,
|
||||
"alpha": 1.0, # Restricts to vector search
|
||||
"enable_reranking": False, # Disable reranking for skill search
|
||||
}
|
||||
|
||||
logger.info(f"Configuring retriever for skill search with: {retriever_config}")
|
||||
retriever = index.as_retriever(**retriever_config)
|
||||
|
||||
# Build query from skills
|
||||
query = f"Skills and experience in: {skills}"
|
||||
|
||||
# Perform retrieval
|
||||
logger.info(f"Searching candidates with skills query: {query}")
|
||||
|
||||
# Run the retrieval in a thread pool to avoid blocking the async loop
|
||||
import asyncio
|
||||
loop = asyncio.get_event_loop()
|
||||
nodes = await loop.run_in_executor(
|
||||
None,
|
||||
lambda: retriever.retrieve(query)
|
||||
)
|
||||
|
||||
logger.info(f"Retrieved {len(nodes)} nodes from LlamaCloud for skills search")
|
||||
|
||||
# Convert nodes to CandidateMatch objects
|
||||
candidates = []
|
||||
seen_files = set() # Track files to avoid duplicates
|
||||
|
||||
for i, node in enumerate(nodes):
|
||||
try:
|
||||
candidate = self._extract_candidate_info(node)
|
||||
|
||||
# Deduplicate by file name if available
|
||||
if candidate.file_name:
|
||||
if candidate.file_name in seen_files:
|
||||
logger.info(f"Skipping duplicate file: {candidate.file_name}")
|
||||
continue
|
||||
seen_files.add(candidate.file_name)
|
||||
|
||||
candidates.append(candidate)
|
||||
logger.info(f"Processed candidate {i+1}: {candidate.candidate_name} (score: {candidate.score:.3f})")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error processing node {i}: {e}")
|
||||
continue
|
||||
|
||||
# Sort by score (descending)
|
||||
candidates.sort(key=lambda x: x.score, reverse=True)
|
||||
|
||||
logger.info(f"Successfully retrieved {len(candidates)} unique candidates for skills: {skills}")
|
||||
return candidates
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error searching candidates by skills: {e}")
|
||||
raise
|
||||
@@ -0,0 +1,346 @@
|
||||
"""OpenAI API service for job description extraction."""
|
||||
|
||||
import json
|
||||
import logging
|
||||
import httpx
|
||||
from typing import Dict, List, Any
|
||||
|
||||
from config import OPENAI_API_KEY, DEFAULT_MODEL, REQUEST_TIMEOUT, OPENAI_TEMPERATURE
|
||||
from models import JobDescriptionData
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class OpenAIService:
|
||||
"""Service class for handling OpenAI API interactions."""
|
||||
|
||||
def __init__(self):
|
||||
"""Initialize the OpenAI service."""
|
||||
if not OPENAI_API_KEY:
|
||||
raise ValueError("OPENAI_API_KEY is required")
|
||||
self.api_key = OPENAI_API_KEY
|
||||
self.model = DEFAULT_MODEL
|
||||
self.timeout = REQUEST_TIMEOUT
|
||||
self.temperature = OPENAI_TEMPERATURE
|
||||
|
||||
async def extract_job_description_from_text(self, text: str) -> JobDescriptionData:
|
||||
"""Extract job description data from text using OpenAI.
|
||||
|
||||
Args:
|
||||
text: The job description text to analyze
|
||||
|
||||
Returns:
|
||||
JobDescriptionData object with extracted information
|
||||
|
||||
Raises:
|
||||
Exception: If the API call fails or response parsing fails
|
||||
"""
|
||||
logger.info(f"Starting extraction with text length: {len(text)}")
|
||||
logger.info("API key is available, proceeding with extraction")
|
||||
|
||||
# Create the extraction prompt based on reference implementation
|
||||
prompt = self._create_extraction_prompt(text)
|
||||
|
||||
try:
|
||||
logger.info("Creating HTTP client and making API request")
|
||||
async with httpx.AsyncClient(timeout=self.timeout) as client:
|
||||
request_data = {
|
||||
"model": self.model,
|
||||
"messages": [
|
||||
{
|
||||
"role": "system",
|
||||
"content": "You are a helpful assistant that extracts structured data from job descriptions."
|
||||
},
|
||||
{
|
||||
"role": "user",
|
||||
"content": prompt
|
||||
}
|
||||
],
|
||||
"temperature": self.temperature,
|
||||
"response_format": {"type": "json_object"}
|
||||
}
|
||||
|
||||
logger.info(f"Making request to OpenAI with model: {self.model}")
|
||||
|
||||
response = await client.post(
|
||||
"https://api.openai.com/v1/chat/completions",
|
||||
headers={
|
||||
"Content-Type": "application/json",
|
||||
"Authorization": f"Bearer {self.api_key}"
|
||||
},
|
||||
json=request_data
|
||||
)
|
||||
|
||||
logger.info(f"OpenAI API response status: {response.status_code}")
|
||||
|
||||
if response.status_code != 200:
|
||||
error_text = response.text
|
||||
logger.error(f"OpenAI API error response: {error_text}")
|
||||
try:
|
||||
error_data = response.json()
|
||||
logger.error(f"OpenAI API error JSON: {error_data}")
|
||||
raise Exception(f"OpenAI API error ({response.status_code}): {error_data}")
|
||||
except json.JSONDecodeError:
|
||||
raise Exception(f"OpenAI API error ({response.status_code}): {error_text}")
|
||||
|
||||
data = response.json()
|
||||
logger.info("Successfully parsed OpenAI response JSON")
|
||||
|
||||
content = data["choices"][0]["message"]["content"]
|
||||
logger.info(f"Extracted content from OpenAI response, length: {len(content) if content else 0}")
|
||||
|
||||
if not content:
|
||||
logger.error("OpenAI returned empty content")
|
||||
raise Exception("Failed to extract job description data: Empty response")
|
||||
|
||||
logger.info(f"OpenAI response content sample: {content[:200]}...")
|
||||
|
||||
return self._parse_response_to_job_data(content)
|
||||
|
||||
except httpx.TimeoutException as e:
|
||||
logger.error(f"HTTP timeout error: {e}")
|
||||
raise Exception(f"Request timeout: {str(e)}")
|
||||
except httpx.RequestError as e:
|
||||
logger.error(f"HTTP request error: {e}")
|
||||
raise Exception(f"Network error: {str(e)}")
|
||||
except Exception as e:
|
||||
logger.error(f"Unexpected error in extract_job_description_from_text: {e}")
|
||||
logger.error(f"Error type: {type(e).__name__}")
|
||||
raise
|
||||
|
||||
def _create_extraction_prompt(self, text: str) -> str:
|
||||
"""Create the prompt for job description extraction.
|
||||
|
||||
Args:
|
||||
text: The job description text
|
||||
|
||||
Returns:
|
||||
Formatted prompt string
|
||||
"""
|
||||
return f"""
|
||||
Extract the following information from this job description text.
|
||||
Format the response as a valid JSON object with these fields:
|
||||
- title: The job title
|
||||
- company: The company name (use "Unknown" if not found)
|
||||
- location: The job location (use "Not specified" if not found)
|
||||
- required_qualifications: An array of strings, each one representing a required qualification
|
||||
- preferred_qualifications: An array of strings, each one representing a preferred/nice-to-have qualification
|
||||
- description: A summary of the job description
|
||||
- experience_level: The experience level (entry-level, mid-level, senior, etc.)
|
||||
- employment_type: The employment type (full-time, part-time, contract, etc.)
|
||||
|
||||
Job Description Text:
|
||||
{text}
|
||||
"""
|
||||
|
||||
def _parse_response_to_job_data(self, content: str) -> JobDescriptionData:
|
||||
"""Parse OpenAI response content to JobDescriptionData object.
|
||||
|
||||
Args:
|
||||
content: The JSON content from OpenAI response
|
||||
|
||||
Returns:
|
||||
JobDescriptionData object
|
||||
|
||||
Raises:
|
||||
Exception: If JSON parsing fails
|
||||
"""
|
||||
try:
|
||||
parsed_data = json.loads(content)
|
||||
logger.info("Successfully parsed JSON from OpenAI response")
|
||||
logger.info(f"Parsed data keys: {list(parsed_data.keys())}")
|
||||
|
||||
# Validate and create JobDescriptionData object
|
||||
result = JobDescriptionData(
|
||||
title=parsed_data.get("title", "Unknown Position"),
|
||||
company=parsed_data.get("company", "Unknown"),
|
||||
location=parsed_data.get("location", "Not specified"),
|
||||
required_qualifications=parsed_data.get("required_qualifications", []),
|
||||
preferred_qualifications=parsed_data.get("preferred_qualifications", []),
|
||||
description=parsed_data.get("description", ""),
|
||||
experience_level=parsed_data.get("experience_level", "Not specified"),
|
||||
employment_type=parsed_data.get("employment_type", "Not specified")
|
||||
)
|
||||
logger.info("Successfully created JobDescriptionData object")
|
||||
return result
|
||||
|
||||
except json.JSONDecodeError as e:
|
||||
logger.error(f"Failed to parse JSON response: {e}")
|
||||
logger.error(f"Raw content: {content}")
|
||||
raise Exception(f"Failed to parse response from OpenAI: {str(e)}")
|
||||
|
||||
async def score_candidate_qualifications(
|
||||
self,
|
||||
candidate_resume: str,
|
||||
required_qualifications: List[str],
|
||||
preferred_qualifications: List[str],
|
||||
job_title: str = "",
|
||||
job_description: str = ""
|
||||
) -> Dict[str, Any]:
|
||||
"""Score a candidate's resume against job qualifications using OpenAI.
|
||||
|
||||
Args:
|
||||
candidate_resume: The candidate's resume text content
|
||||
required_qualifications: List of required qualifications
|
||||
preferred_qualifications: List of preferred qualifications
|
||||
job_title: Job title for context (optional)
|
||||
job_description: Job description for context (optional)
|
||||
|
||||
Returns:
|
||||
Dictionary containing detailed scoring results
|
||||
"""
|
||||
try:
|
||||
logger.info("Starting candidate qualification scoring with OpenAI")
|
||||
|
||||
# Build the prompt for scoring
|
||||
prompt_parts = [
|
||||
"You are a professional recruiter tasked with evaluating how well a candidate's resume matches the qualifications for a job.",
|
||||
""
|
||||
]
|
||||
|
||||
if job_title:
|
||||
prompt_parts.append(f"JOB TITLE: {job_title}")
|
||||
|
||||
if job_description:
|
||||
prompt_parts.append(f"JOB DESCRIPTION: {job_description}")
|
||||
|
||||
prompt_parts.extend([
|
||||
"",
|
||||
"CANDIDATE'S RESUME:",
|
||||
candidate_resume,
|
||||
"",
|
||||
"Please evaluate the candidate against each qualification using the following scale:",
|
||||
"0 - Not Met: The candidate's resume shows no evidence of meeting this qualification",
|
||||
"1 - Somewhat Met: The candidate's resume shows some evidence of meeting this qualification but may lack depth or completeness",
|
||||
"2 - Strongly Met: The candidate's resume clearly demonstrates they meet or exceed this qualification",
|
||||
"",
|
||||
"Please evaluate ONLY the following qualifications, and return your response in JSON format with explanations for each score:",
|
||||
""
|
||||
])
|
||||
|
||||
if required_qualifications:
|
||||
prompt_parts.append("REQUIRED QUALIFICATIONS:")
|
||||
for i, qual in enumerate(required_qualifications, 1):
|
||||
prompt_parts.append(f"{i}. {qual}")
|
||||
prompt_parts.append("")
|
||||
|
||||
if preferred_qualifications:
|
||||
prompt_parts.append("PREFERRED QUALIFICATIONS:")
|
||||
for i, qual in enumerate(preferred_qualifications, 1):
|
||||
prompt_parts.append(f"{i}. {qual}")
|
||||
prompt_parts.append("")
|
||||
|
||||
prompt_parts.extend([
|
||||
'Format your response as valid JSON with this structure:',
|
||||
'{',
|
||||
' "requiredScores": [',
|
||||
' {',
|
||||
' "qualification": "qualification text",',
|
||||
' "score": 0/1/2,',
|
||||
' "explanation": "brief explanation for the score"',
|
||||
' },',
|
||||
' ...',
|
||||
' ],',
|
||||
' "preferredScores": [',
|
||||
' {',
|
||||
' "qualification": "qualification text",',
|
||||
' "score": 0/1/2,',
|
||||
' "explanation": "brief explanation for the score"',
|
||||
' },',
|
||||
' ...',
|
||||
' ],',
|
||||
' "overallFeedback": "brief overall assessment of the candidate"',
|
||||
'}'
|
||||
])
|
||||
|
||||
prompt = "\n".join(prompt_parts)
|
||||
|
||||
logger.info("Sending scoring request to OpenAI")
|
||||
|
||||
# Call OpenAI API
|
||||
async with httpx.AsyncClient(timeout=self.timeout) as client:
|
||||
request_data = {
|
||||
"model": self.model,
|
||||
"messages": [
|
||||
{
|
||||
"role": "system",
|
||||
"content": "You are a professional recruiter who evaluates how well candidate resumes match job qualifications."
|
||||
},
|
||||
{
|
||||
"role": "user",
|
||||
"content": prompt
|
||||
}
|
||||
],
|
||||
"temperature": 0.1,
|
||||
"response_format": {"type": "json_object"}
|
||||
}
|
||||
|
||||
response = await client.post(
|
||||
"https://api.openai.com/v1/chat/completions",
|
||||
headers={
|
||||
"Content-Type": "application/json",
|
||||
"Authorization": f"Bearer {self.api_key}"
|
||||
},
|
||||
json=request_data
|
||||
)
|
||||
|
||||
if response.status_code != 200:
|
||||
error_text = response.text
|
||||
logger.error(f"OpenAI API error response: {error_text}")
|
||||
try:
|
||||
error_data = response.json()
|
||||
raise Exception(f"OpenAI API error ({response.status_code}): {error_data}")
|
||||
except json.JSONDecodeError:
|
||||
raise Exception(f"OpenAI API error ({response.status_code}): {error_text}")
|
||||
|
||||
data = response.json()
|
||||
content = data["choices"][0]["message"]["content"]
|
||||
|
||||
if not content:
|
||||
raise ValueError("No content in OpenAI response")
|
||||
|
||||
logger.info("Received response from OpenAI, parsing JSON")
|
||||
|
||||
try:
|
||||
# Parse the JSON response
|
||||
scoring_data = json.loads(content)
|
||||
|
||||
# Calculate the total score
|
||||
required_scores = scoring_data.get("requiredScores", [])
|
||||
preferred_scores = scoring_data.get("preferredScores", [])
|
||||
|
||||
required_total = sum(item.get("score", 0) for item in required_scores)
|
||||
preferred_total = sum(item.get("score", 0) for item in preferred_scores)
|
||||
|
||||
total_score = required_total + preferred_total
|
||||
max_possible_score = (len(required_qualifications) + len(preferred_qualifications)) * 2
|
||||
|
||||
# Calculate match percentage
|
||||
match_percentage = (total_score / max_possible_score * 100) if max_possible_score > 0 else 0
|
||||
|
||||
result = {
|
||||
"requiredScores": required_scores,
|
||||
"preferredScores": preferred_scores,
|
||||
"totalScore": total_score,
|
||||
"maxPossibleScore": max_possible_score,
|
||||
"matchPercentage": round(match_percentage, 1),
|
||||
"overallFeedback": scoring_data.get("overallFeedback", ""),
|
||||
"scoringBreakdown": {
|
||||
"requiredTotal": required_total,
|
||||
"preferredTotal": preferred_total,
|
||||
"requiredCount": len(required_qualifications),
|
||||
"preferredCount": len(preferred_qualifications)
|
||||
}
|
||||
}
|
||||
|
||||
logger.info(f"Successfully scored candidate: {total_score}/{max_possible_score} ({match_percentage:.1f}%)")
|
||||
return result
|
||||
|
||||
except json.JSONDecodeError as e:
|
||||
logger.error(f"Failed to parse JSON response: {e}")
|
||||
logger.error(f"Raw content: {content}")
|
||||
raise ValueError("Failed to parse scoring data from LLM response")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error scoring candidate qualifications: {e}")
|
||||
raise
|
||||
@@ -0,0 +1,99 @@
|
||||
import asyncio
|
||||
|
||||
from fastmcp import Client
|
||||
|
||||
async def test_server():
|
||||
# Test the MCP server using streamable-http transport.
|
||||
# Use "/sse" endpoint if using sse transport.
|
||||
async with Client("http://localhost:8080/mcp") as client:
|
||||
# List available tools
|
||||
tools = await client.list_tools()
|
||||
for tool in tools:
|
||||
print(f">>> Tool found: {tool.name}")
|
||||
|
||||
# Call add tool
|
||||
print(">>> Calling add tool for 1 + 2")
|
||||
result = await client.call_tool("add", {"a": 1, "b": 2})
|
||||
print(f"<<< Result: {result[0].text}")
|
||||
# Call subtract tool
|
||||
print(">>> Calling subtract tool for 10 - 3")
|
||||
result = await client.call_tool("subtract", {"a": 10, "b": 3})
|
||||
print(f"<<< Result: {result[0].text}")
|
||||
# Call multiply tool
|
||||
print(">>> Calling multiply tool for 4 * 5")
|
||||
result = await client.call_tool("multiply", {"a": 4, "b": 5})
|
||||
print(f"<<< Result: {result[0].text}")
|
||||
|
||||
# Call extract_job_requirements tool
|
||||
sample_jd = """
|
||||
Software Engineer - Full Stack
|
||||
TechCorp Inc.
|
||||
San Francisco, CA
|
||||
|
||||
We are seeking a talented Full Stack Software Engineer to join our growing team.
|
||||
|
||||
Need to have:
|
||||
- Bachelor's degree in Computer Science or related field
|
||||
- 3+ years of experience in web development
|
||||
- Proficiency in JavaScript, Python, SQL
|
||||
- Experience with React and Node.js
|
||||
|
||||
Plus if you have:
|
||||
- Experience with cloud platforms (AWS, GCP)
|
||||
- Knowledge of Docker and Kubernetes
|
||||
- Previous startup experience
|
||||
|
||||
This is a full-time position offering competitive salary and benefits.
|
||||
"""
|
||||
print(">>> Calling extract_job_requirements tool")
|
||||
jd_result = await client.call_tool("extract_job_requirements", {"jd_text": sample_jd})
|
||||
print(f"<<< Result: {jd_result[0].text}")
|
||||
|
||||
# Call find_matching_candidates tool with qualifications
|
||||
print(">>> Calling find_matching_candidates tool")
|
||||
result = await client.call_tool("find_matching_candidates", {
|
||||
"required_qualifications": "Python, JavaScript, React, Node.js, 3+ years experience",
|
||||
"preferred_qualifications": "AWS, Docker, Kubernetes, CI/CD",
|
||||
"top_k": 5,
|
||||
"enable_reranking": True
|
||||
})
|
||||
print(f"<<< Result: {result[0].text}")
|
||||
|
||||
# Call search_candidates_by_skills tool
|
||||
print(">>> Calling search_candidates_by_skills tool")
|
||||
result = await client.call_tool("search_candidates_by_skills", {
|
||||
"skills": "Python, JavaScript, React, Node.js",
|
||||
"top_k": 3
|
||||
})
|
||||
print(f"<<< Result: {result[0].text}")
|
||||
|
||||
# Test score_candidate_qualifications tool
|
||||
print(">>> Calling score_candidate_qualifications tool")
|
||||
sample_resume = """
|
||||
John Doe
|
||||
Software Engineer
|
||||
|
||||
Experience:
|
||||
- 5 years of Python development
|
||||
- 3 years of JavaScript and React
|
||||
- 2 years working with AWS and Docker
|
||||
- Experience with machine learning projects
|
||||
- Bachelor's degree in Computer Science
|
||||
|
||||
Skills: Python, JavaScript, React, Node.js, AWS, Docker, Machine Learning, SQL
|
||||
"""
|
||||
|
||||
result = await client.call_tool("score_candidate_qualifications", {
|
||||
"candidate_resume": sample_resume,
|
||||
"required_qualifications": "Python, JavaScript, React, 3+ years experience",
|
||||
"preferred_qualifications": "AWS, Docker, Machine Learning",
|
||||
"job_title": "Senior Software Engineer",
|
||||
"job_description": "We are looking for a senior software engineer to join our team"
|
||||
})
|
||||
print(f"<<< Result: {result[0].text}")
|
||||
|
||||
print(">>> All tests completed successfully!")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(test_server())
|
||||
@@ -0,0 +1 @@
|
||||
"""MCP tools package."""
|
||||
@@ -0,0 +1,229 @@
|
||||
"""Candidate retrieval tools using LlamaCloud."""
|
||||
|
||||
import json
|
||||
import logging
|
||||
import traceback
|
||||
from typing import Dict, Any
|
||||
|
||||
from services.llamacloud_service import LlamaCloudService
|
||||
from models import JobDescriptionData
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class CandidateTools:
|
||||
"""Container class for candidate retrieval MCP tools."""
|
||||
|
||||
def __init__(self):
|
||||
"""Initialize CandidateTools with LlamaCloud service."""
|
||||
try:
|
||||
self.llamacloud_service = LlamaCloudService()
|
||||
logger.info("CandidateTools initialized successfully")
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to initialize CandidateTools: {e}")
|
||||
self.llamacloud_service = None
|
||||
|
||||
async def find_matching_candidates(
|
||||
self,
|
||||
required_qualifications: str,
|
||||
preferred_qualifications: str = "",
|
||||
top_k: int = 10,
|
||||
enable_reranking: bool = True
|
||||
) -> str:
|
||||
"""Find candidates matching job qualifications from LlamaCloud resume index.
|
||||
|
||||
Args:
|
||||
required_qualifications: Comma-separated string of required qualifications
|
||||
preferred_qualifications: Comma-separated string of preferred qualifications (optional)
|
||||
top_k: Number of top candidates to retrieve (default: 10, max: 50)
|
||||
enable_reranking: Whether to enable reranking for better results (default: True)
|
||||
|
||||
Returns:
|
||||
JSON string containing list of matching candidates with their scores and information
|
||||
"""
|
||||
logger.info(f">>> Tool: 'find_matching_candidates' called with top_k={top_k}, reranking={enable_reranking}")
|
||||
|
||||
# Validate service availability
|
||||
if not self.llamacloud_service:
|
||||
error_msg = "LlamaCloud service is not available. Check configuration and API key."
|
||||
logger.error(error_msg)
|
||||
return json.dumps({"error": error_msg})
|
||||
|
||||
# Validate input parameters
|
||||
if not required_qualifications or not required_qualifications.strip():
|
||||
return json.dumps({"error": "Required qualifications cannot be empty"})
|
||||
|
||||
# Validate top_k parameter
|
||||
if not isinstance(top_k, int) or top_k < 1 or top_k > 50:
|
||||
return json.dumps({"error": "top_k must be an integer between 1 and 50"})
|
||||
|
||||
try:
|
||||
# Parse qualifications into lists
|
||||
required_quals = [qual.strip() for qual in required_qualifications.split(',') if qual.strip()]
|
||||
preferred_quals = [qual.strip() for qual in preferred_qualifications.split(',') if qual.strip()] if preferred_qualifications else []
|
||||
|
||||
logger.info(f"Required qualifications: {required_quals}")
|
||||
logger.info(f"Preferred qualifications: {preferred_quals}")
|
||||
|
||||
# Retrieve candidates from LlamaCloud using the new method
|
||||
candidates = await self.llamacloud_service.retrieve_candidates_by_qualifications(
|
||||
required_qualifications=required_quals,
|
||||
preferred_qualifications=preferred_quals,
|
||||
top_k=top_k,
|
||||
enable_reranking=enable_reranking
|
||||
)
|
||||
|
||||
# Convert candidates to dictionary format
|
||||
candidates_data = [candidate.to_dict() for candidate in candidates]
|
||||
|
||||
# Create response
|
||||
result = {
|
||||
"search_type": "qualifications_based",
|
||||
"total_candidates": len(candidates_data),
|
||||
"search_parameters": {
|
||||
"top_k": top_k,
|
||||
"enable_reranking": enable_reranking,
|
||||
"required_qualifications": required_quals,
|
||||
"preferred_qualifications": preferred_quals
|
||||
},
|
||||
"candidates": candidates_data
|
||||
}
|
||||
|
||||
logger.info(f"Successfully found {len(candidates_data)} matching candidates")
|
||||
return json.dumps(result, indent=2)
|
||||
|
||||
except Exception as e:
|
||||
error_msg = f"Failed to find matching candidates: {str(e)}"
|
||||
logger.error(f"Error in find_matching_candidates: {error_msg}")
|
||||
logger.error(f"Traceback: {traceback.format_exc()}")
|
||||
return json.dumps({"error": error_msg})
|
||||
|
||||
async def search_candidates_by_skills(self, skills: str, top_k: int = 10) -> str:
|
||||
"""Search candidates by specific skills or keywords.
|
||||
|
||||
Args:
|
||||
skills: Comma-separated list of skills or keywords to search for
|
||||
top_k: Number of top candidates to retrieve (default: 10, max: 50)
|
||||
|
||||
Returns:
|
||||
JSON string containing list of matching candidates
|
||||
"""
|
||||
logger.info(f">>> Tool: 'search_candidates_by_skills' called with skills='{skills}', top_k={top_k}")
|
||||
|
||||
# Validate service availability
|
||||
if not self.llamacloud_service:
|
||||
error_msg = "LlamaCloud service is not available. Check configuration and API key."
|
||||
logger.error(error_msg)
|
||||
return json.dumps({"error": error_msg})
|
||||
|
||||
# Validate input parameters
|
||||
if not skills or not skills.strip():
|
||||
return json.dumps({"error": "Skills parameter cannot be empty"})
|
||||
|
||||
# Validate top_k parameter
|
||||
if not isinstance(top_k, int) or top_k < 1 or top_k > 50:
|
||||
return json.dumps({"error": "top_k must be an integer between 1 and 50"})
|
||||
|
||||
try:
|
||||
# Create a simple job description focused on skills
|
||||
skills_list = [skill.strip() for skill in skills.split(",") if skill.strip()]
|
||||
|
||||
job_description = JobDescriptionData(
|
||||
title="Skills-based Search",
|
||||
company="Search Query",
|
||||
location="Any",
|
||||
required_qualifications=skills_list,
|
||||
preferred_qualifications=[],
|
||||
description=f"Looking for candidates with skills: {skills}",
|
||||
experience_level="",
|
||||
employment_type=""
|
||||
)
|
||||
|
||||
logger.info(f"Searching for candidates with skills: {skills_list}")
|
||||
|
||||
# Retrieve candidates from LlamaCloud
|
||||
candidates = await self.llamacloud_service.retrieve_candidates(
|
||||
job_description=job_description,
|
||||
top_k=top_k,
|
||||
enable_reranking=True
|
||||
)
|
||||
|
||||
# Convert candidates to dictionary format
|
||||
candidates_data = [candidate.to_dict() for candidate in candidates]
|
||||
|
||||
# Create response
|
||||
result = {
|
||||
"search_skills": skills_list,
|
||||
"total_candidates": len(candidates_data),
|
||||
"search_parameters": {
|
||||
"top_k": top_k
|
||||
},
|
||||
"candidates": candidates_data
|
||||
}
|
||||
|
||||
logger.info(f"Successfully found {len(candidates_data)} candidates with matching skills")
|
||||
return json.dumps(result, indent=2)
|
||||
|
||||
except Exception as e:
|
||||
error_msg = f"Failed to search candidates by skills: {str(e)}"
|
||||
logger.error(f"Error in search_candidates_by_skills: {error_msg}")
|
||||
logger.error(f"Traceback: {traceback.format_exc()}")
|
||||
return json.dumps({"error": error_msg})
|
||||
|
||||
async def score_candidate_qualifications(
|
||||
self,
|
||||
candidate_resume: str,
|
||||
required_qualifications: str,
|
||||
preferred_qualifications: str = "",
|
||||
job_title: str = "",
|
||||
job_description: str = ""
|
||||
) -> str:
|
||||
"""Score a candidate's resume against specific job qualifications using LLM evaluation.
|
||||
|
||||
Args:
|
||||
candidate_resume: The candidate's resume text content
|
||||
required_qualifications: Comma-separated string of required qualifications
|
||||
preferred_qualifications: Comma-separated string of preferred qualifications (optional)
|
||||
job_title: Job title for context (optional)
|
||||
job_description: Job description for context (optional)
|
||||
|
||||
Returns:
|
||||
JSON string containing detailed scoring results for each qualification
|
||||
"""
|
||||
logger.info(f">>> Tool: 'score_candidate_qualifications' called")
|
||||
|
||||
# Validate input parameters
|
||||
if not candidate_resume or not candidate_resume.strip():
|
||||
return json.dumps({"error": "Candidate resume cannot be empty"})
|
||||
|
||||
if not required_qualifications or not required_qualifications.strip():
|
||||
return json.dumps({"error": "Required qualifications cannot be empty"})
|
||||
|
||||
try:
|
||||
# Parse qualifications into lists
|
||||
required_quals = [qual.strip() for qual in required_qualifications.split(',') if qual.strip()]
|
||||
preferred_quals = [qual.strip() for qual in preferred_qualifications.split(',') if qual.strip()] if preferred_qualifications else []
|
||||
|
||||
logger.info(f"Scoring candidate against {len(required_quals)} required and {len(preferred_quals)} preferred qualifications")
|
||||
|
||||
# Import OpenAI service
|
||||
from services.openai_service import OpenAIService
|
||||
|
||||
# Use OpenAI service for scoring
|
||||
openai_service = OpenAIService()
|
||||
scoring_result = await openai_service.score_candidate_qualifications(
|
||||
candidate_resume=candidate_resume,
|
||||
required_qualifications=required_quals,
|
||||
preferred_qualifications=preferred_quals,
|
||||
job_title=job_title,
|
||||
job_description=job_description
|
||||
)
|
||||
|
||||
logger.info(f"Successfully scored candidate with total score {scoring_result.get('totalScore', 0)}/{scoring_result.get('maxPossibleScore', 0)}")
|
||||
return json.dumps(scoring_result, indent=2)
|
||||
|
||||
except Exception as e:
|
||||
error_msg = f"Failed to score candidate qualifications: {str(e)}"
|
||||
logger.error(f"Error in score_candidate_qualifications: {error_msg}")
|
||||
logger.error(f"Traceback: {traceback.format_exc()}")
|
||||
return json.dumps({"error": error_msg})
|
||||
@@ -0,0 +1,68 @@
|
||||
"""Job description related MCP tools."""
|
||||
|
||||
import json
|
||||
import logging
|
||||
import traceback
|
||||
from typing import Dict, Any
|
||||
|
||||
from services.openai_service import OpenAIService
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class JobTools:
|
||||
"""Container class for job description related MCP tools."""
|
||||
|
||||
def __init__(self):
|
||||
"""Initialize JobTools with OpenAI service."""
|
||||
self.openai_service = OpenAIService()
|
||||
|
||||
async def extract_job_requirements(self, jd_text: str) -> str:
|
||||
"""Extract structured job requirements from job description text.
|
||||
|
||||
Args:
|
||||
jd_text: The job description text to analyze
|
||||
|
||||
Returns:
|
||||
JSON string containing structured job requirements including title, company,
|
||||
location, required_qualifications, preferred_qualifications, description,
|
||||
experience_level, and employment_type.
|
||||
"""
|
||||
logger.info(f">>> Tool: 'extract_job_requirements' called with JD text length: {len(jd_text)}")
|
||||
|
||||
# Input validation
|
||||
if not jd_text or not jd_text.strip():
|
||||
return json.dumps({"error": "Job description text cannot be empty"})
|
||||
|
||||
if len(jd_text.strip()) < 10:
|
||||
return json.dumps({"error": "Job description text is too short to be meaningful"})
|
||||
|
||||
try:
|
||||
logger.info("Starting job description extraction process...")
|
||||
|
||||
# Use the OpenAI service for extraction
|
||||
extraction_result = await self.openai_service.extract_job_description_from_text(jd_text)
|
||||
|
||||
# Check that the extraction result is valid
|
||||
if not extraction_result:
|
||||
logger.error("JD extraction result is undefined")
|
||||
return json.dumps({"error": "Failed to extract data from job description text"})
|
||||
|
||||
logger.info("JD extraction completed successfully")
|
||||
result_dict = extraction_result.to_dict()
|
||||
logger.info(f"Structured JD extraction result: {json.dumps(result_dict, indent=2)}")
|
||||
|
||||
return json.dumps(result_dict)
|
||||
|
||||
except ValueError as e:
|
||||
logger.error(f"ValueError in extract_job_requirements: {e}")
|
||||
return json.dumps({"error": f"Configuration error: {str(e)}"})
|
||||
except json.JSONDecodeError as e:
|
||||
logger.error(f"JSON decode error in extract_job_requirements: {e}")
|
||||
return json.dumps({"error": f"JSON parsing error: {str(e)}"})
|
||||
except Exception as e:
|
||||
logger.error(f"Unexpected error extracting job requirements: {e}")
|
||||
logger.error(f"Error type: {type(e).__name__}")
|
||||
logger.error(f"Error args: {e.args}")
|
||||
logger.error(f"Traceback: {traceback.format_exc()}")
|
||||
return json.dumps({"error": f"Failed to extract job requirements: {str(e)}"})
|
||||
@@ -0,0 +1,52 @@
|
||||
"""Mathematical operation MCP tools."""
|
||||
|
||||
import logging
|
||||
from typing import Union
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class MathTools:
|
||||
"""Container class for mathematical operation MCP tools."""
|
||||
|
||||
@staticmethod
|
||||
def add(a: int, b: int) -> int:
|
||||
"""Add two numbers together.
|
||||
|
||||
Args:
|
||||
a: The first number
|
||||
b: The second number
|
||||
|
||||
Returns:
|
||||
The sum of the two numbers
|
||||
"""
|
||||
logger.info(f">>> Tool: 'add' called with numbers '{a}' and '{b}'")
|
||||
return a + b
|
||||
|
||||
@staticmethod
|
||||
def subtract(a: int, b: int) -> int:
|
||||
"""Subtract two numbers.
|
||||
|
||||
Args:
|
||||
a: The first number
|
||||
b: The second number
|
||||
|
||||
Returns:
|
||||
The difference of the two numbers
|
||||
"""
|
||||
logger.info(f">>> Tool: 'subtract' called with numbers '{a}' and '{b}'")
|
||||
return a - b
|
||||
|
||||
@staticmethod
|
||||
def multiply(a: int, b: int) -> int:
|
||||
"""Multiply two numbers.
|
||||
|
||||
Args:
|
||||
a: The first number
|
||||
b: The second number
|
||||
|
||||
Returns:
|
||||
The product of the two numbers
|
||||
"""
|
||||
logger.info(f">>> Tool: 'multiply' called with numbers '{a}' and '{b}'")
|
||||
return a * b
|
||||
Reference in New Issue
Block a user