Initial commit (cleaned history)

This commit is contained in:
Zhaoqi Li
2025-06-23 15:35:06 -07:00
commit 4e22fc31f2
17 changed files with 4814 additions and 0 deletions
+87
View File
@@ -0,0 +1,87 @@
# Environment variables and sensitive files
.env
.env.local
.env.production
config_local.py
# Python
__pycache__/
*.pyc
*.pyo
*.pyd
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST
# Virtual environments
.venv/
venv/
ENV/
env/
.env/
# IDE and Editor files
.vscode/
.idea/
*.swp
*.swo
*~
.spyderproject
.spyproject
.ropeproject
# OS generated files
.DS_Store
.DS_Store?
._*
.Spotlight-V100
.Trashes
ehthumbs.db
Thumbs.db
# Logs
*.log
logs/
# Testing
.coverage
.pytest_cache/
.tox/
.nox/
coverage.xml
*.cover
.hypothesis/
# Documentation builds
docs/_build/
# Jupyter Notebook
.ipynb_checkpoints
# pyenv
.python-version
# Backup files
*_backup.py
*_backup_*.py
server_clean.py
test_server_clean.py
# Temporary files
*.tmp
*.temp
temp_*
+20
View File
@@ -0,0 +1,20 @@
# Use the official Python lightweight image
FROM python:3.13-slim
# Install uv
COPY --from=ghcr.io/astral-sh/uv:latest /uv /uvx /bin/
# Install the project into /app
COPY . /app
WORKDIR /app
# Allow statements and log messages to immediately appear in the logs
ENV PYTHONUNBUFFERED=1
# Install dependencies
RUN uv sync
EXPOSE $PORT
# Run the FastMCP server
CMD ["uv", "run", "server.py"]
+500
View File
@@ -0,0 +1,500 @@
# Job Matching MCP Server
A Model Context Protocol (MCP) server that provides intelligent job matching capabilities. Extract structured job requirements from job descriptions and find/rank candidates from your LlamaCloud resume index.
## 🚀 Features
### Core Job Matching Functions
1. **`extract_job_requirements`** - Extract structured data from job description text
2. **`find_matching_candidates`** - Find and rank candidates from LlamaCloud index
3. **`search_candidates_by_skills`** - Search candidates by specific skills
4. **`score_candidate_qualifications`** - Score candidate against job requirements
### Additional Functions
- **`add`** / **`subtract`** / **`multiply`** - Basic math functions (backward compatibility)
## 📋 Available MCP Tools
### Job Description Processing
- `extract_job_requirements(job_description_text: str)` - Extract structured job requirements from text
### Candidate Management
- `find_matching_candidates(required_qualifications: str, preferred_qualifications: str, top_k: int, enable_reranking: bool)` - Find candidates matching job qualifications
- `search_candidates_by_skills(skills: str, top_k: int)` - Search candidates by specific skills
- `score_candidate_qualifications(candidate_resume: str, required_qualifications: str, preferred_qualifications: str, job_title: str, job_description: str)` - Score candidate against job requirements
## 📋 Function Details
### 1. Extract Job Requirements
```python
extract_job_requirements(jd_text: str) -> str
```
**Input:** Job description text (copied from job posting)
**Output:** JSON string containing:
- `title`: Job title
- `company`: Company name
- `location`: Job location
- `required_qualifications`: Array of required qualifications
- `preferred_qualifications`: Array of preferred qualifications
- `description`: Job summary
- `experience_level`: Experience level (entry/mid/senior)
- `employment_type`: Employment type (full-time/contract/etc.)
### 2. Find and Rank Candidates from LlamaCloud
```python
find_matching_candidates(required_qualifications: str, preferred_qualifications: str, top_k: int, enable_reranking: bool) -> str
```
**Input:**
- `required_qualifications`: Comma-separated required qualifications
- `preferred_qualifications`: Comma-separated preferred qualifications
- `top_k`: Maximum candidates to retrieve (default: 10)
- `enable_reranking`: Whether to enable reranking (default: True)
**Output:** JSON string containing:
- `candidates`: Array of candidates with scores and analysis
- `total_candidates`: Number of candidates found
- `search_parameters`: Details about search configuration
**Key Features:**
- **Retrieves candidates from LlamaCloud index** using semantic search
- **Weighted scoring**: Required qualifications have higher weight
- **Match percentage**: Overall compatibility score
- **Detailed explanations**: For each qualification assessment
### 3. Search Candidates by Skills
```python
search_candidates_by_skills(skills: str, top_k: int) -> str
```
**Input:**
- `skills`: Comma-separated list of skills or keywords
- `top_k`: Number of top candidates to retrieve (default: 10)
**Output:** JSON with matching candidates and their scores
### 4. Score Candidate Qualifications
```python
score_candidate_qualifications(candidate_resume: str, required_qualifications: str, preferred_qualifications: str, job_title: str, job_description: str) -> str
```
**Input:**
- `candidate_resume`: The candidate's resume text
- `required_qualifications`: Comma-separated required qualifications
- `preferred_qualifications`: Comma-separated preferred qualifications
- `job_title`: Job title for context (optional)
- `job_description`: Job description for context (optional)
**Output:** Comprehensive analysis including:
- Strengths and weaknesses
- Detailed scoring breakdown
- Hiring recommendations
- Role fit assessment
## 🛠️ Setup & Configuration
### 1. Configure API Keys & Settings
You have **two options** for configuration:
#### Option A: Environment Variables (Recommended for Production)
```bash
# Required: OpenAI API Key
export OPENAI_API_KEY="your-openai-api-key"
# Required: LlamaCloud Configuration
export LLAMA_CLOUD_API_KEY="your-llamacloud-api-key"
export LLAMA_CLOUD_INDEX_NAME="your-resume-index-name"
export LLAMA_CLOUD_PROJECT_NAME="your-project-name"
export LLAMA_CLOUD_ORGANIZATION_ID="your-organization-id"
# Optional: Server Configuration
export PORT="8080"
export HOST="0.0.0.0"
export REQUEST_TIMEOUT="30.0"
export OPENAI_TEMPERATURE="0.1"
```
#### Option B: Direct Configuration in config.py (For Local Development)
1. **Open `config.py`** and replace the placeholder values:
```python
# Replace these placeholder values with your actual API keys:
OPENAI_API_KEY = "your-actual-openai-api-key-here"
LLAMA_CLOUD_API_KEY = "your-actual-llamacloud-api-key-here"
LLAMA_CLOUD_ORGANIZATION_ID = "your-actual-org-id-here"
LLAMA_CLOUD_INDEX_NAME = "your-actual-index-name"
```
2. **⚠️ Security Warning**: If you edit `config.py` directly, **never commit your API keys to version control!**
#### Getting Your API Keys:
- **OpenAI API Key**: Get from [OpenAI Platform](https://platform.openai.com/api-keys)
- **LlamaCloud API Key**: Get from [LlamaCloud Console](https://cloud.llamaindex.ai/)
- **LlamaCloud Org ID**: Found in your LlamaCloud project settings
- **LlamaCloud Index Name**: The name of your resume index in LlamaCloud
*Without LlamaCloud credentials, the server uses mock candidate data for testing.*
### 2. Protect Your API Keys (Important!)
If you're planning to commit this code to version control, create a `.gitignore` file to protect your sensitive information:
```bash
# Create .gitignore file
cat > .gitignore << EOF
# Environment variables and sensitive files
.env
.env.local
.env.production
config_local.py
# Python
__pycache__/
*.pyc
*.pyo
*.pyd
.Python
.venv/
venv/
# IDE
.vscode/
.idea/
*.swp
*.swo
# OS
.DS_Store
Thumbs.db
EOF
```
**Alternative**: You can also create a separate `config_local.py` file with your actual keys and import it in `config.py`, then add `config_local.py` to `.gitignore`.
### 3. Install Dependencies
```bash
# Install using uv (recommended)
uv install
# Or using pip
pip install fastmcp httpx
```
### 4. Run the Server
```bash
python server.py
```
Server starts on `http://localhost:8080/mcp` (or `PORT` environment variable)
## 🧪 Testing
Run the comprehensive test suite:
```bash
# Start the server first
python server.py
# In another terminal, run tests
python test_server.py
```
The test suite will:
- ✅ Extract job requirements from sample JD
- ✅ Retrieve and rank candidates from LlamaCloud (or mock data)
- ✅ Perform detailed analysis of top candidate
- ✅ Test backward compatibility functions
## 📊 How It Works
### Architecture Overview
1. **Job Description Processing**
- Uses OpenAI to extract structured requirements from free-form JD text
- Separates required vs. preferred qualifications
- Extracts metadata (title, company, location, etc.)
2. **Candidate Retrieval**
- Queries LlamaCloud index using semantic search
- Builds search query from job requirements
- Retrieves top candidates with similarity scores
3. **Intelligent Scoring**
- Uses OpenAI to score each candidate (0-2 scale)
- **0**: Not Met, **1**: Somewhat Met, **2**: Strongly Met
- Required qualifications weighted 2x in final score
- Provides explanations for each score
4. **Match Calculation**
```
Weighted Score = (Required Total × 2) + Preferred Total
Match % = (Weighted Score / Max Possible Score) × 100
```
### Data Flow
```mermaid
graph TD
A[Job Description Text] --> B[extract_job_requirements]
B --> C[Structured Requirements JSON]
C --> D[find_matching_candidates]
E[LlamaCloud Index] --> D
D --> F[Ranked Candidates with Scores]
C --> G[score_candidate_qualifications]
H[Individual Resume] --> G
G --> I[Detailed Analysis & Recommendations]
```
## 🎯 Use Cases
### 1. Automated Resume Screening
```python
# Extract requirements from job posting
job_reqs = extract_job_requirements(job_posting_text)
# Find top candidates from your resume database
top_candidates = find_matching_candidates("Python, JavaScript, React", "AWS, Docker", 10, True)
# Get detailed analysis of promising candidates
for candidate in top_5:
analysis = score_candidate_qualifications(candidate['resume'], job_reqs, candidate['name'])
```
### 2. Hiring Pipeline Integration
- **ATS Integration**: Automatically score incoming applications
- **Recruiter Tools**: Provide data-driven candidate rankings
- **Interview Prep**: Generate candidate-specific interview questions
### 3. Job Market Analysis
- **Requirement Trends**: Track common qualifications across postings
- **Candidate Gap Analysis**: Identify missing skills in candidate pool
- **Salary Benchmarking**: Correlate requirements with compensation data
## 🚀 Deployment
### Prerequisites
Make sure you have the following set up:
- Python 3.10+
- UV package manager
- Google Cloud SDK (gcloud)
- Project ID configured: `export PROJECT_ID=<your-project-id>`
- Artifact Registry repository created: `remote-mcp-servers`
### Local Development
```bash
# 1. Install dependencies
uv install
# 2. Configure API keys (choose one method):
# Method A: Set environment variables
export OPENAI_API_KEY="your-openai-api-key"
export LLAMA_CLOUD_API_KEY="your-llamacloud-api-key"
export LLAMA_CLOUD_INDEX_NAME="your-index-name"
export LLAMA_CLOUD_ORGANIZATION_ID="your-org-id"
# Method B: Edit config.py directly (see configuration section above)
# 3. Run server
python server.py
```
The server will start on `http://localhost:8080/mcp` and log which configuration it's using:
```
[INFO]: LlamaCloudService initialized with index: your-index-name
[INFO]: MCP server starting on 0.0.0.0:8080
```
#### Quick Configuration Test:
You can verify your configuration is working by running:
```bash
python -c "from config import OPENAI_API_KEY, LLAMA_CLOUD_API_KEY, LLAMA_CLOUD_INDEX_NAME; print(f'OpenAI: {OPENAI_API_KEY[:10]}..., LlamaCloud: {LLAMA_CLOUD_API_KEY[:10]}..., Index: {LLAMA_CLOUD_INDEX_NAME}')"
```
If you see placeholder values like "your-openai-api-key-here", your configuration needs to be updated.
### Docker Deployment
```bash
# Build image
docker build -t job-matching-mcp .
# Run container
docker run -p 8080:8080 \
-e OPENAI_API_KEY="your-key" \
-e LLAMA_CLOUD_API_KEY="your-key" \
job-matching-mcp
```
### Google Cloud Run Deployment
**📚 Reference Documentation**: [Build and Deploy a Remote MCP Server to Google Cloud Run in Under 10 Minutes](https://cloud.google.com/blog/topics/developers-practitioners/build-and-deploy-a-remote-mcp-server-to-google-cloud-run-in-under-10-minutes)
#### Initial Deployment
```bash
# Build and push to Artifact Registry
gcloud builds submit --region=us-central1 \
--tag us-central1-docker.pkg.dev/$PROJECT_ID/remote-mcp-servers/mcp-server:latest
# Deploy to Cloud Run
gcloud run deploy mcp-server \
--image us-central1-docker.pkg.dev/$PROJECT_ID/remote-mcp-servers/mcp-server:latest \
--region=us-central1 \
--no-allow-unauthenticated \
--set-env-vars OPENAI_API_KEY="your-key",LLAMA_CLOUD_API_KEY="your-key"
```
#### Redeployment Steps
After making code changes to your MCP server, follow these steps to redeploy:
**Step 1: Rebuild the container and push to Artifact Registry**
```bash
gcloud builds submit --region=us-central1 \
--tag us-central1-docker.pkg.dev/$PROJECT_ID/remote-mcp-servers/mcp-server:latest
```
**Step 2: Re-deploy the updated container to Cloud Run**
```bash
gcloud run deploy mcp-server \
--image us-central1-docker.pkg.dev/$PROJECT_ID/remote-mcp-servers/mcp-server:latest \
--region=us-central1 \
--no-allow-unauthenticated
```
**Step 3: Test the deployment (optional)**
Start the Cloud Run proxy to test your updated server:
```bash
gcloud run services proxy mcp-server --region=us-central1
```
Then run your test script:
```bash
uv run test_server.py
```
## 📁 Project Structure
```
mcp-on-cloudrun/
├── config.py # Configuration constants
├── models.py # Data structures
├── server.py # Main MCP server
├── Dockerfile # Container configuration
├── pyproject.toml # Python dependencies
├── services/
│ ├── openai_service.py # OpenAI API integration
│ └── llamacloud_service.py # LlamaCloud integration
├── tools/
│ ├── math_tools.py # Math operations (add, subtract, multiply)
│ ├── job_tools.py # Job description extraction
│ └── candidate_tools.py # Candidate search and scoring
└── test_server.py # Test client
```
## 🔧 Customization
### Scoring Criteria
Modify the scoring prompts in `services/openai_service.py` to adjust evaluation criteria:
- Change scoring scale (0-2 to 0-5, etc.)
- Adjust weighting between required/preferred qualifications
- Add domain-specific evaluation criteria
### LlamaCloud Integration
For production deployment with real candidate data:
1. Set up LlamaCloud account and create resume index
2. Configure environment variables in `config.py`
3. Replace mock candidate data with actual LlamaCloud API calls
### OpenAI Model Selection
Change the model in `config.py`:
```python
OPENAI_MODEL = "gpt-4o-mini" # Fast and cost-effective
# OPENAI_MODEL = "gpt-4o" # Higher quality, more expensive
```
## 📈 Performance & Scaling
- **Concurrent Requests**: FastMCP handles multiple simultaneous job matching requests
- **Caching**: Consider implementing Redis for frequent job requirement extractions
- **Rate Limiting**: OpenAI API has rate limits; implement queuing for high-volume usage
- **Cost Optimization**: Use `gpt-4o-mini` for most operations, `gpt-4o` for critical analysis
## 🔐 Security Considerations
### Security Notes
- **Always use `--no-allow-unauthenticated`** to require authentication for Cloud Run
- Ensure users have the `roles/run.invoker` IAM role to access the server
- Use the Cloud Run proxy for local testing with authentication
- **API Keys**: Never commit API keys to version control
- **Input Validation**: Server validates all inputs and handles malformed data
- **Error Handling**: Graceful degradation when external services are unavailable
- **Data Privacy**: Resume data processed through OpenAI; consider data retention policies
## 🔍 Troubleshooting
### Common Issues:
#### Configuration Issues:
1. **"Invalid API key" errors**:
- Check that your API keys are correctly set in `config.py` or environment variables
- Verify API keys are valid and have proper permissions
- For OpenAI: Ensure you have credits/billing set up
2. **"LlamaCloud index not found"**:
- Verify `LLAMA_CLOUD_INDEX_NAME` matches your actual index name
- Check `LLAMA_CLOUD_ORGANIZATION_ID` is correct
- Ensure your LlamaCloud API key has access to the specified index
3. **Server shows placeholder values**:
- If you see "your-openai-api-key-here" in logs, your config isn't loading properly
- Check that you've either set environment variables OR edited `config.py` directly
- Restart the server after making configuration changes
#### Deployment Issues:
4. **Authentication errors**: Ensure Cloud Run proxy is running and you have proper IAM roles
5. **Build failures**: Check Dockerfile and dependencies in pyproject.toml
6. **Port conflicts**: Use `lsof -ti:8080 | xargs kill -9` to free up port 8080
### Logs:
View Cloud Run logs:
```bash
gcloud run services logs tail mcp-server --region=us-central1
```
View local server logs:
```bash
# Server logs are printed to console when running locally
python server.py
```
## 🤝 Contributing
1. Fork the repository
2. Create a feature branch
3. Add tests for new functionality
4. Ensure all tests pass
5. Submit a pull request
## 📄 License
MIT License - see LICENSE file for details.
---
**Ready to revolutionize your hiring process with AI-powered job matching!** 🎯✨
+150
View File
@@ -0,0 +1,150 @@
# MCP Server - Clean Modular Structure
This document explains the refactored, modular structure of the MCP server that follows Python best practices.
## 📁 Project Structure
```
mcp-on-cloudrun/
├── config.py # Configuration constants and settings
├── models.py # Data models and structures
├── services/ # Business logic services
│ ├── __init__.py
│ ├── openai_service.py # OpenAI API interactions
│ └── llamacloud_service.py # LlamaCloud resume index interactions
├── tools/ # MCP tool definitions
│ ├── __init__.py
│ ├── job_tools.py # Job description related tools
│ ├── math_tools.py # Mathematical operation tools
│ └── candidate_tools.py # Candidate retrieval tools
├── server_clean.py # Clean main server entry point
├── test_server_clean.py # Test script for clean server
├── server.py # Original monolithic server (backup)
└── test_server.py # Original test script (backup)
```
## 🏗️ Architecture Overview
### Separation of Concerns
The refactored structure follows the **Single Responsibility Principle** by separating different concerns into dedicated modules:
1. **Configuration** (`config.py`): All constants, API keys, and configuration settings
2. **Models** (`models.py`): Data structures and models like `JobDescriptionData`
3. **Services** (`services/`): Business logic and external API interactions
4. **Tools** (`tools/`): MCP tool definitions organized by functionality
5. **Server** (`server_clean.py`): FastMCP setup and tool registration only
### Benefits of This Structure
**Maintainability**: Each module has a clear purpose and can be modified independently
**Testability**: Individual components can be unit tested in isolation
**Reusability**: Services and models can be reused across different tools
**Scalability**: Easy to add new tools, services, or models
**Readability**: Clean, focused code that's easy to understand
**Best Practices**: Follows Python packaging and project structure conventions
## 📋 Module Details
### `config.py`
- Contains all configuration constants
- Environment variables and API keys
- Server settings (host, port, timeouts)
- Easy to modify without touching business logic
### `models.py`
- Defines data structures like `JobDescriptionData`
- Includes validation and serialization methods
- Type hints for better IDE support and documentation
### `services/openai_service.py`
- Encapsulates all OpenAI API interactions
- Handles HTTP requests, error handling, and response parsing
- Can be easily mocked for testing
- Configurable through the config module
### `tools/job_tools.py`
- Contains job description related MCP tools
- Uses the OpenAI service for processing
- Handles input validation and error responses
- Clean separation between tool interface and business logic
### `tools/math_tools.py`
- Simple mathematical operation tools
- Demonstrates how to organize related tools
- Static methods for stateless operations
### `server_clean.py`
- Minimal server setup code
- Imports and registers tools from their respective modules
- Clean main function with proper error handling
- Easy to understand and modify
## 🚀 Running the Clean Server
```bash
# Start the clean server
uv run server_clean.py
# Test the clean server (in another terminal)
uv run test_server_clean.py
```
## 🔧 Adding New Tools
To add a new tool:
1. **Create the tool class** in the appropriate `tools/` module
2. **Add any required services** in the `services/` directory
3. **Register the tool** in `server_clean.py` with the `@mcp.tool()` decorator
4. **Add tests** to verify functionality
Example:
```python
# In tools/new_tools.py
class NewTools:
def my_new_tool(self, param: str) -> str:
return f"Processed: {param}"
# In server_clean.py
from tools.new_tools import NewTools
new_tools = NewTools()
@mcp.tool()
def my_new_tool(param: str) -> str:
return new_tools.my_new_tool(param)
```
## 📊 Comparison: Before vs After
| Aspect | Before (server.py) | After (Clean Structure) |
|--------|-------------------|-------------------------|
| Lines of code | 275 lines | ~100 lines in main server |
| Concerns mixed | ✗ All in one file | ✅ Separated by purpose |
| Testability | ✗ Hard to test parts | ✅ Easy to unit test |
| Maintainability | ✗ Changes affect everything | ✅ Isolated changes |
| Readability | ✗ Long, complex file | ✅ Short, focused files |
| Scalability | ✗ Gets worse over time | ✅ Easy to extend |
## 🛠️ Available MCP Tools
### Mathematical Operations
- `add(a: int, b: int)` - Add two numbers
- `subtract(a: int, b: int)` - Subtract two numbers
- `multiply(a: int, b: int)` - Multiply two numbers
### Job Description Processing
- `extract_job_requirements(jd_text: str)` - Extract structured data from job description text
### Candidate Retrieval (LlamaCloud)
- `find_matching_candidates(required_qualifications: str, preferred_qualifications: str, top_k: int, enable_reranking: bool)` - Find candidates matching job qualifications from LlamaCloud resume index
- `search_candidates_by_skills(skills: str, top_k: int)` - Search candidates by specific skills or keywords
- `score_candidate_qualifications(candidate_resume: str, required_qualifications: str, preferred_qualifications: str, job_title: str, job_description: str)` - Score a candidate's resume against specific job qualifications using LLM evaluation
## 🎯 Next Steps
1. **Configure LlamaCloud**: Set your API key and index details in `config.py`
2. **Add more tools**: Follow the established patterns
3. **Add unit tests**: Test individual components
4. **Add type checking**: Use `mypy` for static type checking
5. **Add documentation**: Use docstrings and type hints throughout
+21
View File
@@ -0,0 +1,21 @@
"""Configuration settings for the MCP server."""
import os
# OpenAI Configuration
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY", "your-openai-api-key-here") # Replace with your actual API key or set OPENAI_API_KEY env var
DEFAULT_MODEL = "gpt-4o-mini"
# LlamaCloud Configuration - Public Resumes Index
LLAMA_CLOUD_API_KEY = os.getenv("LLAMA_CLOUD_API_KEY", "your-llamacloud-api-key-here") # Replace with your actual API key or set LLAMA_CLOUD_API_KEY env var
LLAMA_CLOUD_PROJECT_NAME = os.getenv("LLAMA_CLOUD_PROJECT_NAME", "Default") # Replace with your project name or set env var
LLAMA_CLOUD_ORGANIZATION_ID = os.getenv("LLAMA_CLOUD_ORGANIZATION_ID", "your-org-id-here") # Replace with your organization ID or set env var
LLAMA_CLOUD_INDEX_NAME = os.getenv("LLAMA_CLOUD_INDEX_NAME", "resume_public") # Replace with your index name or set env var
# Server Configuration
DEFAULT_PORT = int(os.getenv("PORT", "8080"))
DEFAULT_HOST = os.getenv("HOST", "0.0.0.0")
# API Configuration
REQUEST_TIMEOUT = float(os.getenv("REQUEST_TIMEOUT", "30.0"))
OPENAI_TEMPERATURE = float(os.getenv("OPENAI_TEMPERATURE", "0.1"))
+68
View File
@@ -0,0 +1,68 @@
"""Data models for the MCP server."""
import logging
from typing import List, Dict, Any
logger = logging.getLogger(__name__)
class JobDescriptionData:
"""Data structure for job description information matching reference implementation."""
def __init__(
self,
title: str,
company: str,
location: str,
required_qualifications: List[str],
preferred_qualifications: List[str],
description: str,
experience_level: str,
employment_type: str
):
"""Initialize JobDescriptionData object.
Args:
title: The job title
company: The company name
location: The job location
required_qualifications: List of required qualifications
preferred_qualifications: List of preferred qualifications
description: Job description summary
experience_level: Experience level (entry, mid, senior, etc.)
employment_type: Employment type (full-time, part-time, etc.)
"""
logger.info(f"Creating JobDescriptionData with title: {title}")
self.title = title
self.company = company
self.location = location
self.required_qualifications = required_qualifications
self.preferred_qualifications = preferred_qualifications
self.description = description
self.experience_level = experience_level
self.employment_type = employment_type
logger.info("JobDescriptionData object created successfully")
def to_dict(self) -> Dict[str, Any]:
"""Convert the object to a dictionary for JSON serialization.
Returns:
Dictionary representation of the job description data
"""
logger.info("Converting JobDescriptionData to dict")
result = {
"title": self.title,
"company": self.company,
"location": self.location,
"required_qualifications": self.required_qualifications,
"preferred_qualifications": self.preferred_qualifications,
"description": self.description,
"experience_level": self.experience_level,
"employment_type": self.employment_type
}
logger.info("Successfully converted to dict")
return result
def __repr__(self) -> str:
"""String representation of the object."""
return f"JobDescriptionData(title='{self.title}', company='{self.company}')"
+11
View File
@@ -0,0 +1,11 @@
[project]
name = "mcp-on-cloudrun"
version = "0.1.0"
description = "Example of deploying a MCP server on Cloud Run"
requires-python = ">=3.10"
dependencies = [
"fastmcp==2.6.1",
"httpx>=0.27.0",
"llama-cloud>=0.1.26",
"llama-index>=0.12.43",
]
+164
View File
@@ -0,0 +1,164 @@
"""Main MCP server entry point with clean modular structure."""
import asyncio
import logging
import os
from fastmcp import FastMCP
from config import DEFAULT_PORT, DEFAULT_HOST
from tools.job_tools import JobTools
from tools.math_tools import MathTools
from tools.candidate_tools import CandidateTools
# Configure logging
logger = logging.getLogger(__name__)
logging.basicConfig(format="[%(levelname)s]: %(message)s", level=logging.INFO)
# Initialize FastMCP server
mcp = FastMCP("MCP Server on Cloud Run")
# Initialize tool instances
job_tools = JobTools()
math_tools = MathTools()
candidate_tools = CandidateTools()
# Register job description tools
@mcp.tool()
async def extract_job_requirements(jd_text: str) -> str:
"""Extract structured job requirements from job description text.
Args:
jd_text: The job description text to analyze
Returns:
JSON string containing structured job requirements including title, company,
location, required_qualifications, preferred_qualifications, description,
experience_level, and employment_type.
"""
return await job_tools.extract_job_requirements(jd_text)
# Register mathematical operation tools
@mcp.tool()
def add(a: int, b: int) -> int:
"""Add two numbers together.
Args:
a: The first number
b: The second number
Returns:
The sum of the two numbers
"""
return math_tools.add(a, b)
@mcp.tool()
def subtract(a: int, b: int) -> int:
"""Subtract two numbers.
Args:
a: The first number
b: The second number
Returns:
The difference of the two numbers
"""
return math_tools.subtract(a, b)
@mcp.tool()
def multiply(a: int, b: int) -> int:
"""Multiply two numbers.
Args:
a: The first number
b: The second number
Returns:
The product of the two numbers
"""
return math_tools.multiply(a, b)
# Register candidate retrieval tools
@mcp.tool()
async def find_matching_candidates(required_qualifications: str, preferred_qualifications: str = "", top_k: int = 10, enable_reranking: bool = True) -> str:
"""Find candidates matching job qualifications from LlamaCloud resume index.
Args:
required_qualifications: Comma-separated string of required qualifications (e.g., "Python, Machine Learning, 3+ years experience")
preferred_qualifications: Comma-separated string of preferred qualifications (optional, e.g., "AWS, Docker, PhD")
top_k: Number of top candidates to retrieve (default: 10, max: 50)
enable_reranking: Whether to enable reranking for better results (default: True)
Returns:
JSON string containing list of matching candidates with their scores and information
"""
return await candidate_tools.find_matching_candidates(required_qualifications, preferred_qualifications, top_k, enable_reranking)
@mcp.tool()
async def search_candidates_by_skills(skills: str, top_k: int = 10) -> str:
"""Search candidates by specific skills or keywords from LlamaCloud resume index.
Args:
skills: Comma-separated list of skills or keywords to search for (e.g., "Python, Machine Learning, AWS")
top_k: Number of top candidates to retrieve (default: 10, max: 50)
Returns:
JSON string containing list of matching candidates with their scores and information
"""
return await candidate_tools.search_candidates_by_skills(skills, top_k)
@mcp.tool()
async def score_candidate_qualifications(
candidate_resume: str,
required_qualifications: str,
preferred_qualifications: str = "",
job_title: str = "",
job_description: str = ""
) -> str:
"""Score a candidate's resume against specific job qualifications using LLM evaluation.
Args:
candidate_resume: The candidate's resume text content
required_qualifications: Comma-separated string of required qualifications (e.g., "Python, 3+ years experience, Bachelor's degree")
preferred_qualifications: Comma-separated string of preferred qualifications (optional, e.g., "AWS, Docker, Master's degree")
job_title: Job title for context (optional)
job_description: Job description for context (optional)
Returns:
JSON string containing detailed scoring results for each qualification with explanations and overall feedback
"""
return await candidate_tools.score_candidate_qualifications(
candidate_resume,
required_qualifications,
preferred_qualifications,
job_title,
job_description
)
async def main():
"""Main server startup function."""
port = int(os.getenv("PORT", DEFAULT_PORT))
logger.info(f"MCP server starting on {DEFAULT_HOST}:{port}")
try:
# Could also use 'sse' transport, host="0.0.0.0" required for Cloud Run.
await mcp.run_async(
transport="streamable-http",
host=DEFAULT_HOST,
port=port,
)
except Exception as e:
logger.error(f"Server startup failed: {e}")
raise
if __name__ == "__main__":
asyncio.run(main())
+1
View File
@@ -0,0 +1 @@
"""Services package for business logic."""
+430
View File
@@ -0,0 +1,430 @@
"""LlamaCloud service for candidate retrieval from resume index."""
import json
import logging
from typing import List, Dict, Any, Optional
import asyncio
from config import (
LLAMA_CLOUD_API_KEY,
LLAMA_CLOUD_INDEX_NAME,
LLAMA_CLOUD_PROJECT_NAME,
LLAMA_CLOUD_ORGANIZATION_ID
)
from models import JobDescriptionData
logger = logging.getLogger(__name__)
try:
from llama_index.indices.managed.llama_cloud import LlamaCloudIndex
LLAMA_INDEX_AVAILABLE = True
except ImportError:
logger.error("llama-index package is required for LlamaCloud functionality. Please install it with: pip install llama-index")
LLAMA_INDEX_AVAILABLE = False
class CandidateMatch:
"""Data structure for candidate match results."""
def __init__(
self,
node_id: str,
score: float,
content: str,
metadata: Dict[str, Any],
candidate_name: Optional[str] = None,
file_name: Optional[str] = None
):
self.node_id = node_id
self.score = score
self.content = content
self.metadata = metadata
self.candidate_name = candidate_name or "Unknown Candidate"
self.file_name = file_name or ""
def to_dict(self) -> Dict[str, Any]:
"""Convert to dictionary for JSON serialization."""
return {
"node_id": self.node_id,
"score": self.score,
"content": self.content,
"metadata": self.metadata,
"candidate_name": self.candidate_name,
"file_name": self.file_name
}
class LlamaCloudService:
"""Service class for interacting with LlamaCloud resume index using LlamaIndex."""
def __init__(self):
"""Initialize the LlamaCloud service."""
if not LLAMA_INDEX_AVAILABLE:
raise ImportError("llama-index package is required for LlamaCloud functionality. Please install it with: pip install llama-index")
if not LLAMA_CLOUD_API_KEY or LLAMA_CLOUD_API_KEY == "llx-your-api-key-here":
raise ValueError("LLAMA_CLOUD_API_KEY is required and must be set to a valid API key")
if not LLAMA_CLOUD_INDEX_NAME:
raise ValueError("LLAMA_CLOUD_INDEX_NAME is required")
self.api_key = LLAMA_CLOUD_API_KEY
self.index_name = LLAMA_CLOUD_INDEX_NAME
self.project_name = LLAMA_CLOUD_PROJECT_NAME
self.organization_id = LLAMA_CLOUD_ORGANIZATION_ID
# Initialize LlamaCloud index (will be created lazily)
self._index = None
logger.info(f"LlamaCloudService initialized with index: {self.index_name}")
def _get_index(self):
"""Get or create the LlamaCloud index instance."""
if self._index is None:
try:
# Set the API key in environment if not already set
import os
if not os.environ.get("LLAMA_CLOUD_API_KEY"):
os.environ["LLAMA_CLOUD_API_KEY"] = self.api_key
logger.info(f"Connecting to LlamaCloud index: {self.index_name}")
# Connect to existing index as per the documentation
self._index = LlamaCloudIndex(
name=self.index_name,
project_name=self.project_name
)
logger.info("Successfully connected to LlamaCloud index")
except Exception as e:
logger.error(f"Failed to connect to LlamaCloud index: {e}")
raise
return self._index
def _build_search_query(self, job_description: JobDescriptionData) -> str:
"""Build a search query from job description data."""
query_parts = []
if job_description.title:
query_parts.append(f"Job Title: {job_description.title}")
if job_description.required_qualifications:
query_parts.append(f"Required Qualifications: {' '.join(job_description.required_qualifications)}")
if job_description.preferred_qualifications:
query_parts.append(f"Preferred Qualifications: {' '.join(job_description.preferred_qualifications)}")
if job_description.experience_level:
query_parts.append(f"Experience Level: {job_description.experience_level}")
query = " ".join(query_parts)
logger.info(f"Built search query: {query}")
return query
def _build_qualifications_query(self, required_qualifications: List[str], preferred_qualifications: List[str]) -> str:
"""Build a search query from qualification lists."""
query_parts = []
if required_qualifications:
query_parts.append(f"Required skills and qualifications: {', '.join(required_qualifications)}")
if preferred_qualifications:
query_parts.append(f"Preferred skills and experience: {', '.join(preferred_qualifications)}")
# Combine all qualifications for a comprehensive search
all_qualifications = required_qualifications + preferred_qualifications
if all_qualifications:
query_parts.append(f"Relevant experience with: {', '.join(all_qualifications)}")
query = " ".join(query_parts)
logger.info(f"Built qualifications query: {query}")
return query
def _extract_candidate_info(self, node) -> CandidateMatch:
"""Extract candidate information from a retrieved node."""
try:
# Extract basic information from the node
node_id = getattr(node, 'id_', '') or getattr(node, 'node_id', '')
score = getattr(node, 'score', 0.0)
# Extract content from different possible locations
content = ""
metadata = {}
# Handle different node structures
if hasattr(node, 'node'):
# Node with nested structure
inner_node = node.node
node_id = node_id or getattr(inner_node, 'id_', '')
content = getattr(inner_node, 'text', '') or getattr(inner_node, 'content', '')
metadata = getattr(inner_node, 'metadata', {}) or getattr(inner_node, 'extra_info', {})
else:
# Direct node structure
content = getattr(node, 'text', '') or getattr(node, 'content', '')
metadata = getattr(node, 'metadata', {}) or getattr(node, 'extra_info', {})
# Extract candidate name and file name from metadata
candidate_name = "Unknown Candidate"
file_name = metadata.get('file_name', '') or metadata.get('filename', '') or metadata.get('file_path', '')
# Try to extract candidate name from file name
if file_name:
# Remove file extension and replace underscores with spaces
import os
base_name = os.path.basename(file_name)
name_part = base_name.split('.')[0].replace('_', ' ').replace('-', ' ')
if name_part and not name_part.lower().startswith('resume'):
candidate_name = name_part.title()
# Try to extract name from content if not found in metadata
if candidate_name == "Unknown Candidate" and content:
# Simple pattern matching for names in resume content
import re
name_patterns = [
r'^([A-Z][a-z]+ [A-Z][a-z]+)', # First line with Name format
r'Name:?\s*([A-Z][a-z]+ [A-Z][a-z]+)', # Name: John Doe
r'([A-Z][a-z]+ [A-Z][a-z]+)\s*\n', # Name followed by newline
]
for pattern in name_patterns:
match = re.search(pattern, content[:200]) # Search in first 200 chars
if match:
candidate_name = match.group(1)
break
return CandidateMatch(
node_id=node_id,
score=score,
content=content,
metadata=metadata,
candidate_name=candidate_name,
file_name=file_name
)
except Exception as e:
logger.error(f"Error extracting candidate info from node: {e}")
# Return a basic match with available information
return CandidateMatch(
node_id=str(getattr(node, 'id_', 'unknown')),
score=getattr(node, 'score', 0.0),
content=str(getattr(node, 'text', getattr(node, 'content', ''))),
metadata=getattr(node, 'metadata', {})
)
async def retrieve_candidates(
self,
job_description: JobDescriptionData,
top_k: int = 20,
enable_reranking: bool = True
) -> List[CandidateMatch]:
"""Retrieve top candidates matching the job description."""
try:
logger.info(f"Starting candidate retrieval for job: {job_description.title}")
# Build search query from job description
query = self._build_search_query(job_description)
# Get the index and configure retriever
index = self._get_index()
# Configure retriever as per the documentation
# alpha=1.0 restricts it to vector search
retriever_config = {
"dense_similarity_top_k": top_k,
"alpha": 1.0, # Restricts to vector search
"enable_reranking": enable_reranking,
}
logger.info(f"Configuring retriever with: {retriever_config}")
retriever = index.as_retriever(**retriever_config)
# Perform retrieval
logger.info(f"Retrieving candidates with query: {query}")
# Run the retrieval in a thread pool to avoid blocking the async loop
import asyncio
loop = asyncio.get_event_loop()
nodes = await loop.run_in_executor(
None,
lambda: retriever.retrieve(query)
)
logger.info(f"Retrieved {len(nodes)} nodes from LlamaCloud")
# Convert nodes to CandidateMatch objects
candidates = []
seen_files = set() # Track files to avoid duplicates
for i, node in enumerate(nodes):
try:
candidate = self._extract_candidate_info(node)
# Deduplicate by file name if available
if candidate.file_name:
if candidate.file_name in seen_files:
logger.info(f"Skipping duplicate file: {candidate.file_name}")
continue
seen_files.add(candidate.file_name)
candidates.append(candidate)
logger.info(f"Processed candidate {i+1}: {candidate.candidate_name} (score: {candidate.score:.3f})")
except Exception as e:
logger.error(f"Error processing node {i}: {e}")
continue
# Sort by score (descending)
candidates.sort(key=lambda x: x.score, reverse=True)
logger.info(f"Successfully retrieved {len(candidates)} unique candidates")
return candidates
except Exception as e:
logger.error(f"Error retrieving candidates: {e}")
raise
async def retrieve_candidates_by_qualifications(
self,
required_qualifications: List[str],
preferred_qualifications: List[str],
top_k: int = 20,
enable_reranking: bool = True
) -> List[CandidateMatch]:
"""Retrieve candidates matching specific qualifications."""
try:
logger.info(f"Starting candidate retrieval by qualifications")
logger.info(f"Required: {required_qualifications}")
logger.info(f"Preferred: {preferred_qualifications}")
# Build search query from qualifications
query = self._build_qualifications_query(required_qualifications, preferred_qualifications)
# Get the index and configure retriever
index = self._get_index()
# Configure retriever as per the documentation
retriever_config = {
"dense_similarity_top_k": top_k,
"alpha": 1.0, # Restricts to vector search
"enable_reranking": enable_reranking,
}
logger.info(f"Configuring retriever with: {retriever_config}")
retriever = index.as_retriever(**retriever_config)
# Perform retrieval
logger.info(f"Retrieving candidates with qualifications query: {query}")
# Run the retrieval in a thread pool to avoid blocking the async loop
import asyncio
loop = asyncio.get_event_loop()
nodes = await loop.run_in_executor(
None,
lambda: retriever.retrieve(query)
)
logger.info(f"Retrieved {len(nodes)} nodes from LlamaCloud")
# Convert nodes to CandidateMatch objects
candidates = []
seen_files = set() # Track files to avoid duplicates
for i, node in enumerate(nodes):
try:
candidate = self._extract_candidate_info(node)
# Deduplicate by file name if available
if candidate.file_name:
if candidate.file_name in seen_files:
logger.info(f"Skipping duplicate file: {candidate.file_name}")
continue
seen_files.add(candidate.file_name)
candidates.append(candidate)
logger.info(f"Processed candidate {i+1}: {candidate.candidate_name} (score: {candidate.score:.3f})")
except Exception as e:
logger.error(f"Error processing node {i}: {e}")
continue
# Sort by score (descending)
candidates.sort(key=lambda x: x.score, reverse=True)
logger.info(f"Successfully retrieved {len(candidates)} unique candidates by qualifications")
return candidates
except Exception as e:
logger.error(f"Error retrieving candidates by qualifications: {e}")
raise
async def search_by_skills(
self,
skills: str,
top_k: int = 20
) -> List[CandidateMatch]:
"""Search candidates by specific skills."""
try:
logger.info(f"Starting skill-based search for: {skills}")
# Get the index and configure retriever
index = self._get_index()
# Configure retriever for skill search (no reranking for simplicity)
retriever_config = {
"dense_similarity_top_k": top_k,
"alpha": 1.0, # Restricts to vector search
"enable_reranking": False, # Disable reranking for skill search
}
logger.info(f"Configuring retriever for skill search with: {retriever_config}")
retriever = index.as_retriever(**retriever_config)
# Build query from skills
query = f"Skills and experience in: {skills}"
# Perform retrieval
logger.info(f"Searching candidates with skills query: {query}")
# Run the retrieval in a thread pool to avoid blocking the async loop
import asyncio
loop = asyncio.get_event_loop()
nodes = await loop.run_in_executor(
None,
lambda: retriever.retrieve(query)
)
logger.info(f"Retrieved {len(nodes)} nodes from LlamaCloud for skills search")
# Convert nodes to CandidateMatch objects
candidates = []
seen_files = set() # Track files to avoid duplicates
for i, node in enumerate(nodes):
try:
candidate = self._extract_candidate_info(node)
# Deduplicate by file name if available
if candidate.file_name:
if candidate.file_name in seen_files:
logger.info(f"Skipping duplicate file: {candidate.file_name}")
continue
seen_files.add(candidate.file_name)
candidates.append(candidate)
logger.info(f"Processed candidate {i+1}: {candidate.candidate_name} (score: {candidate.score:.3f})")
except Exception as e:
logger.error(f"Error processing node {i}: {e}")
continue
# Sort by score (descending)
candidates.sort(key=lambda x: x.score, reverse=True)
logger.info(f"Successfully retrieved {len(candidates)} unique candidates for skills: {skills}")
return candidates
except Exception as e:
logger.error(f"Error searching candidates by skills: {e}")
raise
+346
View File
@@ -0,0 +1,346 @@
"""OpenAI API service for job description extraction."""
import json
import logging
import httpx
from typing import Dict, List, Any
from config import OPENAI_API_KEY, DEFAULT_MODEL, REQUEST_TIMEOUT, OPENAI_TEMPERATURE
from models import JobDescriptionData
logger = logging.getLogger(__name__)
class OpenAIService:
"""Service class for handling OpenAI API interactions."""
def __init__(self):
"""Initialize the OpenAI service."""
if not OPENAI_API_KEY:
raise ValueError("OPENAI_API_KEY is required")
self.api_key = OPENAI_API_KEY
self.model = DEFAULT_MODEL
self.timeout = REQUEST_TIMEOUT
self.temperature = OPENAI_TEMPERATURE
async def extract_job_description_from_text(self, text: str) -> JobDescriptionData:
"""Extract job description data from text using OpenAI.
Args:
text: The job description text to analyze
Returns:
JobDescriptionData object with extracted information
Raises:
Exception: If the API call fails or response parsing fails
"""
logger.info(f"Starting extraction with text length: {len(text)}")
logger.info("API key is available, proceeding with extraction")
# Create the extraction prompt based on reference implementation
prompt = self._create_extraction_prompt(text)
try:
logger.info("Creating HTTP client and making API request")
async with httpx.AsyncClient(timeout=self.timeout) as client:
request_data = {
"model": self.model,
"messages": [
{
"role": "system",
"content": "You are a helpful assistant that extracts structured data from job descriptions."
},
{
"role": "user",
"content": prompt
}
],
"temperature": self.temperature,
"response_format": {"type": "json_object"}
}
logger.info(f"Making request to OpenAI with model: {self.model}")
response = await client.post(
"https://api.openai.com/v1/chat/completions",
headers={
"Content-Type": "application/json",
"Authorization": f"Bearer {self.api_key}"
},
json=request_data
)
logger.info(f"OpenAI API response status: {response.status_code}")
if response.status_code != 200:
error_text = response.text
logger.error(f"OpenAI API error response: {error_text}")
try:
error_data = response.json()
logger.error(f"OpenAI API error JSON: {error_data}")
raise Exception(f"OpenAI API error ({response.status_code}): {error_data}")
except json.JSONDecodeError:
raise Exception(f"OpenAI API error ({response.status_code}): {error_text}")
data = response.json()
logger.info("Successfully parsed OpenAI response JSON")
content = data["choices"][0]["message"]["content"]
logger.info(f"Extracted content from OpenAI response, length: {len(content) if content else 0}")
if not content:
logger.error("OpenAI returned empty content")
raise Exception("Failed to extract job description data: Empty response")
logger.info(f"OpenAI response content sample: {content[:200]}...")
return self._parse_response_to_job_data(content)
except httpx.TimeoutException as e:
logger.error(f"HTTP timeout error: {e}")
raise Exception(f"Request timeout: {str(e)}")
except httpx.RequestError as e:
logger.error(f"HTTP request error: {e}")
raise Exception(f"Network error: {str(e)}")
except Exception as e:
logger.error(f"Unexpected error in extract_job_description_from_text: {e}")
logger.error(f"Error type: {type(e).__name__}")
raise
def _create_extraction_prompt(self, text: str) -> str:
"""Create the prompt for job description extraction.
Args:
text: The job description text
Returns:
Formatted prompt string
"""
return f"""
Extract the following information from this job description text.
Format the response as a valid JSON object with these fields:
- title: The job title
- company: The company name (use "Unknown" if not found)
- location: The job location (use "Not specified" if not found)
- required_qualifications: An array of strings, each one representing a required qualification
- preferred_qualifications: An array of strings, each one representing a preferred/nice-to-have qualification
- description: A summary of the job description
- experience_level: The experience level (entry-level, mid-level, senior, etc.)
- employment_type: The employment type (full-time, part-time, contract, etc.)
Job Description Text:
{text}
"""
def _parse_response_to_job_data(self, content: str) -> JobDescriptionData:
"""Parse OpenAI response content to JobDescriptionData object.
Args:
content: The JSON content from OpenAI response
Returns:
JobDescriptionData object
Raises:
Exception: If JSON parsing fails
"""
try:
parsed_data = json.loads(content)
logger.info("Successfully parsed JSON from OpenAI response")
logger.info(f"Parsed data keys: {list(parsed_data.keys())}")
# Validate and create JobDescriptionData object
result = JobDescriptionData(
title=parsed_data.get("title", "Unknown Position"),
company=parsed_data.get("company", "Unknown"),
location=parsed_data.get("location", "Not specified"),
required_qualifications=parsed_data.get("required_qualifications", []),
preferred_qualifications=parsed_data.get("preferred_qualifications", []),
description=parsed_data.get("description", ""),
experience_level=parsed_data.get("experience_level", "Not specified"),
employment_type=parsed_data.get("employment_type", "Not specified")
)
logger.info("Successfully created JobDescriptionData object")
return result
except json.JSONDecodeError as e:
logger.error(f"Failed to parse JSON response: {e}")
logger.error(f"Raw content: {content}")
raise Exception(f"Failed to parse response from OpenAI: {str(e)}")
async def score_candidate_qualifications(
self,
candidate_resume: str,
required_qualifications: List[str],
preferred_qualifications: List[str],
job_title: str = "",
job_description: str = ""
) -> Dict[str, Any]:
"""Score a candidate's resume against job qualifications using OpenAI.
Args:
candidate_resume: The candidate's resume text content
required_qualifications: List of required qualifications
preferred_qualifications: List of preferred qualifications
job_title: Job title for context (optional)
job_description: Job description for context (optional)
Returns:
Dictionary containing detailed scoring results
"""
try:
logger.info("Starting candidate qualification scoring with OpenAI")
# Build the prompt for scoring
prompt_parts = [
"You are a professional recruiter tasked with evaluating how well a candidate's resume matches the qualifications for a job.",
""
]
if job_title:
prompt_parts.append(f"JOB TITLE: {job_title}")
if job_description:
prompt_parts.append(f"JOB DESCRIPTION: {job_description}")
prompt_parts.extend([
"",
"CANDIDATE'S RESUME:",
candidate_resume,
"",
"Please evaluate the candidate against each qualification using the following scale:",
"0 - Not Met: The candidate's resume shows no evidence of meeting this qualification",
"1 - Somewhat Met: The candidate's resume shows some evidence of meeting this qualification but may lack depth or completeness",
"2 - Strongly Met: The candidate's resume clearly demonstrates they meet or exceed this qualification",
"",
"Please evaluate ONLY the following qualifications, and return your response in JSON format with explanations for each score:",
""
])
if required_qualifications:
prompt_parts.append("REQUIRED QUALIFICATIONS:")
for i, qual in enumerate(required_qualifications, 1):
prompt_parts.append(f"{i}. {qual}")
prompt_parts.append("")
if preferred_qualifications:
prompt_parts.append("PREFERRED QUALIFICATIONS:")
for i, qual in enumerate(preferred_qualifications, 1):
prompt_parts.append(f"{i}. {qual}")
prompt_parts.append("")
prompt_parts.extend([
'Format your response as valid JSON with this structure:',
'{',
' "requiredScores": [',
' {',
' "qualification": "qualification text",',
' "score": 0/1/2,',
' "explanation": "brief explanation for the score"',
' },',
' ...',
' ],',
' "preferredScores": [',
' {',
' "qualification": "qualification text",',
' "score": 0/1/2,',
' "explanation": "brief explanation for the score"',
' },',
' ...',
' ],',
' "overallFeedback": "brief overall assessment of the candidate"',
'}'
])
prompt = "\n".join(prompt_parts)
logger.info("Sending scoring request to OpenAI")
# Call OpenAI API
async with httpx.AsyncClient(timeout=self.timeout) as client:
request_data = {
"model": self.model,
"messages": [
{
"role": "system",
"content": "You are a professional recruiter who evaluates how well candidate resumes match job qualifications."
},
{
"role": "user",
"content": prompt
}
],
"temperature": 0.1,
"response_format": {"type": "json_object"}
}
response = await client.post(
"https://api.openai.com/v1/chat/completions",
headers={
"Content-Type": "application/json",
"Authorization": f"Bearer {self.api_key}"
},
json=request_data
)
if response.status_code != 200:
error_text = response.text
logger.error(f"OpenAI API error response: {error_text}")
try:
error_data = response.json()
raise Exception(f"OpenAI API error ({response.status_code}): {error_data}")
except json.JSONDecodeError:
raise Exception(f"OpenAI API error ({response.status_code}): {error_text}")
data = response.json()
content = data["choices"][0]["message"]["content"]
if not content:
raise ValueError("No content in OpenAI response")
logger.info("Received response from OpenAI, parsing JSON")
try:
# Parse the JSON response
scoring_data = json.loads(content)
# Calculate the total score
required_scores = scoring_data.get("requiredScores", [])
preferred_scores = scoring_data.get("preferredScores", [])
required_total = sum(item.get("score", 0) for item in required_scores)
preferred_total = sum(item.get("score", 0) for item in preferred_scores)
total_score = required_total + preferred_total
max_possible_score = (len(required_qualifications) + len(preferred_qualifications)) * 2
# Calculate match percentage
match_percentage = (total_score / max_possible_score * 100) if max_possible_score > 0 else 0
result = {
"requiredScores": required_scores,
"preferredScores": preferred_scores,
"totalScore": total_score,
"maxPossibleScore": max_possible_score,
"matchPercentage": round(match_percentage, 1),
"overallFeedback": scoring_data.get("overallFeedback", ""),
"scoringBreakdown": {
"requiredTotal": required_total,
"preferredTotal": preferred_total,
"requiredCount": len(required_qualifications),
"preferredCount": len(preferred_qualifications)
}
}
logger.info(f"Successfully scored candidate: {total_score}/{max_possible_score} ({match_percentage:.1f}%)")
return result
except json.JSONDecodeError as e:
logger.error(f"Failed to parse JSON response: {e}")
logger.error(f"Raw content: {content}")
raise ValueError("Failed to parse scoring data from LLM response")
except Exception as e:
logger.error(f"Error scoring candidate qualifications: {e}")
raise
+99
View File
@@ -0,0 +1,99 @@
import asyncio
from fastmcp import Client
async def test_server():
# Test the MCP server using streamable-http transport.
# Use "/sse" endpoint if using sse transport.
async with Client("http://localhost:8080/mcp") as client:
# List available tools
tools = await client.list_tools()
for tool in tools:
print(f">>> Tool found: {tool.name}")
# Call add tool
print(">>> Calling add tool for 1 + 2")
result = await client.call_tool("add", {"a": 1, "b": 2})
print(f"<<< Result: {result[0].text}")
# Call subtract tool
print(">>> Calling subtract tool for 10 - 3")
result = await client.call_tool("subtract", {"a": 10, "b": 3})
print(f"<<< Result: {result[0].text}")
# Call multiply tool
print(">>> Calling multiply tool for 4 * 5")
result = await client.call_tool("multiply", {"a": 4, "b": 5})
print(f"<<< Result: {result[0].text}")
# Call extract_job_requirements tool
sample_jd = """
Software Engineer - Full Stack
TechCorp Inc.
San Francisco, CA
We are seeking a talented Full Stack Software Engineer to join our growing team.
Need to have:
- Bachelor's degree in Computer Science or related field
- 3+ years of experience in web development
- Proficiency in JavaScript, Python, SQL
- Experience with React and Node.js
Plus if you have:
- Experience with cloud platforms (AWS, GCP)
- Knowledge of Docker and Kubernetes
- Previous startup experience
This is a full-time position offering competitive salary and benefits.
"""
print(">>> Calling extract_job_requirements tool")
jd_result = await client.call_tool("extract_job_requirements", {"jd_text": sample_jd})
print(f"<<< Result: {jd_result[0].text}")
# Call find_matching_candidates tool with qualifications
print(">>> Calling find_matching_candidates tool")
result = await client.call_tool("find_matching_candidates", {
"required_qualifications": "Python, JavaScript, React, Node.js, 3+ years experience",
"preferred_qualifications": "AWS, Docker, Kubernetes, CI/CD",
"top_k": 5,
"enable_reranking": True
})
print(f"<<< Result: {result[0].text}")
# Call search_candidates_by_skills tool
print(">>> Calling search_candidates_by_skills tool")
result = await client.call_tool("search_candidates_by_skills", {
"skills": "Python, JavaScript, React, Node.js",
"top_k": 3
})
print(f"<<< Result: {result[0].text}")
# Test score_candidate_qualifications tool
print(">>> Calling score_candidate_qualifications tool")
sample_resume = """
John Doe
Software Engineer
Experience:
- 5 years of Python development
- 3 years of JavaScript and React
- 2 years working with AWS and Docker
- Experience with machine learning projects
- Bachelor's degree in Computer Science
Skills: Python, JavaScript, React, Node.js, AWS, Docker, Machine Learning, SQL
"""
result = await client.call_tool("score_candidate_qualifications", {
"candidate_resume": sample_resume,
"required_qualifications": "Python, JavaScript, React, 3+ years experience",
"preferred_qualifications": "AWS, Docker, Machine Learning",
"job_title": "Senior Software Engineer",
"job_description": "We are looking for a senior software engineer to join our team"
})
print(f"<<< Result: {result[0].text}")
print(">>> All tests completed successfully!")
if __name__ == "__main__":
asyncio.run(test_server())
+1
View File
@@ -0,0 +1 @@
"""MCP tools package."""
+229
View File
@@ -0,0 +1,229 @@
"""Candidate retrieval tools using LlamaCloud."""
import json
import logging
import traceback
from typing import Dict, Any
from services.llamacloud_service import LlamaCloudService
from models import JobDescriptionData
logger = logging.getLogger(__name__)
class CandidateTools:
"""Container class for candidate retrieval MCP tools."""
def __init__(self):
"""Initialize CandidateTools with LlamaCloud service."""
try:
self.llamacloud_service = LlamaCloudService()
logger.info("CandidateTools initialized successfully")
except Exception as e:
logger.error(f"Failed to initialize CandidateTools: {e}")
self.llamacloud_service = None
async def find_matching_candidates(
self,
required_qualifications: str,
preferred_qualifications: str = "",
top_k: int = 10,
enable_reranking: bool = True
) -> str:
"""Find candidates matching job qualifications from LlamaCloud resume index.
Args:
required_qualifications: Comma-separated string of required qualifications
preferred_qualifications: Comma-separated string of preferred qualifications (optional)
top_k: Number of top candidates to retrieve (default: 10, max: 50)
enable_reranking: Whether to enable reranking for better results (default: True)
Returns:
JSON string containing list of matching candidates with their scores and information
"""
logger.info(f">>> Tool: 'find_matching_candidates' called with top_k={top_k}, reranking={enable_reranking}")
# Validate service availability
if not self.llamacloud_service:
error_msg = "LlamaCloud service is not available. Check configuration and API key."
logger.error(error_msg)
return json.dumps({"error": error_msg})
# Validate input parameters
if not required_qualifications or not required_qualifications.strip():
return json.dumps({"error": "Required qualifications cannot be empty"})
# Validate top_k parameter
if not isinstance(top_k, int) or top_k < 1 or top_k > 50:
return json.dumps({"error": "top_k must be an integer between 1 and 50"})
try:
# Parse qualifications into lists
required_quals = [qual.strip() for qual in required_qualifications.split(',') if qual.strip()]
preferred_quals = [qual.strip() for qual in preferred_qualifications.split(',') if qual.strip()] if preferred_qualifications else []
logger.info(f"Required qualifications: {required_quals}")
logger.info(f"Preferred qualifications: {preferred_quals}")
# Retrieve candidates from LlamaCloud using the new method
candidates = await self.llamacloud_service.retrieve_candidates_by_qualifications(
required_qualifications=required_quals,
preferred_qualifications=preferred_quals,
top_k=top_k,
enable_reranking=enable_reranking
)
# Convert candidates to dictionary format
candidates_data = [candidate.to_dict() for candidate in candidates]
# Create response
result = {
"search_type": "qualifications_based",
"total_candidates": len(candidates_data),
"search_parameters": {
"top_k": top_k,
"enable_reranking": enable_reranking,
"required_qualifications": required_quals,
"preferred_qualifications": preferred_quals
},
"candidates": candidates_data
}
logger.info(f"Successfully found {len(candidates_data)} matching candidates")
return json.dumps(result, indent=2)
except Exception as e:
error_msg = f"Failed to find matching candidates: {str(e)}"
logger.error(f"Error in find_matching_candidates: {error_msg}")
logger.error(f"Traceback: {traceback.format_exc()}")
return json.dumps({"error": error_msg})
async def search_candidates_by_skills(self, skills: str, top_k: int = 10) -> str:
"""Search candidates by specific skills or keywords.
Args:
skills: Comma-separated list of skills or keywords to search for
top_k: Number of top candidates to retrieve (default: 10, max: 50)
Returns:
JSON string containing list of matching candidates
"""
logger.info(f">>> Tool: 'search_candidates_by_skills' called with skills='{skills}', top_k={top_k}")
# Validate service availability
if not self.llamacloud_service:
error_msg = "LlamaCloud service is not available. Check configuration and API key."
logger.error(error_msg)
return json.dumps({"error": error_msg})
# Validate input parameters
if not skills or not skills.strip():
return json.dumps({"error": "Skills parameter cannot be empty"})
# Validate top_k parameter
if not isinstance(top_k, int) or top_k < 1 or top_k > 50:
return json.dumps({"error": "top_k must be an integer between 1 and 50"})
try:
# Create a simple job description focused on skills
skills_list = [skill.strip() for skill in skills.split(",") if skill.strip()]
job_description = JobDescriptionData(
title="Skills-based Search",
company="Search Query",
location="Any",
required_qualifications=skills_list,
preferred_qualifications=[],
description=f"Looking for candidates with skills: {skills}",
experience_level="",
employment_type=""
)
logger.info(f"Searching for candidates with skills: {skills_list}")
# Retrieve candidates from LlamaCloud
candidates = await self.llamacloud_service.retrieve_candidates(
job_description=job_description,
top_k=top_k,
enable_reranking=True
)
# Convert candidates to dictionary format
candidates_data = [candidate.to_dict() for candidate in candidates]
# Create response
result = {
"search_skills": skills_list,
"total_candidates": len(candidates_data),
"search_parameters": {
"top_k": top_k
},
"candidates": candidates_data
}
logger.info(f"Successfully found {len(candidates_data)} candidates with matching skills")
return json.dumps(result, indent=2)
except Exception as e:
error_msg = f"Failed to search candidates by skills: {str(e)}"
logger.error(f"Error in search_candidates_by_skills: {error_msg}")
logger.error(f"Traceback: {traceback.format_exc()}")
return json.dumps({"error": error_msg})
async def score_candidate_qualifications(
self,
candidate_resume: str,
required_qualifications: str,
preferred_qualifications: str = "",
job_title: str = "",
job_description: str = ""
) -> str:
"""Score a candidate's resume against specific job qualifications using LLM evaluation.
Args:
candidate_resume: The candidate's resume text content
required_qualifications: Comma-separated string of required qualifications
preferred_qualifications: Comma-separated string of preferred qualifications (optional)
job_title: Job title for context (optional)
job_description: Job description for context (optional)
Returns:
JSON string containing detailed scoring results for each qualification
"""
logger.info(f">>> Tool: 'score_candidate_qualifications' called")
# Validate input parameters
if not candidate_resume or not candidate_resume.strip():
return json.dumps({"error": "Candidate resume cannot be empty"})
if not required_qualifications or not required_qualifications.strip():
return json.dumps({"error": "Required qualifications cannot be empty"})
try:
# Parse qualifications into lists
required_quals = [qual.strip() for qual in required_qualifications.split(',') if qual.strip()]
preferred_quals = [qual.strip() for qual in preferred_qualifications.split(',') if qual.strip()] if preferred_qualifications else []
logger.info(f"Scoring candidate against {len(required_quals)} required and {len(preferred_quals)} preferred qualifications")
# Import OpenAI service
from services.openai_service import OpenAIService
# Use OpenAI service for scoring
openai_service = OpenAIService()
scoring_result = await openai_service.score_candidate_qualifications(
candidate_resume=candidate_resume,
required_qualifications=required_quals,
preferred_qualifications=preferred_quals,
job_title=job_title,
job_description=job_description
)
logger.info(f"Successfully scored candidate with total score {scoring_result.get('totalScore', 0)}/{scoring_result.get('maxPossibleScore', 0)}")
return json.dumps(scoring_result, indent=2)
except Exception as e:
error_msg = f"Failed to score candidate qualifications: {str(e)}"
logger.error(f"Error in score_candidate_qualifications: {error_msg}")
logger.error(f"Traceback: {traceback.format_exc()}")
return json.dumps({"error": error_msg})
+68
View File
@@ -0,0 +1,68 @@
"""Job description related MCP tools."""
import json
import logging
import traceback
from typing import Dict, Any
from services.openai_service import OpenAIService
logger = logging.getLogger(__name__)
class JobTools:
"""Container class for job description related MCP tools."""
def __init__(self):
"""Initialize JobTools with OpenAI service."""
self.openai_service = OpenAIService()
async def extract_job_requirements(self, jd_text: str) -> str:
"""Extract structured job requirements from job description text.
Args:
jd_text: The job description text to analyze
Returns:
JSON string containing structured job requirements including title, company,
location, required_qualifications, preferred_qualifications, description,
experience_level, and employment_type.
"""
logger.info(f">>> Tool: 'extract_job_requirements' called with JD text length: {len(jd_text)}")
# Input validation
if not jd_text or not jd_text.strip():
return json.dumps({"error": "Job description text cannot be empty"})
if len(jd_text.strip()) < 10:
return json.dumps({"error": "Job description text is too short to be meaningful"})
try:
logger.info("Starting job description extraction process...")
# Use the OpenAI service for extraction
extraction_result = await self.openai_service.extract_job_description_from_text(jd_text)
# Check that the extraction result is valid
if not extraction_result:
logger.error("JD extraction result is undefined")
return json.dumps({"error": "Failed to extract data from job description text"})
logger.info("JD extraction completed successfully")
result_dict = extraction_result.to_dict()
logger.info(f"Structured JD extraction result: {json.dumps(result_dict, indent=2)}")
return json.dumps(result_dict)
except ValueError as e:
logger.error(f"ValueError in extract_job_requirements: {e}")
return json.dumps({"error": f"Configuration error: {str(e)}"})
except json.JSONDecodeError as e:
logger.error(f"JSON decode error in extract_job_requirements: {e}")
return json.dumps({"error": f"JSON parsing error: {str(e)}"})
except Exception as e:
logger.error(f"Unexpected error extracting job requirements: {e}")
logger.error(f"Error type: {type(e).__name__}")
logger.error(f"Error args: {e.args}")
logger.error(f"Traceback: {traceback.format_exc()}")
return json.dumps({"error": f"Failed to extract job requirements: {str(e)}"})
+52
View File
@@ -0,0 +1,52 @@
"""Mathematical operation MCP tools."""
import logging
from typing import Union
logger = logging.getLogger(__name__)
class MathTools:
"""Container class for mathematical operation MCP tools."""
@staticmethod
def add(a: int, b: int) -> int:
"""Add two numbers together.
Args:
a: The first number
b: The second number
Returns:
The sum of the two numbers
"""
logger.info(f">>> Tool: 'add' called with numbers '{a}' and '{b}'")
return a + b
@staticmethod
def subtract(a: int, b: int) -> int:
"""Subtract two numbers.
Args:
a: The first number
b: The second number
Returns:
The difference of the two numbers
"""
logger.info(f">>> Tool: 'subtract' called with numbers '{a}' and '{b}'")
return a - b
@staticmethod
def multiply(a: int, b: int) -> int:
"""Multiply two numbers.
Args:
a: The first number
b: The second number
Returns:
The product of the two numbers
"""
logger.info(f">>> Tool: 'multiply' called with numbers '{a}' and '{b}'")
return a * b
Generated
+2567
View File
File diff suppressed because it is too large Load Diff