mirror of
https://github.com/langchain-ai/agent-evals.git
synced 2026-07-01 20:35:18 -04:00
add create dataset for people
This commit is contained in:
@@ -17,4 +17,4 @@ Below is the list of currently available evals:
|
||||
| [Math](./math) | [Math Problems](https://smith.langchain.com/public/e0993f2f-c055-4446-afc2-e52da6a4dda0/d) | Solve math problems and return numerical answers | `{"Question": "Find the second derivative of f(x)=ln(x) and evaluate it at x=0.5."}` | `{"Answer": "-4"}` |
|
||||
| [Public Company Data Enrichment](./public_company_data_enrichment) | [Public Companies](https://smith.langchain.com/public/640df79c-1831-494e-8824-d7300205dc8e/d) | Extract structured company information like CEO, headquarters, employee count etc. | `{"company": "Nvidia", "extraction_schema": {...}}` | `{"info": {"ceo": "Jensen Huang", "name": "Nvidia Corporation", ...}}` |
|
||||
| [Startup Data Enrichment](./startup_data_enrichment) | [Startups](https://smith.langchain.com/public/afabd12a-62fa-4c09-b083-6b1742b4cc3a/d) | Extract structured company information like latest round, total funding, year founded etc. | `{"company": "LangChain", "extraction_schema": {...}}` | `{"info": {"latest_round": "Series A", ...}}` |
|
||||
| [People Data Enrichment](./people_data_enrichment) | [People Dataset](https://smith.langchain.com/public/2af89d2a-93f6-4c84-80ac-70defcfd14c8/d) | Extract structured information about people like work experience, role, company etc. | `{"person": {"name": "Erick Friis", "email": "erick@langchain.dev", ...}, "extraction_schema": {...}}` | `{"extracted_information": {"Years-Experience": 10, "Company": "LangChain", ...}}` |
|
||||
| [People Data Enrichment](./people_data_enrichment) | [People Dataset](https://smith.langchain.com/public/3384cc3a-722c-4eb1-8e41-dff56fea05b8/d) | Extract structured information about people like work experience, role, company etc. | `{"person": {"name": "Erick Friis", "email": "erick@langchain.dev", ...}, "extraction_schema": {...}}` | `{"extracted_information": {"Years-Experience": 10, "Company": "LangChain", ...}}` |
|
||||
@@ -4,11 +4,11 @@ This directory contains evaluation script for the people data enrichment agents.
|
||||
|
||||
## Dataset
|
||||
|
||||
The dataset used can be found [here](https://smith.langchain.com/public/2af89d2a-93f6-4c84-80ac-70defcfd14c8/d). This dataset has a list of people to do research on and extract the following fields for:
|
||||
- `Years-Experience`
|
||||
- `Company`
|
||||
- `Role`
|
||||
- `Prior-Companies`
|
||||
The dataset used can be found [here](https://smith.langchain.com/public/3384cc3a-722c-4eb1-8e41-dff56fea05b8/d). This dataset has a list of people to do research on and extract the following fields for:
|
||||
- `years_experience`
|
||||
- `current_company`
|
||||
- `role`
|
||||
- `prior_companies`
|
||||
|
||||
|
||||
<details>
|
||||
@@ -27,28 +27,28 @@ The dataset used can be found [here](https://smith.langchain.com/public/2af89d2a
|
||||
"type": "object",
|
||||
"title": "Person-Schema",
|
||||
"required": [
|
||||
"Years-Experience",
|
||||
"Company",
|
||||
"Role",
|
||||
"Prior-Companies"
|
||||
"years_experience",
|
||||
"current_company",
|
||||
"role",
|
||||
"prior_companies"
|
||||
],
|
||||
"properties": {
|
||||
"Role": {
|
||||
"role": {
|
||||
"type": "string",
|
||||
"description": "Current role of the person."
|
||||
},
|
||||
"Company": {
|
||||
"current_company": {
|
||||
"type": "string",
|
||||
"description": "The name of the current company the person works at."
|
||||
},
|
||||
"Prior-Companies": {
|
||||
"prior_companies": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "string"
|
||||
},
|
||||
"description": "List of previous companies where the person has worked"
|
||||
},
|
||||
"Years-Experience": {
|
||||
"years_experience": {
|
||||
"type": "number",
|
||||
"description": "How many years of full time work experience (excluding internships) does this person have."
|
||||
}
|
||||
@@ -65,26 +65,48 @@ The dataset used can be found [here](https://smith.langchain.com/public/2af89d2a
|
||||
```json
|
||||
{
|
||||
"extracted_information": {
|
||||
"Role": "Exploring new ideas and building out next project",
|
||||
"Company": "South Park Commons",
|
||||
"Prior-Companies": [
|
||||
"Instabase",
|
||||
"Chestnut",
|
||||
"MIT"
|
||||
"role": "Exploring new ideas and building out next project",
|
||||
"current_company": "South Park Commons",
|
||||
"prior_companies": [
|
||||
"Instabase",
|
||||
"Chestnut",
|
||||
"MIT"
|
||||
],
|
||||
"Years-Experience": 5
|
||||
"years_experience": 5
|
||||
}
|
||||
}
|
||||
```
|
||||
</details>
|
||||
|
||||
### Using the dataset
|
||||
|
||||
To use the data from this dataset in your own project, you can:
|
||||
|
||||
(1) clone the dataset using LangSmith SDK:
|
||||
|
||||
```python
|
||||
from langsmith import Client
|
||||
client = Client()
|
||||
|
||||
cloned_dataset = client.clone_public_dataset(
|
||||
"https://smith.langchain.com/public/3384cc3a-722c-4eb1-8e41-dff56fea05b8/d",
|
||||
dataset_name="People Data Enrichment"
|
||||
)
|
||||
```
|
||||
|
||||
(2) create a new dataset with the same examples using the following command:
|
||||
|
||||
```shell
|
||||
python people_data_enrichment/create_dataset.py
|
||||
```
|
||||
|
||||
## Evaluation Metric
|
||||
|
||||
Currently there is a single evaluation metric: fraction of the fields that were correctly extracted (per person). Correctness is defined differently depending on the field type:
|
||||
|
||||
- fuzzy matching for list of string fields such as `Prior-Companies`
|
||||
- fuzzy matches for fields like `Role` / `Company`
|
||||
- checking within a certain tolerance (+/- 15%) for `Years-Experience` field
|
||||
- fuzzy matching for list of string fields such as `prior_companies`
|
||||
- fuzzy matches for fields like `role` / `current_company`
|
||||
- checking within a certain tolerance (+/- 15%) for `years_experience` field
|
||||
|
||||
## Invoking the agent
|
||||
|
||||
|
||||
@@ -0,0 +1,320 @@
|
||||
EXAMPLES = [
|
||||
{
|
||||
"name": "Harrison Chase",
|
||||
"linkedin_profile": "https://www.linkedin.com/in/harrison-chase-961287118/",
|
||||
"work_email": "harrison@langchain.dev",
|
||||
"role": "CEO",
|
||||
"current_company": "LangChain",
|
||||
"prior_companies": "Kensho Technologies, Robust Intelligence",
|
||||
"years_experience": 7,
|
||||
},
|
||||
{
|
||||
"name": "Jake Rachleff",
|
||||
"linkedin_profile": "https://www.linkedin.com/in/jakerachleff/",
|
||||
"work_email": "jake@langchain.dev",
|
||||
"role": "Software Engineer",
|
||||
"current_company": "LangChain",
|
||||
"prior_companies": "Databricks",
|
||||
"years_experience": 6,
|
||||
},
|
||||
{
|
||||
"name": "Nuno Campos",
|
||||
"linkedin_profile": "https://www.linkedin.com/in/nuno-f-campos/",
|
||||
"work_email": "nuno@langchain.dev",
|
||||
"role": "Founding Engineer",
|
||||
"current_company": "LangChain",
|
||||
"prior_companies": "James, YLD, Boringbits",
|
||||
"years_experience": 9,
|
||||
},
|
||||
{
|
||||
"name": "Vince Signori",
|
||||
"linkedin_profile": "https://www.linkedin.com/in/vincesignori/",
|
||||
"work_email": "vince@langchain.dev",
|
||||
"role": "GTM",
|
||||
"current_company": "LangChain",
|
||||
"prior_companies": "iPass, Imperva, Zendesk, Trifacta, HashiCorp",
|
||||
"years_experience": 12,
|
||||
},
|
||||
{
|
||||
"name": "Nick Huang",
|
||||
"linkedin_profile": "https://www.linkedin.com/in/ncchuang/",
|
||||
"work_email": "nick@langchain.dev",
|
||||
"role": "Deployed Engineer",
|
||||
"current_company": "LangChain",
|
||||
"prior_companies": "Intros AI, Palantir",
|
||||
"years_experience": 2,
|
||||
},
|
||||
{
|
||||
"name": "Will Fu-Hinthorn",
|
||||
"linkedin_profile": "https://www.linkedin.com/in/williamfuhinthorn/",
|
||||
"work_email": "will@langchain.dev",
|
||||
"role": "Foudning Engineer",
|
||||
"current_company": "LangChain",
|
||||
"prior_companies": "Microsoft, Robust Intelligence",
|
||||
"years_experience": 7,
|
||||
},
|
||||
{
|
||||
"name": "Jonathan Hodges",
|
||||
"linkedin_profile": "https://www.linkedin.com/in/jonathanhodges/",
|
||||
"work_email": "N/A",
|
||||
"role": "Chief Architect, AI & ML",
|
||||
"current_company": "Bolt",
|
||||
"prior_companies": "NBC Entertainment Digital, Pearson North America, Ascend Learning, Workiva, GHX, Atypical AI, Userpilot",
|
||||
"years_experience": 16,
|
||||
},
|
||||
{
|
||||
"name": "Jessica Ou",
|
||||
"linkedin_profile": "https://www.linkedin.com/in/jessicaou/",
|
||||
"work_email": "jess@langchain.dev",
|
||||
"role": "Business Operations & Finance",
|
||||
"current_company": "LangChain",
|
||||
"prior_companies": "Morgan Stanley, NEA, Sapienne",
|
||||
"years_experience": 8,
|
||||
},
|
||||
{
|
||||
"name": "Vadym Barda",
|
||||
"linkedin_profile": "https://www.linkedin.com/in/vadymbarda/",
|
||||
"work_email": "vadym@langchain.dev",
|
||||
"role": "SWE/ML Engineer",
|
||||
"current_company": "LangChain",
|
||||
"prior_companies": "Kensho Technologies, Tunum",
|
||||
"years_experience": 9,
|
||||
},
|
||||
{
|
||||
"name": "Adam D'Abbracci",
|
||||
"linkedin_profile": "https://www.linkedin.com/in/adam-d-abbracci-25390a22/",
|
||||
"work_email": "N/A",
|
||||
"role": "Founder",
|
||||
"current_company": "Proteus",
|
||||
"prior_companies": "TwentyTwenty Productions, Redcircle, Bags Inc., The Walt Disney Company, The New York Times",
|
||||
"years_experience": 11,
|
||||
},
|
||||
{
|
||||
"name": "Charles Bernoskie",
|
||||
"linkedin_profile": "https://www.linkedin.com/in/charlesbernoskie/",
|
||||
"work_email": "charles@langchain.dev",
|
||||
"role": "GTM",
|
||||
"current_company": "LangChain",
|
||||
"prior_companies": "BuyWithMe, Stack Overflow, Elastic, materialize",
|
||||
"years_experience": 18,
|
||||
},
|
||||
{
|
||||
"name": "Bagatur Askaryan",
|
||||
"linkedin_profile": "https://www.linkedin.com/in/bagatur-askaryan/",
|
||||
"work_email": "bagatur@langchain.dev",
|
||||
"role": "Founding Engineer",
|
||||
"current_company": "LangChain",
|
||||
"prior_companies": "Robust Intelligence",
|
||||
"years_experience": 4,
|
||||
},
|
||||
{
|
||||
"name": "Cameron Vetter",
|
||||
"linkedin_profile": "https://www.linkedin.com/in/cameronvetter/",
|
||||
"work_email": "N/A",
|
||||
"role": "AI Practice Director",
|
||||
"current_company": "New Resources Consulting",
|
||||
"prior_companies": "Edstrom Industries, Centare Group, GE Healthcare, Direct Supply, Runzheimer International, Safenet Consulting, Octavion Technology Group, Zecil Software",
|
||||
"years_experience": 24,
|
||||
},
|
||||
{
|
||||
"name": "Alex Kira",
|
||||
"linkedin_profile": "https://www.linkedin.com/in/alexkira/",
|
||||
"work_email": "alex@langchain.dev",
|
||||
"role": "Engineer",
|
||||
"current_company": "LangChain",
|
||||
"prior_companies": "Air2Web, CheckFree, Optimus Solutions, PromoterBee, Oracle, LoLo, Apple, Telmate, Uber, Netflix, Ramp",
|
||||
"years_experience": 24,
|
||||
},
|
||||
{
|
||||
"name": "Chester Curme",
|
||||
"linkedin_profile": "https://www.linkedin.com/in/chestercurme/",
|
||||
"work_email": "chester@langchain.dev",
|
||||
"role": "Machine Learning Engineer",
|
||||
"current_company": "LangChain",
|
||||
"prior_companies": "Loomis, Sayles & Company, Kensho Technologies, Evisort, Microsoft",
|
||||
"years_experience": 9,
|
||||
},
|
||||
{
|
||||
"name": "Ankush Gola",
|
||||
"linkedin_profile": "https://www.linkedin.com/in/ankush-gola-77255866/",
|
||||
"work_email": "ankush@langchain.dev",
|
||||
"role": "Co-Founder",
|
||||
"current_company": "LangChain",
|
||||
"prior_companies": "Facebook, Robust Intelligence, Unfold",
|
||||
"years_experience": 9,
|
||||
},
|
||||
{
|
||||
"name": "Jacob Lee",
|
||||
"linkedin_profile": "https://www.linkedin.com/in/jacoblee93/",
|
||||
"work_email": "jacob@langchain.dev",
|
||||
"role": "Founding Engineer",
|
||||
"current_company": "LangChain",
|
||||
"prior_companies": "Google, Autocode, Remora Software",
|
||||
"years_experience": 9,
|
||||
},
|
||||
{
|
||||
"name": "Lance Martin",
|
||||
"linkedin_profile": "https://www.linkedin.com/in/lance-martin-64a33b5/",
|
||||
"work_email": "lance@langchain.dev",
|
||||
"role": "Software/ML",
|
||||
"current_company": "LangChain",
|
||||
"prior_companies": "Uber, Ike, Nuro",
|
||||
"years_experience": 9,
|
||||
},
|
||||
{
|
||||
"name": "David Duong",
|
||||
"linkedin_profile": "https://www.linkedin.com/in/duongtat",
|
||||
"work_email": "david@langchain.dev",
|
||||
"role": "Founding Engineer",
|
||||
"current_company": "LangChain",
|
||||
"prior_companies": "AstrumQ Interactive, Moravio, Spendee, Fitify, Cleevio",
|
||||
"years_experience": 9,
|
||||
},
|
||||
{
|
||||
"name": "Kevin Swiber",
|
||||
"linkedin_profile": "https://www.linkedin.com/in/kevinswiber",
|
||||
"work_email": "N/A",
|
||||
"role": "API Strategy Lead",
|
||||
"current_company": "Postman",
|
||||
"prior_companies": "Ford Motor Company, EHIM, Quicken Loans, Apigee, LunchBadger, NodeSource",
|
||||
"years_experience": 18,
|
||||
},
|
||||
{
|
||||
"name": "Andrew Nguonly",
|
||||
"linkedin_profile": "https://www.linkedin.com/in/andrewnguonly/",
|
||||
"work_email": "andrew@langchain.dev",
|
||||
"role": "Software Engineer",
|
||||
"current_company": "LangChain",
|
||||
"prior_companies": "j2 Global, CGI, Omaze, Honey, Netflix, Carta, South Park Commons",
|
||||
"years_experience": 13,
|
||||
},
|
||||
{
|
||||
"name": "Nazar Borovets",
|
||||
"linkedin_profile": "https://www.linkedin.com/in/nazareka/",
|
||||
"work_email": "N/A",
|
||||
"role": "Backend & LLM Python Developer",
|
||||
"current_company": "Veido",
|
||||
"prior_companies": "HexOcean, Wooden Borovets Products, Gart Technology",
|
||||
"years_experience": 1,
|
||||
},
|
||||
{
|
||||
"name": "Eric Han",
|
||||
"linkedin_profile": "https://www.linkedin.com/in/eric-han27/",
|
||||
"work_email": "eric@langchain.dev",
|
||||
"role": "Software Engineer",
|
||||
"current_company": "LangChain",
|
||||
"prior_companies": "Hotwire, Bottles Waiting Inc., Shopagon, Overlay Gaming Corporation, Instabase",
|
||||
"years_experience": 12,
|
||||
},
|
||||
{
|
||||
"name": "Greg Asquith",
|
||||
"linkedin_profile": "https://www.linkedin.com/in/gregasquith/",
|
||||
"work_email": "N/A",
|
||||
"role": "Technology Consultant",
|
||||
"current_company": "gregasquith.com",
|
||||
"prior_companies": "Renault, Essence, Adcessible, Smartly Video and Display",
|
||||
"years_experience": 9,
|
||||
},
|
||||
{
|
||||
"name": "Erick Friis",
|
||||
"linkedin_profile": "https://www.linkedin.com/in/efriis/",
|
||||
"work_email": "erick@langchain.dev",
|
||||
"role": "Founding Engineer",
|
||||
"current_company": "LangChain",
|
||||
"prior_companies": "Instabase, Chestnut, South Park Commons",
|
||||
"years_experience": 5,
|
||||
},
|
||||
{
|
||||
"name": "Oliver Dupuis",
|
||||
"linkedin_profile": "https://www.linkedin.com/in/olivierdupuis/",
|
||||
"work_email": "N/A",
|
||||
"role": "Data Product Builder",
|
||||
"current_company": "RepublicOfData.io",
|
||||
"prior_companies": "University of Ottawa, Lantrns Analytics, Rittman Analytics",
|
||||
"years_experience": 19,
|
||||
},
|
||||
{
|
||||
"name": "Eugene Yurtsev",
|
||||
"linkedin_profile": "https://www.linkedin.com/in/eugene-yurtsev-797a3b1b/",
|
||||
"work_email": "eugene@langchain.dev",
|
||||
"role": "SWE/ML Engineer",
|
||||
"current_company": "LangChain",
|
||||
"prior_companies": "Kensho Technologies, Yurtsev",
|
||||
"years_experience": 9,
|
||||
},
|
||||
{
|
||||
"name": "Brian Vander Schaaf",
|
||||
"linkedin_profile": "https://www.linkedin.com/in/brianvanderschaaf/",
|
||||
"work_email": "brian@langchain.dev",
|
||||
"role": "Software Engineer",
|
||||
"current_company": "LangChain",
|
||||
"prior_companies": "Yodle, Two Sigma IQ, Frontrunner",
|
||||
"years_experience": 8,
|
||||
},
|
||||
{
|
||||
"name": "Julia Schottenstein",
|
||||
"linkedin_profile": "https://www.linkedin.com/in/julia-schottenstein-25424318/",
|
||||
"work_email": "julia@langchain.dev",
|
||||
"role": "Building",
|
||||
"current_company": "LangChain",
|
||||
"prior_companies": "Qatalyst Partners, NEA, dbt Labs",
|
||||
"years_experience": 10,
|
||||
},
|
||||
{
|
||||
"name": "Wei Wong",
|
||||
"linkedin_profile": "https://www.linkedin.com/in/weijianwong/",
|
||||
"work_email": "wei@langchain.dev",
|
||||
"role": "Account Executive",
|
||||
"current_company": "LangChain",
|
||||
"prior_companies": "Deutsche Bank, SingleStore, Snowflake",
|
||||
"years_experience": 7,
|
||||
},
|
||||
]
|
||||
|
||||
if __name__ == "__main__":
|
||||
from langsmith import Client
|
||||
from langsmith.utils import LangSmithNotFoundError
|
||||
|
||||
client = Client()
|
||||
dataset_name = "People Data Enrichment"
|
||||
|
||||
# Storing inputs in a dataset lets us
|
||||
# run chains and LLMs over a shared set of examples.
|
||||
try:
|
||||
exists_dataset = client.read_dataset(dataset_name=dataset_name)
|
||||
print(f"Dataset '{dataset_name}' already exists.")
|
||||
print("You can access the dataset via the URL: ", exists_dataset.url)
|
||||
exit(1)
|
||||
except LangSmithNotFoundError:
|
||||
# Then let's create the dataset if it doesn't exist
|
||||
pass
|
||||
|
||||
dataset = client.create_dataset(
|
||||
dataset_name=dataset_name,
|
||||
description="Evaluate ability to research information about people (e.g., name, email, linkedin)",
|
||||
)
|
||||
|
||||
# Prepare inputs, outputs, and metadata for bulk creation
|
||||
inputs = [
|
||||
{
|
||||
k: v
|
||||
for k, v in record.items()
|
||||
if k in ["name", "work_email", "linkedin_profile"]
|
||||
}
|
||||
for record in EXAMPLES
|
||||
]
|
||||
outputs = [
|
||||
{
|
||||
k: v
|
||||
for k, v in record.items()
|
||||
if k in ["years_experience", "current_company", "role", "prior_companies"]
|
||||
}
|
||||
for record in EXAMPLES
|
||||
]
|
||||
client.create_examples(
|
||||
inputs=inputs,
|
||||
outputs=outputs,
|
||||
dataset_id=dataset.id,
|
||||
)
|
||||
print(f"Dataset '{dataset_name}' created with {len(EXAMPLES)} examples.")
|
||||
print("You can access the dataset via the URL: ", dataset.url)
|
||||
@@ -8,9 +8,9 @@ import argparse
|
||||
# Defaults
|
||||
EXPERIMENT_PREFIX = "People mAIstro "
|
||||
TOLERANCE = 0.15 # should match within 15%
|
||||
NUMERIC_FIELDS = ("Years-Experience",)
|
||||
FUZZY_MATCH_FIELDS = ("Role", "Company")
|
||||
LIST_OF_STRING_FIELDS = ("Prior-Companies",)
|
||||
NUMERIC_FIELDS = ("years_experience",)
|
||||
FUZZY_MATCH_FIELDS = ("role", "current_company")
|
||||
LIST_OF_STRING_FIELDS = ("prior_companies",)
|
||||
DEFAULT_DATASET_NAME = "People Data Enrichment"
|
||||
DEFAULT_GRAPH_ID = "people_maistro"
|
||||
DEFAULT_AGENT_URL = "https://langr.ph/marketplace/62bf5890-28fa-4dd1-b469-4751fe7ecdf3"
|
||||
@@ -20,29 +20,29 @@ client = Client()
|
||||
extraction_schema = {
|
||||
"type": "object",
|
||||
"required": [
|
||||
"Years-Experience",
|
||||
"Company",
|
||||
"Role",
|
||||
"Prior-Companies",
|
||||
"years_experience",
|
||||
"current_company",
|
||||
"role",
|
||||
"prior_companies",
|
||||
],
|
||||
"properties": {
|
||||
"Role": {"type": "string", "description": "Current role of the person."},
|
||||
"Years-Experience": {
|
||||
"role": {"type": "string", "description": "Current role of the person."},
|
||||
"years_experience": {
|
||||
"type": "number",
|
||||
"description": "How many years of full time work experience (excluding internships) does this person have.",
|
||||
},
|
||||
"Company": {
|
||||
"current_company": {
|
||||
"type": "string",
|
||||
"description": "The name of the current company the person works at.",
|
||||
},
|
||||
"Prior-Companies": {
|
||||
"prior_companies": {
|
||||
"type": "array",
|
||||
"items": {"type": "string"},
|
||||
"description": "List of previous companies where the person has worked",
|
||||
},
|
||||
},
|
||||
"description": "Person information",
|
||||
"title": "Person-Schema",
|
||||
"title": "Person",
|
||||
}
|
||||
|
||||
|
||||
@@ -132,9 +132,9 @@ def transform_dataset_inputs(inputs: dict) -> dict:
|
||||
# see the `Example input` in the README for reference on what `inputs` dict should look like
|
||||
return {
|
||||
"person": {
|
||||
"name": inputs["Person"],
|
||||
"email": inputs["Work-Email"],
|
||||
"linkedin": inputs["Linkedin"],
|
||||
"name": inputs["name"],
|
||||
"email": inputs["work_email"],
|
||||
"linkedin": inputs["linkedin_profile"],
|
||||
},
|
||||
"extraction_schema": extraction_schema,
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user