Files
2024-12-02 18:34:13 -05:00

446 lines
19 KiB
Python

EXAMPLES = [
{
"company": "LangChain",
"info": {
"name": "LangChain, Inc.",
"description": "LangChain helps developers to build applications powered by large language models (LLMs). It provides tools and frameworks to integrate LLMs with external data sources and APIs, facilitating the creation of advanced AI applications.",
"website": "https://www.langchain.com",
"crunchbase_profile": "https://www.crunchbase.com/organization/langchain",
"year_founded": 2022,
"ceo": "Harrison Chase",
"total_funding_mm_usd": 35.0,
"latest_round": "Series A",
"latest_round_date": "2024-02-15",
"latest_round_amount_mm_usd": 25.0,
},
},
{
"company": "Kensho",
"info": {
"name": "Kensho Technologies, LLC.",
"description": "Kensho Technologies, a subsidiary of S&P Global, specializes in developing advanced analytics and machine learning solutions for the financial industry. Their products include tools for natural language processing, data extraction, and linking, enabling clients to derive actionable insights from complex data sets.",
"website": "https://kensho.com/",
"crunchbase_profile": "https://www.crunchbase.com/organization/kensho",
"year_founded": 2013,
"ceo": "Bhavesh Dayalji",
"total_funding_mm_usd": 81.1,
"latest_round": "Series B",
"latest_round_date": "2017-02-28",
"latest_round_amount_mm_usd": 50.0,
},
},
{
"company": "Robust Intelligence",
"info": {
"name": "Robust Intelligence, Inc.",
"description": "Robust Intelligence offers an AI application security platform designed to protect machine learning models from various threats, including data poisoning and adversarial attacks. Their solutions ensure the integrity and reliability of AI systems across diverse industries.",
"website": "https://www.robustintelligence.com/",
"crunchbase_profile": "https://www.crunchbase.com/organization/robust-intelligence",
"year_founded": 2019,
"ceo": "Yaron Singer",
"total_funding_mm_usd": 44.0,
"latest_round": "Series B",
"latest_round_date": "2021-12-09",
"latest_round_amount_mm_usd": 30.0,
},
},
{
"company": "Perplexity.ai",
"info": {
"name": "Perplexity AI, Inc.",
"description": "Perplexity.ai is an AI-powered search engine that delivers concise and accurate answers to user queries. It leverages advanced natural language processing to provide direct responses, enhancing the search experience.",
"website": "https://www.perplexity.ai",
"crunchbase_profile": "https://www.crunchbase.com/organization/perplexity-ai",
"year_founded": 2022,
"ceo": "Aravind Srinivas",
"total_funding_mm_usd": 165.0,
"latest_round": "Series B",
"latest_round_date": "2024-04-23",
"latest_round_amount_mm_usd": 62.7,
},
},
{
"company": "Physical Intelligence.ai",
"info": {
"name": "Physical Intelligence.ai",
"description": "Physical Intelligence.ai specializes in developing AI solutions that enhance human physical capabilities. Their technologies focus on improving physical performance and health through intelligent systems.",
"website": "https://www.physicalintelligence.company/",
"crunchbase_profile": "https://www.crunchbase.com/organization/physical-intelligence-834b",
"year_founded": 2023,
"ceo": "Karol Hausman",
"total_funding_mm_usd": 470.0,
"latest_round": "Series A",
"latest_round_date": "2024-11-04",
"latest_round_amount_mm_usd": 400.0,
},
},
{
"company": "Galileo.ai",
"info": {
"name": "Galileo AI, Inc.",
"description": "Galileo.ai offers AI-driven design tools that assist in creating user interfaces and experiences. Their platform automates design processes, enabling rapid prototyping and iteration for designers and developers.",
"website": "https://www.usegalileo.ai/",
"crunchbase_profile": "https://www.crunchbase.com/organization/galileo-ai",
"year_founded": 2022,
"ceo": "Arnaud Benard",
"total_funding_mm_usd": 4.8,
"latest_round": "Seed",
"latest_round_date": "2024-02-06",
"latest_round_amount_mm_usd": 4.4,
},
},
{
"company": "Sierra.ai",
"info": {
"name": "Sierra Technologies, Inc.",
"description": "Sierra.ai develops AI-powered safety and compliance solutions for the trucking industry. Their technology aims to enhance driver safety, ensure regulatory compliance, and improve operational efficiency.",
"website": "https://sierra.ai/",
"crunchbase_profile": "https://www.crunchbase.com/organization/sierra-1124",
"year_founded": 2023,
"ceo": "Clay Bavor",
"total_funding_mm_usd": 285.0,
"latest_round": "Series B",
"latest_round_date": "2024-10-28",
"latest_round_amount_mm_usd": 175.0,
},
},
{
"company": "Rad AI",
"info": {
"name": "Rad AI, Inc.",
"description": "Rad AI provides artificial intelligence solutions for radiology, aiming to improve diagnostic accuracy and efficiency. Their platform assists radiologists by automating routine tasks and enhancing image analysis.",
"website": "https://www.radai.com",
"crunchbase_profile": "https://www.crunchbase.com/organization/radai",
"year_founded": 2018,
"ceo": "Doktor Gurson",
"total_funding_mm_usd": 83.0,
"latest_round": "Series B",
"latest_round_date": "2024-05-07",
"latest_round_amount_mm_usd": 50.0,
},
},
{
"company": "Together AI",
"info": {
"name": "Together, Inc.",
"description": "Together AI focuses on building open-source models and tools for natural language processing. They aim to make advanced AI technologies accessible and collaborative for researchers and developers.",
"website": "https://www.together.ai/",
"crunchbase_profile": "https://www.crunchbase.com/organization/together-ai",
"year_founded": 2022,
"ceo": "Vipul Ved Prakash",
"total_funding_mm_usd": 228.5,
"latest_round": "Series A",
"latest_round_date": "2024-03-13",
"latest_round_amount_mm_usd": 106.0,
},
},
{
"company": "Omneky",
"info": {
"name": "Omneky Inc.",
"description": "Omneky utilizes AI to create personalized advertising content across digital platforms. Their platform analyzes data to generate targeted ads, optimizing marketing strategies for businesses.",
"website": "https://www.omneky.com",
"crunchbase_profile": "https://www.crunchbase.com/organization/omneky",
"year_founded": 2018,
"ceo": "Hikari Senju",
"total_funding_mm_usd": 13.0,
"latest_round": "Seed",
"latest_round_date": "2022-11-15",
"latest_round_amount_mm_usd": 10.0,
},
},
{
"company": "Curai Health",
"info": {
"name": "Curai, Inc.",
"description": "Curai Health offers AI-assisted primary care services, combining artificial intelligence with medical expertise to provide accessible and affordable healthcare solutions.",
"website": "https://www.curaihealth.com",
"crunchbase_profile": "https://www.crunchbase.com/organization/curai",
"year_founded": 2017,
"ceo": "Neal Khosla",
"total_funding_mm_usd": 38.2,
"latest_round": "Series B",
"latest_round_date": "2020-12-16",
"latest_round_amount_mm_usd": 27.5,
},
},
{
"company": "Decagon.ai",
"info": {
"name": "Decagon AI, Inc.",
"description": "Decagon.ai develops enterprise-grade generative AI agents for customer support, enabling businesses to provide efficient and personalized customer service experiences.",
"website": "https://decagon.ai",
"crunchbase_profile": "https://www.crunchbase.com/organization/decagon-485e",
"year_founded": 2023,
"ceo": "Jesse Zhang",
"total_funding_mm_usd": 100.0,
"latest_round": "Series B",
"latest_round_date": "2024-10-15",
"latest_round_amount_mm_usd": 65.0,
},
},
{
"company": "Xaira Therapeutics",
"info": {
"name": "Xaira Therapeutics",
"description": "Xaira Therapeutics is a biotechnology company leveraging artificial intelligence for drug discovery and development, aiming to deliver transformative medicines.",
"website": "https://xaira.com/",
"crunchbase_profile": "https://www.crunchbase.com/organization/xaira-therapeutics",
"year_founded": 2023,
"ceo": "Marc Tessier-Lavigne",
"total_funding_mm_usd": 1000.0,
"latest_round": "Series A",
"latest_round_date": "2024-04-23",
"latest_round_amount_mm_usd": 1000.0,
},
},
{
"company": "Regie.ai",
"info": {
"name": "Regie.ai",
"description": "Regie.ai provides generative AI tools for sales teams, automating content creation and streamlining communication processes to enhance sales efficiency.",
"website": "https://www.regie.ai/",
"crunchbase_profile": "https://www.crunchbase.com/organization/regie-da23",
"year_founded": 2020,
"ceo": "Srinath Sridhar",
"total_funding_mm_usd": 20.8,
"latest_round": "Series A",
"latest_round_date": "2023-02-09",
"latest_round_amount_mm_usd": 6.0,
},
},
{
"company": "Bifrost AI",
"info": {
"name": "Bifrost AI, Inc.",
"description": "Bifrost AI specializes in generating synthetic data for AI and robotics, enabling faster training and validation of models without the need for real-world data.",
"website": "https://www.bifrost.ai",
"crunchbase_profile": "https://www.crunchbase.com/organization/bifrost",
"year_founded": 2020,
"ceo": "Charles Wong",
"total_funding_mm_usd": 13.1,
"latest_round": "Series A",
"latest_round_date": "2024-10-30",
"latest_round_amount_mm_usd": 8.0,
},
},
{
"company": "Recraft",
"info": {
"name": "Recraft, Inc",
"description": "Recraft offers an AI-powered design tool for creating and editing images, providing features like image generation, vectorization, and mockup creation for professional designers.",
"website": "https://www.recraft.ai",
"crunchbase_profile": "https://www.crunchbase.com/organization/recraft",
"year_founded": 2022,
"ceo": "Anna Veronika Dorogush",
"total_funding_mm_usd": 12.0,
"latest_round": "Series A",
"latest_round_date": "2024-01-18",
"latest_round_amount_mm_usd": 12.0,
},
},
{
"company": "Brightseed",
"info": {
"name": "Brightseed, Inc",
"description": "Brightseed utilizes artificial intelligence to discover bioactive compounds in nature that can restore human health, focusing on the intersection of nature, science, and humanity.",
"website": "https://www.brightseedbio.com",
"crunchbase_profile": "https://www.crunchbase.com/organization/brightseed",
"year_founded": 2017,
"ceo": "Jim Flatt",
"total_funding_mm_usd": 120.8,
"latest_round": "Series B",
"latest_round_date": "2022-05-09",
"latest_round_amount_mm_usd": 68.0,
},
},
{
"company": "Etched.ai",
"info": {
"name": "Etched.ai, Inc.",
"description": "Etched.ai is developing the world's first transformer ASIC, a specialized chip designed to run AI models faster and more efficiently than traditional GPUs.",
"website": "https://www.etched.com",
"crunchbase_profile": "https://www.crunchbase.com/organization/etched-ai",
"year_founded": 2022,
"ceo": "Gavin Uberti",
"total_funding_mm_usd": 125.4,
"latest_round": "Series A",
"latest_round_date": "2024-06-25",
"latest_round_amount_mm_usd": 120.0,
},
},
{
"company": "World Labs",
"info": {
"name": "World Labs Technologies",
"description": "World Labs is an AI-focused company dedicated to advancing artificial intelligence technologies and applications across various sectors.",
"website": "https://www.worldlabs.ai",
"crunchbase_profile": "https://www.crunchbase.com/organization/world-labs",
"year_founded": 2024,
"ceo": "Fei-Fei Li",
"total_funding_mm_usd": 230.0,
"latest_round": "Series A",
"latest_round_date": "2024-09-13",
"latest_round_amount_mm_usd": 230.0,
},
},
{
"company": "Sight Machine",
"info": {
"name": "Sight Machine Inc.",
"description": "Sight Machine provides manufacturing analytics powered by AI, offering real-time insights to improve production efficiency and quality.",
"website": "https://sightmachine.com",
"crunchbase_profile": "https://www.crunchbase.com/organization/sight-machine",
"year_founded": 2011,
"ceo": "Jon Sobel",
"total_funding_mm_usd": 80.4,
"latest_round": "Series C",
"latest_round_date": "2019-04-23",
"latest_round_amount_mm_usd": 29.4,
},
},
{
"company": "Ambience Healthcare",
"info": {
"name": "Ambience Healthcare, Inc.",
"description": "Ambience Healthcare offers AI-powered scribe solutions for healthcare providers, automating clinical documentation to reduce clinician burnout and improve care quality.",
"website": "https://www.ambiencehealthcare.com",
"crunchbase_profile": "https://www.crunchbase.com/organization/ambience-healthcare",
"year_founded": 2020,
"ceo": "Mike Ng",
"total_funding_mm_usd": 76.3,
"latest_round": "Series B",
"latest_round_date": "2024-02-06",
"latest_round_amount_mm_usd": 70.0,
},
},
{
"company": "Safely You",
"info": {
"name": "SafelyYou, Inc.",
"description": "Safely You utilizes AI technology to reduce falls and associated risks in senior living communities, enhancing resident safety and care.",
"website": "https://www.safely-you.com",
"crunchbase_profile": "https://www.crunchbase.com/organization/safely-you",
"year_founded": 2016,
"ceo": "George Netscher",
"total_funding_mm_usd": 71.3,
"latest_round": "Debt",
"latest_round_date": "2023-05-25",
"latest_round_amount_mm_usd": 10.0,
},
},
{
"company": "Kintsugi.AI",
"info": {
"name": "KintsugiAI, Inc.",
"description": "Kintsugi.AI provides sales tax automation solutions for companies globally, streamlining compliance processes and reducing errors.",
"website": "trykintsugi.com",
"crunchbase_profile": "https://www.crunchbase.com/organization/kintsugi-0524",
"year_founded": 2022,
"ceo": "Pujun Bhatnagar",
"total_funding_mm_usd": 12.2,
"latest_round": "Series A",
"latest_round_date": "2024-11-19",
"latest_round_amount_mm_usd": 4.0,
},
},
]
EXTRACTION_SCHEMA = {
"type": "object",
"title": "company_info",
"properties": {
"name": {"type": "string", "description": "Official company name"},
"description": {
"type": "string",
"description": "Brief description of the company and its activities",
},
"website": {
"type": "string",
"format": "uri",
"description": "Company's official website URL",
},
"crunchbase_profile": {
"type": "string",
"format": "uri",
"description": "Company's Crunchbase profile URL",
},
"year_founded": {
"type": "integer",
"minimum": 1800,
"description": "Year when the company was founded",
},
"ceo": {"type": "string", "description": "Name of the company's CEO"},
"total_funding_mm_usd": {
"type": "number",
"minimum": 0,
"description": "Total funding raised in millions of USD",
},
"latest_round": {
"type": "string",
"description": "Type of the most recent funding round (e.g., Series A, Seed, etc.)",
},
"latest_round_date": {
"type": "string",
"format": "date",
"description": "Date of the most recent funding round (YYYY-MM-DD)",
},
"latest_round_amount_mm_usd": {
"type": "number",
"minimum": 0,
"description": "Amount raised in the most recent funding round in millions of USD",
},
},
"required": [
"name",
"description",
"website",
"crunchbase_profile",
"year_founded",
"ceo",
"total_funding_mm_usd",
"latest_round",
"latest_round_date",
"latest_round_amount_mm_usd",
],
"description": "Company information",
}
if __name__ == "__main__":
from langsmith import Client
from langsmith.utils import LangSmithNotFoundError
client = Client()
dataset_name = "Startup Data Enrichment"
# Storing inputs in a dataset lets us
# run chains and LLMs over a shared set of examples.
try:
exists_dataset = client.read_dataset(dataset_name=dataset_name)
print(f"Dataset '{dataset_name}' already exists.")
print("You can access the dataset via the URL: ", exists_dataset.url)
exit(1)
except LangSmithNotFoundError:
# Then let's create the dataset if it doesn't exist
pass
dataset = client.create_dataset(
dataset_name=dataset_name,
description="Evaluate ability to research information about startups (e.g., latest round, total funding, year founded etc.)",
)
# Prepare inputs, outputs, and metadata for bulk creation
inputs = [
{"company": record["company"], "extraction_schema": EXTRACTION_SCHEMA}
for record in EXAMPLES
]
outputs = [{"info": record["info"]} for record in EXAMPLES]
client.create_examples(
inputs=inputs,
outputs=outputs,
dataset_id=dataset.id,
)
print(f"Dataset '{dataset_name}' created with {len(EXAMPLES)} examples.")
print("You can access the dataset via the URL: ", dataset.url)