mirror of
https://github.com/run-llama/LlamaIndexTS.git
synced 2026-07-01 22:14:03 -04:00
127 lines
3.6 KiB
TypeScript
127 lines
3.6 KiB
TypeScript
import { Document } from "llamaindex/Node";
|
|
import type { ServiceContext } from "llamaindex/ServiceContext";
|
|
import { serviceContextFromDefaults } from "llamaindex/ServiceContext";
|
|
import { OpenAIEmbedding } from "llamaindex/embeddings/index";
|
|
import {
|
|
KeywordExtractor,
|
|
QuestionsAnsweredExtractor,
|
|
SummaryExtractor,
|
|
TitleExtractor,
|
|
} from "llamaindex/extractors/index";
|
|
import { OpenAI } from "llamaindex/llm/LLM";
|
|
import { SimpleNodeParser } from "llamaindex/nodeParsers/index";
|
|
import { afterAll, beforeAll, describe, expect, test, vi } from "vitest";
|
|
import {
|
|
DEFAULT_LLM_TEXT_OUTPUT,
|
|
mockEmbeddingModel,
|
|
mockLlmGeneration,
|
|
} from "./utility/mockOpenAI.js";
|
|
|
|
// Mock the OpenAI getOpenAISession function during testing
|
|
vi.mock("llamaindex/llm/open_ai", () => {
|
|
return {
|
|
getOpenAISession: vi.fn().mockImplementation(() => null),
|
|
};
|
|
});
|
|
|
|
describe("[MetadataExtractor]: Extractors should populate the metadata", () => {
|
|
let serviceContext: ServiceContext;
|
|
|
|
beforeAll(async () => {
|
|
const languageModel = new OpenAI({
|
|
model: "gpt-3.5-turbo",
|
|
});
|
|
|
|
mockLlmGeneration({ languageModel });
|
|
|
|
const embedModel = new OpenAIEmbedding();
|
|
|
|
mockEmbeddingModel(embedModel);
|
|
|
|
serviceContext = serviceContextFromDefaults({
|
|
llm: languageModel,
|
|
embedModel,
|
|
});
|
|
});
|
|
|
|
afterAll(() => {
|
|
vi.clearAllMocks();
|
|
});
|
|
|
|
test("[MetadataExtractor] KeywordExtractor returns excerptKeywords metadata", async () => {
|
|
const nodeParser = new SimpleNodeParser();
|
|
|
|
const nodes = nodeParser.getNodesFromDocuments([
|
|
new Document({ text: DEFAULT_LLM_TEXT_OUTPUT }),
|
|
]);
|
|
|
|
const keywordExtractor = new KeywordExtractor({
|
|
llm: serviceContext.llm,
|
|
keywords: 5,
|
|
});
|
|
|
|
const nodesWithKeywordMetadata = await keywordExtractor.processNodes(nodes);
|
|
|
|
expect(nodesWithKeywordMetadata[0].metadata).toMatchObject({
|
|
excerptKeywords: DEFAULT_LLM_TEXT_OUTPUT,
|
|
});
|
|
});
|
|
|
|
test("[MetadataExtractor] TitleExtractor returns documentTitle metadata", async () => {
|
|
const nodeParser = new SimpleNodeParser();
|
|
|
|
const nodes = nodeParser.getNodesFromDocuments([
|
|
new Document({ text: DEFAULT_LLM_TEXT_OUTPUT }),
|
|
]);
|
|
|
|
const titleExtractor = new TitleExtractor({
|
|
llm: serviceContext.llm,
|
|
nodes: 5,
|
|
});
|
|
|
|
const nodesWithKeywordMetadata = await titleExtractor.processNodes(nodes);
|
|
|
|
expect(nodesWithKeywordMetadata[0].metadata).toMatchObject({
|
|
documentTitle: DEFAULT_LLM_TEXT_OUTPUT,
|
|
});
|
|
});
|
|
|
|
test("[MetadataExtractor] QuestionsAnsweredExtractor returns questionsThisExcerptCanAnswer metadata", async () => {
|
|
const nodeParser = new SimpleNodeParser();
|
|
|
|
const nodes = nodeParser.getNodesFromDocuments([
|
|
new Document({ text: DEFAULT_LLM_TEXT_OUTPUT }),
|
|
]);
|
|
|
|
const questionsAnsweredExtractor = new QuestionsAnsweredExtractor({
|
|
llm: serviceContext.llm,
|
|
questions: 5,
|
|
});
|
|
|
|
const nodesWithKeywordMetadata =
|
|
await questionsAnsweredExtractor.processNodes(nodes);
|
|
|
|
expect(nodesWithKeywordMetadata[0].metadata).toMatchObject({
|
|
questionsThisExcerptCanAnswer: DEFAULT_LLM_TEXT_OUTPUT,
|
|
});
|
|
});
|
|
|
|
test("[MetadataExtractor] SumamryExtractor returns sectionSummary metadata", async () => {
|
|
const nodeParser = new SimpleNodeParser();
|
|
|
|
const nodes = nodeParser.getNodesFromDocuments([
|
|
new Document({ text: DEFAULT_LLM_TEXT_OUTPUT }),
|
|
]);
|
|
|
|
const summaryExtractor = new SummaryExtractor({
|
|
llm: serviceContext.llm,
|
|
});
|
|
|
|
const nodesWithKeywordMetadata = await summaryExtractor.processNodes(nodes);
|
|
|
|
expect(nodesWithKeywordMetadata[0].metadata).toMatchObject({
|
|
sectionSummary: DEFAULT_LLM_TEXT_OUTPUT,
|
|
});
|
|
});
|
|
});
|