mirror of
https://github.com/Mintplex-Labs/langchainjs.git
synced 2026-07-01 12:17:38 -04:00
feat: Add support for the Solidity language (#1616)
This commit is contained in:
@@ -6,7 +6,7 @@ hide_table_of_contents: true
|
||||
|
||||
LangChain supports a variety of different markup and programming language-specific text splitters to split your text based on language-specific syntax.
|
||||
This results in more semantically self-contained chunks that are more useful to a vector store or other retriever.
|
||||
Popular languages like JavaScript, Python, and Rust are supported as well as Latex, HTML, and Markdown.
|
||||
Popular languages like JavaScript, Python, Solidity, and Rust are supported as well as Latex, HTML, and Markdown.
|
||||
|
||||
## Usage
|
||||
|
||||
|
||||
@@ -287,3 +287,32 @@ test("Rust code splitter", async () => {
|
||||
"}",
|
||||
]);
|
||||
});
|
||||
|
||||
test("Solidity code splitter", async () => {
|
||||
const splitter = RecursiveCharacterTextSplitter.fromLanguage("sol", {
|
||||
chunkSize: 16,
|
||||
chunkOverlap: 0,
|
||||
});
|
||||
const code = `pragma solidity ^0.8.20;
|
||||
contract HelloWorld {
|
||||
function add(uint a, uint b) pure public returns(uint) {
|
||||
return a + b;
|
||||
}
|
||||
}
|
||||
`;
|
||||
const chunks = await splitter.splitText(code);
|
||||
expect(chunks).toStrictEqual([
|
||||
"pragma solidity",
|
||||
"^0.8.20;",
|
||||
"contract",
|
||||
"HelloWorld {",
|
||||
"function",
|
||||
"add(uint a,",
|
||||
"uint b) pure",
|
||||
"public",
|
||||
"returns(uint) {",
|
||||
"return a",
|
||||
"+ b;",
|
||||
"}\n }",
|
||||
]);
|
||||
});
|
||||
|
||||
@@ -222,6 +222,7 @@ export const SupportedTextSplitterLanguages = [
|
||||
"markdown",
|
||||
"latex",
|
||||
"html",
|
||||
"sol",
|
||||
] as const;
|
||||
|
||||
export type SupportedTextSplitterLanguage =
|
||||
@@ -617,6 +618,36 @@ export class RecursiveCharacterTextSplitter
|
||||
" ",
|
||||
"",
|
||||
];
|
||||
} else if (language === "sol") {
|
||||
return [
|
||||
// Split along compiler informations definitions
|
||||
"\npragma ",
|
||||
"\nusing ",
|
||||
// Split along contract definitions
|
||||
"\ncontract ",
|
||||
"\ninterface ",
|
||||
"\nlibrary ",
|
||||
// Split along method definitions
|
||||
"\nconstructor ",
|
||||
"\ntype ",
|
||||
"\nfunction ",
|
||||
"\nevent ",
|
||||
"\nmodifier ",
|
||||
"\nerror ",
|
||||
"\nstruct ",
|
||||
"\nenum ",
|
||||
// Split along control flow statements
|
||||
"\nif ",
|
||||
"\nfor ",
|
||||
"\nwhile ",
|
||||
"\ndo while ",
|
||||
"\nassembly ",
|
||||
// Split by the normal type of lines
|
||||
"\n\n",
|
||||
"\n",
|
||||
" ",
|
||||
"",
|
||||
];
|
||||
} else {
|
||||
throw new Error(`Language ${language} is not supported.`);
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user