mirror of
https://github.com/langchain-ai/langsmith-model-server.git
synced 2026-07-01 11:38:21 -04:00
101 lines
3.9 KiB
Python
101 lines
3.9 KiB
Python
from typing import Any, Dict, Iterator, List, Optional
|
|
|
|
from langchain_core.callbacks.manager import CallbackManagerForLLMRun
|
|
from langchain_core.language_models.llms import LLM
|
|
from langchain_core.outputs import GenerationChunk
|
|
from langchain_core.runnables import ConfigurableField, Runnable
|
|
|
|
|
|
class CustomLLM(LLM):
|
|
"""A custom instruct style model.
|
|
|
|
This model returns the first n characters of the input prompt. This is intended to serve as a template for
|
|
your own instruct style model you may want to expose in the playground.
|
|
"""
|
|
|
|
n: int = 5
|
|
|
|
def _call(
|
|
self,
|
|
prompt: str,
|
|
stop: Optional[List[str]] = None,
|
|
run_manager: Optional[CallbackManagerForLLMRun] = None,
|
|
**kwargs: Any,
|
|
) -> str:
|
|
"""Run the LLM on the given input.
|
|
|
|
Override this method to implement the LLM logic.
|
|
|
|
Args:
|
|
prompt: The prompt to generate from.
|
|
stop: Stop words to use when generating. Model output is cut off at the
|
|
first occurrence of any of the stop substrings.
|
|
If stop tokens are not supported consider raising NotImplementedError.
|
|
run_manager: Callback manager for the run.
|
|
**kwargs: Arbitrary additional keyword arguments. These are usually passed
|
|
to the model provider API call.
|
|
|
|
Returns:
|
|
The model output as a string. Actual completions SHOULD NOT include the prompt.
|
|
"""
|
|
return prompt[: self.n]
|
|
|
|
def _stream(
|
|
self,
|
|
prompt: str,
|
|
stop: Optional[List[str]] = None,
|
|
run_manager: Optional[CallbackManagerForLLMRun] = None,
|
|
**kwargs: Any,
|
|
) -> Iterator[GenerationChunk]:
|
|
"""Stream the LLM on the given prompt.
|
|
|
|
This method should be overridden by subclasses that support streaming.
|
|
|
|
If not implemented, the default behavior of calls to stream will be to
|
|
fallback to the non-streaming version of the model and return
|
|
the output as a single chunk. Do not implement this method if the model
|
|
does not support streaming.
|
|
|
|
Args:
|
|
prompt: The prompt to generate from.
|
|
stop: Stop words to use when generating. Model output is cut off at the
|
|
first occurrence of any of these substrings.
|
|
run_manager: Callback manager for the run.
|
|
**kwargs: Arbitrary additional keyword arguments. These are usually passed
|
|
to the model provider API call.
|
|
|
|
Returns:
|
|
An iterator of GenerationChunks.
|
|
"""
|
|
for char in prompt[: self.n]:
|
|
chunk = GenerationChunk(text=char)
|
|
if run_manager:
|
|
run_manager.on_llm_new_token(chunk.text, chunk=chunk)
|
|
|
|
yield chunk
|
|
|
|
@property
|
|
def _identifying_params(self) -> Dict[str, Any]:
|
|
"""Return a dictionary of identifying parameters. """
|
|
return {
|
|
# The model name allows users to specify custom token counting
|
|
# rules in LLM monitoring applications (e.g., in LangSmith users
|
|
# can provide per token pricing for their model and monitor
|
|
# costs for the given LLM.)
|
|
"model_name": "CustomChatModel",
|
|
}
|
|
|
|
@property
|
|
def _llm_type(self) -> str:
|
|
"""Get the type of language model used by this chat model. Used for logging purposes only."""
|
|
return "custom"
|
|
|
|
def with_configurable_fields(self) -> Runnable:
|
|
"""Expose fields you want to be configurable in the playground. We will automatically expose these to the
|
|
playground. If you don't want to expose any fields, you can remove this method."""
|
|
return self.configurable_fields(n=ConfigurableField(
|
|
id="n",
|
|
name="Num Characters",
|
|
description="Number of characters to return from the input prompt.",
|
|
))
|