Skip to content

Commit 73dd1dd

Browse files
committed
perf: ⚡️ Remove langchain for model calling
1 parent e36ee9f commit 73dd1dd

14 files changed

+249
-127
lines changed

apps/dashboard/pages/api/agents/[id]/query.tsx

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -128,7 +128,7 @@ export const chatAgentRequest = async (
128128
agent.modelName = data.modelName;
129129
}
130130

131-
const manager = new AgentManager({ agent, topK: 5 });
131+
const manager = new AgentManager({ agent, topK: 50 });
132132
const ctrl = new AbortController();
133133

134134
if (data.streaming) {

packages/lib/agent.ts

Lines changed: 23 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
import axios from 'axios';
22
import { ChatOpenAI } from 'langchain/chat_models/openai';
33
import { AIMessage, HumanMessage, SystemMessage } from 'langchain/schema';
4+
import { ChatCompletionMessageParam } from 'openai/resources';
45

56
import {
67
Agent,
@@ -284,19 +285,30 @@ export default class AgentManager {
284285
const _promptType = promptType || this.agent.promptType;
285286
const _promptTemplate = promptTemplate || (this.agent.prompt as string);
286287

287-
let initialMessages: any = [];
288+
let initialMessages: ChatCompletionMessageParam[] = [];
288289
if (_promptType === PromptType.customer_support) {
289290
initialMessages = [
290-
new SystemMessage(
291-
`${_promptTemplate}
292-
Answer the query in the same language in which the query is asked.
293-
Give answer in the markdown rich format with proper bolds, italics etc as per heirarchy and readability requirements.
294-
You will be provided by a context retrieved by the knowledge_base_retrieval function.
295-
If the context does not contain the information needed to answer this query then politely say that you don't know without mentioning the existence of a context.
296-
Remember do not answer any query that is outside of the provided context nor mention its existence.
297-
You are allowed to use the following conversation history to answer the query.
298-
`
299-
),
291+
{
292+
role: 'system',
293+
content: `${_promptTemplate}
294+
Answer the query in the same language in which the query is asked.
295+
Give answer in the markdown rich format with proper bolds, italics etc as per heirarchy and readability requirements.
296+
You will be provided by a context retrieved by the knowledge_base_retrieval function.
297+
If the context does not contain the information needed to answer this query then politely say that you don't know without mentioning the existence of a context.
298+
Remember do not answer any query that is outside of the provided context nor mention its existence.
299+
You are allowed to use the following conversation history to answer the query.
300+
`,
301+
},
302+
// new SystemMessage(
303+
// `${_promptTemplate}
304+
// Answer the query in the same language in which the query is asked.
305+
// Give answer in the markdown rich format with proper bolds, italics etc as per heirarchy and readability requirements.
306+
// You will be provided by a context retrieved by the knowledge_base_retrieval function.
307+
// If the context does not contain the information needed to answer this query then politely say that you don't know without mentioning the existence of a context.
308+
// Remember do not answer any query that is outside of the provided context nor mention its existence.
309+
// You are allowed to use the following conversation history to answer the query.
310+
// `
311+
// ),
300312
// new HumanMessage(`${_promptTemplate}
301313
// Answer the message in the same language in which the message is asked.
302314
// If you don't find an answer from the chunks, politely say that you don't know without mentioning the existence of a context. Don't try to make up an answer.

packages/lib/chains/chat-retrieval.ts

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,8 +7,10 @@ import { ChatRequest } from '@chaindesk/lib/types/dtos';
77
import { Datastore, MessageFrom } from '@chaindesk/prisma';
88

99
import chat, { ChatProps } from '../chatv2';
10+
import { ModelConfig } from '../config';
1011
import createPromptContext from '../create-prompt-context';
1112
import retrieval from '../retrieval';
13+
import truncateArray from '../truncateArray';
1214

1315
export type ChatRetrievalChainProps = Omit<ChatProps, 'prompt'> & {
1416
datastore?: Datastore;
@@ -39,7 +41,7 @@ const chatRetrieval = async ({
3941
abortController,
4042
...otherProps
4143
}: ChatRetrievalChainProps) => {
42-
const results = retrievalSearch
44+
const _results = retrievalSearch
4345
? await retrieval({
4446
datastore,
4547
filters,
@@ -48,6 +50,18 @@ const chatRetrieval = async ({
4850
})
4951
: [];
5052

53+
const results = await truncateArray<AppDocument<ChunkMetadataRetrieved>>({
54+
items: _results,
55+
getText: (item) => item.pageContent,
56+
setText: (item, text) => {
57+
return {
58+
...item,
59+
pageContent: text,
60+
};
61+
},
62+
maxTokens: ModelConfig?.[modelName!]?.maxTokens * 0.2,
63+
});
64+
5165
const prompt = getPrompt(results);
5266

5367
// Generate answer

packages/lib/chains/index.ts

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -100,9 +100,13 @@ export default class ChainManager {
100100

101101
return chat({
102102
initialMessages: [
103-
new SystemMessage(
104-
`You are a productivity assistant. Please provide a helpful and professional response to the user's question or issue.`
105-
),
103+
{
104+
role: 'system',
105+
content: `You are a productivity assistant. Please provide a helpful and professional response to the user's question or issue.`,
106+
},
107+
// new SystemMessage(
108+
// `You are a productivity assistant. Please provide a helpful and professional response to the user's question or issue.`
109+
// ),
106110
],
107111
prompt: input,
108112
temperature: temperature || 0.5,

packages/lib/chains/qa.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ const qa = async ({
3131
abortController,
3232
}: QAChainProps) => {
3333
return chatRetrieval({
34-
modelName: 'gpt_3_5_turbo_16k',
34+
modelName: 'gpt_3_5_turbo',
3535
retrievalSearch: query,
3636
getPrompt(chunks) {
3737
return promptInject({

packages/lib/chat-model.ts

Lines changed: 82 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,82 @@
1+
import OpenAI, { ClientOptions } from 'openai';
2+
import { ChatCompletionMessageParam, CompletionUsage } from 'openai/resources';
3+
import pRetry from 'p-retry';
4+
5+
import failedAttemptHandler from './lc-failed-attempt-hanlder';
6+
7+
export default class ChatModel {
8+
public openai: OpenAI;
9+
10+
constructor(options: ClientOptions) {
11+
this.openai = new OpenAI({
12+
...options,
13+
});
14+
}
15+
16+
static countTokensMessages(messages: ChatCompletionMessageParam[]) {
17+
let counter = 0;
18+
19+
for (const each of messages) {
20+
counter += each?.content?.length || 0;
21+
}
22+
23+
return counter / 4;
24+
}
25+
26+
async call({
27+
handleStream,
28+
signal,
29+
...otherProps
30+
}: Parameters<typeof this.openai.chat.completions.create>[0] & {
31+
handleStream?: (text: string) => any;
32+
signal?: AbortSignal;
33+
}) {
34+
return pRetry(
35+
async () => {
36+
if (!!handleStream) {
37+
let usage: CompletionUsage = {
38+
completion_tokens: 0,
39+
prompt_tokens: ChatModel.countTokensMessages(otherProps?.messages),
40+
total_tokens: 0,
41+
};
42+
43+
const streaming = await this.openai.chat.completions.create({
44+
...otherProps,
45+
stream: true,
46+
});
47+
48+
let buffer = '';
49+
for await (const chunk of streaming) {
50+
const content = chunk.choices[0]?.delta?.content || '';
51+
52+
handleStream?.(content);
53+
buffer += content;
54+
usage.completion_tokens += 1;
55+
}
56+
57+
usage.total_tokens = usage.prompt_tokens + usage.completion_tokens;
58+
59+
return {
60+
answer: buffer?.trim?.(),
61+
usage,
62+
};
63+
} else {
64+
const response = await this.openai.chat.completions.create({
65+
...otherProps,
66+
stream: false,
67+
});
68+
69+
return {
70+
answer: response?.choices?.[0]?.message?.content?.trim?.(),
71+
usage: response?.usage,
72+
};
73+
}
74+
},
75+
{
76+
signal,
77+
retries: 6,
78+
onFailedAttempt: failedAttemptHandler,
79+
}
80+
);
81+
}
82+
}

packages/lib/chatv2.ts

Lines changed: 32 additions & 78 deletions
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,11 @@
1-
import { ChatOpenAI } from 'langchain/chat_models/openai';
2-
import {
3-
AIMessage,
4-
BaseMessage,
5-
FunctionMessage,
6-
HumanMessage,
7-
MessageContent,
8-
} from 'langchain/schema';
1+
import { ChatCompletionMessageParam } from 'openai/resources';
92

103
import { AgentModelName, Message, MessageFrom } from '@chaindesk/prisma';
114

125
import { ChatModelConfigSchema, ChatResponse } from './types/dtos';
6+
import ChatModel from './chat-model';
137
import { ModelConfig } from './config';
14-
import failedAttemptHandler from './lc-failed-attempt-hanlder';
8+
import formatMessagesOpenAI from './format-messages-openai';
159
import truncateChatMessages from './truncateChatMessages';
1610

1711
export type ChatProps = ChatModelConfigSchema & {
@@ -20,7 +14,7 @@ export type ChatProps = ChatModelConfigSchema & {
2014
modelName?: AgentModelName;
2115
history?: Message[];
2216
abortController?: any;
23-
initialMessages?: BaseMessage[] | undefined;
17+
initialMessages?: ChatCompletionMessageParam[] | undefined;
2418
context?: string;
2519
useXpContext?: boolean;
2620
};
@@ -37,95 +31,55 @@ const chat = async ({
3731
useXpContext,
3832
...otherProps
3933
}: ChatProps) => {
40-
let totalCompletionTokens = 0;
41-
let totalPromptTokens = 0;
42-
let totalExecutionTokens = 0;
43-
44-
const model = new ChatOpenAI({
45-
streaming: Boolean(stream),
46-
modelName: ModelConfig[modelName]?.name,
47-
48-
temperature: temperature || 0,
49-
topP: otherProps.topP,
50-
frequencyPenalty: otherProps.frequencyPenalty,
51-
presencePenalty: otherProps.presencePenalty,
52-
maxTokens: otherProps.maxTokens,
53-
onFailedAttempt: failedAttemptHandler,
54-
callbacks: [
55-
{
56-
handleLLMNewToken: stream,
57-
handleLLMEnd: (output, runId, parentRunId?, tags?) => {
58-
const { completionTokens, promptTokens, totalTokens } =
59-
output.llmOutput?.tokenUsage ||
60-
output.llmOutput?.estimatedTokenUsage;
61-
totalCompletionTokens += completionTokens ?? 0;
62-
totalPromptTokens += promptTokens ?? 0;
63-
totalExecutionTokens += totalTokens ?? 0;
64-
},
65-
handleLLMError: async (err: Error) => {
66-
console.error('handleLLMError', err);
67-
},
68-
},
69-
],
70-
});
71-
72-
if (process.env.APP_ENV === 'test') {
73-
model.call = async (props: any) => {
74-
const res = {
75-
text: 'Hello world',
76-
} as any;
77-
78-
if (stream) {
79-
stream(res.text);
80-
}
81-
82-
return res;
83-
};
84-
}
85-
8634
const truncatedHistory = (
8735
await truncateChatMessages({
88-
messages: (history || [])
89-
?.map((each) => {
90-
if (each.from === MessageFrom.human) {
91-
return new HumanMessage(each.text);
92-
}
93-
return new AIMessage(each.text);
94-
})
95-
.reverse(),
36+
messages: formatMessagesOpenAI(history || []).reverse(),
9637
maxTokens: ModelConfig[modelName]?.maxTokens * 0.3, // 30% tokens limit for history
9738
})
9839
).reverse();
9940

100-
const messages = [
41+
const messages: ChatCompletionMessageParam[] = [
10142
...initialMessages,
10243
...truncatedHistory,
103-
...(useXpContext && context
44+
...((useXpContext && context
10445
? [
105-
new FunctionMessage({
106-
content: context,
46+
{
47+
role: 'function',
48+
content: context!,
10749
name: 'knowledge_base_retrieval',
108-
}),
50+
},
10951
]
110-
: []),
111-
new HumanMessage(prompt),
52+
: []) as ChatCompletionMessageParam[]),
53+
{ role: 'user', content: prompt },
11254
];
11355

11456
// console.log('messages ===--------c_>', messages);
11557

116-
const output = await model.call(messages, {
58+
const model = new ChatModel({});
59+
60+
const output = await model.call({
61+
handleStream: stream,
62+
model: ModelConfig[modelName]?.name,
63+
messages,
64+
65+
temperature: temperature || 0,
66+
top_p: otherProps.topP,
67+
frequency_penalty: otherProps.frequencyPenalty,
68+
presence_penalty: otherProps.presencePenalty,
69+
max_tokens: otherProps.maxTokens,
11770
signal: abortController?.signal,
11871
});
11972

120-
const answer = (output?.content as string)?.trim?.();
73+
const answer = output?.answer;
12174

12275
const usage = {
123-
completionTokens: totalCompletionTokens,
124-
promptTokens: totalPromptTokens,
125-
totalTokens: totalExecutionTokens,
76+
completionTokens: output?.usage?.completion_tokens,
77+
promptTokens: output?.usage?.prompt_tokens,
78+
totalTokens: output?.usage?.total_tokens,
12679
cost:
127-
totalPromptTokens * ModelConfig[modelName]?.providerPriceByInputToken +
128-
totalCompletionTokens *
80+
(output?.usage?.prompt_tokens || 0) *
81+
ModelConfig[modelName]?.providerPriceByInputToken +
82+
(output?.usage?.completion_tokens || 0) *
12983
ModelConfig[modelName]?.providerPricePriceByOutputToken,
13084
};
13185

packages/lib/config.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ export const XPBNPLabels = {
1616
export const ModelConfig = {
1717
[AgentModelName.gpt_3_5_turbo]: {
1818
name: 'gpt-3.5-turbo-1106',
19-
maxTokens: 4096,
19+
maxTokens: 16385,
2020
cost: 1,
2121
providerPriceByInputToken: 0.000001,
2222
providerPricePriceByOutputToken: 0.000002,

packages/lib/count-tokens.ts

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,4 +14,8 @@ const countTokens = ({
1414
return nbTokens;
1515
};
1616

17+
export const countTokensEstimation = ({ text }: { text: string }) => {
18+
return text?.length / 4;
19+
};
20+
1721
export default countTokens;
Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
import { ChatCompletionMessageParam } from 'openai/resources';
2+
3+
import { Message } from '@chaindesk/prisma';
4+
5+
const formatMessagesOpenAI = (messages: Message[]) => {
6+
return messages.map((each) => {
7+
let role = 'user' as ChatCompletionMessageParam['role'];
8+
9+
if (each.from === 'agent') {
10+
role = 'assistant';
11+
}
12+
13+
return {
14+
role,
15+
content: each.text,
16+
} as ChatCompletionMessageParam;
17+
});
18+
};
19+
20+
export default formatMessagesOpenAI;

0 commit comments

Comments
 (0)