From 2a38ae4a955bdfd1eaa7be9571f42711b8866ed3 Mon Sep 17 00:00:00 2001 From: Wroclaw Date: Sun, 19 Mar 2023 04:15:08 +0100 Subject: [PATCH] Limit sent chat to 2048 tokens. This also solves the issue where we would request more tokens, than the model is capable of (over 4096) --- package-lock.json | 6 ++++++ package.json | 1 + src/toOpenAIMessages.ts | 30 ++++++++++++++++++------------ src/tokenCounter.ts | 5 +++++ 4 files changed, 30 insertions(+), 12 deletions(-) create mode 100644 src/tokenCounter.ts diff --git a/package-lock.json b/package-lock.json index 55aeb90..f5a0775 100644 --- a/package-lock.json +++ b/package-lock.json @@ -11,6 +11,7 @@ "dependencies": { "discord.js": "^14.8.0", "fold-to-ascii": "^5.0.1", + "gpt-3-encoder": "^1.1.4", "openai": "^3.2.1" }, "devDependencies": { @@ -1161,6 +1162,11 @@ "url": "https://github.com/sponsors/sindresorhus" } }, + "node_modules/gpt-3-encoder": { + "version": "1.1.4", + "resolved": "https://registry.npmjs.org/gpt-3-encoder/-/gpt-3-encoder-1.1.4.tgz", + "integrity": "sha512-fSQRePV+HUAhCn7+7HL7lNIXNm6eaFWFbNLOOGtmSJ0qJycyQvj60OvRlH7mee8xAMjBDNRdMXlMwjAbMTDjkg==" + }, "node_modules/grapheme-splitter": { "version": "1.0.4", "resolved": "https://registry.npmjs.org/grapheme-splitter/-/grapheme-splitter-1.0.4.tgz", diff --git a/package.json b/package.json index 0e43c41..2c52020 100644 --- a/package.json +++ b/package.json @@ -12,6 +12,7 @@ "dependencies": { "discord.js": "^14.8.0", "fold-to-ascii": "^5.0.1", + "gpt-3-encoder": "^1.1.4", "openai": "^3.2.1" }, "devDependencies": { diff --git a/src/toOpenAIMessages.ts b/src/toOpenAIMessages.ts index 5f97681..38c7cd1 100644 --- a/src/toOpenAIMessages.ts +++ b/src/toOpenAIMessages.ts @@ -3,6 +3,7 @@ import { Collection, Message as DiscordMessage } from "discord.js"; import FoldToAscii from "fold-to-ascii"; import config from "./config.json"; +import countTokens from "./tokenCounter"; /** * Formats the message to use as a message content in OpenAI api @@ -74,24 +75,29 @@ function getAuthorUsername(message: DiscordMessage): string { } /** - * Converts the Collection of Discord Messages to array of OpenAI Messages + * Converts the Collection of Discord Messages to array of OpenAI Messages to send * @param messages the collection to convert * @returns the converted messages */ export default function toOpenAIMessages(messages: Collection): OpenAIMessage[] { const rvalue: OpenAIMessage[] = []; + let tokenCount = 0; + + messages.sort((a, b) => b.createdTimestamp - a.createdTimestamp); + + for (const message of messages.values()) { + const content = formatMessage(message); + // FIXME: tokens are not being counted properly (it's lower than it is) but it's enough for me for now. + tokenCount += countTokens(content); + if (tokenCount > 2048) break; + rvalue.push({ + role: message.author.id == message.client.user.id ? "assistant" : "user", + content: content, + name: getAuthorUsername(message), + }); + } rvalue.push({ role: "system", content: config.systemPrompt}); - messages - .sort((a, b) => a.createdTimestamp - b.createdTimestamp) - .each(message => { - rvalue.push({ - role: message.author.id == message.client.user.id ? "assistant" : "user", - content: formatMessage(message), - name: getAuthorUsername(message), - }); - }); - - return rvalue; + return rvalue.reverse(); } diff --git a/src/tokenCounter.ts b/src/tokenCounter.ts new file mode 100644 index 0000000..7141d0d --- /dev/null +++ b/src/tokenCounter.ts @@ -0,0 +1,5 @@ +import { encode } from "gpt-3-encoder"; + +export default function countTokens(text: string): number { + return encode(text).length; +}