Limit sent chat to 2048 tokens.

This also solves the issue where we would request more tokens,
than the model is capable of (over 4096)
This commit is contained in:
Wroclaw 2023-03-19 04:15:08 +01:00
parent aafefc3ad0
commit 2a38ae4a95
4 changed files with 30 additions and 12 deletions

6
package-lock.json generated
View file

@ -11,6 +11,7 @@
"dependencies": { "dependencies": {
"discord.js": "^14.8.0", "discord.js": "^14.8.0",
"fold-to-ascii": "^5.0.1", "fold-to-ascii": "^5.0.1",
"gpt-3-encoder": "^1.1.4",
"openai": "^3.2.1" "openai": "^3.2.1"
}, },
"devDependencies": { "devDependencies": {
@ -1161,6 +1162,11 @@
"url": "https://github.com/sponsors/sindresorhus" "url": "https://github.com/sponsors/sindresorhus"
} }
}, },
"node_modules/gpt-3-encoder": {
"version": "1.1.4",
"resolved": "https://registry.npmjs.org/gpt-3-encoder/-/gpt-3-encoder-1.1.4.tgz",
"integrity": "sha512-fSQRePV+HUAhCn7+7HL7lNIXNm6eaFWFbNLOOGtmSJ0qJycyQvj60OvRlH7mee8xAMjBDNRdMXlMwjAbMTDjkg=="
},
"node_modules/grapheme-splitter": { "node_modules/grapheme-splitter": {
"version": "1.0.4", "version": "1.0.4",
"resolved": "https://registry.npmjs.org/grapheme-splitter/-/grapheme-splitter-1.0.4.tgz", "resolved": "https://registry.npmjs.org/grapheme-splitter/-/grapheme-splitter-1.0.4.tgz",

View file

@ -12,6 +12,7 @@
"dependencies": { "dependencies": {
"discord.js": "^14.8.0", "discord.js": "^14.8.0",
"fold-to-ascii": "^5.0.1", "fold-to-ascii": "^5.0.1",
"gpt-3-encoder": "^1.1.4",
"openai": "^3.2.1" "openai": "^3.2.1"
}, },
"devDependencies": { "devDependencies": {

View file

@ -3,6 +3,7 @@ import { Collection, Message as DiscordMessage } from "discord.js";
import FoldToAscii from "fold-to-ascii"; import FoldToAscii from "fold-to-ascii";
import config from "./config.json"; import config from "./config.json";
import countTokens from "./tokenCounter";
/** /**
* Formats the message to use as a message content in OpenAI api * Formats the message to use as a message content in OpenAI api
@ -74,24 +75,29 @@ function getAuthorUsername(message: DiscordMessage): string {
} }
/** /**
* Converts the Collection of Discord Messages to array of OpenAI Messages * Converts the Collection of Discord Messages to array of OpenAI Messages to send
* @param messages the collection to convert * @param messages the collection to convert
* @returns the converted messages * @returns the converted messages
*/ */
export default function toOpenAIMessages(messages: Collection<string, DiscordMessage>): OpenAIMessage[] { export default function toOpenAIMessages(messages: Collection<string, DiscordMessage>): OpenAIMessage[] {
const rvalue: OpenAIMessage[] = []; const rvalue: OpenAIMessage[] = [];
let tokenCount = 0;
messages.sort((a, b) => b.createdTimestamp - a.createdTimestamp);
for (const message of messages.values()) {
const content = formatMessage(message);
// FIXME: tokens are not being counted properly (it's lower than it is) but it's enough for me for now.
tokenCount += countTokens(content);
if (tokenCount > 2048) break;
rvalue.push({
role: message.author.id == message.client.user.id ? "assistant" : "user",
content: content,
name: getAuthorUsername(message),
});
}
rvalue.push({ role: "system", content: config.systemPrompt}); rvalue.push({ role: "system", content: config.systemPrompt});
messages return rvalue.reverse();
.sort((a, b) => a.createdTimestamp - b.createdTimestamp)
.each(message => {
rvalue.push({
role: message.author.id == message.client.user.id ? "assistant" : "user",
content: formatMessage(message),
name: getAuthorUsername(message),
});
});
return rvalue;
} }

5
src/tokenCounter.ts Normal file
View file

@ -0,0 +1,5 @@
import { encode } from "gpt-3-encoder";
export default function countTokens(text: string): number {
return encode(text).length;
}