Limit sent chat to 2048 tokens.

This also solves the issue where we would request more tokens, than the model is capable of (over 4096)
2023-03-19 04:15:08 +01:00 · 2023-03-19 04:15:08 +01:00 · 2a38ae4a95
commit 2a38ae4a95
parent aafefc3ad0
4 changed files with 30 additions and 12 deletions
--- a/package-lock.json
+++ b/package-lock.json
@ -11,6 +11,7 @@
      "dependencies": {
        "discord.js": "^14.8.0",
        "fold-to-ascii": "^5.0.1",
+        "gpt-3-encoder": "^1.1.4",
        "openai": "^3.2.1"
      },
      "devDependencies": {
@ -1161,6 +1162,11 @@
        "url": "https://github.com/sponsors/sindresorhus"
      }
    },
+    "node_modules/gpt-3-encoder": {
+      "version": "1.1.4",
+      "resolved": "https://registry.npmjs.org/gpt-3-encoder/-/gpt-3-encoder-1.1.4.tgz",
+      "integrity": "sha512-fSQRePV+HUAhCn7+7HL7lNIXNm6eaFWFbNLOOGtmSJ0qJycyQvj60OvRlH7mee8xAMjBDNRdMXlMwjAbMTDjkg=="
+    },
    "node_modules/grapheme-splitter": {
      "version": "1.0.4",
      "resolved": "https://registry.npmjs.org/grapheme-splitter/-/grapheme-splitter-1.0.4.tgz",
--- a/package.json
+++ b/package.json
@ -12,6 +12,7 @@
  "dependencies": {
    "discord.js": "^14.8.0",
    "fold-to-ascii": "^5.0.1",
+    "gpt-3-encoder": "^1.1.4",
    "openai": "^3.2.1"
  },
  "devDependencies": {
--- a/src/toOpenAIMessages.ts
+++ b/src/toOpenAIMessages.ts
@ -3,6 +3,7 @@ import { Collection, Message as DiscordMessage } from "discord.js";
 import FoldToAscii from "fold-to-ascii";

 import config from "./config.json";
+import countTokens from "./tokenCounter";

 /**
 * Formats the message to use as a message content in OpenAI api
@ -74,24 +75,29 @@ function getAuthorUsername(message: DiscordMessage): string {
 }

 /**
- * Converts the Collection of Discord Messages to array of OpenAI Messages
+ * Converts the Collection of Discord Messages to array of OpenAI Messages to send
 * @param messages the collection to convert
 * @returns the converted messages
 */
 export default function toOpenAIMessages(messages: Collection<string, DiscordMessage>): OpenAIMessage[] {
  const rvalue: OpenAIMessage[] = [];
+  let tokenCount = 0;
+
+  messages.sort((a, b) => b.createdTimestamp - a.createdTimestamp);
+
+  for (const message of messages.values()) {
+    const content = formatMessage(message);
+    // FIXME: tokens are not being counted properly (it's lower than it is) but it's enough for me for now.
+    tokenCount += countTokens(content);
+    if (tokenCount > 2048) break;
+    rvalue.push({
+      role: message.author.id == message.client.user.id ? "assistant" : "user",
+      content: content,
+      name: getAuthorUsername(message),
+    });
+  }

  rvalue.push({ role: "system", content: config.systemPrompt});

-  messages
-  .sort((a, b) => a.createdTimestamp - b.createdTimestamp)
-  .each(message => {
-    rvalue.push({
-      role: message.author.id == message.client.user.id ? "assistant" : "user",
-      content: formatMessage(message),
-      name: getAuthorUsername(message),
-    });
-  });
-
-  return rvalue;
+  return rvalue.reverse();
 }
--- a/src/tokenCounter.ts
+++ b/src/tokenCounter.ts
@ -0,0 +1,5 @@
+import { encode } from "gpt-3-encoder";
+
+export default function countTokens(text: string): number {
+    return encode(text).length;
+}