make the chat work

2024-03-08 01:13:27 -07:00
parent 1ed6023a14
commit 917c00c23a
5 changed files with 398 additions and 90 deletions
--- a/main.go
+++ b/main.go
@ -1,22 +1,13 @@
 package main

 import (
-	"bytes"
 	"context"
+	"ctxGPT/LLMMapper"
 	"ctxGPT/database"
+	"ctxGPT/promptBuilder"
 	"fmt"
-	"os"
-	"strings"
-	"text/template"
-	"time"
-
-	"github.com/pkoukk/tiktoken-go"
-	"github.com/sashabaranov/go-openai"
 )

-const encodingName = "gpt-4"
-const model = openai.GPT4TurboPreview
-
 func main() {

 	db, err := database.NewDB()
@ -38,88 +29,14 @@ func main() {
 	// to get text out of PDF, DOC, DOCX, XML, HTML, RTF, ODT pages documents and images to plain text
 	// use https://github.com/sajari/docconv

-	summarizeConvoPrompt, err := BuildPrompt("summarize.tmpl", struct{ WordLimit int }{WordLimit: 100})
+	summarizeConvoPrompt, err := promptBuilder.BuildPrompt("summarize.tmpl", struct{ WordLimit int }{WordLimit: 100})
 	if err != nil {
 		panic(err)
 	}
 	fmt.Println(summarizeConvoPrompt)
-	tokenCount, err := GetTokenCount(summarizeConvoPrompt)
+	tokenCount, err := LLMMapper.GetTokenCount(summarizeConvoPrompt)
 	if err != nil {
 		panic(err)
 	}
 	fmt.Println(tokenCount)
 }
-
-func BuildPrompt(name string, in interface{}) (string, error) {
-	fileLocation := "./prompts/" + name
-	tmpl, err := template.New(name).ParseFiles(fileLocation)
-	if err != nil {
-		return "", fmt.Errorf("error parsing template: %w", err)
-	}
-	b := bytes.Buffer{}
-	err = tmpl.Execute(&b, in)
-	if err != nil {
-		return "", fmt.Errorf("error executing template: %w", err)
-	}
-	return b.String(), nil
-}
-
-func GetTokenCount(input string) (int, error) {
-	tke, err := tiktoken.EncodingForModel(encodingName) // cached in "TIKTOKEN_CACHE_DIR"
-	if err != nil {
-		return 0, fmt.Errorf("error getting encoding: %w", err)
-	}
-	token := tke.Encode(input, nil, nil)
-	return len(token), nil
-}
-
-// SinglePromptInteraction calls openai chat endpoint with just a system prompt and a user prompt and returns the response
-func SinglePromptInteraction(systemPrompt, prompt string) (openai.ChatCompletionResponse, error) {
-	return singlePromptInteraction(systemPrompt, prompt, 5)
-}
-
-// singlePromptInteraction calls openai chat endpoint with just a system prompt and a user prompt and returns the response
-// it also attempts 5 retries spaced 5 seconds apart in the case of rate limiting errors
-func singlePromptInteraction(systemPrompt, prompt string, retries int) (openai.ChatCompletionResponse, error) {
-
-	client := openai.NewClient(os.Getenv("OPENAI_API_KEY"))
-	messages := []openai.ChatCompletionMessage{
-		{
-			Role:    openai.ChatMessageRoleSystem,
-			Content: systemPrompt,
-		},
-		{
-			Role:    openai.ChatMessageRoleUser,
-			Content: prompt,
-		},
-	}
-
-	resp, err := client.CreateChatCompletion(
-		context.Background(),
-		openai.ChatCompletionRequest{
-			Model:       model, // switch to the configured Model
-			Messages:    messages,
-			MaxTokens:   256,
-			Temperature: 0,
-		},
-	)
-	if err != nil {
-		// if 429, wait and try again
-		if strings.Contains(err.Error(), "429") && retries > 0 {
-			seconds := (1 / retries) * 60 // back off for each retry e.g. 12, 15, 20, 30, 60
-			fmt.Printf("429 error, waiting %v seconds...\n", seconds)
-			time.Sleep(time.Duration(seconds) * time.Second)
-			return singlePromptInteraction(systemPrompt, prompt, retries-1) // TODO: establish base case to prevent forever retrying
-		}
-		return openai.ChatCompletionResponse{}, fmt.Errorf("ChatCompletion request error: %w", err)
-	}
-
-	return resp, nil
-}
-
-// TODO: anything to be stored in the database should be chunked to sizes between 512 and 1024 tokens
-// it should also overlap with the previous chunk by 100-200 tokens
-// When the LLM asks for more context, it should be able to use the database to find the most relevant chunks here is how:
-// We will get the embeddings for each prompt and use those embeddings to search for the closest 6 chunks
-// we will use a separate LLM prompt to make an attempt to select and sort the chunks based on the user's input
-// then we will add the best matched chunks to the main prompt as further context for the given prompt