make the chat work
This commit is contained in:
91
main.go
91
main.go
@ -1,22 +1,13 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
"ctxGPT/LLMMapper"
|
||||
"ctxGPT/database"
|
||||
"ctxGPT/promptBuilder"
|
||||
"fmt"
|
||||
"os"
|
||||
"strings"
|
||||
"text/template"
|
||||
"time"
|
||||
|
||||
"github.com/pkoukk/tiktoken-go"
|
||||
"github.com/sashabaranov/go-openai"
|
||||
)
|
||||
|
||||
const encodingName = "gpt-4"
|
||||
const model = openai.GPT4TurboPreview
|
||||
|
||||
func main() {
|
||||
|
||||
db, err := database.NewDB()
|
||||
@ -38,88 +29,14 @@ func main() {
|
||||
// to get text out of PDF, DOC, DOCX, XML, HTML, RTF, ODT pages documents and images to plain text
|
||||
// use https://github.com/sajari/docconv
|
||||
|
||||
summarizeConvoPrompt, err := BuildPrompt("summarize.tmpl", struct{ WordLimit int }{WordLimit: 100})
|
||||
summarizeConvoPrompt, err := promptBuilder.BuildPrompt("summarize.tmpl", struct{ WordLimit int }{WordLimit: 100})
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
fmt.Println(summarizeConvoPrompt)
|
||||
tokenCount, err := GetTokenCount(summarizeConvoPrompt)
|
||||
tokenCount, err := LLMMapper.GetTokenCount(summarizeConvoPrompt)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
fmt.Println(tokenCount)
|
||||
}
|
||||
|
||||
func BuildPrompt(name string, in interface{}) (string, error) {
|
||||
fileLocation := "./prompts/" + name
|
||||
tmpl, err := template.New(name).ParseFiles(fileLocation)
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("error parsing template: %w", err)
|
||||
}
|
||||
b := bytes.Buffer{}
|
||||
err = tmpl.Execute(&b, in)
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("error executing template: %w", err)
|
||||
}
|
||||
return b.String(), nil
|
||||
}
|
||||
|
||||
func GetTokenCount(input string) (int, error) {
|
||||
tke, err := tiktoken.EncodingForModel(encodingName) // cached in "TIKTOKEN_CACHE_DIR"
|
||||
if err != nil {
|
||||
return 0, fmt.Errorf("error getting encoding: %w", err)
|
||||
}
|
||||
token := tke.Encode(input, nil, nil)
|
||||
return len(token), nil
|
||||
}
|
||||
|
||||
// SinglePromptInteraction calls openai chat endpoint with just a system prompt and a user prompt and returns the response
|
||||
func SinglePromptInteraction(systemPrompt, prompt string) (openai.ChatCompletionResponse, error) {
|
||||
return singlePromptInteraction(systemPrompt, prompt, 5)
|
||||
}
|
||||
|
||||
// singlePromptInteraction calls openai chat endpoint with just a system prompt and a user prompt and returns the response
|
||||
// it also attempts 5 retries spaced 5 seconds apart in the case of rate limiting errors
|
||||
func singlePromptInteraction(systemPrompt, prompt string, retries int) (openai.ChatCompletionResponse, error) {
|
||||
|
||||
client := openai.NewClient(os.Getenv("OPENAI_API_KEY"))
|
||||
messages := []openai.ChatCompletionMessage{
|
||||
{
|
||||
Role: openai.ChatMessageRoleSystem,
|
||||
Content: systemPrompt,
|
||||
},
|
||||
{
|
||||
Role: openai.ChatMessageRoleUser,
|
||||
Content: prompt,
|
||||
},
|
||||
}
|
||||
|
||||
resp, err := client.CreateChatCompletion(
|
||||
context.Background(),
|
||||
openai.ChatCompletionRequest{
|
||||
Model: model, // switch to the configured Model
|
||||
Messages: messages,
|
||||
MaxTokens: 256,
|
||||
Temperature: 0,
|
||||
},
|
||||
)
|
||||
if err != nil {
|
||||
// if 429, wait and try again
|
||||
if strings.Contains(err.Error(), "429") && retries > 0 {
|
||||
seconds := (1 / retries) * 60 // back off for each retry e.g. 12, 15, 20, 30, 60
|
||||
fmt.Printf("429 error, waiting %v seconds...\n", seconds)
|
||||
time.Sleep(time.Duration(seconds) * time.Second)
|
||||
return singlePromptInteraction(systemPrompt, prompt, retries-1) // TODO: establish base case to prevent forever retrying
|
||||
}
|
||||
return openai.ChatCompletionResponse{}, fmt.Errorf("ChatCompletion request error: %w", err)
|
||||
}
|
||||
|
||||
return resp, nil
|
||||
}
|
||||
|
||||
// TODO: anything to be stored in the database should be chunked to sizes between 512 and 1024 tokens
|
||||
// it should also overlap with the previous chunk by 100-200 tokens
|
||||
// When the LLM asks for more context, it should be able to use the database to find the most relevant chunks here is how:
|
||||
// We will get the embeddings for each prompt and use those embeddings to search for the closest 6 chunks
|
||||
// we will use a separate LLM prompt to make an attempt to select and sort the chunks based on the user's input
|
||||
// then we will add the best matched chunks to the main prompt as further context for the given prompt
|
||||
|
Reference in New Issue
Block a user