ctxGPT/main.go

package main

import (
	"bytes"
	"context"
	"ctxGPT/database"
	"fmt"
	"os"
	"strings"
	"text/template"
	"time"

	"github.com/pkoukk/tiktoken-go"
	"github.com/sashabaranov/go-openai"
)

const encodingName = "gpt-4"
const model = openai.GPT4TurboPreview

func main() {

	db, err := database.NewDB()
	if err != nil {
		panic(err)
	}
	defer db.Close()
	value, err := db.Get(context.Background(), "context1")
	if err != nil {
		panic(err)
	}
	fmt.Println(value)

	err = db.Save(context.Background(), "context2", "value2")
	if err != nil {
		panic(err)
	}

	// to get text out of PDF, DOC, DOCX, XML, HTML, RTF, ODT pages documents and images to plain text
	// use https://github.com/sajari/docconv

	summarizeConvoPrompt, err := BuildPrompt("summarize.tmpl", struct{ WordLimit int }{WordLimit: 100})
	if err != nil {
		panic(err)
	}
	fmt.Println(summarizeConvoPrompt)
	tokenCount, err := GetTokenCount(summarizeConvoPrompt)
	if err != nil {
		panic(err)
	}
	fmt.Println(tokenCount)
}

func BuildPrompt(name string, in interface{}) (string, error) {
	fileLocation := "./prompts/" + name
	tmpl, err := template.New(name).ParseFiles(fileLocation)
	if err != nil {
		return "", fmt.Errorf("error parsing template: %w", err)
	}
	b := bytes.Buffer{}
	err = tmpl.Execute(&b, in)
	if err != nil {
		return "", fmt.Errorf("error executing template: %w", err)
	}
	return b.String(), nil
}

func GetTokenCount(input string) (int, error) {
	tke, err := tiktoken.EncodingForModel(encodingName) // cached in "TIKTOKEN_CACHE_DIR"
	if err != nil {
		return 0, fmt.Errorf("error getting encoding: %w", err)
	}
	token := tke.Encode(input, nil, nil)
	return len(token), nil
}

// SinglePromptInteraction calls openai chat endpoint with just a system prompt and a user prompt and returns the response
func SinglePromptInteraction(systemPrompt, prompt string) (openai.ChatCompletionResponse, error) {
	return singlePromptInteraction(systemPrompt, prompt, 5)
}

// singlePromptInteraction calls openai chat endpoint with just a system prompt and a user prompt and returns the response
// it also attempts 5 retries spaced 5 seconds apart in the case of rate limiting errors
func singlePromptInteraction(systemPrompt, prompt string, retries int) (openai.ChatCompletionResponse, error) {

	client := openai.NewClient(os.Getenv("OPENAI_API_KEY"))
	messages := []openai.ChatCompletionMessage{
		{
			Role:    openai.ChatMessageRoleSystem,
			Content: systemPrompt,
		},
		{
			Role:    openai.ChatMessageRoleUser,
			Content: prompt,
		},
	}

	resp, err := client.CreateChatCompletion(
		context.Background(),
		openai.ChatCompletionRequest{
			Model:       model, // switch to the configured Model
			Messages:    messages,
			MaxTokens:   256,
			Temperature: 0,
		},
	)
	if err != nil {
		// if 429, wait and try again
		if strings.Contains(err.Error(), "429") && retries > 0 {
			fmt.Println("429 error, waiting 5 seconds...")
			time.Sleep(5 * time.Second)
			return singlePromptInteraction(systemPrompt, prompt, retries-1) // TODO: establish base case to prevent forever retrying
		}
		return openai.ChatCompletionResponse{}, fmt.Errorf("ChatCompletion request error: %w", err)
	}

	return resp, nil
}

// TODO: anything to be stored in the database should be chunked to sizes between 512 and 1024 tokens
// it should also overlap with the previous chunk by 100-200 tokens
// When the LLM asks for more context, it should be able to use the database to find the most relevant chunks here is how:
// We will get the embeddings for each prompt and use those embeddings to search for the closest 6 chunks
// we will use a separate LLM prompt to make an attempt to select and sort the chunks based on the user's input
// then we will add the best matched chunks to the main prompt as further context for the given prompt