From e0430c62bd911869cbfd0695eb0cf0bade1b575e Mon Sep 17 00:00:00 2001 From: Mason Payne Date: Sun, 5 Nov 2023 00:33:38 -0600 Subject: [PATCH] initial commit set up the database with full text search and a simple prompt template engine --- .gitignore | 4 ++ .idea/.gitignore | 8 +++ .idea/ctxGPT.iml | 9 +++ .idea/dataSources.xml | 17 +++++ .idea/modules.xml | 8 +++ .idea/vcs.xml | 6 ++ database/db.go | 143 +++++++++++++++++++++++++++++++++++++++++ go.mod | 21 ++++++ go.sum | 31 +++++++++ main.go | 69 ++++++++++++++++++++ main_test.go | 38 +++++++++++ prompts/summarize.tmpl | 11 ++++ prompts/test.tmpl | 1 + 13 files changed, 366 insertions(+) create mode 100644 .gitignore create mode 100644 .idea/.gitignore create mode 100644 .idea/ctxGPT.iml create mode 100644 .idea/dataSources.xml create mode 100644 .idea/modules.xml create mode 100644 .idea/vcs.xml create mode 100644 database/db.go create mode 100644 go.mod create mode 100644 go.sum create mode 100644 main.go create mode 100644 main_test.go create mode 100644 prompts/summarize.tmpl create mode 100644 prompts/test.tmpl diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..ba57b28 --- /dev/null +++ b/.gitignore @@ -0,0 +1,4 @@ +cache/ +context.db +context.db-shm +context.db-wal diff --git a/.idea/.gitignore b/.idea/.gitignore new file mode 100644 index 0000000..13566b8 --- /dev/null +++ b/.idea/.gitignore @@ -0,0 +1,8 @@ +# Default ignored files +/shelf/ +/workspace.xml +# Editor-based HTTP Client requests +/httpRequests/ +# Datasource local storage ignored files +/dataSources/ +/dataSources.local.xml diff --git a/.idea/ctxGPT.iml b/.idea/ctxGPT.iml new file mode 100644 index 0000000..5e764c4 --- /dev/null +++ b/.idea/ctxGPT.iml @@ -0,0 +1,9 @@ + + + + + + + + + \ No newline at end of file diff --git a/.idea/dataSources.xml b/.idea/dataSources.xml new file mode 100644 index 0000000..077f2fb --- /dev/null +++ b/.idea/dataSources.xml @@ -0,0 +1,17 @@ + + + + + sqlite.xerial + true + org.sqlite.JDBC + jdbc:sqlite:C:\Users\gomas\src\ctxGPT\context.db + $ProjectFileDir$ + + + file://$APPLICATION_CONFIG_DIR$/jdbc-drivers/Xerial SQLiteJDBC/3.40.1/org/xerial/sqlite-jdbc/3.40.1.0/sqlite-jdbc-3.40.1.0.jar + + + + + \ No newline at end of file diff --git a/.idea/modules.xml b/.idea/modules.xml new file mode 100644 index 0000000..d215d43 --- /dev/null +++ b/.idea/modules.xml @@ -0,0 +1,8 @@ + + + + + + + + \ No newline at end of file diff --git a/.idea/vcs.xml b/.idea/vcs.xml new file mode 100644 index 0000000..94a25f7 --- /dev/null +++ b/.idea/vcs.xml @@ -0,0 +1,6 @@ + + + + + + \ No newline at end of file diff --git a/database/db.go b/database/db.go new file mode 100644 index 0000000..53c4200 --- /dev/null +++ b/database/db.go @@ -0,0 +1,143 @@ +package database + +import ( + "context" + "fmt" + "log" + "zombiezen.com/go/sqlite" + "zombiezen.com/go/sqlite/sqlitemigration" + "zombiezen.com/go/sqlite/sqlitex" +) + +const dbLocation = "./context.db" + +var schema = sqlitemigration.Schema{ + // Each element of the Migrations slice is applied in sequence. When you + // want to change the schema, add a new SQL script to this list. + // + // Existing databases will pick up at the same position in the Migrations + // slice as they last left off. + Migrations: []string{ + + // sqlite create a table called context_store with an id, key, and content column + "CREATE TABLE IF NOT EXISTS context_store ( id INTEGER PRIMARY KEY, key_name TEXT NOT NULL, value TEXT );", + + // sqlite create a virtual table called context using fts5 with a key and value column + "CREATE VIRTUAL TABLE context_search USING fts5 ( key_name, value );", + + `CREATE TRIGGER context_ai AFTER INSERT ON context_store BEGIN + INSERT INTO context_search(rowid, key_name, value) VALUES (new.id, new.key_name, new.value); + END;`, + `CREATE TRIGGER context_ad AFTER DELETE ON context_store BEGIN + DELETE FROM context_search WHERE key_name = old.key_name AND value = old.value; + END;`, + `CREATE TRIGGER context_au AFTER UPDATE ON context_store BEGIN + UPDATE context_search SET key_name = new.key_name, value = new.value WHERE id = old.id; + END;`, + }, +} + +type DB struct { + db *sqlitex.Pool +} + +func NewDB() (*DB, error) { + + err := runMigrations() + if err != nil { + return nil, fmt.Errorf("error running migrations | %w", err) + } + + dbpool, err := sqlitex.Open(dbLocation, 0, 10) + if err != nil { + return nil, fmt.Errorf("error opening db | %w", err) + } + return &DB{ + db: dbpool, + }, nil +} + +func (d *DB) Close() error { + return d.db.Close() +} + +func runMigrations() error { + // Open a pool. This does not block, and will start running any migrations + // asynchronously. + pool := sqlitemigration.NewPool(dbLocation, schema, sqlitemigration.Options{ + Flags: sqlite.OpenReadWrite | sqlite.OpenCreate, + PrepareConn: func(conn *sqlite.Conn) error { + // Enable foreign keys. See https://sqlite.org/foreignkeys.html + return sqlitex.ExecuteTransient(conn, "PRAGMA foreign_keys = ON;", nil) + }, + OnError: func(e error) { + log.Println(e) + }, + }) + defer pool.Close() + + // Get a connection. This blocks until the migration completes. + conn, err := pool.Get(context.Background()) + if err != nil { + return fmt.Errorf("error getting db connection | %w", err) + } + defer pool.Put(conn) + + // Print the list of schema objects created. + const listSchemaQuery = `SELECT "type", "name" FROM sqlite_master ORDER BY 1, 2;` + err = sqlitex.ExecuteTransient(conn, listSchemaQuery, &sqlitex.ExecOptions{ + ResultFunc: func(stmt *sqlite.Stmt) error { + //fmt.Printf("%-5s %s\n", stmt.ColumnText(0), stmt.ColumnText(1)) + return nil + }, + }) + if err != nil { + return fmt.Errorf("error executing query | %w", err) + } + return nil +} + +func (d *DB) Get(ctx context.Context, search string) ([]string, error) { + conn := d.db.Get(ctx) + if conn == nil { + return []string{}, fmt.Errorf("error getting db connection") + } + defer d.db.Put(conn) + + query := "SELECT * FROM context_search WHERE context_search MATCH $search;" + + stmt := conn.Prep(query) + defer stmt.Finalize() + stmt.SetText("$search", search) + for { + if hasRow, err := stmt.Step(); err != nil { + return []string{}, fmt.Errorf("error getting value from db | %w", err) + } else if !hasRow { + break + } + keyName := stmt.GetText("key_name") + value := stmt.GetText("value") + return []string{keyName, value}, nil + } + return []string{}, nil +} + +func (d *DB) Save(ctx context.Context, key, value string) error { + conn := d.db.Get(ctx) + if conn == nil { + return fmt.Errorf("error getting db connection to save info") + } + defer d.db.Put(conn) + + query := "INSERT INTO context_store (key_name, value) VALUES ($key, $value);" + stmt := conn.Prep(query) + defer stmt.Finalize() + stmt.SetText("$key", key) + stmt.SetText("$value", value) + + _, err := stmt.Step() + if err != nil { + return fmt.Errorf("error inserting new context | %w", err) + } + return nil +} diff --git a/go.mod b/go.mod new file mode 100644 index 0000000..02b53a6 --- /dev/null +++ b/go.mod @@ -0,0 +1,21 @@ +module ctxGPT + +go 1.19 + +require ( + github.com/pkoukk/tiktoken-go v0.1.6 + zombiezen.com/go/sqlite v0.13.1 +) + +require ( + github.com/dlclark/regexp2 v1.10.0 // indirect + github.com/dustin/go-humanize v1.0.0 // indirect + github.com/google/uuid v1.3.0 // indirect + github.com/mattn/go-isatty v0.0.16 // indirect + github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec // indirect + golang.org/x/sys v0.5.0 // indirect + modernc.org/libc v1.22.3 // indirect + modernc.org/mathutil v1.5.0 // indirect + modernc.org/memory v1.5.0 // indirect + modernc.org/sqlite v1.21.1 // indirect +) diff --git a/go.sum b/go.sum new file mode 100644 index 0000000..6efb74d --- /dev/null +++ b/go.sum @@ -0,0 +1,31 @@ +github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= +github.com/dlclark/regexp2 v1.10.0 h1:+/GIL799phkJqYW+3YbOd8LCcbHzT0Pbo8zl70MHsq0= +github.com/dlclark/regexp2 v1.10.0/go.mod h1:DHkYz0B9wPfa6wondMfaivmHpzrQ3v9q8cnmRbL6yW8= +github.com/dustin/go-humanize v1.0.0 h1:VSnTsYCnlFHaM2/igO1h6X3HA71jcobQuxemgkq4zYo= +github.com/dustin/go-humanize v1.0.0/go.mod h1:HtrtbFcZ19U5GC7JDqmcUSB87Iq5E25KnS6fMYU6eOk= +github.com/google/go-cmp v0.5.9 h1:O2Tfq5qg4qc4AmwVlvv0oLiVAGB7enBSJ2x2DqQFi38= +github.com/google/uuid v1.3.0 h1:t6JiXgmwXMjEs8VusXIJk2BXHsn+wx8BZdTaoZ5fu7I= +github.com/google/uuid v1.3.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= +github.com/mattn/go-isatty v0.0.16 h1:bq3VjFmv/sOjHtdEhmkEV4x1AJtvUvOJ2PFAZ5+peKQ= +github.com/mattn/go-isatty v0.0.16/go.mod h1:kYGgaQfpe5nmfYZH+SKPsOc2e4SrIfOl2e/yFXSvRLM= +github.com/pkoukk/tiktoken-go v0.1.6 h1:JF0TlJzhTbrI30wCvFuiw6FzP2+/bR+FIxUdgEAcUsw= +github.com/pkoukk/tiktoken-go v0.1.6/go.mod h1:9NiV+i9mJKGj1rYOT+njbv+ZwA/zJxYdewGl6qVatpg= +github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= +github.com/remyoudompheng/bigfft v0.0.0-20200410134404-eec4a21b6bb0/go.mod h1:qqbHyh8v60DhA7CoWK5oRCqLrMHRGoxYCSS9EjAz6Eo= +github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec h1:W09IVJc94icq4NjY3clb7Lk8O1qJ8BdBEF8z0ibU0rE= +github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec/go.mod h1:qqbHyh8v60DhA7CoWK5oRCqLrMHRGoxYCSS9EjAz6Eo= +github.com/stretchr/testify v1.8.2 h1:+h33VjcLVPDHtOdpUCuF+7gSuG3yGIftsP1YvFihtJ8= +golang.org/x/sys v0.0.0-20220811171246-fbc7d0a398ab/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.5.0 h1:MUK/U/4lj1t1oPg0HfuXDN/Z1wv31ZJ/YcPiGccS4DU= +golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= +modernc.org/libc v1.22.3 h1:D/g6O5ftAfavceqlLOFwaZuA5KYafKwmr30A6iSqoyY= +modernc.org/libc v1.22.3/go.mod h1:MQrloYP209xa2zHome2a8HLiLm6k0UT8CoHpV74tOFw= +modernc.org/mathutil v1.5.0 h1:rV0Ko/6SfM+8G+yKiyI830l3Wuz1zRutdslNoQ0kfiQ= +modernc.org/mathutil v1.5.0/go.mod h1:mZW8CKdRPY1v87qxC/wUdX5O1qDzXMP5TH3wjfpga6E= +modernc.org/memory v1.5.0 h1:N+/8c5rE6EqugZwHii4IFsaJ7MUhoWX07J5tC/iI5Ds= +modernc.org/memory v1.5.0/go.mod h1:PkUhL0Mugw21sHPeskwZW4D6VscE/GQJOnIpCnW6pSU= +modernc.org/sqlite v1.21.1 h1:GyDFqNnESLOhwwDRaHGdp2jKLDzpyT/rNLglX3ZkMSU= +modernc.org/sqlite v1.21.1/go.mod h1:XwQ0wZPIh1iKb5mkvCJ3szzbhk+tykC8ZWqTRTgYRwI= +zombiezen.com/go/sqlite v0.13.1 h1:qDzxyWWmMtSSEH5qxamqBFmqA2BLSSbtODi3ojaE02o= +zombiezen.com/go/sqlite v0.13.1/go.mod h1:Ht/5Rg3Ae2hoyh1I7gbWtWAl89CNocfqeb/aAMTkJr4= diff --git a/main.go b/main.go new file mode 100644 index 0000000..bc05717 --- /dev/null +++ b/main.go @@ -0,0 +1,69 @@ +package main + +import ( + "bytes" + "context" + "ctxGPT/database" + "fmt" + "text/template" + + "github.com/pkoukk/tiktoken-go" +) + +const encodingName = "gpt-4" + +func main() { + + db, err := database.NewDB() + if err != nil { + panic(err) + } + defer db.Close() + value, err := db.Get(context.Background(), "context1") + if err != nil { + panic(err) + } + fmt.Println(value) + + err = db.Save(context.Background(), "context2", "value2") + if err != nil { + panic(err) + } + + // to get text out of PDF, DOC, DOCX, XML, HTML, RTF, ODT pages documents and images to plain text + // use https://github.com/sajari/docconv + + summarizeConvoPrompt, err := BuildPrompt("summarize.tmpl", struct{ WordLimit int }{WordLimit: 100}) + if err != nil { + panic(err) + } + fmt.Println(summarizeConvoPrompt) + tokenCount, err := GetTokenCount(summarizeConvoPrompt) + if err != nil { + panic(err) + } + fmt.Println(tokenCount) +} + +func BuildPrompt(name string, in interface{}) (string, error) { + fileLocation := "./prompts/" + name + tmpl, err := template.New(name).ParseFiles(fileLocation) + if err != nil { + return "", fmt.Errorf("error parsing template: %w", err) + } + b := bytes.Buffer{} + err = tmpl.Execute(&b, in) + if err != nil { + return "", fmt.Errorf("error executing template: %w", err) + } + return b.String(), nil +} + +func GetTokenCount(input string) (int, error) { + tke, err := tiktoken.EncodingForModel(encodingName) // cached in "TIKTOKEN_CACHE_DIR" + if err != nil { + return 0, fmt.Errorf("error getting encoding: %w", err) + } + token := tke.Encode(input, nil, nil) + return len(token), nil +} diff --git a/main_test.go b/main_test.go new file mode 100644 index 0000000..8cc1e70 --- /dev/null +++ b/main_test.go @@ -0,0 +1,38 @@ +package main + +import "testing" + +func Test_BuildPrompt(t *testing.T) { + type args struct { + name string + in interface{} + } + tests := []struct { + name string + args args + want string + wantErr bool + }{ + { + name: "BuildPrompt test", + args: args{ + name: "test.tmpl", + in: struct{ Name string }{Name: "test"}, + }, + want: "This is a test test", + wantErr: false, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got, err := BuildPrompt(tt.args.name, tt.args.in) + if (err != nil) != tt.wantErr { + t.Errorf("BuildPrompt() error = %v, wantErr %v", err, tt.wantErr) + return + } + if got != tt.want { + t.Errorf("BuildPrompt() got = %v, want %v", got, tt.want) + } + }) + } +} diff --git a/prompts/summarize.tmpl b/prompts/summarize.tmpl new file mode 100644 index 0000000..ebc279c --- /dev/null +++ b/prompts/summarize.tmpl @@ -0,0 +1,11 @@ +Your job is to summarize a history of previous messages in a conversation between an AI persona and a human. +The conversation you are given is a from a fixed context window and may not be complete. +Messages sent by the AI are marked with the 'assistant' role. +The AI 'assistant' can also make calls to functions, whose outputs can be seen in messages with the 'function' role. +Things the AI says in the message content are considered inner monologue and are not seen by the user. +The only AI messages seen by the user are from when the AI uses 'send_message'. +Messages the user sends are in the 'user' role. +The 'user' role is also used for important system events, such as login events and heartbeat events (heartbeats run the AI's program without user action, allowing the AI to act without prompting from the user sending them a message). +Summarize what happened in the conversation from the perspective of the AI (use the first person). +Keep your summary less than {{.WordLimit}} words, do NOT exceed this word limit. +Only output the summary, do NOT include anything else in your output. \ No newline at end of file diff --git a/prompts/test.tmpl b/prompts/test.tmpl new file mode 100644 index 0000000..5115ccb --- /dev/null +++ b/prompts/test.tmpl @@ -0,0 +1 @@ +This is a test {{.Name}} \ No newline at end of file