build backend to collect and search using embeddings
This commit is contained in:
1
.gitignore
vendored
1
.gitignore
vendored
@ -1,2 +1,3 @@
|
||||
node_modules/
|
||||
.idea/
|
||||
/db.sqlite
|
||||
|
99
backend/AI/ai.go
Normal file
99
backend/AI/ai.go
Normal file
@ -0,0 +1,99 @@
|
||||
package AI
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"github.com/pkoukk/tiktoken-go"
|
||||
"github.com/sashabaranov/go-openai"
|
||||
"os"
|
||||
)
|
||||
|
||||
// This package should use the OpenAI API to provide AI services.
|
||||
|
||||
type AI interface {
|
||||
// Get Embedding
|
||||
GetEmbeddings(ctx context.Context, text string) (openai.EmbeddingResponse, error)
|
||||
GetTokenCount(input string) (int, error)
|
||||
}
|
||||
|
||||
type ai struct {
|
||||
apiKey string
|
||||
baseURL string
|
||||
encodingName string
|
||||
model string
|
||||
client *openai.Client
|
||||
}
|
||||
|
||||
type AIOption func(*ai)
|
||||
|
||||
func NewAI(otps ...AIOption) (AI, error) {
|
||||
a := ai{
|
||||
//baseURL: "https://api.openai.com",
|
||||
encodingName: "gpt-4o",
|
||||
model: openai.GPT4oMini,
|
||||
}
|
||||
|
||||
for _, opt := range otps {
|
||||
opt(&a)
|
||||
}
|
||||
|
||||
if a.apiKey == "" && os.Getenv("OPENAI_API_KEY") != "" {
|
||||
a.apiKey = os.Getenv("OPENAI_API_KEY")
|
||||
}
|
||||
if a.apiKey == "" {
|
||||
return nil, fmt.Errorf("api key is required")
|
||||
}
|
||||
|
||||
config := openai.DefaultConfig(a.apiKey)
|
||||
if a.baseURL == "" && os.Getenv("OPENAI_BASE_URL") != "" {
|
||||
a.baseURL = os.Getenv("OPENAI_BASE_URL")
|
||||
}
|
||||
|
||||
if a.baseURL != "" {
|
||||
config.BaseURL = a.baseURL
|
||||
}
|
||||
|
||||
a.client = openai.NewClientWithConfig(config)
|
||||
|
||||
return a, nil
|
||||
}
|
||||
|
||||
func (a ai) GetEmbeddings(ctx context.Context, text string) (openai.EmbeddingResponse, error) {
|
||||
embeddingRequest := openai.EmbeddingRequest{
|
||||
Input: text,
|
||||
Model: "text-embedding-3-small",
|
||||
}
|
||||
|
||||
embeddings, err := a.client.CreateEmbeddings(ctx, embeddingRequest)
|
||||
if err != nil {
|
||||
return openai.EmbeddingResponse{}, fmt.Errorf("error creating embeddings: %w", err)
|
||||
}
|
||||
return embeddings, nil
|
||||
}
|
||||
|
||||
func WithAPIKey(apiKey string) AIOption {
|
||||
return func(a *ai) {
|
||||
a.apiKey = apiKey
|
||||
}
|
||||
}
|
||||
|
||||
func WithBaseURL(baseURL string) AIOption {
|
||||
return func(a *ai) {
|
||||
a.baseURL = baseURL
|
||||
}
|
||||
}
|
||||
|
||||
func WithEncodingName(encodingName string) AIOption {
|
||||
return func(a *ai) {
|
||||
a.encodingName = encodingName
|
||||
}
|
||||
}
|
||||
|
||||
func (a ai) GetTokenCount(input string) (int, error) {
|
||||
tke, err := tiktoken.EncodingForModel(a.encodingName) // cached in "TIKTOKEN_CACHE_DIR"
|
||||
if err != nil {
|
||||
return 0, fmt.Errorf("error getting encoding: %w", err)
|
||||
}
|
||||
token := tke.Encode(input, nil, nil)
|
||||
return len(token), nil
|
||||
}
|
73
backend/Leg/utah.go
Normal file
73
backend/Leg/utah.go
Normal file
@ -0,0 +1,73 @@
|
||||
package Leg
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"os"
|
||||
"time"
|
||||
|
||||
"git.sa.vin/legislature-tracker/backend/cachedAPI"
|
||||
"git.sa.vin/legislature-tracker/backend/types"
|
||||
)
|
||||
|
||||
type UtahLeg interface {
|
||||
GetBillList(year, session string) (types.UtahBillList, error)
|
||||
GetBillDetails(year, session, billID string) (types.UtahBill, error)
|
||||
}
|
||||
|
||||
type utahLeg struct {
|
||||
cache cachedAPI.CachedAPI
|
||||
}
|
||||
|
||||
var developerToken string
|
||||
|
||||
func NewUtahLeg(cache cachedAPI.CachedAPI) UtahLeg {
|
||||
developerToken = os.Getenv("UTAH_DEV_TOKEN")
|
||||
return &utahLeg{
|
||||
cache: cache,
|
||||
}
|
||||
}
|
||||
|
||||
// GetBillList gets the list of bills for a given year and session,
|
||||
// session should be one of "GS", "S#" where # is the session number
|
||||
func (u utahLeg) GetBillList(year, session string) (types.UtahBillList, error) {
|
||||
// if session is not GS it must start with S and end with a number
|
||||
if session != "GS" && (session[0] != 'S' || session[1] < '0' || session[1] > '9') {
|
||||
return types.UtahBillList{}, fmt.Errorf("session must be one of GS or S with some number")
|
||||
}
|
||||
respString, err := u.cache.Get(fmt.Sprintf("https://glen.le.utah.gov/bills/%v%v/billlist/%v", year, session, developerToken), time.Hour)
|
||||
if err != nil {
|
||||
return types.UtahBillList{}, fmt.Errorf("error getting bill list: %w", err)
|
||||
}
|
||||
if respString == "Invalid request" {
|
||||
return types.UtahBillList{}, fmt.Errorf("invalid request")
|
||||
}
|
||||
var billList types.UtahBillList
|
||||
err = json.Unmarshal([]byte(respString), &billList)
|
||||
if err != nil {
|
||||
return types.UtahBillList{}, fmt.Errorf("error unmarshalling bill list: %w", err)
|
||||
}
|
||||
return billList, nil
|
||||
}
|
||||
|
||||
// GetBillDetails gets the details of a bill for a given year, session, and billID
|
||||
// session should be one of "GS", "S2"
|
||||
func (u utahLeg) GetBillDetails(year, session, billID string) (types.UtahBill, error) {
|
||||
// if session is not GS it must start with S and end with a number
|
||||
if session != "GS" && (session[0] != 'S' || session[1] < '0' || session[1] > '9') {
|
||||
return types.UtahBill{}, fmt.Errorf("session must be one of GS or S with some number")
|
||||
}
|
||||
respString, err := u.cache.Get(fmt.Sprintf("https://glen.le.utah.gov/bills/%v%v/%v/%v", year, session, billID, developerToken), time.Hour)
|
||||
if err != nil {
|
||||
return types.UtahBill{}, fmt.Errorf("error getting bill details: %w", err)
|
||||
}
|
||||
if respString == "Invalid request" {
|
||||
return types.UtahBill{}, fmt.Errorf("invalid request")
|
||||
}
|
||||
var bill types.UtahBill
|
||||
err = json.Unmarshal([]byte(respString), &bill)
|
||||
if err != nil {
|
||||
return types.UtahBill{}, fmt.Errorf("error unmarshalling bill details: %w", err)
|
||||
}
|
||||
return bill, nil
|
||||
}
|
52
backend/cachedAPI/cachedAPI.go
Normal file
52
backend/cachedAPI/cachedAPI.go
Normal file
@ -0,0 +1,52 @@
|
||||
package cachedAPI
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"git.sa.vin/legislature-tracker/backend/datastore"
|
||||
"io"
|
||||
"net/http"
|
||||
"time"
|
||||
)
|
||||
|
||||
// This package behaves like an API but uses libSQL as a cache that gets checked before the actual API is called.
|
||||
type CachedAPI interface {
|
||||
Get(url string, cacheTTL time.Duration) (string, error)
|
||||
}
|
||||
|
||||
type cachedAPI struct {
|
||||
mapper datastore.CacheStore
|
||||
}
|
||||
|
||||
func NewCachedAPI(mapper datastore.CacheStore) CachedAPI {
|
||||
return &cachedAPI{
|
||||
mapper: mapper,
|
||||
}
|
||||
}
|
||||
|
||||
func (c cachedAPI) Get(url string, cacheTTL time.Duration) (string, error) {
|
||||
response, found, err := c.mapper.CachedAPI(url)
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("error getting cached API response: %w", err)
|
||||
}
|
||||
if found {
|
||||
return response, nil
|
||||
}
|
||||
// Call the actual API
|
||||
resp, err := http.Get(url)
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("error calling API: %w", err)
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
// Read the response
|
||||
bodyBytes, err := io.ReadAll(resp.Body)
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("error reading API response: %w", err)
|
||||
}
|
||||
// Save the response to the cache
|
||||
err = c.mapper.SaveAPIResponse(url, string(bodyBytes), cacheTTL)
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("error saving API response: %w", err)
|
||||
}
|
||||
|
||||
return string(bodyBytes), nil
|
||||
}
|
125
backend/datastore/mapper.go
Normal file
125
backend/datastore/mapper.go
Normal file
@ -0,0 +1,125 @@
|
||||
package datastore
|
||||
|
||||
import (
|
||||
"database/sql"
|
||||
"fmt"
|
||||
"git.sa.vin/legislature-tracker/backend/types"
|
||||
"strings"
|
||||
"time"
|
||||
)
|
||||
|
||||
type CacheStore interface {
|
||||
CachedAPI(url string) (string, bool, error)
|
||||
SaveAPIResponse(url, response string, cacheTTL time.Duration) error
|
||||
}
|
||||
|
||||
type SearchStore interface {
|
||||
SaveEmbeddings(id, content string, embeddings []float32) error
|
||||
FindRelevantContent(queryEmbeddings []float32) ([]types.SearchResponse, error)
|
||||
}
|
||||
|
||||
type Mapper struct {
|
||||
db *sql.DB
|
||||
}
|
||||
|
||||
func NewMapper(db *sql.DB) *Mapper {
|
||||
return &Mapper{
|
||||
db: db,
|
||||
}
|
||||
}
|
||||
|
||||
// CachedAPI returns the cached API response for the given URL
|
||||
// If the URL is not in the cache it returns an empty string and false
|
||||
func (m *Mapper) CachedAPI(url string) (string, bool, error) {
|
||||
// Check the cache for the URL
|
||||
// If the URL is in the cache, return the cached response
|
||||
// Otherwise, call the API and cache the response
|
||||
|
||||
query := `SELECT response, created_at, ttl FROM cache WHERE url = ?`
|
||||
rows, err := m.db.Query(query, url)
|
||||
if err != nil {
|
||||
// norows error is not an error
|
||||
if err == sql.ErrNoRows {
|
||||
return "", false, nil
|
||||
}
|
||||
return "", false, fmt.Errorf("error reading from cache url: %v | %w", url, err)
|
||||
}
|
||||
defer rows.Close()
|
||||
|
||||
var response struct {
|
||||
Response string
|
||||
CreatedAt time.Time
|
||||
TTL time.Duration
|
||||
}
|
||||
for rows.Next() {
|
||||
err = rows.Scan(&response.Response, &response.CreatedAt, &response.TTL)
|
||||
if err != nil {
|
||||
return "", false, fmt.Errorf("error scanning cache response: %w", err)
|
||||
}
|
||||
// Check if the cache is expired
|
||||
if time.Since(response.CreatedAt) > response.TTL {
|
||||
return "", false, nil
|
||||
}
|
||||
return response.Response, true, nil
|
||||
}
|
||||
return "", false, nil
|
||||
}
|
||||
|
||||
// SaveAPIResponse saves the API response to the cache
|
||||
func (m *Mapper) SaveAPIResponse(url, response string, cacheTTL time.Duration) error {
|
||||
// Insert the response into the cache
|
||||
query := `INSERT INTO cache (url, response, ttl) VALUES (?, ?, ?)`
|
||||
_, err := m.db.Exec(query, url, response, cacheTTL)
|
||||
if err != nil {
|
||||
if strings.Contains(err.Error(), "UNIQUE constraint failed: cache.url") {
|
||||
// Update the existing row if there is a UNIQUE constraint error
|
||||
updateQuery := `UPDATE cache SET response = ?, ttl = ? WHERE url = ?`
|
||||
_, updateErr := m.db.Exec(updateQuery, response, cacheTTL, url)
|
||||
if updateErr != nil {
|
||||
return fmt.Errorf("error updating cache response: %w", updateErr)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
return fmt.Errorf("error inserting cache response: %w", err)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (m *Mapper) SaveEmbeddings(id, content string, embeddings []float32) error {
|
||||
// Insert the embeddings into the database
|
||||
query := `INSERT INTO searchable_content (trackingid, content, full_emb) VALUES (?, ?, vector32(?))`
|
||||
_, err := m.db.Exec(query, id, content, serializeEmbeddings(embeddings))
|
||||
if err != nil {
|
||||
return fmt.Errorf("error inserting embeddings: %w", err)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func serializeEmbeddings(embeddings []float32) string {
|
||||
return strings.Join(strings.Split(fmt.Sprintf("%v", embeddings), " "), ", ")
|
||||
}
|
||||
|
||||
func (m *Mapper) FindRelevantContent(queryEmbeddings []float32) ([]types.SearchResponse, error) {
|
||||
// Find the relevant content in the database
|
||||
query := `SELECT searchable_content.trackingid, searchable_content.content FROM vector_top_k('emb_idx', vector32(?), 10) JOIN searchable_content ON id = searchable_content.rowid`
|
||||
rows, err := m.db.Query(query, serializeEmbeddings(queryEmbeddings))
|
||||
if err != nil {
|
||||
// norows error is not an error
|
||||
if err == sql.ErrNoRows {
|
||||
return nil, nil
|
||||
}
|
||||
return nil, fmt.Errorf("error querying embeddings: %w", err)
|
||||
}
|
||||
defer rows.Close()
|
||||
|
||||
var results []types.SearchResponse
|
||||
for rows.Next() {
|
||||
var result types.SearchResponse
|
||||
err = rows.Scan(&result.TrackingID, &result.Content)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("error scanning embeddings: %w", err)
|
||||
}
|
||||
results = append(results, result)
|
||||
}
|
||||
return results, nil
|
||||
}
|
36
backend/datastore/mapper_test.go
Normal file
36
backend/datastore/mapper_test.go
Normal file
@ -0,0 +1,36 @@
|
||||
package datastore
|
||||
|
||||
import "testing"
|
||||
|
||||
func Benchmark_mySerializedEmbeddings(b *testing.B) {
|
||||
type args struct {
|
||||
embeddings []float32
|
||||
}
|
||||
tests := []struct {
|
||||
name string
|
||||
args args
|
||||
want string
|
||||
}{
|
||||
{
|
||||
name: "Test 1",
|
||||
args: args{
|
||||
embeddings: []float32{0.1, 0.2, 0.3},
|
||||
},
|
||||
want: "[0.1, 0.2, 0.3]",
|
||||
},
|
||||
{
|
||||
name: "Crazy long test",
|
||||
args: args{
|
||||
embeddings: []float32{0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0},
|
||||
},
|
||||
want: "[0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1]",
|
||||
},
|
||||
}
|
||||
for _, tt := range tests {
|
||||
b.Run(tt.name, func(t *testing.B) {
|
||||
if got := serializeEmbeddings(tt.args.embeddings); got != tt.want {
|
||||
t.Errorf("mySerializedEmbeddings() = %v, want %v", got, tt.want)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
79
backend/main.go
Normal file
79
backend/main.go
Normal file
@ -0,0 +1,79 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"embed"
|
||||
"git.sa.vin/legislature-tracker/backend/AI"
|
||||
"git.sa.vin/legislature-tracker/backend/search"
|
||||
"log"
|
||||
"os"
|
||||
|
||||
"git.sa.vin/legislature-tracker/backend/Leg"
|
||||
"git.sa.vin/legislature-tracker/backend/cachedAPI"
|
||||
"git.sa.vin/legislature-tracker/backend/datastore"
|
||||
"github.com/payne8/go-libsql-dual-driver"
|
||||
)
|
||||
|
||||
//go:embed migrations/*.sql
|
||||
var migrationFiles embed.FS
|
||||
|
||||
func main() {
|
||||
|
||||
logger := log.New(os.Stdout, "any-remark", log.LstdFlags)
|
||||
primaryUrl := os.Getenv("LIBSQL_DATABASE_URL")
|
||||
authToken := os.Getenv("LIBSQL_AUTH_TOKEN")
|
||||
|
||||
tdb, err := libsqldb.NewLibSqlDB(
|
||||
primaryUrl,
|
||||
libsqldb.WithMigrationFiles(migrationFiles),
|
||||
libsqldb.WithAuthToken(authToken),
|
||||
libsqldb.WithLocalDBName("local.db"), // will not be used for remote-only
|
||||
)
|
||||
if err != nil {
|
||||
logger.Printf("failed to open db %s: %s", primaryUrl, err)
|
||||
log.Fatalln(err)
|
||||
return
|
||||
}
|
||||
err = tdb.Migrate()
|
||||
if err != nil {
|
||||
logger.Printf("failed to migrate db %s: %s", primaryUrl, err)
|
||||
log.Fatalln(err)
|
||||
return
|
||||
}
|
||||
|
||||
mapper := datastore.NewMapper(tdb.DB)
|
||||
api := cachedAPI.NewCachedAPI(mapper)
|
||||
utah := Leg.NewUtahLeg(api)
|
||||
ai, err := AI.NewAI()
|
||||
if err != nil {
|
||||
log.Fatalf("error creating AI: %v", err)
|
||||
}
|
||||
searchService, err := search.NewSearch(search.WithAI(ai), search.WithMapper(mapper))
|
||||
if err != nil {
|
||||
log.Fatalf("error creating search: %v", err)
|
||||
}
|
||||
|
||||
test, err := utah.GetBillList("2024", "GS")
|
||||
if err != nil {
|
||||
log.Fatalf("error getting bill list: %v", err)
|
||||
}
|
||||
log.Printf("bill list: %+v", test)
|
||||
|
||||
test2, err := utah.GetBillDetails("2024", "GS", "HB0001")
|
||||
if err != nil {
|
||||
log.Fatalf("error getting bill details: %v", err)
|
||||
}
|
||||
log.Printf("bill details: %+v", test2)
|
||||
|
||||
//err = searchService.InsertContent(context.Background(), test2.TrackingID, test2.GeneralProvisions+" "+test2.HilightedProvisions)
|
||||
//if err != nil {
|
||||
// log.Fatalf("error inserting content: %v", err)
|
||||
//}
|
||||
|
||||
results, err := searchService.Search("I'm looking for a bill that affects public education")
|
||||
if err != nil {
|
||||
log.Fatalf("error searching: %v", err)
|
||||
}
|
||||
|
||||
log.Printf("search results: %+v", results)
|
||||
|
||||
}
|
@ -0,0 +1,8 @@
|
||||
CREATE TABLE searchable_content (
|
||||
trackingid TEXT NOT NULL,
|
||||
content TEXT NOT NULL,
|
||||
full_emb F32_BLOB(1536) NOT NULL,
|
||||
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
|
||||
);
|
||||
|
||||
CREATE INDEX emb_idx ON searchable_content (libsql_vector_idx(full_emb));
|
11
backend/migrations/2025-01-03-init.sql
Normal file
11
backend/migrations/2025-01-03-init.sql
Normal file
@ -0,0 +1,11 @@
|
||||
CREATE TABLE IF NOT EXISTS cache (
|
||||
id INTEGER PRIMARY KEY,
|
||||
url TEXT NOT NULL UNIQUE,
|
||||
response TEXT NOT NULL,
|
||||
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
||||
ttl INTEGER DEFAULT 0
|
||||
);
|
||||
|
||||
CREATE INDEX idx_url ON cache (url);
|
||||
CREATE INDEX idx_created_at ON cache (created_at);
|
||||
|
77
backend/search/search.go
Normal file
77
backend/search/search.go
Normal file
@ -0,0 +1,77 @@
|
||||
package search
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"git.sa.vin/legislature-tracker/backend/AI"
|
||||
"git.sa.vin/legislature-tracker/backend/datastore"
|
||||
"git.sa.vin/legislature-tracker/backend/types"
|
||||
)
|
||||
|
||||
type Search interface {
|
||||
Search(query string) ([]types.SearchResponse, error)
|
||||
InsertContent(ctx context.Context, id string, content string) error
|
||||
}
|
||||
|
||||
type SearchOption func(s *search)
|
||||
|
||||
func NewSearch(opts ...SearchOption) (Search, error) {
|
||||
s := &search{}
|
||||
for _, opt := range opts {
|
||||
opt(s)
|
||||
}
|
||||
if s.ai == nil {
|
||||
return nil, fmt.Errorf("AI is required")
|
||||
}
|
||||
if s.mapper == nil {
|
||||
return nil, fmt.Errorf("mapper is required")
|
||||
}
|
||||
return s, nil
|
||||
}
|
||||
|
||||
func WithMapper(mapper datastore.SearchStore) func(s *search) {
|
||||
return func(s *search) {
|
||||
s.mapper = mapper
|
||||
}
|
||||
}
|
||||
|
||||
func WithAI(ai AI.AI) func(s *search) {
|
||||
return func(s *search) {
|
||||
s.ai = ai
|
||||
}
|
||||
}
|
||||
|
||||
type search struct {
|
||||
ai AI.AI
|
||||
mapper datastore.SearchStore
|
||||
}
|
||||
|
||||
func (s search) Search(query string) ([]types.SearchResponse, error) {
|
||||
// get embeddings for the query
|
||||
embeddings, err := s.ai.GetEmbeddings(context.Background(), query)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("error getting embeddings: %w", err)
|
||||
}
|
||||
if len(embeddings.Data) == 0 {
|
||||
return nil, fmt.Errorf("no embeddings returned")
|
||||
}
|
||||
// find relevant content in the database
|
||||
return s.mapper.FindRelevantContent(embeddings.Data[0].Embedding)
|
||||
}
|
||||
|
||||
func (s search) InsertContent(ctx context.Context, id string, content string) error {
|
||||
// get embeddings for the content
|
||||
embeddings, err := s.ai.GetEmbeddings(ctx, content)
|
||||
if err != nil {
|
||||
return fmt.Errorf("error getting embeddings: %w", err)
|
||||
}
|
||||
if len(embeddings.Data) == 0 {
|
||||
return fmt.Errorf("no embeddings returned")
|
||||
}
|
||||
// save the embeddings to the database
|
||||
err = s.mapper.SaveEmbeddings(id, content, embeddings.Data[0].Embedding)
|
||||
if err != nil {
|
||||
return fmt.Errorf("error saving embeddings: %w", err)
|
||||
}
|
||||
return nil
|
||||
}
|
6
backend/types/search.go
Normal file
6
backend/types/search.go
Normal file
@ -0,0 +1,6 @@
|
||||
package types
|
||||
|
||||
type SearchResponse struct {
|
||||
TrackingID string
|
||||
Content string
|
||||
}
|
33
backend/types/utah.go
Normal file
33
backend/types/utah.go
Normal file
@ -0,0 +1,33 @@
|
||||
package types
|
||||
|
||||
// UtahBill is a struct that represents a bill in the Utah legislature
|
||||
type UtahBill struct {
|
||||
Bill string `json:"bill"`
|
||||
Version string `json:"version"`
|
||||
ShortTitle string `json:"shorttitle"`
|
||||
Sponsor string `json:"sponsor"`
|
||||
FloorSponsor string `json:"floorsponsor"`
|
||||
GeneralProvisions string `json:"generalprovisions"`
|
||||
HilightedProvisions string `json:"hilightedprovisions"`
|
||||
Monies string `json:"monies"`
|
||||
Attorney string `json:"attorney"`
|
||||
FiscalAnalyst string `json:"fiscalanalyst"`
|
||||
LastAction string `json:"lastaction"`
|
||||
LastActionOwner string `json:"lastactionowner"`
|
||||
LastActionTime string `json:"lastactiontime"`
|
||||
TrackingID string `json:"trackingid"`
|
||||
Subjects []string `json:"subjects"`
|
||||
CodeSections []string `json:"codesections"`
|
||||
Agendas []string `json:"agendas"`
|
||||
}
|
||||
|
||||
// UtahBillListItem is a struct that represents a bill in a list of bills
|
||||
type UtahBillListItem struct {
|
||||
Number string `json:"number"`
|
||||
UpdateTime string `json:"updatetime"`
|
||||
}
|
||||
|
||||
// UtahBillList is a struct that represents a list of bills in the Utah legislature
|
||||
type UtahBillList struct {
|
||||
Bills []UtahBillListItem `json:"bills"`
|
||||
}
|
19
go.mod
19
go.mod
@ -2,4 +2,21 @@ module git.sa.vin/legislature-tracker
|
||||
|
||||
go 1.23
|
||||
|
||||
require golang.org/x/net v0.33.0
|
||||
require (
|
||||
github.com/payne8/go-libsql-dual-driver v0.2.3
|
||||
github.com/pkoukk/tiktoken-go v0.1.7
|
||||
github.com/sashabaranov/go-openai v1.36.1
|
||||
)
|
||||
|
||||
require (
|
||||
github.com/antlr4-go/antlr/v4 v4.13.0 // indirect
|
||||
github.com/dlclark/regexp2 v1.10.0 // indirect
|
||||
github.com/google/uuid v1.3.0 // indirect
|
||||
github.com/hashicorp/errwrap v1.0.0 // indirect
|
||||
github.com/hashicorp/go-multierror v1.1.1 // indirect
|
||||
github.com/libsql/sqlite-antlr4-parser v0.0.0-20240327125255-dbf53b6cbf06 // indirect
|
||||
github.com/tursodatabase/go-libsql v0.0.0-20240429120401-651096bbee0b // indirect
|
||||
github.com/tursodatabase/libsql-client-go v0.0.0-20240628122535-1c47b26184e8 // indirect
|
||||
golang.org/x/exp v0.0.0-20240325151524-a685a6edb6d8 // indirect
|
||||
nhooyr.io/websocket v1.8.10 // indirect
|
||||
)
|
||||
|
44
go.sum
44
go.sum
@ -1,2 +1,42 @@
|
||||
golang.org/x/net v0.33.0 h1:74SYHlV8BIgHIFC/LrYkOGIwL19eTYXQ5wc6TBuO36I=
|
||||
golang.org/x/net v0.33.0/go.mod h1:HXLR5J+9DxmrqMwG9qjGCxZ+zKXxBru04zlTvWlWuN4=
|
||||
github.com/antlr4-go/antlr/v4 v4.13.0 h1:lxCg3LAv+EUK6t1i0y1V6/SLeUi0eKEKdhQAlS8TVTI=
|
||||
github.com/antlr4-go/antlr/v4 v4.13.0/go.mod h1:pfChB/xh/Unjila75QW7+VU4TSnWnnk9UTnmpPaOR2g=
|
||||
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
|
||||
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
|
||||
github.com/dlclark/regexp2 v1.10.0 h1:+/GIL799phkJqYW+3YbOd8LCcbHzT0Pbo8zl70MHsq0=
|
||||
github.com/dlclark/regexp2 v1.10.0/go.mod h1:DHkYz0B9wPfa6wondMfaivmHpzrQ3v9q8cnmRbL6yW8=
|
||||
github.com/google/go-cmp v0.5.9 h1:O2Tfq5qg4qc4AmwVlvv0oLiVAGB7enBSJ2x2DqQFi38=
|
||||
github.com/google/go-cmp v0.5.9/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY=
|
||||
github.com/google/uuid v1.3.0 h1:t6JiXgmwXMjEs8VusXIJk2BXHsn+wx8BZdTaoZ5fu7I=
|
||||
github.com/google/uuid v1.3.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
|
||||
github.com/hashicorp/errwrap v1.0.0 h1:hLrqtEDnRye3+sgx6z4qVLNuviH3MR5aQ0ykNJa/UYA=
|
||||
github.com/hashicorp/errwrap v1.0.0/go.mod h1:YH+1FKiLXxHSkmPseP+kNlulaMuP3n2brvKWEqk/Jc4=
|
||||
github.com/hashicorp/go-multierror v1.1.1 h1:H5DkEtf6CXdFp0N0Em5UCwQpXMWke8IA0+lD48awMYo=
|
||||
github.com/hashicorp/go-multierror v1.1.1/go.mod h1:iw975J/qwKPdAO1clOe2L8331t/9/fmwbPZ6JB6eMoM=
|
||||
github.com/libsql/sqlite-antlr4-parser v0.0.0-20240327125255-dbf53b6cbf06 h1:JLvn7D+wXjH9g4Jsjo+VqmzTUpl/LX7vfr6VOfSWTdM=
|
||||
github.com/libsql/sqlite-antlr4-parser v0.0.0-20240327125255-dbf53b6cbf06/go.mod h1:FUkZ5OHjlGPjnM2UyGJz9TypXQFgYqw6AFNO1UiROTM=
|
||||
github.com/payne8/go-libsql-dual-driver v0.2.3 h1:ea19rrdn3QQqvDrHNZ5gqqj2Nn7DbhGDVvDL4UDYZ68=
|
||||
github.com/payne8/go-libsql-dual-driver v0.2.3/go.mod h1:fhe8WdGtBLvGZ5drN9We0uWEedXeCCTvWaTLExrGW9M=
|
||||
github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4=
|
||||
github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
|
||||
github.com/pkoukk/tiktoken-go v0.1.7 h1:qOBHXX4PHtvIvmOtyg1EeKlwFRiMKAcoMp4Q+bLQDmw=
|
||||
github.com/pkoukk/tiktoken-go v0.1.7/go.mod h1:9NiV+i9mJKGj1rYOT+njbv+ZwA/zJxYdewGl6qVatpg=
|
||||
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
|
||||
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
|
||||
github.com/sashabaranov/go-openai v1.36.1 h1:EVfRXwIlW2rUzpx6vR+aeIKCK/xylSrVYAx1TMTSX3g=
|
||||
github.com/sashabaranov/go-openai v1.36.1/go.mod h1:lj5b/K+zjTSFxVLijLSTDZuP7adOgerWeFyZLUhAKRg=
|
||||
github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsTg=
|
||||
github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY=
|
||||
github.com/tursodatabase/go-libsql v0.0.0-20240429120401-651096bbee0b h1:R7hev4b96zgXjKbS2ZNbHBnDvyFZhH+LlMqtKH6hIkU=
|
||||
github.com/tursodatabase/go-libsql v0.0.0-20240429120401-651096bbee0b/go.mod h1:TjsB2miB8RW2Sse8sdxzVTdeGlx74GloD5zJYUC38d8=
|
||||
github.com/tursodatabase/libsql-client-go v0.0.0-20240628122535-1c47b26184e8 h1:XM3aeBrpNrkvi48EiKCtMNAgsiaAaAOCHAW9SaIWouo=
|
||||
github.com/tursodatabase/libsql-client-go v0.0.0-20240628122535-1c47b26184e8/go.mod h1:fblU7nZYWAROzJzkpln8teKFDtdRvAOmZHeIpahY4jk=
|
||||
golang.org/x/exp v0.0.0-20240325151524-a685a6edb6d8 h1:aAcj0Da7eBAtrTp03QXWvm88pSyOt+UgdZw2BFZ+lEw=
|
||||
golang.org/x/exp v0.0.0-20240325151524-a685a6edb6d8/go.mod h1:CQ1k9gNrJ50XIzaKCRR2hssIjF07kZFEiieALBM/ARQ=
|
||||
golang.org/x/sync v0.6.0 h1:5BMeUDZ7vkXGfEr1x9B4bRcTH4lpkTkpdh0T/J+qjbQ=
|
||||
golang.org/x/sync v0.6.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk=
|
||||
gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
|
||||
gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
|
||||
gotest.tools v2.2.0+incompatible h1:VsBPFP1AI068pPrMxtb/S8Zkgf9xEmTLJjfM+P5UIEo=
|
||||
gotest.tools v2.2.0+incompatible/go.mod h1:DsYFclhRJ6vuDpmuTbkuFWG+y2sxOXAzmJt81HFBacw=
|
||||
nhooyr.io/websocket v1.8.10 h1:mv4p+MnGrLDcPlBoWsvPP7XCzTYMXP9F9eIGoKbgx7Q=
|
||||
nhooyr.io/websocket v1.8.10/go.mod h1:rN9OFWIUwuxg4fR5tELlYC04bXYowCP9GX47ivo2l+c=
|
||||
|
@ -1,14 +1,26 @@
|
||||
import puppeteer from 'puppeteer';
|
||||
const startingLing = 'https://le.utah.gov/~2024/bills/static/HB0030.html';
|
||||
// const startingLink = 'https://le.utah.gov/~2024/bills/static/HB0030.html';
|
||||
// const startingLink = 'https://le.utah.gov/~2025/bills/static/HB0011.html';
|
||||
const startingLink = 'https://le.utah.gov/~2025/bills/static/HB0012.html';
|
||||
|
||||
(async () => {
|
||||
// Launch the browser and open a new blank page
|
||||
const browser = await puppeteer.launch({
|
||||
headless: false,
|
||||
});
|
||||
let text = await getPageText(browser, startingLing);
|
||||
let text = await getPageText(browser, startingLink);
|
||||
|
||||
console.log(text);
|
||||
const lines = text.join(' ').split('. ');
|
||||
|
||||
console.log(lines.join('.\n'));
|
||||
|
||||
let totalChars = 0;
|
||||
for (let line of lines) {
|
||||
totalChars += line.length;
|
||||
}
|
||||
console.log('Total chars:', totalChars);
|
||||
console.log('Total lines:', lines.length);
|
||||
console.log('Average chars per line:', totalChars / lines.length);
|
||||
|
||||
})();
|
||||
|
||||
@ -17,6 +29,7 @@ async function getPageText(browser, url) {
|
||||
await page.goto(url);
|
||||
|
||||
const test = await page.evaluate(() => {
|
||||
// ------------------- in the browser context -------------------
|
||||
// Use the querySelector to target the leg element
|
||||
const legElement = document.querySelector('leg');
|
||||
if (legElement) {
|
||||
@ -42,6 +55,8 @@ async function getPageText(browser, url) {
|
||||
})
|
||||
.filter((text) => text.length > 0); // Filter out any leftover empty strings
|
||||
}
|
||||
|
||||
// ------------------- in the browser context -------------------
|
||||
});
|
||||
|
||||
await browser.close();
|
||||
|
Reference in New Issue
Block a user