diff --git a/.gitignore b/.gitignore
index d35bbf7..fbc85c9 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,2 +1,3 @@
node_modules/
-.idea/
\ No newline at end of file
+.idea/
+/db.sqlite
diff --git a/backend/AI/ai.go b/backend/AI/ai.go
new file mode 100644
index 0000000..411562f
--- /dev/null
+++ b/backend/AI/ai.go
@@ -0,0 +1,99 @@
+package AI
+
+import (
+ "context"
+ "fmt"
+ "github.com/pkoukk/tiktoken-go"
+ "github.com/sashabaranov/go-openai"
+ "os"
+)
+
+// This package should use the OpenAI API to provide AI services.
+
+type AI interface {
+ // Get Embedding
+ GetEmbeddings(ctx context.Context, text string) (openai.EmbeddingResponse, error)
+ GetTokenCount(input string) (int, error)
+}
+
+type ai struct {
+ apiKey string
+ baseURL string
+ encodingName string
+ model string
+ client *openai.Client
+}
+
+type AIOption func(*ai)
+
+func NewAI(otps ...AIOption) (AI, error) {
+ a := ai{
+ //baseURL: "https://api.openai.com",
+ encodingName: "gpt-4o",
+ model: openai.GPT4oMini,
+ }
+
+ for _, opt := range otps {
+ opt(&a)
+ }
+
+ if a.apiKey == "" && os.Getenv("OPENAI_API_KEY") != "" {
+ a.apiKey = os.Getenv("OPENAI_API_KEY")
+ }
+ if a.apiKey == "" {
+ return nil, fmt.Errorf("api key is required")
+ }
+
+ config := openai.DefaultConfig(a.apiKey)
+ if a.baseURL == "" && os.Getenv("OPENAI_BASE_URL") != "" {
+ a.baseURL = os.Getenv("OPENAI_BASE_URL")
+ }
+
+ if a.baseURL != "" {
+ config.BaseURL = a.baseURL
+ }
+
+ a.client = openai.NewClientWithConfig(config)
+
+ return a, nil
+}
+
+func (a ai) GetEmbeddings(ctx context.Context, text string) (openai.EmbeddingResponse, error) {
+ embeddingRequest := openai.EmbeddingRequest{
+ Input: text,
+ Model: "text-embedding-3-small",
+ }
+
+ embeddings, err := a.client.CreateEmbeddings(ctx, embeddingRequest)
+ if err != nil {
+ return openai.EmbeddingResponse{}, fmt.Errorf("error creating embeddings: %w", err)
+ }
+ return embeddings, nil
+}
+
+func WithAPIKey(apiKey string) AIOption {
+ return func(a *ai) {
+ a.apiKey = apiKey
+ }
+}
+
+func WithBaseURL(baseURL string) AIOption {
+ return func(a *ai) {
+ a.baseURL = baseURL
+ }
+}
+
+func WithEncodingName(encodingName string) AIOption {
+ return func(a *ai) {
+ a.encodingName = encodingName
+ }
+}
+
+func (a ai) GetTokenCount(input string) (int, error) {
+ tke, err := tiktoken.EncodingForModel(a.encodingName) // cached in "TIKTOKEN_CACHE_DIR"
+ if err != nil {
+ return 0, fmt.Errorf("error getting encoding: %w", err)
+ }
+ token := tke.Encode(input, nil, nil)
+ return len(token), nil
+}
diff --git a/backend/Leg/utah.go b/backend/Leg/utah.go
new file mode 100644
index 0000000..b9ab0e8
--- /dev/null
+++ b/backend/Leg/utah.go
@@ -0,0 +1,73 @@
+package Leg
+
+import (
+ "encoding/json"
+ "fmt"
+ "os"
+ "time"
+
+ "git.sa.vin/legislature-tracker/backend/cachedAPI"
+ "git.sa.vin/legislature-tracker/backend/types"
+)
+
+type UtahLeg interface {
+ GetBillList(year, session string) (types.UtahBillList, error)
+ GetBillDetails(year, session, billID string) (types.UtahBill, error)
+}
+
+type utahLeg struct {
+ cache cachedAPI.CachedAPI
+}
+
+var developerToken string
+
+func NewUtahLeg(cache cachedAPI.CachedAPI) UtahLeg {
+ developerToken = os.Getenv("UTAH_DEV_TOKEN")
+ return &utahLeg{
+ cache: cache,
+ }
+}
+
+// GetBillList gets the list of bills for a given year and session,
+// session should be one of "GS", "S#" where # is the session number
+func (u utahLeg) GetBillList(year, session string) (types.UtahBillList, error) {
+ // if session is not GS it must start with S and end with a number
+ if session != "GS" && (session[0] != 'S' || session[1] < '0' || session[1] > '9') {
+ return types.UtahBillList{}, fmt.Errorf("session must be one of GS or S with some number")
+ }
+ respString, err := u.cache.Get(fmt.Sprintf("https://glen.le.utah.gov/bills/%v%v/billlist/%v", year, session, developerToken), time.Hour)
+ if err != nil {
+ return types.UtahBillList{}, fmt.Errorf("error getting bill list: %w", err)
+ }
+ if respString == "Invalid request" {
+ return types.UtahBillList{}, fmt.Errorf("invalid request")
+ }
+ var billList types.UtahBillList
+ err = json.Unmarshal([]byte(respString), &billList)
+ if err != nil {
+ return types.UtahBillList{}, fmt.Errorf("error unmarshalling bill list: %w", err)
+ }
+ return billList, nil
+}
+
+// GetBillDetails gets the details of a bill for a given year, session, and billID
+// session should be one of "GS", "S2"
+func (u utahLeg) GetBillDetails(year, session, billID string) (types.UtahBill, error) {
+ // if session is not GS it must start with S and end with a number
+ if session != "GS" && (session[0] != 'S' || session[1] < '0' || session[1] > '9') {
+ return types.UtahBill{}, fmt.Errorf("session must be one of GS or S with some number")
+ }
+ respString, err := u.cache.Get(fmt.Sprintf("https://glen.le.utah.gov/bills/%v%v/%v/%v", year, session, billID, developerToken), time.Hour)
+ if err != nil {
+ return types.UtahBill{}, fmt.Errorf("error getting bill details: %w", err)
+ }
+ if respString == "Invalid request" {
+ return types.UtahBill{}, fmt.Errorf("invalid request")
+ }
+ var bill types.UtahBill
+ err = json.Unmarshal([]byte(respString), &bill)
+ if err != nil {
+ return types.UtahBill{}, fmt.Errorf("error unmarshalling bill details: %w", err)
+ }
+ return bill, nil
+}
diff --git a/backend/cachedAPI/cachedAPI.go b/backend/cachedAPI/cachedAPI.go
new file mode 100644
index 0000000..e603cda
--- /dev/null
+++ b/backend/cachedAPI/cachedAPI.go
@@ -0,0 +1,52 @@
+package cachedAPI
+
+import (
+ "fmt"
+ "git.sa.vin/legislature-tracker/backend/datastore"
+ "io"
+ "net/http"
+ "time"
+)
+
+// This package behaves like an API but uses libSQL as a cache that gets checked before the actual API is called.
+type CachedAPI interface {
+ Get(url string, cacheTTL time.Duration) (string, error)
+}
+
+type cachedAPI struct {
+ mapper datastore.CacheStore
+}
+
+func NewCachedAPI(mapper datastore.CacheStore) CachedAPI {
+ return &cachedAPI{
+ mapper: mapper,
+ }
+}
+
+func (c cachedAPI) Get(url string, cacheTTL time.Duration) (string, error) {
+ response, found, err := c.mapper.CachedAPI(url)
+ if err != nil {
+ return "", fmt.Errorf("error getting cached API response: %w", err)
+ }
+ if found {
+ return response, nil
+ }
+ // Call the actual API
+ resp, err := http.Get(url)
+ if err != nil {
+ return "", fmt.Errorf("error calling API: %w", err)
+ }
+ defer resp.Body.Close()
+ // Read the response
+ bodyBytes, err := io.ReadAll(resp.Body)
+ if err != nil {
+ return "", fmt.Errorf("error reading API response: %w", err)
+ }
+ // Save the response to the cache
+ err = c.mapper.SaveAPIResponse(url, string(bodyBytes), cacheTTL)
+ if err != nil {
+ return "", fmt.Errorf("error saving API response: %w", err)
+ }
+
+ return string(bodyBytes), nil
+}
diff --git a/backend/datastore/mapper.go b/backend/datastore/mapper.go
new file mode 100644
index 0000000..c12037e
--- /dev/null
+++ b/backend/datastore/mapper.go
@@ -0,0 +1,125 @@
+package datastore
+
+import (
+ "database/sql"
+ "fmt"
+ "git.sa.vin/legislature-tracker/backend/types"
+ "strings"
+ "time"
+)
+
+type CacheStore interface {
+ CachedAPI(url string) (string, bool, error)
+ SaveAPIResponse(url, response string, cacheTTL time.Duration) error
+}
+
+type SearchStore interface {
+ SaveEmbeddings(id, content string, embeddings []float32) error
+ FindRelevantContent(queryEmbeddings []float32) ([]types.SearchResponse, error)
+}
+
+type Mapper struct {
+ db *sql.DB
+}
+
+func NewMapper(db *sql.DB) *Mapper {
+ return &Mapper{
+ db: db,
+ }
+}
+
+// CachedAPI returns the cached API response for the given URL
+// If the URL is not in the cache it returns an empty string and false
+func (m *Mapper) CachedAPI(url string) (string, bool, error) {
+ // Check the cache for the URL
+ // If the URL is in the cache, return the cached response
+ // Otherwise, call the API and cache the response
+
+ query := `SELECT response, created_at, ttl FROM cache WHERE url = ?`
+ rows, err := m.db.Query(query, url)
+ if err != nil {
+ // norows error is not an error
+ if err == sql.ErrNoRows {
+ return "", false, nil
+ }
+ return "", false, fmt.Errorf("error reading from cache url: %v | %w", url, err)
+ }
+ defer rows.Close()
+
+ var response struct {
+ Response string
+ CreatedAt time.Time
+ TTL time.Duration
+ }
+ for rows.Next() {
+ err = rows.Scan(&response.Response, &response.CreatedAt, &response.TTL)
+ if err != nil {
+ return "", false, fmt.Errorf("error scanning cache response: %w", err)
+ }
+ // Check if the cache is expired
+ if time.Since(response.CreatedAt) > response.TTL {
+ return "", false, nil
+ }
+ return response.Response, true, nil
+ }
+ return "", false, nil
+}
+
+// SaveAPIResponse saves the API response to the cache
+func (m *Mapper) SaveAPIResponse(url, response string, cacheTTL time.Duration) error {
+ // Insert the response into the cache
+ query := `INSERT INTO cache (url, response, ttl) VALUES (?, ?, ?)`
+ _, err := m.db.Exec(query, url, response, cacheTTL)
+ if err != nil {
+ if strings.Contains(err.Error(), "UNIQUE constraint failed: cache.url") {
+ // Update the existing row if there is a UNIQUE constraint error
+ updateQuery := `UPDATE cache SET response = ?, ttl = ? WHERE url = ?`
+ _, updateErr := m.db.Exec(updateQuery, response, cacheTTL, url)
+ if updateErr != nil {
+ return fmt.Errorf("error updating cache response: %w", updateErr)
+ }
+ return nil
+ }
+ return fmt.Errorf("error inserting cache response: %w", err)
+ }
+ return nil
+}
+
+func (m *Mapper) SaveEmbeddings(id, content string, embeddings []float32) error {
+ // Insert the embeddings into the database
+ query := `INSERT INTO searchable_content (trackingid, content, full_emb) VALUES (?, ?, vector32(?))`
+ _, err := m.db.Exec(query, id, content, serializeEmbeddings(embeddings))
+ if err != nil {
+ return fmt.Errorf("error inserting embeddings: %w", err)
+ }
+ return nil
+}
+
+func serializeEmbeddings(embeddings []float32) string {
+ return strings.Join(strings.Split(fmt.Sprintf("%v", embeddings), " "), ", ")
+}
+
+func (m *Mapper) FindRelevantContent(queryEmbeddings []float32) ([]types.SearchResponse, error) {
+ // Find the relevant content in the database
+ query := `SELECT searchable_content.trackingid, searchable_content.content FROM vector_top_k('emb_idx', vector32(?), 10) JOIN searchable_content ON id = searchable_content.rowid`
+ rows, err := m.db.Query(query, serializeEmbeddings(queryEmbeddings))
+ if err != nil {
+ // norows error is not an error
+ if err == sql.ErrNoRows {
+ return nil, nil
+ }
+ return nil, fmt.Errorf("error querying embeddings: %w", err)
+ }
+ defer rows.Close()
+
+ var results []types.SearchResponse
+ for rows.Next() {
+ var result types.SearchResponse
+ err = rows.Scan(&result.TrackingID, &result.Content)
+ if err != nil {
+ return nil, fmt.Errorf("error scanning embeddings: %w", err)
+ }
+ results = append(results, result)
+ }
+ return results, nil
+}
diff --git a/backend/datastore/mapper_test.go b/backend/datastore/mapper_test.go
new file mode 100644
index 0000000..00c542f
--- /dev/null
+++ b/backend/datastore/mapper_test.go
@@ -0,0 +1,36 @@
+package datastore
+
+import "testing"
+
+func Benchmark_mySerializedEmbeddings(b *testing.B) {
+ type args struct {
+ embeddings []float32
+ }
+ tests := []struct {
+ name string
+ args args
+ want string
+ }{
+ {
+ name: "Test 1",
+ args: args{
+ embeddings: []float32{0.1, 0.2, 0.3},
+ },
+ want: "[0.1, 0.2, 0.3]",
+ },
+ {
+ name: "Crazy long test",
+ args: args{
+ embeddings: []float32{0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0},
+ },
+ want: "[0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1]",
+ },
+ }
+ for _, tt := range tests {
+ b.Run(tt.name, func(t *testing.B) {
+ if got := serializeEmbeddings(tt.args.embeddings); got != tt.want {
+ t.Errorf("mySerializedEmbeddings() = %v, want %v", got, tt.want)
+ }
+ })
+ }
+}
diff --git a/backend/main.go b/backend/main.go
new file mode 100644
index 0000000..d8513c2
--- /dev/null
+++ b/backend/main.go
@@ -0,0 +1,79 @@
+package main
+
+import (
+ "embed"
+ "git.sa.vin/legislature-tracker/backend/AI"
+ "git.sa.vin/legislature-tracker/backend/search"
+ "log"
+ "os"
+
+ "git.sa.vin/legislature-tracker/backend/Leg"
+ "git.sa.vin/legislature-tracker/backend/cachedAPI"
+ "git.sa.vin/legislature-tracker/backend/datastore"
+ "github.com/payne8/go-libsql-dual-driver"
+)
+
+//go:embed migrations/*.sql
+var migrationFiles embed.FS
+
+func main() {
+
+ logger := log.New(os.Stdout, "any-remark", log.LstdFlags)
+ primaryUrl := os.Getenv("LIBSQL_DATABASE_URL")
+ authToken := os.Getenv("LIBSQL_AUTH_TOKEN")
+
+ tdb, err := libsqldb.NewLibSqlDB(
+ primaryUrl,
+ libsqldb.WithMigrationFiles(migrationFiles),
+ libsqldb.WithAuthToken(authToken),
+ libsqldb.WithLocalDBName("local.db"), // will not be used for remote-only
+ )
+ if err != nil {
+ logger.Printf("failed to open db %s: %s", primaryUrl, err)
+ log.Fatalln(err)
+ return
+ }
+ err = tdb.Migrate()
+ if err != nil {
+ logger.Printf("failed to migrate db %s: %s", primaryUrl, err)
+ log.Fatalln(err)
+ return
+ }
+
+ mapper := datastore.NewMapper(tdb.DB)
+ api := cachedAPI.NewCachedAPI(mapper)
+ utah := Leg.NewUtahLeg(api)
+ ai, err := AI.NewAI()
+ if err != nil {
+ log.Fatalf("error creating AI: %v", err)
+ }
+ searchService, err := search.NewSearch(search.WithAI(ai), search.WithMapper(mapper))
+ if err != nil {
+ log.Fatalf("error creating search: %v", err)
+ }
+
+ test, err := utah.GetBillList("2024", "GS")
+ if err != nil {
+ log.Fatalf("error getting bill list: %v", err)
+ }
+ log.Printf("bill list: %+v", test)
+
+ test2, err := utah.GetBillDetails("2024", "GS", "HB0001")
+ if err != nil {
+ log.Fatalf("error getting bill details: %v", err)
+ }
+ log.Printf("bill details: %+v", test2)
+
+ //err = searchService.InsertContent(context.Background(), test2.TrackingID, test2.GeneralProvisions+" "+test2.HilightedProvisions)
+ //if err != nil {
+ // log.Fatalf("error inserting content: %v", err)
+ //}
+
+ results, err := searchService.Search("I'm looking for a bill that affects public education")
+ if err != nil {
+ log.Fatalf("error searching: %v", err)
+ }
+
+ log.Printf("search results: %+v", results)
+
+}
diff --git a/backend/migrations/2025-01-03-0001-add-searchable-content.sql b/backend/migrations/2025-01-03-0001-add-searchable-content.sql
new file mode 100644
index 0000000..9030944
--- /dev/null
+++ b/backend/migrations/2025-01-03-0001-add-searchable-content.sql
@@ -0,0 +1,8 @@
+CREATE TABLE searchable_content (
+ trackingid TEXT NOT NULL,
+ content TEXT NOT NULL,
+ full_emb F32_BLOB(1536) NOT NULL,
+ created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
+);
+
+CREATE INDEX emb_idx ON searchable_content (libsql_vector_idx(full_emb));
\ No newline at end of file
diff --git a/backend/migrations/2025-01-03-init.sql b/backend/migrations/2025-01-03-init.sql
new file mode 100644
index 0000000..2abbd36
--- /dev/null
+++ b/backend/migrations/2025-01-03-init.sql
@@ -0,0 +1,11 @@
+CREATE TABLE IF NOT EXISTS cache (
+ id INTEGER PRIMARY KEY,
+ url TEXT NOT NULL UNIQUE,
+ response TEXT NOT NULL,
+ created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
+ ttl INTEGER DEFAULT 0
+);
+
+CREATE INDEX idx_url ON cache (url);
+CREATE INDEX idx_created_at ON cache (created_at);
+
diff --git a/backend/search/search.go b/backend/search/search.go
new file mode 100644
index 0000000..7bdb2d3
--- /dev/null
+++ b/backend/search/search.go
@@ -0,0 +1,77 @@
+package search
+
+import (
+ "context"
+ "fmt"
+ "git.sa.vin/legislature-tracker/backend/AI"
+ "git.sa.vin/legislature-tracker/backend/datastore"
+ "git.sa.vin/legislature-tracker/backend/types"
+)
+
+type Search interface {
+ Search(query string) ([]types.SearchResponse, error)
+ InsertContent(ctx context.Context, id string, content string) error
+}
+
+type SearchOption func(s *search)
+
+func NewSearch(opts ...SearchOption) (Search, error) {
+ s := &search{}
+ for _, opt := range opts {
+ opt(s)
+ }
+ if s.ai == nil {
+ return nil, fmt.Errorf("AI is required")
+ }
+ if s.mapper == nil {
+ return nil, fmt.Errorf("mapper is required")
+ }
+ return s, nil
+}
+
+func WithMapper(mapper datastore.SearchStore) func(s *search) {
+ return func(s *search) {
+ s.mapper = mapper
+ }
+}
+
+func WithAI(ai AI.AI) func(s *search) {
+ return func(s *search) {
+ s.ai = ai
+ }
+}
+
+type search struct {
+ ai AI.AI
+ mapper datastore.SearchStore
+}
+
+func (s search) Search(query string) ([]types.SearchResponse, error) {
+ // get embeddings for the query
+ embeddings, err := s.ai.GetEmbeddings(context.Background(), query)
+ if err != nil {
+ return nil, fmt.Errorf("error getting embeddings: %w", err)
+ }
+ if len(embeddings.Data) == 0 {
+ return nil, fmt.Errorf("no embeddings returned")
+ }
+ // find relevant content in the database
+ return s.mapper.FindRelevantContent(embeddings.Data[0].Embedding)
+}
+
+func (s search) InsertContent(ctx context.Context, id string, content string) error {
+ // get embeddings for the content
+ embeddings, err := s.ai.GetEmbeddings(ctx, content)
+ if err != nil {
+ return fmt.Errorf("error getting embeddings: %w", err)
+ }
+ if len(embeddings.Data) == 0 {
+ return fmt.Errorf("no embeddings returned")
+ }
+ // save the embeddings to the database
+ err = s.mapper.SaveEmbeddings(id, content, embeddings.Data[0].Embedding)
+ if err != nil {
+ return fmt.Errorf("error saving embeddings: %w", err)
+ }
+ return nil
+}
diff --git a/backend/types/search.go b/backend/types/search.go
new file mode 100644
index 0000000..fe3a0fd
--- /dev/null
+++ b/backend/types/search.go
@@ -0,0 +1,6 @@
+package types
+
+type SearchResponse struct {
+ TrackingID string
+ Content string
+}
diff --git a/backend/types/utah.go b/backend/types/utah.go
new file mode 100644
index 0000000..0b6bfba
--- /dev/null
+++ b/backend/types/utah.go
@@ -0,0 +1,33 @@
+package types
+
+// UtahBill is a struct that represents a bill in the Utah legislature
+type UtahBill struct {
+ Bill string `json:"bill"`
+ Version string `json:"version"`
+ ShortTitle string `json:"shorttitle"`
+ Sponsor string `json:"sponsor"`
+ FloorSponsor string `json:"floorsponsor"`
+ GeneralProvisions string `json:"generalprovisions"`
+ HilightedProvisions string `json:"hilightedprovisions"`
+ Monies string `json:"monies"`
+ Attorney string `json:"attorney"`
+ FiscalAnalyst string `json:"fiscalanalyst"`
+ LastAction string `json:"lastaction"`
+ LastActionOwner string `json:"lastactionowner"`
+ LastActionTime string `json:"lastactiontime"`
+ TrackingID string `json:"trackingid"`
+ Subjects []string `json:"subjects"`
+ CodeSections []string `json:"codesections"`
+ Agendas []string `json:"agendas"`
+}
+
+// UtahBillListItem is a struct that represents a bill in a list of bills
+type UtahBillListItem struct {
+ Number string `json:"number"`
+ UpdateTime string `json:"updatetime"`
+}
+
+// UtahBillList is a struct that represents a list of bills in the Utah legislature
+type UtahBillList struct {
+ Bills []UtahBillListItem `json:"bills"`
+}
diff --git a/go.mod b/go.mod
index f430008..b8f6828 100644
--- a/go.mod
+++ b/go.mod
@@ -2,4 +2,21 @@ module git.sa.vin/legislature-tracker
go 1.23
-require golang.org/x/net v0.33.0
+require (
+ github.com/payne8/go-libsql-dual-driver v0.2.3
+ github.com/pkoukk/tiktoken-go v0.1.7
+ github.com/sashabaranov/go-openai v1.36.1
+)
+
+require (
+ github.com/antlr4-go/antlr/v4 v4.13.0 // indirect
+ github.com/dlclark/regexp2 v1.10.0 // indirect
+ github.com/google/uuid v1.3.0 // indirect
+ github.com/hashicorp/errwrap v1.0.0 // indirect
+ github.com/hashicorp/go-multierror v1.1.1 // indirect
+ github.com/libsql/sqlite-antlr4-parser v0.0.0-20240327125255-dbf53b6cbf06 // indirect
+ github.com/tursodatabase/go-libsql v0.0.0-20240429120401-651096bbee0b // indirect
+ github.com/tursodatabase/libsql-client-go v0.0.0-20240628122535-1c47b26184e8 // indirect
+ golang.org/x/exp v0.0.0-20240325151524-a685a6edb6d8 // indirect
+ nhooyr.io/websocket v1.8.10 // indirect
+)
diff --git a/go.sum b/go.sum
index 16660ab..6a1a583 100644
--- a/go.sum
+++ b/go.sum
@@ -1,2 +1,42 @@
-golang.org/x/net v0.33.0 h1:74SYHlV8BIgHIFC/LrYkOGIwL19eTYXQ5wc6TBuO36I=
-golang.org/x/net v0.33.0/go.mod h1:HXLR5J+9DxmrqMwG9qjGCxZ+zKXxBru04zlTvWlWuN4=
+github.com/antlr4-go/antlr/v4 v4.13.0 h1:lxCg3LAv+EUK6t1i0y1V6/SLeUi0eKEKdhQAlS8TVTI=
+github.com/antlr4-go/antlr/v4 v4.13.0/go.mod h1:pfChB/xh/Unjila75QW7+VU4TSnWnnk9UTnmpPaOR2g=
+github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
+github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
+github.com/dlclark/regexp2 v1.10.0 h1:+/GIL799phkJqYW+3YbOd8LCcbHzT0Pbo8zl70MHsq0=
+github.com/dlclark/regexp2 v1.10.0/go.mod h1:DHkYz0B9wPfa6wondMfaivmHpzrQ3v9q8cnmRbL6yW8=
+github.com/google/go-cmp v0.5.9 h1:O2Tfq5qg4qc4AmwVlvv0oLiVAGB7enBSJ2x2DqQFi38=
+github.com/google/go-cmp v0.5.9/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY=
+github.com/google/uuid v1.3.0 h1:t6JiXgmwXMjEs8VusXIJk2BXHsn+wx8BZdTaoZ5fu7I=
+github.com/google/uuid v1.3.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
+github.com/hashicorp/errwrap v1.0.0 h1:hLrqtEDnRye3+sgx6z4qVLNuviH3MR5aQ0ykNJa/UYA=
+github.com/hashicorp/errwrap v1.0.0/go.mod h1:YH+1FKiLXxHSkmPseP+kNlulaMuP3n2brvKWEqk/Jc4=
+github.com/hashicorp/go-multierror v1.1.1 h1:H5DkEtf6CXdFp0N0Em5UCwQpXMWke8IA0+lD48awMYo=
+github.com/hashicorp/go-multierror v1.1.1/go.mod h1:iw975J/qwKPdAO1clOe2L8331t/9/fmwbPZ6JB6eMoM=
+github.com/libsql/sqlite-antlr4-parser v0.0.0-20240327125255-dbf53b6cbf06 h1:JLvn7D+wXjH9g4Jsjo+VqmzTUpl/LX7vfr6VOfSWTdM=
+github.com/libsql/sqlite-antlr4-parser v0.0.0-20240327125255-dbf53b6cbf06/go.mod h1:FUkZ5OHjlGPjnM2UyGJz9TypXQFgYqw6AFNO1UiROTM=
+github.com/payne8/go-libsql-dual-driver v0.2.3 h1:ea19rrdn3QQqvDrHNZ5gqqj2Nn7DbhGDVvDL4UDYZ68=
+github.com/payne8/go-libsql-dual-driver v0.2.3/go.mod h1:fhe8WdGtBLvGZ5drN9We0uWEedXeCCTvWaTLExrGW9M=
+github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4=
+github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
+github.com/pkoukk/tiktoken-go v0.1.7 h1:qOBHXX4PHtvIvmOtyg1EeKlwFRiMKAcoMp4Q+bLQDmw=
+github.com/pkoukk/tiktoken-go v0.1.7/go.mod h1:9NiV+i9mJKGj1rYOT+njbv+ZwA/zJxYdewGl6qVatpg=
+github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
+github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
+github.com/sashabaranov/go-openai v1.36.1 h1:EVfRXwIlW2rUzpx6vR+aeIKCK/xylSrVYAx1TMTSX3g=
+github.com/sashabaranov/go-openai v1.36.1/go.mod h1:lj5b/K+zjTSFxVLijLSTDZuP7adOgerWeFyZLUhAKRg=
+github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsTg=
+github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY=
+github.com/tursodatabase/go-libsql v0.0.0-20240429120401-651096bbee0b h1:R7hev4b96zgXjKbS2ZNbHBnDvyFZhH+LlMqtKH6hIkU=
+github.com/tursodatabase/go-libsql v0.0.0-20240429120401-651096bbee0b/go.mod h1:TjsB2miB8RW2Sse8sdxzVTdeGlx74GloD5zJYUC38d8=
+github.com/tursodatabase/libsql-client-go v0.0.0-20240628122535-1c47b26184e8 h1:XM3aeBrpNrkvi48EiKCtMNAgsiaAaAOCHAW9SaIWouo=
+github.com/tursodatabase/libsql-client-go v0.0.0-20240628122535-1c47b26184e8/go.mod h1:fblU7nZYWAROzJzkpln8teKFDtdRvAOmZHeIpahY4jk=
+golang.org/x/exp v0.0.0-20240325151524-a685a6edb6d8 h1:aAcj0Da7eBAtrTp03QXWvm88pSyOt+UgdZw2BFZ+lEw=
+golang.org/x/exp v0.0.0-20240325151524-a685a6edb6d8/go.mod h1:CQ1k9gNrJ50XIzaKCRR2hssIjF07kZFEiieALBM/ARQ=
+golang.org/x/sync v0.6.0 h1:5BMeUDZ7vkXGfEr1x9B4bRcTH4lpkTkpdh0T/J+qjbQ=
+golang.org/x/sync v0.6.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk=
+gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
+gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
+gotest.tools v2.2.0+incompatible h1:VsBPFP1AI068pPrMxtb/S8Zkgf9xEmTLJjfM+P5UIEo=
+gotest.tools v2.2.0+incompatible/go.mod h1:DsYFclhRJ6vuDpmuTbkuFWG+y2sxOXAzmJt81HFBacw=
+nhooyr.io/websocket v1.8.10 h1:mv4p+MnGrLDcPlBoWsvPP7XCzTYMXP9F9eIGoKbgx7Q=
+nhooyr.io/websocket v1.8.10/go.mod h1:rN9OFWIUwuxg4fR5tELlYC04bXYowCP9GX47ivo2l+c=
diff --git a/main.go b/main.go
deleted file mode 100644
index 3968b12..0000000
--- a/main.go
+++ /dev/null
@@ -1,482 +0,0 @@
-package main
-
-import (
- "fmt"
- "log"
- "strings"
-
- "golang.org/x/net/html"
-)
-
-func main() {
- htmlStr := `
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-HB0030
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
Additional Accessibility Settings
-
-
-
-
-
-
- Font Size:
-
-
-
-
-
-
-
-
-
-
-
-
-
-H.B. 30 Road Rage Amendments
-
-
-
-
- CoSponsor(s):
Acton, C.K. | Pierucci, C. |
Stoddard, A. |
- Drafting Attorney: Jacqueline Carlton
- Fiscal Analyst: Gary R. Syphus
-
-
-
-
-
- Information
- Last Action: 18 Mar 2024, Governor Signed
- Last Location: Lieutenant Governor's office for filing
-
-
-
-
-
-
-
-
Bill Status / Votes
-
• Senate Actions • House Actions • Fiscal Actions • Other Actions
Date | Action | Location | Vote |
12/19/2023 | Bill Numbered but not Distributed | Legislative Research and General Counsel | |
12/19/2023 | Numbered Bill Publicly Distributed | Legislative Research and General Counsel | |
1/10/2024 | House/ received bill from Legislative Research | Clerk of the House | |
1/16/2024 | House/ 1st reading (Introduced) | House Rules Committee | |
1/25/2024 | House/ received fiscal note from Fiscal Analyst | House Rules Committee | |
1/31/2024 | House/ to standing committee | House Law Enforcement and Criminal Justice Committee | |
2/9/2024 | House Comm - Substitute Recommendation from # 0 to # 3 | House Law Enforcement and Criminal Justice Committee | 7 0 5 |
2/9/2024 | House Comm - Favorable Recommendation | House Law Enforcement and Criminal Justice Committee | 7 0 5 |
2/12/2024 (10:18:50 AM) | House/ comm rpt/ substituted | House Law Enforcement and Criminal Justice Committee | |
2/12/2024 (10:18:51 AM) | House/ 2nd reading | House 3rd Reading Calendar for House bills | |
2/12/2024 | LFA/ fiscal note sent to sponsor | House 3rd Reading Calendar for House bills | |
2/12/2024 | LFA/ fiscal note publicly available | House 3rd Reading Calendar for House bills | |
2/16/2024 (11:43:56 AM) | House/ 3rd reading | House 3rd Reading Calendar for House bills | |
2/16/2024 (11:49:30 AM) | House/ floor amendment # 1 | House 3rd Reading Calendar for House bills | Voice vote |
2/16/2024 (11:58:00 AM) | House/ passed 3rd reading | Senate Secretary | 51 17 7 |
2/16/2024 (11:58:02 AM) | House/ to Senate | Senate Secretary | |
2/16/2024 | Senate/ received from House | Waiting for Introduction in the Senate | |
2/16/2024 | Senate/ 1st reading (Introduced) | Senate Rules Committee | |
2/20/2024 | Senate/ to standing committee | Senate Judiciary, Law Enforcement, and Criminal Justice Committee | |
2/22/2024 | Senate Comm - Favorable Recommendation | Senate Judiciary, Law Enforcement, and Criminal Justice Committee | 3 0 3 |
2/22/2024 (2:19:24 PM) | Senate/ committee report favorable | Senate Judiciary, Law Enforcement, and Criminal Justice Committee | |
2/22/2024 (2:19:25 PM) | Senate/ placed on 2nd Reading Calendar | Senate 2nd Reading Calendar | |
2/27/2024 | Senate/ 2nd Reading Calendar to Rules | Senate Rules Committee | |
2/28/2024 | Senate/ Rules to 2nd Reading Calendar | Senate 2nd Reading Calendar | |
2/28/2024 (12:09:34 PM) | Senate/ 2nd & 3rd readings/ suspension | Senate 2nd Reading Calendar | |
2/28/2024 (12:09:48 PM) | Senate/ circled | Senate 2nd Reading Calendar | Voice vote |
2/28/2024 (8:05:19 PM) | Senate/ uncircled | Senate 2nd Reading Calendar | Voice vote |
2/28/2024 (8:11:13 PM) | Senate/ passed 2nd & 3rd readings/ suspension | Senate President | 23 5 1 |
2/28/2024 (8:11:14 PM) | Senate/ signed by President/ returned to House | House Speaker | |
2/28/2024 (8:11:15 PM) | Senate/ to House | House Speaker | |
2/29/2024 | House/ received from Senate | House Speaker | |
2/29/2024 | House/ signed by Speaker/ sent for enrolling | Legislative Research and General Counsel / Enrolling | |
2/29/2024 | Bill Received from House for Enrolling | Legislative Research and General Counsel / Enrolling | |
2/29/2024 | Draft of Enrolled Bill Prepared | Legislative Research and General Counsel / Enrolling | |
3/7/2024 | Enrolled Bill Returned to House or Senate | Clerk of the House | |
3/7/2024 | House/ enrolled bill to Printing | Clerk of the House | |
3/8/2024 | House/ received enrolled bill from Printing | Clerk of the House | |
3/8/2024 | House/ to Governor | Executive Branch - Governor | |
3/18/2024 | Governor Signed | Lieutenant Governor's office for filing | |
-
-
-
Committee Hearings/Floor Debate
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-`
-
- doc, err := html.Parse(strings.NewReader(htmlStr))
- if err != nil {
- log.Fatal(err)
- }
-
- var extractText func(*html.Node)
- extractText = func(n *html.Node) {
- if n.Type == html.TextNode {
- fmt.Printf("%v\n", n.Data)
- }
- for c := n.FirstChild; c != nil; c = c.NextSibling {
- extractText(c)
- }
- }
-
- extractText(doc)
-}
diff --git a/scraper/index.js b/scraper/index.js
index 3ee606d..6ca89b9 100644
--- a/scraper/index.js
+++ b/scraper/index.js
@@ -1,14 +1,26 @@
import puppeteer from 'puppeteer';
-const startingLing = 'https://le.utah.gov/~2024/bills/static/HB0030.html';
+// const startingLink = 'https://le.utah.gov/~2024/bills/static/HB0030.html';
+// const startingLink = 'https://le.utah.gov/~2025/bills/static/HB0011.html';
+const startingLink = 'https://le.utah.gov/~2025/bills/static/HB0012.html';
(async () => {
// Launch the browser and open a new blank page
const browser = await puppeteer.launch({
headless: false,
});
- let text = await getPageText(browser, startingLing);
+ let text = await getPageText(browser, startingLink);
- console.log(text);
+ const lines = text.join(' ').split('. ');
+
+ console.log(lines.join('.\n'));
+
+ let totalChars = 0;
+ for (let line of lines) {
+ totalChars += line.length;
+ }
+ console.log('Total chars:', totalChars);
+ console.log('Total lines:', lines.length);
+ console.log('Average chars per line:', totalChars / lines.length);
})();
@@ -17,6 +29,7 @@ async function getPageText(browser, url) {
await page.goto(url);
const test = await page.evaluate(() => {
+ // ------------------- in the browser context -------------------
// Use the querySelector to target the leg element
const legElement = document.querySelector('leg');
if (legElement) {
@@ -42,6 +55,8 @@ async function getPageText(browser, url) {
})
.filter((text) => text.length > 0); // Filter out any leftover empty strings
}
+
+ // ------------------- in the browser context -------------------
});
await browser.close();