build backend to collect and search using embeddings

This commit is contained in:
2025-01-04 00:15:29 -07:00
parent c4521af5c2
commit 3be1648ee4
16 changed files with 679 additions and 489 deletions

View File

@ -1,14 +1,26 @@
import puppeteer from 'puppeteer';
const startingLing = 'https://le.utah.gov/~2024/bills/static/HB0030.html';
// const startingLink = 'https://le.utah.gov/~2024/bills/static/HB0030.html';
// const startingLink = 'https://le.utah.gov/~2025/bills/static/HB0011.html';
const startingLink = 'https://le.utah.gov/~2025/bills/static/HB0012.html';
(async () => {
// Launch the browser and open a new blank page
const browser = await puppeteer.launch({
headless: false,
});
let text = await getPageText(browser, startingLing);
let text = await getPageText(browser, startingLink);
console.log(text);
const lines = text.join(' ').split('. ');
console.log(lines.join('.\n'));
let totalChars = 0;
for (let line of lines) {
totalChars += line.length;
}
console.log('Total chars:', totalChars);
console.log('Total lines:', lines.length);
console.log('Average chars per line:', totalChars / lines.length);
})();
@ -17,6 +29,7 @@ async function getPageText(browser, url) {
await page.goto(url);
const test = await page.evaluate(() => {
// ------------------- in the browser context -------------------
// Use the querySelector to target the leg element
const legElement = document.querySelector('leg');
if (legElement) {
@ -42,6 +55,8 @@ async function getPageText(browser, url) {
})
.filter((text) => text.length > 0); // Filter out any leftover empty strings
}
// ------------------- in the browser context -------------------
});
await browser.close();