build backend to collect and search using embeddings
This commit is contained in:
@ -1,14 +1,26 @@
|
||||
import puppeteer from 'puppeteer';
|
||||
const startingLing = 'https://le.utah.gov/~2024/bills/static/HB0030.html';
|
||||
// const startingLink = 'https://le.utah.gov/~2024/bills/static/HB0030.html';
|
||||
// const startingLink = 'https://le.utah.gov/~2025/bills/static/HB0011.html';
|
||||
const startingLink = 'https://le.utah.gov/~2025/bills/static/HB0012.html';
|
||||
|
||||
(async () => {
|
||||
// Launch the browser and open a new blank page
|
||||
const browser = await puppeteer.launch({
|
||||
headless: false,
|
||||
});
|
||||
let text = await getPageText(browser, startingLing);
|
||||
let text = await getPageText(browser, startingLink);
|
||||
|
||||
console.log(text);
|
||||
const lines = text.join(' ').split('. ');
|
||||
|
||||
console.log(lines.join('.\n'));
|
||||
|
||||
let totalChars = 0;
|
||||
for (let line of lines) {
|
||||
totalChars += line.length;
|
||||
}
|
||||
console.log('Total chars:', totalChars);
|
||||
console.log('Total lines:', lines.length);
|
||||
console.log('Average chars per line:', totalChars / lines.length);
|
||||
|
||||
})();
|
||||
|
||||
@ -17,6 +29,7 @@ async function getPageText(browser, url) {
|
||||
await page.goto(url);
|
||||
|
||||
const test = await page.evaluate(() => {
|
||||
// ------------------- in the browser context -------------------
|
||||
// Use the querySelector to target the leg element
|
||||
const legElement = document.querySelector('leg');
|
||||
if (legElement) {
|
||||
@ -42,6 +55,8 @@ async function getPageText(browser, url) {
|
||||
})
|
||||
.filter((text) => text.length > 0); // Filter out any leftover empty strings
|
||||
}
|
||||
|
||||
// ------------------- in the browser context -------------------
|
||||
});
|
||||
|
||||
await browser.close();
|
||||
|
Reference in New Issue
Block a user