Files
legislature-tracker/scraper/index.js

53 lines
1.4 KiB
JavaScript

import puppeteer from 'puppeteer';
const startingLing = 'https://le.utah.gov/~2024/bills/static/HB0030.html';
(async () => {
// Launch the browser and open a new blank page
const browser = await puppeteer.launch({
headless: false,
});
let text = await getPageText(browser, startingLing);
console.log(text);
})();
async function getPageText(browser, url) {
const page = await browser.newPage();
await page.goto(url);
const test = await page.evaluate(() => {
// Use the querySelector to target the leg element
const legElement = document.querySelector('leg');
if (legElement) {
return flattenTree(legElement);
}
return [];
function flattenTree(element) {
if (!element) return [];
// Traverse the child nodes recursively and filter content
return Array.from(element.childNodes)
.flatMap((node) => {
if (node.nodeType === Node.TEXT_NODE) {
// Collect text from text nodes
return node.textContent.trim();
} else if (node.nodeType === Node.ELEMENT_NODE && !node.classList.contains('lineno')) {
// Recursively include elements that are not line numbers
return flattenTree(node);
}
// Ignore elements like line numbers entirely
return [];
})
.filter((text) => text.length > 0); // Filter out any leftover empty strings
}
});
await browser.close();
return test;
}