import puppeteer from 'puppeteer'; const startingLing = 'https://le.utah.gov/~2024/bills/static/HB0030.html'; (async () => { // Launch the browser and open a new blank page const browser = await puppeteer.launch({ headless: false, }); let text = await getPageText(browser, startingLing); console.log(text); })(); async function getPageText(browser, url) { const page = await browser.newPage(); await page.goto(url); const test = await page.evaluate(() => { // Use the querySelector to target the leg element const legElement = document.querySelector('leg'); if (legElement) { return flattenTree(legElement); } return []; function flattenTree(element) { if (!element) return []; // Traverse the child nodes recursively and filter content return Array.from(element.childNodes) .flatMap((node) => { if (node.nodeType === Node.TEXT_NODE) { // Collect text from text nodes return node.textContent.trim(); } else if (node.nodeType === Node.ELEMENT_NODE && !node.classList.contains('lineno')) { // Recursively include elements that are not line numbers return flattenTree(node); } // Ignore elements like line numbers entirely return []; }) .filter((text) => text.length > 0); // Filter out any leftover empty strings } }); await browser.close(); return test; }