import puppeteer from 'puppeteer'; // const startingLink = 'https://le.utah.gov/~2024/bills/static/HB0030.html'; // const startingLink = 'https://le.utah.gov/~2025/bills/static/HB0011.html'; const startingLink = 'https://le.utah.gov/~2025/bills/static/HB0012.html'; (async () => { // Launch the browser and open a new blank page const browser = await puppeteer.launch({ headless: false, }); let text = await getPageText(browser, startingLink); const lines = text.join(' ').split('. '); console.log(lines.join('.\n')); let totalChars = 0; for (let line of lines) { totalChars += line.length; } console.log('Total chars:', totalChars); console.log('Total lines:', lines.length); console.log('Average chars per line:', totalChars / lines.length); })(); async function getPageText(browser, url) { const page = await browser.newPage(); await page.goto(url); const test = await page.evaluate(() => { // ------------------- in the browser context ------------------- // Use the querySelector to target the leg element const legElement = document.querySelector('leg'); if (legElement) { return flattenTree(legElement); } return []; function flattenTree(element) { if (!element) return []; // Traverse the child nodes recursively and filter content return Array.from(element.childNodes) .flatMap((node) => { if (node.nodeType === Node.TEXT_NODE) { // Collect text from text nodes return node.textContent.trim(); } else if (node.nodeType === Node.ELEMENT_NODE && !node.classList.contains('lineno')) { // Recursively include elements that are not line numbers return flattenTree(node); } // Ignore elements like line numbers entirely return []; }) .filter((text) => text.length > 0); // Filter out any leftover empty strings } // ------------------- in the browser context ------------------- }); await browser.close(); return test; }