From 4ca6dfb51d7d5c52a086179172412f8bc422a971 Mon Sep 17 00:00:00 2001
From: Mason Payne <mason@masonitestudios.com>
Date: Mon, 3 Feb 2025 23:49:19 -0700
Subject: [PATCH] add support for a few more formats

---
 .gitignore |   1 +
 main.go    | 134 +++++++++++++++++++++++++++++++++++++++++++++--------
 2 files changed, 115 insertions(+), 20 deletions(-)

diff --git a/.gitignore b/.gitignore
index 3fd7c43..8216995 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,2 +1,3 @@
 /response.txt
 /llmparse.exe
+/response2.txt
diff --git a/main.go b/main.go
index 3634fd5..e012e24 100644
--- a/main.go
+++ b/main.go
@@ -1,6 +1,7 @@
 package main
 
 import (
+	"bufio"
 	"flag"
 	"fmt"
 	"io"
@@ -9,18 +10,23 @@ import (
 	"os"
 	"path/filepath"
 	"regexp"
+	"strings"
 )
 
+// fileBlock holds a file name and its content.
+type fileBlock struct {
+	name    string
+	content string
+}
+
 func main() {
 	// Define the output directory flag.
 	outDir := flag.String("out", ".", "Directory to write output files")
 	flag.Parse()
 
-	// Determine source of input - either a file or STDIN.
+	// Read input from a file or STDIN.
 	var data []byte
 	var err error
-
-	// If no positional arguments or "-" is provided, read from STDIN.
 	if flag.NArg() < 1 || flag.Arg(0) == "-" {
 		data, err = io.ReadAll(os.Stdin)
 		if err != nil {
@@ -33,13 +39,26 @@ func main() {
 			log.Fatalf("Error reading file %s: %v", inputFile, err)
 		}
 	}
-
 	text := string(data)
 
-	// Updated regular expression to handle both LF and CRLF line breaks.
-	re := regexp.MustCompile(`(?s)File:\s*(.+?)\s*\r?\n-+\r?\n(.*?)(\r?\n-+\r?\n|$)`)
-	matches := re.FindAllStringSubmatch(text, -1)
-	if matches == nil {
+	// First, try using the "File:" format.
+	reFile := regexp.MustCompile(`(?s)File:\s*(.+?)\s*\r?\n-+\r?\n(.*?)(\r?\n-+\r?\n|$)`)
+	matches := reFile.FindAllStringSubmatch(text, -1)
+
+	var blocks []fileBlock
+	if matches != nil && len(matches) > 0 {
+		for _, m := range matches {
+			blocks = append(blocks, fileBlock{
+				name:    strings.TrimSpace(m[1]),
+				content: m[2],
+			})
+		}
+	} else {
+		// Fall back to parsing the numbered format manually.
+		blocks = parseNumberedBlocks(text)
+	}
+
+	if len(blocks) == 0 {
 		log.Println("No file entries found in the input.")
 		return
 	}
@@ -49,26 +68,101 @@ func main() {
 		log.Fatalf("Error creating output directory %s: %v", *outDir, err)
 	}
 
-	for _, match := range matches {
-		// match[1] is the file name, and match[2] is the file content.
-		fileName := match[1]
-		fileContent := match[2]
-
-		// Create the full file path using the output directory.
-		fullPath := filepath.Join(*outDir, fileName)
-
-		// Ensure that the directory for the file exists (in case there's nested directories).
+	// Write the blocks out as files.
+	for _, block := range blocks {
+		fullPath := filepath.Join(*outDir, block.name)
 		dir := filepath.Dir(fullPath)
 		if err := os.MkdirAll(dir, os.ModePerm); err != nil {
 			log.Printf("Error creating directory %s: %v", dir, err)
 			continue
 		}
-
-		if err := ioutil.WriteFile(fullPath, []byte(fileContent), 0644); err != nil {
-			log.Printf("Error writing to file %s: %v", fullPath, err)
+		if err := ioutil.WriteFile(fullPath, []byte(block.content), 0644); err != nil {
+			log.Printf("Error writing file %s: %v", fullPath, err)
 		} else {
 			fmt.Printf("Created file: %s\n", fullPath)
 		}
 	}
 
 }
+
+// parseNumberedBlocks scans the text looking for divider lines
+// and header lines of the form "1. filename".
+// It returns a slice of fileBlock values.
+func parseNumberedBlocks(text string) []fileBlock {
+	var blocks []fileBlock
+
+	// We'll use a scanner to process the text line by line.
+	scanner := bufio.NewScanner(strings.NewReader(text))
+	// Regular expression to recognize a divider line (at least 10 dashes or box drawing characters)
+	dividerRe := regexp.MustCompile(`^[\s]*[─-]{10,}[\s]*$`)
+	// Regular expression for matching a header line like "1. index.html"
+	headerRe := regexp.MustCompile(`^\s*\d+\.\s*(.+?)\s*$`)
+
+	lines := []string{}
+	for scanner.Scan() {
+		lines = append(lines, scanner.Text())
+	}
+
+	i := 0
+	for i < len(lines) {
+		// Look for a divider line.
+		if dividerRe.MatchString(lines[i]) {
+			// Skip divider. Look for a header line in following lines.
+			j := i + 1
+			// Skip empty lines.
+			for j < len(lines) && strings.TrimSpace(lines[j]) == "" {
+				j++
+			}
+			if j >= len(lines) {
+				break
+			}
+			// Match the header.
+			headerLine := lines[j]
+			headerMatch := headerRe.FindStringSubmatch(headerLine)
+			if headerMatch == nil {
+				i++
+				continue
+			}
+			filename := headerMatch[1]
+			// Expect next divider.
+			j++
+			for j < len(lines) && strings.TrimSpace(lines[j]) == "" {
+				j++
+			}
+			if j >= len(lines) || !dividerRe.MatchString(lines[j]) {
+				// If no divider after header, this is not a well‐formed block.
+				i = j
+				continue
+			}
+			// Start collecting content after this divider.
+			j++ // move past the divider line
+			contentLines := []string{}
+			// Continue until we see a new divider that is immediately followed by a valid header.
+			for j < len(lines) {
+				// If this line is a divider candidate,
+				// check if the next non-empty line is a header line.
+				if dividerRe.MatchString(lines[j]) {
+					k := j + 1
+					for k < len(lines) && strings.TrimSpace(lines[k]) == "" {
+						k++
+					}
+					if k < len(lines) && headerRe.MatchString(lines[k]) {
+						// We've reached the next file block.
+						break
+					}
+				}
+				contentLines = append(contentLines, lines[j])
+				j++
+			}
+			blocks = append(blocks, fileBlock{
+				name:    strings.TrimSpace(filename),
+				content: strings.Join(contentLines, "\n"),
+			})
+			i = j
+		} else {
+			i++
+		}
+	}
+	return blocks
+
+}