llm-file-parser/main.go

package main

import (
	"bufio"
	"flag"
	"fmt"
	"io"
	"io/ioutil"
	"log"
	"os"
	"path/filepath"
	"regexp"
	"strings"
)

// fileBlock holds a file name and its content.
type fileBlock struct {
	name    string
	content string
}

func main() {
	// Define the output directory flag.
	outDir := flag.String("out", ".", "Directory to write output files")
	flag.Parse()

	// Read input from a file or STDIN.
	var data []byte
	var err error
	if flag.NArg() < 1 || flag.Arg(0) == "-" {
		data, err = io.ReadAll(os.Stdin)
		if err != nil {
			log.Fatalf("Error reading from STDIN: %v", err)
		}
	} else {
		inputFile := flag.Arg(0)
		data, err = ioutil.ReadFile(inputFile)
		if err != nil {
			log.Fatalf("Error reading file %s: %v", inputFile, err)
		}
	}
	text := string(data)

	// First, try using the "File:" format.
	reFile := regexp.MustCompile(`(?s)File:\s*(.+?)\s*\r?\n-+\r?\n(.*?)(\r?\n-+\r?\n|$)`)
	matches := reFile.FindAllStringSubmatch(text, -1)

	var blocks []fileBlock
	if matches != nil && len(matches) > 0 {
		for _, m := range matches {
			blocks = append(blocks, fileBlock{
				name:    strings.TrimSpace(m[1]),
				content: m[2],
			})
		}
	} else {
		// Fall back to parsing the numbered format manually.
		blocks = parseNumberedBlocks(text)
	}

	if len(blocks) == 0 {
		log.Println("No file entries found in the input.")
		return
	}

	// Ensure the output directory exists.
	if err := os.MkdirAll(*outDir, os.ModePerm); err != nil {
		log.Fatalf("Error creating output directory %s: %v", *outDir, err)
	}

	// Write the blocks out as files.
	for _, block := range blocks {
		fullPath := filepath.Join(*outDir, block.name)
		dir := filepath.Dir(fullPath)
		if err := os.MkdirAll(dir, os.ModePerm); err != nil {
			log.Printf("Error creating directory %s: %v", dir, err)
			continue
		}
		if err := ioutil.WriteFile(fullPath, []byte(block.content), 0644); err != nil {
			log.Printf("Error writing file %s: %v", fullPath, err)
		} else {
			fmt.Printf("Created file: %s\n", fullPath)
		}
	}

}

// parseNumberedBlocks scans the text looking for divider lines
// and header lines of the form "1. filename".
// It returns a slice of fileBlock values.
func parseNumberedBlocks(text string) []fileBlock {
	var blocks []fileBlock

	// We'll use a scanner to process the text line by line.
	scanner := bufio.NewScanner(strings.NewReader(text))
	// Regular expression to recognize a divider line (at least 10 dashes or box drawing characters)
	dividerRe := regexp.MustCompile(`^[\s]*[─-]{10,}[\s]*$`)
	// Regular expression for matching a header line like "1. index.html"
	headerRe := regexp.MustCompile(`^\s*\d+\.\s*(.+?)\s*$`)

	lines := []string{}
	for scanner.Scan() {
		lines = append(lines, scanner.Text())
	}

	i := 0
	for i < len(lines) {
		// Look for a divider line.
		if dividerRe.MatchString(lines[i]) {
			// Skip divider. Look for a header line in following lines.
			j := i + 1
			// Skip empty lines.
			for j < len(lines) && strings.TrimSpace(lines[j]) == "" {
				j++
			}
			if j >= len(lines) {
				break
			}
			// Match the header.
			headerLine := lines[j]
			headerMatch := headerRe.FindStringSubmatch(headerLine)
			if headerMatch == nil {
				i++
				continue
			}
			filename := headerMatch[1]
			// Expect next divider.
			j++
			for j < len(lines) && strings.TrimSpace(lines[j]) == "" {
				j++
			}
			if j >= len(lines) || !dividerRe.MatchString(lines[j]) {
				// If no divider after header, this is not a well‐formed block.
				i = j
				continue
			}
			// Start collecting content after this divider.
			j++ // move past the divider line
			contentLines := []string{}
			// Continue until we see a new divider that is immediately followed by a valid header.
			for j < len(lines) {
				// If this line is a divider candidate,
				// check if the next non-empty line is a header line.
				if dividerRe.MatchString(lines[j]) {
					k := j + 1
					for k < len(lines) && strings.TrimSpace(lines[k]) == "" {
						k++
					}
					if k < len(lines) && headerRe.MatchString(lines[k]) {
						// We've reached the next file block.
						break
					}
				}
				contentLines = append(contentLines, lines[j])
				j++
			}
			blocks = append(blocks, fileBlock{
				name:    strings.TrimSpace(filename),
				content: strings.Join(contentLines, "\n"),
			})
			i = j
		} else {
			i++
		}
	}
	return blocks

}