add support for a few more formats

This commit is contained in:
2025-02-03 23:49:19 -07:00
parent f9cbf43b6d
commit 4ca6dfb51d
2 changed files with 115 additions and 20 deletions

1
.gitignore vendored
View File

@ -1,2 +1,3 @@
/response.txt
/llmparse.exe
/response2.txt

134
main.go
View File

@ -1,6 +1,7 @@
package main
import (
"bufio"
"flag"
"fmt"
"io"
@ -9,18 +10,23 @@ import (
"os"
"path/filepath"
"regexp"
"strings"
)
// fileBlock holds a file name and its content.
type fileBlock struct {
name string
content string
}
func main() {
// Define the output directory flag.
outDir := flag.String("out", ".", "Directory to write output files")
flag.Parse()
// Determine source of input - either a file or STDIN.
// Read input from a file or STDIN.
var data []byte
var err error
// If no positional arguments or "-" is provided, read from STDIN.
if flag.NArg() < 1 || flag.Arg(0) == "-" {
data, err = io.ReadAll(os.Stdin)
if err != nil {
@ -33,13 +39,26 @@ func main() {
log.Fatalf("Error reading file %s: %v", inputFile, err)
}
}
text := string(data)
// Updated regular expression to handle both LF and CRLF line breaks.
re := regexp.MustCompile(`(?s)File:\s*(.+?)\s*\r?\n-+\r?\n(.*?)(\r?\n-+\r?\n|$)`)
matches := re.FindAllStringSubmatch(text, -1)
if matches == nil {
// First, try using the "File:" format.
reFile := regexp.MustCompile(`(?s)File:\s*(.+?)\s*\r?\n-+\r?\n(.*?)(\r?\n-+\r?\n|$)`)
matches := reFile.FindAllStringSubmatch(text, -1)
var blocks []fileBlock
if matches != nil && len(matches) > 0 {
for _, m := range matches {
blocks = append(blocks, fileBlock{
name: strings.TrimSpace(m[1]),
content: m[2],
})
}
} else {
// Fall back to parsing the numbered format manually.
blocks = parseNumberedBlocks(text)
}
if len(blocks) == 0 {
log.Println("No file entries found in the input.")
return
}
@ -49,26 +68,101 @@ func main() {
log.Fatalf("Error creating output directory %s: %v", *outDir, err)
}
for _, match := range matches {
// match[1] is the file name, and match[2] is the file content.
fileName := match[1]
fileContent := match[2]
// Create the full file path using the output directory.
fullPath := filepath.Join(*outDir, fileName)
// Ensure that the directory for the file exists (in case there's nested directories).
// Write the blocks out as files.
for _, block := range blocks {
fullPath := filepath.Join(*outDir, block.name)
dir := filepath.Dir(fullPath)
if err := os.MkdirAll(dir, os.ModePerm); err != nil {
log.Printf("Error creating directory %s: %v", dir, err)
continue
}
if err := ioutil.WriteFile(fullPath, []byte(fileContent), 0644); err != nil {
log.Printf("Error writing to file %s: %v", fullPath, err)
if err := ioutil.WriteFile(fullPath, []byte(block.content), 0644); err != nil {
log.Printf("Error writing file %s: %v", fullPath, err)
} else {
fmt.Printf("Created file: %s\n", fullPath)
}
}
}
// parseNumberedBlocks scans the text looking for divider lines
// and header lines of the form "1. filename".
// It returns a slice of fileBlock values.
func parseNumberedBlocks(text string) []fileBlock {
var blocks []fileBlock
// We'll use a scanner to process the text line by line.
scanner := bufio.NewScanner(strings.NewReader(text))
// Regular expression to recognize a divider line (at least 10 dashes or box drawing characters)
dividerRe := regexp.MustCompile(`^[\s]*[─-]{10,}[\s]*$`)
// Regular expression for matching a header line like "1. index.html"
headerRe := regexp.MustCompile(`^\s*\d+\.\s*(.+?)\s*$`)
lines := []string{}
for scanner.Scan() {
lines = append(lines, scanner.Text())
}
i := 0
for i < len(lines) {
// Look for a divider line.
if dividerRe.MatchString(lines[i]) {
// Skip divider. Look for a header line in following lines.
j := i + 1
// Skip empty lines.
for j < len(lines) && strings.TrimSpace(lines[j]) == "" {
j++
}
if j >= len(lines) {
break
}
// Match the header.
headerLine := lines[j]
headerMatch := headerRe.FindStringSubmatch(headerLine)
if headerMatch == nil {
i++
continue
}
filename := headerMatch[1]
// Expect next divider.
j++
for j < len(lines) && strings.TrimSpace(lines[j]) == "" {
j++
}
if j >= len(lines) || !dividerRe.MatchString(lines[j]) {
// If no divider after header, this is not a wellformed block.
i = j
continue
}
// Start collecting content after this divider.
j++ // move past the divider line
contentLines := []string{}
// Continue until we see a new divider that is immediately followed by a valid header.
for j < len(lines) {
// If this line is a divider candidate,
// check if the next non-empty line is a header line.
if dividerRe.MatchString(lines[j]) {
k := j + 1
for k < len(lines) && strings.TrimSpace(lines[k]) == "" {
k++
}
if k < len(lines) && headerRe.MatchString(lines[k]) {
// We've reached the next file block.
break
}
}
contentLines = append(contentLines, lines[j])
j++
}
blocks = append(blocks, fileBlock{
name: strings.TrimSpace(filename),
content: strings.Join(contentLines, "\n"),
})
i = j
} else {
i++
}
}
return blocks
}