add support for a few more formats
This commit is contained in:
1
.gitignore
vendored
1
.gitignore
vendored
@ -1,2 +1,3 @@
|
|||||||
/response.txt
|
/response.txt
|
||||||
/llmparse.exe
|
/llmparse.exe
|
||||||
|
/response2.txt
|
||||||
|
134
main.go
134
main.go
@ -1,6 +1,7 @@
|
|||||||
package main
|
package main
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"bufio"
|
||||||
"flag"
|
"flag"
|
||||||
"fmt"
|
"fmt"
|
||||||
"io"
|
"io"
|
||||||
@ -9,18 +10,23 @@ import (
|
|||||||
"os"
|
"os"
|
||||||
"path/filepath"
|
"path/filepath"
|
||||||
"regexp"
|
"regexp"
|
||||||
|
"strings"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
// fileBlock holds a file name and its content.
|
||||||
|
type fileBlock struct {
|
||||||
|
name string
|
||||||
|
content string
|
||||||
|
}
|
||||||
|
|
||||||
func main() {
|
func main() {
|
||||||
// Define the output directory flag.
|
// Define the output directory flag.
|
||||||
outDir := flag.String("out", ".", "Directory to write output files")
|
outDir := flag.String("out", ".", "Directory to write output files")
|
||||||
flag.Parse()
|
flag.Parse()
|
||||||
|
|
||||||
// Determine source of input - either a file or STDIN.
|
// Read input from a file or STDIN.
|
||||||
var data []byte
|
var data []byte
|
||||||
var err error
|
var err error
|
||||||
|
|
||||||
// If no positional arguments or "-" is provided, read from STDIN.
|
|
||||||
if flag.NArg() < 1 || flag.Arg(0) == "-" {
|
if flag.NArg() < 1 || flag.Arg(0) == "-" {
|
||||||
data, err = io.ReadAll(os.Stdin)
|
data, err = io.ReadAll(os.Stdin)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
@ -33,13 +39,26 @@ func main() {
|
|||||||
log.Fatalf("Error reading file %s: %v", inputFile, err)
|
log.Fatalf("Error reading file %s: %v", inputFile, err)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
text := string(data)
|
text := string(data)
|
||||||
|
|
||||||
// Updated regular expression to handle both LF and CRLF line breaks.
|
// First, try using the "File:" format.
|
||||||
re := regexp.MustCompile(`(?s)File:\s*(.+?)\s*\r?\n-+\r?\n(.*?)(\r?\n-+\r?\n|$)`)
|
reFile := regexp.MustCompile(`(?s)File:\s*(.+?)\s*\r?\n-+\r?\n(.*?)(\r?\n-+\r?\n|$)`)
|
||||||
matches := re.FindAllStringSubmatch(text, -1)
|
matches := reFile.FindAllStringSubmatch(text, -1)
|
||||||
if matches == nil {
|
|
||||||
|
var blocks []fileBlock
|
||||||
|
if matches != nil && len(matches) > 0 {
|
||||||
|
for _, m := range matches {
|
||||||
|
blocks = append(blocks, fileBlock{
|
||||||
|
name: strings.TrimSpace(m[1]),
|
||||||
|
content: m[2],
|
||||||
|
})
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// Fall back to parsing the numbered format manually.
|
||||||
|
blocks = parseNumberedBlocks(text)
|
||||||
|
}
|
||||||
|
|
||||||
|
if len(blocks) == 0 {
|
||||||
log.Println("No file entries found in the input.")
|
log.Println("No file entries found in the input.")
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
@ -49,26 +68,101 @@ func main() {
|
|||||||
log.Fatalf("Error creating output directory %s: %v", *outDir, err)
|
log.Fatalf("Error creating output directory %s: %v", *outDir, err)
|
||||||
}
|
}
|
||||||
|
|
||||||
for _, match := range matches {
|
// Write the blocks out as files.
|
||||||
// match[1] is the file name, and match[2] is the file content.
|
for _, block := range blocks {
|
||||||
fileName := match[1]
|
fullPath := filepath.Join(*outDir, block.name)
|
||||||
fileContent := match[2]
|
|
||||||
|
|
||||||
// Create the full file path using the output directory.
|
|
||||||
fullPath := filepath.Join(*outDir, fileName)
|
|
||||||
|
|
||||||
// Ensure that the directory for the file exists (in case there's nested directories).
|
|
||||||
dir := filepath.Dir(fullPath)
|
dir := filepath.Dir(fullPath)
|
||||||
if err := os.MkdirAll(dir, os.ModePerm); err != nil {
|
if err := os.MkdirAll(dir, os.ModePerm); err != nil {
|
||||||
log.Printf("Error creating directory %s: %v", dir, err)
|
log.Printf("Error creating directory %s: %v", dir, err)
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
if err := ioutil.WriteFile(fullPath, []byte(block.content), 0644); err != nil {
|
||||||
if err := ioutil.WriteFile(fullPath, []byte(fileContent), 0644); err != nil {
|
log.Printf("Error writing file %s: %v", fullPath, err)
|
||||||
log.Printf("Error writing to file %s: %v", fullPath, err)
|
|
||||||
} else {
|
} else {
|
||||||
fmt.Printf("Created file: %s\n", fullPath)
|
fmt.Printf("Created file: %s\n", fullPath)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// parseNumberedBlocks scans the text looking for divider lines
|
||||||
|
// and header lines of the form "1. filename".
|
||||||
|
// It returns a slice of fileBlock values.
|
||||||
|
func parseNumberedBlocks(text string) []fileBlock {
|
||||||
|
var blocks []fileBlock
|
||||||
|
|
||||||
|
// We'll use a scanner to process the text line by line.
|
||||||
|
scanner := bufio.NewScanner(strings.NewReader(text))
|
||||||
|
// Regular expression to recognize a divider line (at least 10 dashes or box drawing characters)
|
||||||
|
dividerRe := regexp.MustCompile(`^[\s]*[─-]{10,}[\s]*$`)
|
||||||
|
// Regular expression for matching a header line like "1. index.html"
|
||||||
|
headerRe := regexp.MustCompile(`^\s*\d+\.\s*(.+?)\s*$`)
|
||||||
|
|
||||||
|
lines := []string{}
|
||||||
|
for scanner.Scan() {
|
||||||
|
lines = append(lines, scanner.Text())
|
||||||
|
}
|
||||||
|
|
||||||
|
i := 0
|
||||||
|
for i < len(lines) {
|
||||||
|
// Look for a divider line.
|
||||||
|
if dividerRe.MatchString(lines[i]) {
|
||||||
|
// Skip divider. Look for a header line in following lines.
|
||||||
|
j := i + 1
|
||||||
|
// Skip empty lines.
|
||||||
|
for j < len(lines) && strings.TrimSpace(lines[j]) == "" {
|
||||||
|
j++
|
||||||
|
}
|
||||||
|
if j >= len(lines) {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
// Match the header.
|
||||||
|
headerLine := lines[j]
|
||||||
|
headerMatch := headerRe.FindStringSubmatch(headerLine)
|
||||||
|
if headerMatch == nil {
|
||||||
|
i++
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
filename := headerMatch[1]
|
||||||
|
// Expect next divider.
|
||||||
|
j++
|
||||||
|
for j < len(lines) && strings.TrimSpace(lines[j]) == "" {
|
||||||
|
j++
|
||||||
|
}
|
||||||
|
if j >= len(lines) || !dividerRe.MatchString(lines[j]) {
|
||||||
|
// If no divider after header, this is not a well‐formed block.
|
||||||
|
i = j
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
// Start collecting content after this divider.
|
||||||
|
j++ // move past the divider line
|
||||||
|
contentLines := []string{}
|
||||||
|
// Continue until we see a new divider that is immediately followed by a valid header.
|
||||||
|
for j < len(lines) {
|
||||||
|
// If this line is a divider candidate,
|
||||||
|
// check if the next non-empty line is a header line.
|
||||||
|
if dividerRe.MatchString(lines[j]) {
|
||||||
|
k := j + 1
|
||||||
|
for k < len(lines) && strings.TrimSpace(lines[k]) == "" {
|
||||||
|
k++
|
||||||
|
}
|
||||||
|
if k < len(lines) && headerRe.MatchString(lines[k]) {
|
||||||
|
// We've reached the next file block.
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
contentLines = append(contentLines, lines[j])
|
||||||
|
j++
|
||||||
|
}
|
||||||
|
blocks = append(blocks, fileBlock{
|
||||||
|
name: strings.TrimSpace(filename),
|
||||||
|
content: strings.Join(contentLines, "\n"),
|
||||||
|
})
|
||||||
|
i = j
|
||||||
|
} else {
|
||||||
|
i++
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return blocks
|
||||||
|
|
||||||
|
}
|
||||||
|
Reference in New Issue
Block a user