package main

import (
	"fmt"
	"iter"
	"strings"
	"unicode"
)

// Chunker splits text into pieces of up to `size` runes with `overlap` runes
// shared between neighbors. Cuts always honor word boundaries: a word is never
// split unless it alone is longer than `size`.
func Chunker(text string, size, overlap int) iter.Seq[string] {
	return func(yield func(string) bool) {
		runes := []rune(text)
		n := len(runes)
		step := size - overlap
		if n == 0 || size <= 0 || step <= 0 {
			return
		}

		for i := 0; i < n; {
			end := i + size
			if end >= n {
				last := strings.TrimSpace(string(runes[i:n]))
				if last != "" {
					yield(last)
				}
				return
			}

			// Walk back to the last word boundary so we don't split a word.
			// Fall through to a hard cut only when a single word fills the window.
			cut := end
			for cut > i && !unicode.IsSpace(runes[cut]) {
				cut--
			}
			if cut == i {
				cut = end
			}

			if !yield(strings.TrimSpace(string(runes[i:cut]))) {
				return
			}

			// Next window: apply overlap, then snap forward to the start of a word.
			i = max(cut-overlap, 0)
			if i > 0 && !unicode.IsSpace(runes[i-1]) {
				for i < n && !unicode.IsSpace(runes[i]) {
					i++
				}
			}
			for i < n && unicode.IsSpace(runes[i]) {
				i++
			}
		}
	}
}

func main() {
	document := `The Go programming language favors standard libraries and simple tools. The use of iterators, introduced in recent versions, simplifies the construction of complex pipelines.`

	const (
		size    = 60
		overlap = 15
	)

	fmt.Printf("document: %d runes | size=%d overlap=%d\n",
		len([]rune(document)), size, overlap)
	fmt.Println(strings.Repeat("-", 60))

	idx := 1
	for chunk := range Chunker(document, size, overlap) {
		fmt.Printf("[%02d] %q\n", idx, chunk)
		idx++
	}
}
