fast-replace/go-fast-replace/internal/replace/cw.go

114 lines
2.3 KiB
Go
Raw Permalink Normal View History

2023-12-30 21:35:09 +00:00
package replace
type Node struct {
children map[rune]*Node
output int
fail *Node
}
func NewNode() *Node {
return &Node{
children: make(map[rune]*Node),
output: -1,
fail: nil,
}
}
type CommentzWalter struct {
root *Node
patterns []string
replacements map[int]string
}
func NewCommentzWalter() *CommentzWalter {
return &CommentzWalter{
root: NewNode(),
patterns: make([]string, 0),
replacements: make(map[int]string),
}
}
func (cw *CommentzWalter) AddPattern(pattern string, replacement string) {
cw.patterns = append(cw.patterns, pattern)
idx := len(cw.patterns) - 1
cw.replacements[idx] = replacement
}
func (cw *CommentzWalter) BuildMatchingMachine() {
for idx, pattern := range cw.patterns {
current := cw.root
for _, char := range pattern {
if _, exists := current.children[char]; !exists {
current.children[char] = NewNode()
}
current = current.children[char]
}
current.output = idx
}
queue := make([]*Node, 0)
for _, node := range cw.root.children {
queue = append(queue, node)
node.fail = cw.root
}
for len(queue) > 0 {
r := queue[0]
queue = queue[1:]
for char, child := range r.children {
queue = append(queue, child)
failNode := r.fail
for failNode != nil && failNode.children[char] == nil {
failNode = failNode.fail
}
if failNode == nil {
child.fail = cw.root
} else {
child.fail = failNode.children[char]
}
}
}
}
func (cw *CommentzWalter) CommentzWalterReplace(text string) string {
matches := make([]string, 0)
current := cw.root
runes := []rune(text)
replaced := make([]rune, 0)
for i, char := range runes {
for current != nil && current.children[char] == nil {
current = current.fail
}
if current == nil {
current = cw.root
replaced = append(replaced, runes[i])
continue
}
current = current.children[char]
if current.output != -1 {
matchIdx := current.output
matches = append(matches, cw.replacements[matchIdx])
replRunes := []rune(cw.replacements[matchIdx])
for _, rn := range replRunes {
replaced = append(replaced, rn)
}
patternLen := len(cw.patterns[matchIdx])
replaceLen := len(cw.replacements[matchIdx])
if (patternLen > replaceLen) {
// Skip index to after the pattern
dif := patternLen - replaceLen
i = i + dif
}
}
}
return string(replaced)
}