gohttp/vendor/github.com/signalsciences/ac/acascii/ac.go

// Package ac provides an implementation of the Aho-Corasick string matching
// algorithm. Throughout this code []byte is referred to
// as a blice.
//
// http://en.wikipedia.org/wiki/Aho%E2%80%93Corasick_string_matching_algorithm
//
// Copyright (c) 2013 CloudFlare, Inc.
//
// Originally from https://github.com/cloudflare/ahocorasick
package acascii

import (
	"container/list"
	"errors"
)

const maxchar = 128

// ErrNotASCII is returned when the dictionary input is not ASCII
var ErrNotASCII = errors.New("non-ASCII input")

// A node in the trie structure used to implement Aho-Corasick
type node struct {
	root bool // true if this is the root

	output bool // True means this node represents a blice that should
	// be output when matching

	b string // The path at this node

	index int // index into original dictionary if output is true

	counter int // Set to the value of the Matcher.counter when a
	// match is output to prevent duplicate output

	// The use of fixed size arrays is space-inefficient but fast for
	// lookups.

	child [maxchar]*node // A non-nil entry in this array means that the
	// index represents a byte value which can be
	// appended to the current node. Blices in the
	// trie are built up byte by byte through these
	// child node pointers.

	fails [maxchar]*node // Where to fail to (by following the fail
	// pointers) for each possible byte

	suffix *node // Pointer to the longest possible strict suffix of
	// this node

	fail *node // Pointer to the next node which is in the dictionary
	// which can be reached from here following suffixes. Called fail
	// because it is used to fallback in the trie when a match fails.
}

// Matcher contains a list of blices to match against
type Matcher struct {
	counter int // Counts the number of matches done, and is used to
	// prevent output of multiple matches of the same string
	trie []node // preallocated block of memory containing all the
	// nodes
	extent int   // offset into trie that is currently free
	root   *node // Points to trie[0]
}

// findBlice looks for a blice in the trie starting from the root and
// returns a pointer to the node representing the end of the blice. If
// the blice is not found it returns nil.
func (m *Matcher) findBlice(b string) *node {
	n := &m.trie[0]

	for n != nil && len(b) > 0 {
		n = n.child[int(b[0])]
		b = b[1:]
	}

	return n
}

// getFreeNode: gets a free node structure from the Matcher's trie
// pool and updates the extent to point to the next free node.
func (m *Matcher) getFreeNode() *node {
	m.extent++

	if m.extent == 1 {
		m.root = &m.trie[0]
		m.root.root = true
	}

	return &m.trie[m.extent-1]
}

// buildTrie builds the fundamental trie structure from a set of
// blices.
func (m *Matcher) buildTrie(dictionary [][]byte) error {

	// Work out the maximum size for the trie (all dictionary entries
	// are distinct plus the root). This is used to preallocate memory
	// for it.

	max := 1
	for _, blice := range dictionary {
		max += len(blice)
	}
	m.trie = make([]node, max)

	// Calling this an ignoring its argument simply allocated
	// m.trie[0] which will be the root element

	m.getFreeNode()

	// This loop builds the nodes in the trie by following through
	// each dictionary entry building the children pointers.

	for _, blice := range dictionary {
		n := m.root
		for i, b := range blice {
			idx := int(b)
			if idx >= maxchar {
				return ErrNotASCII 
			}
			c := n.child[idx]

			if c == nil {
				c = m.getFreeNode()
				n.child[idx] = c
				c.b = string(blice[0 : i+1])

				// Nodes directly under the root node will have the
				// root as their fail point as there are no suffixes
				// possible.

				if i == 0 {
					c.fail = m.root
				}

				c.suffix = m.root
			}

			n = c
		}

		// The last value of n points to the node representing a
		// dictionary entry

		n.output = true
		n.index = len(blice)
	}

	l := new(list.List)
	l.PushBack(m.root)

	for l.Len() > 0 {
		n := l.Remove(l.Front()).(*node)

		for i := 0; i < maxchar; i++ {
			c := n.child[i]
			if c != nil {
				l.PushBack(c)

				for j := 1; j < len(c.b); j++ {
					c.fail = m.findBlice(c.b[j:])
					if c.fail != nil {
						break
					}
				}

				if c.fail == nil {
					c.fail = m.root
				}

				for j := 1; j < len(c.b); j++ {
					s := m.findBlice(c.b[j:])
					if s != nil && s.output {
						c.suffix = s
						break
					}
				}
			}
		}
	}

	for i := 0; i < m.extent; i++ {
		for c := 0; c < maxchar; c++ {
			n := &m.trie[i]
			for n.child[c] == nil && !n.root {
				n = n.fail
			}

			m.trie[i].fails[c] = n
		}
	}

	m.trie = m.trie[:m.extent]
	return nil
}

// buildTrieString builds the fundamental trie structure from a []string
func (m *Matcher) buildTrieString(dictionary []string) error {

	// Work out the maximum size for the trie (all dictionary entries
	// are distinct plus the root). This is used to preallocate memory
	// for it.

	max := 1
	for _, blice := range dictionary {
		max += len(blice)

	}
	m.trie = make([]node, max)

	// Calling this an ignoring its argument simply allocated
	// m.trie[0] which will be the root element

	m.getFreeNode()

	// This loop builds the nodes in the trie by following through
	// each dictionary entry building the children pointers.

	for _, blice := range dictionary {
		n := m.root
		for i := 0; i < len(blice); i++ {
			index := int(blice[i])
			if index >= maxchar {
				return ErrNotASCII
			}
			b := int(blice[i])
			c := n.child[b]
			if c == nil {
				c = m.getFreeNode()
				n.child[b] = c
				c.b = blice[0 : i+1]

				// Nodes directly under the root node will have the
				// root as their fail point as there are no suffixes
				// possible.

				if i == 0 {
					c.fail = m.root
				}

				c.suffix = m.root
			}

			n = c
		}

		// The last value of n points to the node representing a
		// dictionary entry

		n.output = true
		n.index = len(blice)
	}

	l := new(list.List)
	l.PushBack(m.root)

	for l.Len() > 0 {
		n := l.Remove(l.Front()).(*node)

		for i := 0; i < maxchar; i++ {
			c := n.child[i]
			if c != nil {
				l.PushBack(c)

				for j := 1; j < len(c.b); j++ {
					c.fail = m.findBlice(c.b[j:])
					if c.fail != nil {
						break
					}
				}

				if c.fail == nil {
					c.fail = m.root
				}

				for j := 1; j < len(c.b); j++ {
					s := m.findBlice(c.b[j:])
					if s != nil && s.output {
						c.suffix = s
						break
					}
				}
			}
		}
	}

	for i := 0; i < m.extent; i++ {
		for c := 0; c < maxchar; c++ {
			n := &m.trie[i]
			for n.child[c] == nil && !n.root {
				n = n.fail
			}

			m.trie[i].fails[c] = n
		}
	}

	m.trie = m.trie[:m.extent]
	return nil
}

// Compile creates a new Matcher using a list of []byte
func Compile(dictionary [][]byte) (*Matcher, error) {
	m := new(Matcher)
	if err := m.buildTrie(dictionary); err != nil {
		return nil, err
	}
	return m, nil
}

// MustCompile returns a Matcher or panics
func MustCompile(dictionary [][]byte) *Matcher {
	m, err := Compile(dictionary)
	if err != nil {
		panic(err)
	}
	return m
}

// CompileString creates a new Matcher used to match against a set
// of strings (this is a helper to make initialization easy)
func CompileString(dictionary []string) (*Matcher, error) {
	m := new(Matcher)
	if err := m.buildTrieString(dictionary); err != nil {
		return nil, err
	}	
	return m, nil
}

// MustCompileString returns a Matcher or panics
func MustCompileString(dictionary []string) *Matcher {
	m, err := CompileString(dictionary)
	if err != nil {
		panic(err)
	}
	return m
}

// FindAll searches in for blices and returns all the blices found
// in the original dictionary
func (m *Matcher) FindAll(in []byte) [][]byte {
	m.counter++
	var hits [][]byte

	n := m.root

	for idx, b := range in {
		c := int(b)
		if c >= maxchar {
			c = 0
		}
		if !n.root && n.child[c] == nil {
			n = n.fails[c]
		}

		if n.child[c] != nil {
			f := n.child[c]
			n = f

			if f.output && f.counter != m.counter {
				hits = append(hits, in[idx-f.index+1:idx+1])
				f.counter = m.counter
			}

			for !f.suffix.root {
				f = f.suffix
				if f.counter != m.counter {
					hits = append(hits, in[idx-f.index+1:idx+1])
					f.counter = m.counter
				} else {
					// There's no point working our way up the
					// suffixes if it's been done before for this call
					// to Match. The matches are already in hits.
					break
				}
			}
		}
	}

	return hits
}

// FindAllString searches in for blices and returns all the blices (as strings) found as
// in the original dictionary
func (m *Matcher) FindAllString(in string) []string {
	m.counter++
	var hits []string

	n := m.root
	slen := len(in)
	for idx := 0; idx < slen; idx++ {
		c := int(in[idx])
		if c >= maxchar {
			c = 0
		}
		if !n.root && n.child[c] == nil {
			n = n.fails[c]
		}

		if n.child[c] != nil {
			f := n.child[c]
			n = f

			if f.output && f.counter != m.counter {
				hits = append(hits, in[idx-f.index+1:idx+1])
				f.counter = m.counter
			}

			for !f.suffix.root {
				f = f.suffix
				if f.counter != m.counter {
					hits = append(hits, in[idx-f.index+1:idx+1])
					f.counter = m.counter
				} else {
					// There's no point working our way up the
					// suffixes if it's been done before for this call
					// to Match. The matches are already in hits.
					break
				}
			}
		}
	}

	return hits
}

// Match returns true if the input slice contains any subslices
func (m *Matcher) Match(in []byte) bool {
	n := m.root
	for _, b := range in {
		c := int(b)
		if c > maxchar {
		   c = 0	
		}
		if !n.root && n.child[c] == nil {
			n = n.fails[c]
		}

		if n.child[c] != nil {
			n = n.child[c]

			if n.output {
				return true
			}

			for !n.suffix.root {
				return true
			}
		}
	}
	return false
}

// MatchString returns true if the input slice contains any subslices
func (m *Matcher) MatchString(in string) bool {
	n := m.root
	slen := len(in)
	for idx := 0; idx < slen; idx++ {
		c := int(in[idx])
		if c >= maxchar {
			c = 0	
		}
		if !n.root && n.child[c] == nil {
			n = n.fails[c]
		}
		if n.child[c] != nil {
			n = n.child[c]

			if n.output {
				return true
			}

			for !n.suffix.root {
				return true
			}
		}
	}
	return false
}
add gzip 2023-12-21 18:36:51 +08:00			`// Package ac provides an implementation of the Aho-Corasick string matching`
			`// algorithm. Throughout this code []byte is referred to`
			`// as a blice.`
			`//`
			`// http://en.wikipedia.org/wiki/Aho%E2%80%93Corasick_string_matching_algorithm`
			`//`
			`// Copyright (c) 2013 CloudFlare, Inc.`
			`//`
			`// Originally from https://github.com/cloudflare/ahocorasick`
			`package acascii`

			`import (`
			`"container/list"`
			`"errors"`
			`)`

			`const maxchar = 128`

			`// ErrNotASCII is returned when the dictionary input is not ASCII`
			`var ErrNotASCII = errors.New("non-ASCII input")`

			`// A node in the trie structure used to implement Aho-Corasick`
			`type node struct {`
			`root bool // true if this is the root`

			`output bool // True means this node represents a blice that should`
			`// be output when matching`

			`b string // The path at this node`

			`index int // index into original dictionary if output is true`

			`counter int // Set to the value of the Matcher.counter when a`
			`// match is output to prevent duplicate output`

			`// The use of fixed size arrays is space-inefficient but fast for`
			`// lookups.`

			`child [maxchar]*node // A non-nil entry in this array means that the`
			`// index represents a byte value which can be`
			`// appended to the current node. Blices in the`
			`// trie are built up byte by byte through these`
			`// child node pointers.`

			`fails [maxchar]*node // Where to fail to (by following the fail`
			`// pointers) for each possible byte`

			`suffix *node // Pointer to the longest possible strict suffix of`
			`// this node`

			`fail *node // Pointer to the next node which is in the dictionary`
			`// which can be reached from here following suffixes. Called fail`
			`// because it is used to fallback in the trie when a match fails.`
			`}`

			`// Matcher contains a list of blices to match against`
			`type Matcher struct {`
			`counter int // Counts the number of matches done, and is used to`
			`// prevent output of multiple matches of the same string`
			`trie []node // preallocated block of memory containing all the`
			`// nodes`
			`extent int // offset into trie that is currently free`
			`root *node // Points to trie[0]`
			`}`

			`// findBlice looks for a blice in the trie starting from the root and`
			`// returns a pointer to the node representing the end of the blice. If`
			`// the blice is not found it returns nil.`
			`func (m Matcher) findBlice(b string) node {`
			`n := &m.trie[0]`

			`for n != nil && len(b) > 0 {`
			`n = n.child[int(b[0])]`
			`b = b[1:]`
			`}`

			`return n`
			`}`

			`// getFreeNode: gets a free node structure from the Matcher's trie`
			`// pool and updates the extent to point to the next free node.`
			`func (m Matcher) getFreeNode() node {`
			`m.extent++`

			`if m.extent == 1 {`
			`m.root = &m.trie[0]`
			`m.root.root = true`
			`}`

			`return &m.trie[m.extent-1]`
			`}`

			`// buildTrie builds the fundamental trie structure from a set of`
			`// blices.`
			`func (m *Matcher) buildTrie(dictionary [][]byte) error {`

			`// Work out the maximum size for the trie (all dictionary entries`
			`// are distinct plus the root). This is used to preallocate memory`
			`// for it.`

			`max := 1`
			`for _, blice := range dictionary {`
			`max += len(blice)`
			`}`
			`m.trie = make([]node, max)`

			`// Calling this an ignoring its argument simply allocated`
			`// m.trie[0] which will be the root element`

			`m.getFreeNode()`

			`// This loop builds the nodes in the trie by following through`
			`// each dictionary entry building the children pointers.`

			`for _, blice := range dictionary {`
			`n := m.root`
			`for i, b := range blice {`
			`idx := int(b)`
			`if idx >= maxchar {`
			`return ErrNotASCII`
			`}`
			`c := n.child[idx]`

			`if c == nil {`
			`c = m.getFreeNode()`
			`n.child[idx] = c`
			`c.b = string(blice[0 : i+1])`

			`// Nodes directly under the root node will have the`
			`// root as their fail point as there are no suffixes`
			`// possible.`

			`if i == 0 {`
			`c.fail = m.root`
			`}`

			`c.suffix = m.root`
			`}`

			`n = c`
			`}`

			`// The last value of n points to the node representing a`
			`// dictionary entry`

			`n.output = true`
			`n.index = len(blice)`
			`}`

			`l := new(list.List)`
			`l.PushBack(m.root)`

			`for l.Len() > 0 {`
			`n := l.Remove(l.Front()).(*node)`

			`for i := 0; i < maxchar; i++ {`
			`c := n.child[i]`
			`if c != nil {`
			`l.PushBack(c)`

			`for j := 1; j < len(c.b); j++ {`
			`c.fail = m.findBlice(c.b[j:])`
			`if c.fail != nil {`
			`break`
			`}`
			`}`

			`if c.fail == nil {`
			`c.fail = m.root`
			`}`

			`for j := 1; j < len(c.b); j++ {`
			`s := m.findBlice(c.b[j:])`
			`if s != nil && s.output {`
			`c.suffix = s`
			`break`
			`}`
			`}`
			`}`
			`}`
			`}`

			`for i := 0; i < m.extent; i++ {`
			`for c := 0; c < maxchar; c++ {`
			`n := &m.trie[i]`
			`for n.child[c] == nil && !n.root {`
			`n = n.fail`
			`}`

			`m.trie[i].fails[c] = n`
			`}`
			`}`

			`m.trie = m.trie[:m.extent]`
			`return nil`
			`}`

			`// buildTrieString builds the fundamental trie structure from a []string`
			`func (m *Matcher) buildTrieString(dictionary []string) error {`

			`// Work out the maximum size for the trie (all dictionary entries`
			`// are distinct plus the root). This is used to preallocate memory`
			`// for it.`

			`max := 1`
			`for _, blice := range dictionary {`
			`max += len(blice)`

			`}`
			`m.trie = make([]node, max)`

			`// Calling this an ignoring its argument simply allocated`
			`// m.trie[0] which will be the root element`

			`m.getFreeNode()`

			`// This loop builds the nodes in the trie by following through`
			`// each dictionary entry building the children pointers.`

			`for _, blice := range dictionary {`
			`n := m.root`
			`for i := 0; i < len(blice); i++ {`
			`index := int(blice[i])`
			`if index >= maxchar {`
			`return ErrNotASCII`
			`}`
			`b := int(blice[i])`
			`c := n.child[b]`
			`if c == nil {`
			`c = m.getFreeNode()`
			`n.child[b] = c`
			`c.b = blice[0 : i+1]`

			`// Nodes directly under the root node will have the`
			`// root as their fail point as there are no suffixes`
			`// possible.`

			`if i == 0 {`
			`c.fail = m.root`
			`}`

			`c.suffix = m.root`
			`}`

			`n = c`
			`}`

			`// The last value of n points to the node representing a`
			`// dictionary entry`

			`n.output = true`
			`n.index = len(blice)`
			`}`

			`l := new(list.List)`
			`l.PushBack(m.root)`

			`for l.Len() > 0 {`
			`n := l.Remove(l.Front()).(*node)`

			`for i := 0; i < maxchar; i++ {`
			`c := n.child[i]`
			`if c != nil {`
			`l.PushBack(c)`

			`for j := 1; j < len(c.b); j++ {`
			`c.fail = m.findBlice(c.b[j:])`
			`if c.fail != nil {`
			`break`
			`}`
			`}`

			`if c.fail == nil {`
			`c.fail = m.root`
			`}`

			`for j := 1; j < len(c.b); j++ {`
			`s := m.findBlice(c.b[j:])`
			`if s != nil && s.output {`
			`c.suffix = s`
			`break`
			`}`
			`}`
			`}`
			`}`
			`}`

			`for i := 0; i < m.extent; i++ {`
			`for c := 0; c < maxchar; c++ {`
			`n := &m.trie[i]`
			`for n.child[c] == nil && !n.root {`
			`n = n.fail`
			`}`

			`m.trie[i].fails[c] = n`
			`}`
			`}`

			`m.trie = m.trie[:m.extent]`
			`return nil`
			`}`

			`// Compile creates a new Matcher using a list of []byte`
			`func Compile(dictionary [][]byte) (*Matcher, error) {`
			`m := new(Matcher)`
			`if err := m.buildTrie(dictionary); err != nil {`
			`return nil, err`
			`}`
			`return m, nil`
			`}`

			`// MustCompile returns a Matcher or panics`
			`func MustCompile(dictionary [][]byte) *Matcher {`
			`m, err := Compile(dictionary)`
			`if err != nil {`
			`panic(err)`
			`}`
			`return m`
			`}`

			`// CompileString creates a new Matcher used to match against a set`
			`// of strings (this is a helper to make initialization easy)`
			`func CompileString(dictionary []string) (*Matcher, error) {`
			`m := new(Matcher)`
			`if err := m.buildTrieString(dictionary); err != nil {`
			`return nil, err`
			`}`
			`return m, nil`
			`}`

			`// MustCompileString returns a Matcher or panics`
			`func MustCompileString(dictionary []string) *Matcher {`
			`m, err := CompileString(dictionary)`
			`if err != nil {`
			`panic(err)`
			`}`
			`return m`
			`}`

			`// FindAll searches in for blices and returns all the blices found`
			`// in the original dictionary`
			`func (m *Matcher) FindAll(in []byte) [][]byte {`
			`m.counter++`
			`var hits [][]byte`

			`n := m.root`

			`for idx, b := range in {`
			`c := int(b)`
			`if c >= maxchar {`
			`c = 0`
			`}`
			`if !n.root && n.child[c] == nil {`
			`n = n.fails[c]`
			`}`

			`if n.child[c] != nil {`
			`f := n.child[c]`
			`n = f`

			`if f.output && f.counter != m.counter {`
			`hits = append(hits, in[idx-f.index+1:idx+1])`
			`f.counter = m.counter`
			`}`

			`for !f.suffix.root {`
			`f = f.suffix`
			`if f.counter != m.counter {`
			`hits = append(hits, in[idx-f.index+1:idx+1])`
			`f.counter = m.counter`
			`} else {`
			`// There's no point working our way up the`
			`// suffixes if it's been done before for this call`
			`// to Match. The matches are already in hits.`
			`break`
			`}`
			`}`
			`}`
			`}`

			`return hits`
			`}`

			`// FindAllString searches in for blices and returns all the blices (as strings) found as`
			`// in the original dictionary`
			`func (m *Matcher) FindAllString(in string) []string {`
			`m.counter++`
			`var hits []string`

			`n := m.root`
			`slen := len(in)`
			`for idx := 0; idx < slen; idx++ {`
			`c := int(in[idx])`
			`if c >= maxchar {`
			`c = 0`
			`}`
			`if !n.root && n.child[c] == nil {`
			`n = n.fails[c]`
			`}`

			`if n.child[c] != nil {`
			`f := n.child[c]`
			`n = f`

			`if f.output && f.counter != m.counter {`
			`hits = append(hits, in[idx-f.index+1:idx+1])`
			`f.counter = m.counter`
			`}`

			`for !f.suffix.root {`
			`f = f.suffix`
			`if f.counter != m.counter {`
			`hits = append(hits, in[idx-f.index+1:idx+1])`
			`f.counter = m.counter`
			`} else {`
			`// There's no point working our way up the`
			`// suffixes if it's been done before for this call`
			`// to Match. The matches are already in hits.`
			`break`
			`}`
			`}`
			`}`
			`}`

			`return hits`
			`}`

			`// Match returns true if the input slice contains any subslices`
			`func (m *Matcher) Match(in []byte) bool {`
			`n := m.root`
			`for _, b := range in {`
			`c := int(b)`
			`if c > maxchar {`
			`c = 0`
			`}`
			`if !n.root && n.child[c] == nil {`
			`n = n.fails[c]`
			`}`

			`if n.child[c] != nil {`
			`n = n.child[c]`

			`if n.output {`
			`return true`
			`}`

			`for !n.suffix.root {`
			`return true`
			`}`
			`}`
			`}`
			`return false`
			`}`

			`// MatchString returns true if the input slice contains any subslices`
			`func (m *Matcher) MatchString(in string) bool {`
			`n := m.root`
			`slen := len(in)`
			`for idx := 0; idx < slen; idx++ {`
			`c := int(in[idx])`
			`if c >= maxchar {`
			`c = 0`
			`}`
			`if !n.root && n.child[c] == nil {`
			`n = n.fails[c]`
			`}`
			`if n.child[c] != nil {`
			`n = n.child[c]`

			`if n.output {`
			`return true`
			`}`

			`for !n.suffix.root {`
			`return true`
			`}`
			`}`
			`}`
			`return false`
			`}`