From 95b872aa6173815c86d04d2714f734be7123956c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E7=A8=8B=E5=B9=BF?= Date: Thu, 7 Nov 2024 22:48:55 +0800 Subject: [PATCH] add code --- atom_test.go | 56 + example_marshaling_test.go | 85 + example_test.go | 152 ++ example_text_marshaling_test.go | 80 + go.mod | 3 + marshal.go | 1131 ++++++++++++++ marshal_test.go | 2591 +++++++++++++++++++++++++++++++ prefix_test.go | 44 + read.go | 777 +++++++++ read_test.go | 1128 ++++++++++++++ typeinfo.go | 372 +++++ xml.go | 2075 +++++++++++++++++++++++++ xml_test.go | 1488 ++++++++++++++++++ 13 files changed, 9982 insertions(+) create mode 100644 atom_test.go create mode 100644 example_marshaling_test.go create mode 100644 example_test.go create mode 100644 example_text_marshaling_test.go create mode 100644 go.mod create mode 100644 marshal.go create mode 100644 marshal_test.go create mode 100644 prefix_test.go create mode 100644 read.go create mode 100644 read_test.go create mode 100644 typeinfo.go create mode 100644 xml.go create mode 100644 xml_test.go diff --git a/atom_test.go b/atom_test.go new file mode 100644 index 0000000..d43c531 --- /dev/null +++ b/atom_test.go @@ -0,0 +1,56 @@ +// Copyright 2011 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package goxml + +import "time" + +var atomValue = &Feed{ + XMLName: Name{"http://www.w3.org/2005/Atom", "feed"}, + Title: "Example Feed", + Link: []Link{{Href: "http://example.org/"}}, + Updated: ParseTime("2003-12-13T18:30:02Z"), + Author: Person{Name: "John Doe"}, + ID: "urn:uuid:60a76c80-d399-11d9-b93C-0003939e0af6", + + Entry: []Entry{ + { + Title: "Atom-Powered Robots Run Amok", + Link: []Link{{Href: "http://example.org/2003/12/13/atom03"}}, + ID: "urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a", + Updated: ParseTime("2003-12-13T18:30:02Z"), + Summary: NewText("Some text."), + }, + }, +} + +var atomXML = `` + + `` + + `Example Feed` + + `urn:uuid:60a76c80-d399-11d9-b93C-0003939e0af6` + + `` + + `John Doe` + + `` + + `Atom-Powered Robots Run Amok` + + `urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a` + + `` + + `2003-12-13T18:30:02Z` + + `` + + `Some text.` + + `` + + `` + +func ParseTime(str string) time.Time { + t, err := time.Parse(time.RFC3339, str) + if err != nil { + panic(err) + } + return t +} + +func NewText(text string) Text { + return Text{ + Body: text, + } +} diff --git a/example_marshaling_test.go b/example_marshaling_test.go new file mode 100644 index 0000000..28bb1e2 --- /dev/null +++ b/example_marshaling_test.go @@ -0,0 +1,85 @@ +// Copyright 2018 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package goxml_test + +import ( + "fmt" + "log" + "strings" + + "git.pyer.club/kingecg/goxml" +) + +type Animal int + +const ( + Unknown Animal = iota + Gopher + Zebra +) + +func (a *Animal) UnmarshalXML(d *goxml.Decoder, start goxml.StartElement) error { + var s string + if err := d.DecodeElement(&s, &start); err != nil { + return err + } + switch strings.ToLower(s) { + default: + *a = Unknown + case "gopher": + *a = Gopher + case "zebra": + *a = Zebra + } + + return nil +} + +func (a Animal) MarshalXML(e *goxml.Encoder, start goxml.StartElement) error { + var s string + switch a { + default: + s = "unknown" + case Gopher: + s = "gopher" + case Zebra: + s = "zebra" + } + return e.EncodeElement(s, start) +} + +func Example_customMarshalXML() { + blob := ` + + gopher + armadillo + zebra + unknown + gopher + bee + gopher + zebra + ` + var zoo struct { + Animals []Animal `xml:"animal"` + } + if err := goxml.Unmarshal([]byte(blob), &zoo); err != nil { + log.Fatal(err) + } + + census := make(map[Animal]int) + for _, animal := range zoo.Animals { + census[animal] += 1 + } + + fmt.Printf("Zoo Census:\n* Gophers: %d\n* Zebras: %d\n* Unknown: %d\n", + census[Gopher], census[Zebra], census[Unknown]) + + // Output: + // Zoo Census: + // * Gophers: 3 + // * Zebras: 2 + // * Unknown: 3 +} diff --git a/example_test.go b/example_test.go new file mode 100644 index 0000000..b964e35 --- /dev/null +++ b/example_test.go @@ -0,0 +1,152 @@ +// Copyright 2012 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package goxml_test + +import ( + "fmt" + "os" + + "git.pyer.club/kingecg/goxml" +) + +func ExampleMarshalIndent() { + type Address struct { + City, State string + } + type Person struct { + XMLName goxml.Name `xml:"person"` + Id int `xml:"id,attr"` + FirstName string `xml:"name>first"` + LastName string `xml:"name>last"` + Age int `xml:"age"` + Height float32 `xml:"height,omitempty"` + Married bool + Address + Comment string `xml:",comment"` + } + + v := &Person{Id: 13, FirstName: "John", LastName: "Doe", Age: 42} + v.Comment = " Need more details. " + v.Address = Address{"Hanga Roa", "Easter Island"} + + output, err := goxml.MarshalIndent(v, " ", " ") + if err != nil { + fmt.Printf("error: %v\n", err) + } + + os.Stdout.Write(output) + // Output: + // + // + // John + // Doe + // + // 42 + // false + // Hanga Roa + // Easter Island + // + // +} + +func ExampleEncoder() { + type Address struct { + City, State string + } + type Person struct { + XMLName goxml.Name `xml:"person"` + Id int `xml:"id,attr"` + FirstName string `xml:"name>first"` + LastName string `xml:"name>last"` + Age int `xml:"age"` + Height float32 `xml:"height,omitempty"` + Married bool + Address + Comment string `xml:",comment"` + } + + v := &Person{Id: 13, FirstName: "John", LastName: "Doe", Age: 42} + v.Comment = " Need more details. " + v.Address = Address{"Hanga Roa", "Easter Island"} + + enc := goxml.NewEncoder(os.Stdout) + enc.Indent(" ", " ") + if err := enc.Encode(v); err != nil { + fmt.Printf("error: %v\n", err) + } + + // Output: + // + // + // John + // Doe + // + // 42 + // false + // Hanga Roa + // Easter Island + // + // +} + +// This example demonstrates unmarshaling an XML excerpt into a value with +// some preset fields. Note that the Phone field isn't modified and that +// the XML element is ignored. Also, the Groups field is assigned +// considering the element path provided in its tag. +func ExampleUnmarshal() { + type Email struct { + Where string `xml:"where,attr"` + Addr string + } + type Address struct { + City, State string + } + type Result struct { + XMLName goxml.Name `xml:"Person"` + Name string `xml:"FullName"` + Phone string + Email []Email + Groups []string `xml:"Group>Value"` + Address + } + v := Result{Name: "none", Phone: "none"} + + data := ` + + Grace R. Emlin + Example Inc. + + gre@example.com + + + gre@work.com + + + Friends + Squash + + Hanga Roa + Easter Island + + ` + err := goxml.Unmarshal([]byte(data), &v) + if err != nil { + fmt.Printf("error: %v", err) + return + } + fmt.Printf("XMLName: %#v\n", v.XMLName) + fmt.Printf("Name: %q\n", v.Name) + fmt.Printf("Phone: %q\n", v.Phone) + fmt.Printf("Email: %v\n", v.Email) + fmt.Printf("Groups: %v\n", v.Groups) + fmt.Printf("Address: %v\n", v.Address) + // Output: + // XMLName: goxml.Name{Space:"", Local:"Person"} + // Name: "Grace R. Emlin" + // Phone: "none" + // Email: [{home gre@example.com} {work gre@work.com}] + // Groups: [Friends Squash] + // Address: {Hanga Roa Easter Island} +} diff --git a/example_text_marshaling_test.go b/example_text_marshaling_test.go new file mode 100644 index 0000000..b013bca --- /dev/null +++ b/example_text_marshaling_test.go @@ -0,0 +1,80 @@ +// Copyright 2018 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package goxml_test + +import ( + "fmt" + "log" + "strings" + + "git.pyer.club/kingecg/goxml" +) + +type Size int + +const ( + Unrecognized Size = iota + Small + Large +) + +func (s *Size) UnmarshalText(text []byte) error { + switch strings.ToLower(string(text)) { + default: + *s = Unrecognized + case "small": + *s = Small + case "large": + *s = Large + } + return nil +} + +func (s Size) MarshalText() ([]byte, error) { + var name string + switch s { + default: + name = "unrecognized" + case Small: + name = "small" + case Large: + name = "large" + } + return []byte(name), nil +} + +func Example_textMarshalXML() { + blob := ` + + small + regular + large + unrecognized + small + normal + small + large + ` + var inventory struct { + Sizes []Size `xml:"size"` + } + if err := goxml.Unmarshal([]byte(blob), &inventory); err != nil { + log.Fatal(err) + } + + counts := make(map[Size]int) + for _, size := range inventory.Sizes { + counts[size] += 1 + } + + fmt.Printf("Inventory Counts:\n* Small: %d\n* Large: %d\n* Unrecognized: %d\n", + counts[Small], counts[Large], counts[Unrecognized]) + + // Output: + // Inventory Counts: + // * Small: 3 + // * Large: 2 + // * Unrecognized: 3 +} diff --git a/go.mod b/go.mod new file mode 100644 index 0000000..c1fa4b2 --- /dev/null +++ b/go.mod @@ -0,0 +1,3 @@ +module git.pyer.club/kingecg/goxml + +go 1.23.1 diff --git a/marshal.go b/marshal.go new file mode 100644 index 0000000..69a0d22 --- /dev/null +++ b/marshal.go @@ -0,0 +1,1131 @@ +// Copyright 2011 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package goxml + +import ( + "bufio" + "bytes" + "encoding" + "errors" + "fmt" + "io" + "reflect" + "strconv" + "strings" +) + +const ( + // Header is a generic XML header suitable for use with the output of [Marshal]. + // This is not automatically added to any output of this package, + // it is provided as a convenience. + Header = `` + "\n" +) + +// Marshal returns the XML encoding of v. +// +// Marshal handles an array or slice by marshaling each of the elements. +// Marshal handles a pointer by marshaling the value it points at or, if the +// pointer is nil, by writing nothing. Marshal handles an interface value by +// marshaling the value it contains or, if the interface value is nil, by +// writing nothing. Marshal handles all other data by writing one or more XML +// elements containing the data. +// +// The name for the XML elements is taken from, in order of preference: +// - the tag on the XMLName field, if the data is a struct +// - the value of the XMLName field of type [Name] +// - the tag of the struct field used to obtain the data +// - the name of the struct field used to obtain the data +// - the name of the marshaled type +// +// The XML element for a struct contains marshaled elements for each of the +// exported fields of the struct, with these exceptions: +// - the XMLName field, described above, is omitted. +// - a field with tag "-" is omitted. +// - a field with tag "name,attr" becomes an attribute with +// the given name in the XML element. +// - a field with tag ",attr" becomes an attribute with the +// field name in the XML element. +// - a field with tag ",chardata" is written as character data, +// not as an XML element. +// - a field with tag ",cdata" is written as character data +// wrapped in one or more tags, not as an XML element. +// - a field with tag ",innerxml" is written verbatim, not subject +// to the usual marshaling procedure. +// - a field with tag ",comment" is written as an XML comment, not +// subject to the usual marshaling procedure. It must not contain +// the "--" string within it. +// - a field with a tag including the "omitempty" option is omitted +// if the field value is empty. The empty values are false, 0, any +// nil pointer or interface value, and any array, slice, map, or +// string of length zero. +// - an anonymous struct field is handled as if the fields of its +// value were part of the outer struct. +// - a field implementing [Marshaler] is written by calling its MarshalXML +// method. +// - a field implementing [encoding.TextMarshaler] is written by encoding the +// result of its MarshalText method as text. +// +// If a field uses a tag "a>b>c", then the element c will be nested inside +// parent elements a and b. Fields that appear next to each other that name +// the same parent will be enclosed in one XML element. +// +// If the XML name for a struct field is defined by both the field tag and the +// struct's XMLName field, the names must match. +// +// See [MarshalIndent] for an example. +// +// Marshal will return an error if asked to marshal a channel, function, or map. +func Marshal(v any) ([]byte, error) { + var b bytes.Buffer + enc := NewEncoder(&b) + if err := enc.Encode(v); err != nil { + return nil, err + } + if err := enc.Close(); err != nil { + return nil, err + } + return b.Bytes(), nil +} + +// Marshaler is the interface implemented by objects that can marshal +// themselves into valid XML elements. +// +// MarshalXML encodes the receiver as zero or more XML elements. +// By convention, arrays or slices are typically encoded as a sequence +// of elements, one per entry. +// Using start as the element tag is not required, but doing so +// will enable [Unmarshal] to match the XML elements to the correct +// struct field. +// One common implementation strategy is to construct a separate +// value with a layout corresponding to the desired XML and then +// to encode it using e.EncodeElement. +// Another common strategy is to use repeated calls to e.EncodeToken +// to generate the XML output one token at a time. +// The sequence of encoded tokens must make up zero or more valid +// XML elements. +type Marshaler interface { + MarshalXML(e *Encoder, start StartElement) error +} + +// MarshalerAttr is the interface implemented by objects that can marshal +// themselves into valid XML attributes. +// +// MarshalXMLAttr returns an XML attribute with the encoded value of the receiver. +// Using name as the attribute name is not required, but doing so +// will enable [Unmarshal] to match the attribute to the correct +// struct field. +// If MarshalXMLAttr returns the zero attribute [Attr]{}, no attribute +// will be generated in the output. +// MarshalXMLAttr is used only for struct fields with the +// "attr" option in the field tag. +type MarshalerAttr interface { + MarshalXMLAttr(name Name) (Attr, error) +} + +// MarshalIndent works like [Marshal], but each XML element begins on a new +// indented line that starts with prefix and is followed by one or more +// copies of indent according to the nesting depth. +func MarshalIndent(v any, prefix, indent string) ([]byte, error) { + var b bytes.Buffer + enc := NewEncoder(&b) + enc.Indent(prefix, indent) + if err := enc.Encode(v); err != nil { + return nil, err + } + if err := enc.Close(); err != nil { + return nil, err + } + return b.Bytes(), nil +} + +// An Encoder writes XML data to an output stream. +type Encoder struct { + p printer +} + +// NewEncoder returns a new encoder that writes to w. +func NewEncoder(w io.Writer) *Encoder { + e := &Encoder{printer{w: bufio.NewWriter(w)}} + e.p.encoder = e + return e +} + +// Indent sets the encoder to generate XML in which each element +// begins on a new indented line that starts with prefix and is followed by +// one or more copies of indent according to the nesting depth. +func (enc *Encoder) Indent(prefix, indent string) { + enc.p.prefix = prefix + enc.p.indent = indent +} + +// Encode writes the XML encoding of v to the stream. +// +// See the documentation for [Marshal] for details about the conversion +// of Go values to XML. +// +// Encode calls [Encoder.Flush] before returning. +func (enc *Encoder) Encode(v any) error { + err := enc.p.marshalValue(reflect.ValueOf(v), nil, nil) + if err != nil { + return err + } + return enc.p.w.Flush() +} + +// EncodeElement writes the XML encoding of v to the stream, +// using start as the outermost tag in the encoding. +// +// See the documentation for [Marshal] for details about the conversion +// of Go values to XML. +// +// EncodeElement calls [Encoder.Flush] before returning. +func (enc *Encoder) EncodeElement(v any, start StartElement) error { + err := enc.p.marshalValue(reflect.ValueOf(v), nil, &start) + if err != nil { + return err + } + return enc.p.w.Flush() +} + +var ( + begComment = []byte("") + endProcInst = []byte("?>") +) + +// EncodeToken writes the given XML token to the stream. +// It returns an error if [StartElement] and [EndElement] tokens are not properly matched. +// +// EncodeToken does not call [Encoder.Flush], because usually it is part of a larger operation +// such as [Encoder.Encode] or [Encoder.EncodeElement] (or a custom [Marshaler]'s MarshalXML invoked +// during those), and those will call Flush when finished. +// Callers that create an Encoder and then invoke EncodeToken directly, without +// using Encode or EncodeElement, need to call Flush when finished to ensure +// that the XML is written to the underlying writer. +// +// EncodeToken allows writing a [ProcInst] with Target set to "xml" only as the first token +// in the stream. +func (enc *Encoder) EncodeToken(t Token) error { + + p := &enc.p + switch t := t.(type) { + case StartElement: + if err := p.writeStart(&t); err != nil { + return err + } + case EndElement: + if err := p.writeEnd(t.Name); err != nil { + return err + } + case CharData: + escapeText(p, t, false) + case Comment: + if bytes.Contains(t, endComment) { + return fmt.Errorf("xml: EncodeToken of Comment containing --> marker") + } + p.WriteString("") + return p.cachedWriteError() + case ProcInst: + // First token to be encoded which is also a ProcInst with target of xml + // is the xml declaration. The only ProcInst where target of xml is allowed. + if t.Target == "xml" && p.w.Buffered() != 0 { + return fmt.Errorf("xml: EncodeToken of ProcInst xml target only valid for xml declaration, first token encoded") + } + if !isNameString(t.Target) { + return fmt.Errorf("xml: EncodeToken of ProcInst with invalid Target") + } + if bytes.Contains(t.Inst, endProcInst) { + return fmt.Errorf("xml: EncodeToken of ProcInst containing ?> marker") + } + p.WriteString(" 0 { + p.WriteByte(' ') + p.Write(t.Inst) + } + p.WriteString("?>") + case Directive: + if !isValidDirective(t) { + return fmt.Errorf("xml: EncodeToken of Directive containing wrong < or > markers") + } + p.WriteString("") + default: + return fmt.Errorf("xml: EncodeToken of invalid token type") + + } + return p.cachedWriteError() +} + +// isValidDirective reports whether dir is a valid directive text, +// meaning angle brackets are matched, ignoring comments and strings. +func isValidDirective(dir Directive) bool { + var ( + depth int + inquote uint8 + incomment bool + ) + for i, c := range dir { + switch { + case incomment: + if c == '>' { + if n := 1 + i - len(endComment); n >= 0 && bytes.Equal(dir[n:i+1], endComment) { + incomment = false + } + } + // Just ignore anything in comment + case inquote != 0: + if c == inquote { + inquote = 0 + } + // Just ignore anything within quotes + case c == '\'' || c == '"': + inquote = c + case c == '<': + if i+len(begComment) < len(dir) && bytes.Equal(dir[i:i+len(begComment)], begComment) { + incomment = true + } else { + depth++ + } + case c == '>': + if depth == 0 { + return false + } + depth-- + } + } + return depth == 0 && inquote == 0 && !incomment +} + +// Flush flushes any buffered XML to the underlying writer. +// See the [Encoder.EncodeToken] documentation for details about when it is necessary. +func (enc *Encoder) Flush() error { + return enc.p.w.Flush() +} + +// Close the Encoder, indicating that no more data will be written. It flushes +// any buffered XML to the underlying writer and returns an error if the +// written XML is invalid (e.g. by containing unclosed elements). +func (enc *Encoder) Close() error { + return enc.p.Close() +} + +type printer struct { + w *bufio.Writer + encoder *Encoder + seq int + indent string + prefix string + depth int + indentedIn bool + putNewline bool + attrNS map[string]string // map prefix -> name space + attrPrefix map[string]string // map name space -> prefix + prefixes []string + tags []Name + closed bool + err error +} + +// createAttrPrefix finds the name space prefix attribute to use for the given name space, +// defining a new prefix if necessary. It returns the prefix. +func (p *printer) createAttrPrefix(url string) string { + if prefix := p.attrPrefix[url]; prefix != "" { + return prefix + } + + // The "http://www.w3.org/XML/1998/namespace" name space is predefined as "xml" + // and must be referred to that way. + // (The "http://www.w3.org/2000/xmlns/" name space is also predefined as "xmlns", + // but users should not be trying to use that one directly - that's our job.) + if url == xmlURL { + return xmlPrefix + } + + // Need to define a new name space. + if p.attrPrefix == nil { + p.attrPrefix = make(map[string]string) + p.attrNS = make(map[string]string) + } + + // Pick a name. We try to use the final element of the path + // but fall back to _. + prefix := strings.TrimRight(url, "/") + if i := strings.LastIndex(prefix, "/"); i >= 0 { + prefix = prefix[i+1:] + } + if prefix == "" || !isName([]byte(prefix)) || strings.Contains(prefix, ":") { + prefix = "_" + } + // xmlanything is reserved and any variant of it regardless of + // case should be matched, so: + // (('X'|'x') ('M'|'m') ('L'|'l')) + // See Section 2.3 of https://www.w3.org/TR/REC-xml/ + if len(prefix) >= 3 && strings.EqualFold(prefix[:3], "xml") { + prefix = "_" + prefix + } + if p.attrNS[prefix] != "" { + // Name is taken. Find a better one. + for p.seq++; ; p.seq++ { + if id := prefix + "_" + strconv.Itoa(p.seq); p.attrNS[id] == "" { + prefix = id + break + } + } + } + + p.attrPrefix[url] = prefix + p.attrNS[prefix] = url + + p.WriteString(`xmlns:`) + p.WriteString(prefix) + p.WriteString(`="`) + EscapeText(p, []byte(url)) + p.WriteString(`" `) + + p.prefixes = append(p.prefixes, prefix) + + return prefix +} + +// deleteAttrPrefix removes an attribute name space prefix. +func (p *printer) deleteAttrPrefix(prefix string) { + delete(p.attrPrefix, p.attrNS[prefix]) + delete(p.attrNS, prefix) +} + +func (p *printer) markPrefix() { + p.prefixes = append(p.prefixes, "") +} + +func (p *printer) popPrefix() { + for len(p.prefixes) > 0 { + prefix := p.prefixes[len(p.prefixes)-1] + p.prefixes = p.prefixes[:len(p.prefixes)-1] + if prefix == "" { + break + } + p.deleteAttrPrefix(prefix) + } +} + +var ( + marshalerType = reflect.TypeFor[Marshaler]() + marshalerAttrType = reflect.TypeFor[MarshalerAttr]() + textMarshalerType = reflect.TypeFor[encoding.TextMarshaler]() +) + +// marshalValue writes one or more XML elements representing val. +// If val was obtained from a struct field, finfo must have its details. +func (p *printer) marshalValue(val reflect.Value, finfo *fieldInfo, startTemplate *StartElement) error { + if startTemplate != nil && startTemplate.Name.Local == "" { + return fmt.Errorf("xml: EncodeElement of StartElement with missing name") + } + + if !val.IsValid() { + return nil + } + if finfo != nil && finfo.flags&fOmitEmpty != 0 && isEmptyValue(val) { + return nil + } + + // Drill into interfaces and pointers. + // This can turn into an infinite loop given a cyclic chain, + // but it matches the Go 1 behavior. + for val.Kind() == reflect.Interface || val.Kind() == reflect.Pointer { + if val.IsNil() { + return nil + } + val = val.Elem() + } + + kind := val.Kind() + typ := val.Type() + + // Check for marshaler. + if val.CanInterface() && typ.Implements(marshalerType) { + return p.marshalInterface(val.Interface().(Marshaler), defaultStart(typ, finfo, startTemplate)) + } + if val.CanAddr() { + pv := val.Addr() + if pv.CanInterface() && pv.Type().Implements(marshalerType) { + return p.marshalInterface(pv.Interface().(Marshaler), defaultStart(pv.Type(), finfo, startTemplate)) + } + } + + // Check for text marshaler. + if val.CanInterface() && typ.Implements(textMarshalerType) { + return p.marshalTextInterface(val.Interface().(encoding.TextMarshaler), defaultStart(typ, finfo, startTemplate)) + } + if val.CanAddr() { + pv := val.Addr() + if pv.CanInterface() && pv.Type().Implements(textMarshalerType) { + return p.marshalTextInterface(pv.Interface().(encoding.TextMarshaler), defaultStart(pv.Type(), finfo, startTemplate)) + } + } + + // Slices and arrays iterate over the elements. They do not have an enclosing tag. + if (kind == reflect.Slice || kind == reflect.Array) && typ.Elem().Kind() != reflect.Uint8 { + for i, n := 0, val.Len(); i < n; i++ { + if err := p.marshalValue(val.Index(i), finfo, startTemplate); err != nil { + return err + } + } + return nil + } + + tinfo, err := getTypeInfo(typ, false) + if err != nil { + return err + } + + // Create start element. + // Precedence for the XML element name is: + // 0. startTemplate + // 1. XMLName field in underlying struct; + // 2. field name/tag in the struct field; and + // 3. type name + var start StartElement + + if startTemplate != nil { + start.Name = startTemplate.Name + start.Attr = append(start.Attr, startTemplate.Attr...) + } else if tinfo.xmlname != nil { + xmlname := tinfo.xmlname + if xmlname.name != "" { + start.Name.Space, start.Name.Local = xmlname.xmlns, xmlname.name + } else { + fv := xmlname.value(val, dontInitNilPointers) + if v, ok := fv.Interface().(Name); ok && v.Local != "" { + start.Name = v + } + } + } + if start.Name.Local == "" && finfo != nil { + start.Name.Space, start.Name.Local = finfo.xmlns, finfo.name + } + if start.Name.Local == "" { + name := typ.Name() + if i := strings.IndexByte(name, '['); i >= 0 { + // Truncate generic instantiation name. See issue 48318. + name = name[:i] + } + if name == "" { + return &UnsupportedTypeError{typ} + } + start.Name.Local = name + } + + // Attributes + for i := range tinfo.fields { + finfo := &tinfo.fields[i] + if finfo.flags&fAttr == 0 { + continue + } + fv := finfo.value(val, dontInitNilPointers) + + if finfo.flags&fOmitEmpty != 0 && (!fv.IsValid() || isEmptyValue(fv)) { + continue + } + + if fv.Kind() == reflect.Interface && fv.IsNil() { + continue + } + + name := Name{Space: finfo.xmlns, Local: finfo.name} + if err := p.marshalAttr(&start, name, fv); err != nil { + return err + } + } + + // If an empty name was found, namespace is overridden with an empty space + if tinfo.xmlname != nil && start.Name.Space == "" && + tinfo.xmlname.xmlns == "" && tinfo.xmlname.name == "" && + len(p.tags) != 0 && p.tags[len(p.tags)-1].Space != "" { + start.Attr = append(start.Attr, Attr{Name{"", xmlnsPrefix}, ""}) + } + if err := p.writeStart(&start); err != nil { + return err + } + + if val.Kind() == reflect.Struct { + err = p.marshalStruct(tinfo, val) + } else { + s, b, err1 := p.marshalSimple(typ, val) + if err1 != nil { + err = err1 + } else if b != nil { + EscapeText(p, b) + } else { + p.EscapeString(s) + } + } + if err != nil { + return err + } + + if err := p.writeEnd(start.Name); err != nil { + return err + } + + return p.cachedWriteError() +} + +// marshalAttr marshals an attribute with the given name and value, adding to start.Attr. +func (p *printer) marshalAttr(start *StartElement, name Name, val reflect.Value) error { + if val.CanInterface() && val.Type().Implements(marshalerAttrType) { + attr, err := val.Interface().(MarshalerAttr).MarshalXMLAttr(name) + if err != nil { + return err + } + if attr.Name.Local != "" { + start.Attr = append(start.Attr, attr) + } + return nil + } + + if val.CanAddr() { + pv := val.Addr() + if pv.CanInterface() && pv.Type().Implements(marshalerAttrType) { + attr, err := pv.Interface().(MarshalerAttr).MarshalXMLAttr(name) + if err != nil { + return err + } + if attr.Name.Local != "" { + start.Attr = append(start.Attr, attr) + } + return nil + } + } + + if val.CanInterface() && val.Type().Implements(textMarshalerType) { + text, err := val.Interface().(encoding.TextMarshaler).MarshalText() + if err != nil { + return err + } + start.Attr = append(start.Attr, Attr{name, string(text)}) + return nil + } + + if val.CanAddr() { + pv := val.Addr() + if pv.CanInterface() && pv.Type().Implements(textMarshalerType) { + text, err := pv.Interface().(encoding.TextMarshaler).MarshalText() + if err != nil { + return err + } + start.Attr = append(start.Attr, Attr{name, string(text)}) + return nil + } + } + + // Dereference or skip nil pointer, interface values. + switch val.Kind() { + case reflect.Pointer, reflect.Interface: + if val.IsNil() { + return nil + } + val = val.Elem() + } + + // Walk slices. + if val.Kind() == reflect.Slice && val.Type().Elem().Kind() != reflect.Uint8 { + n := val.Len() + for i := 0; i < n; i++ { + if err := p.marshalAttr(start, name, val.Index(i)); err != nil { + return err + } + } + return nil + } + + if val.Type() == attrType { + start.Attr = append(start.Attr, val.Interface().(Attr)) + return nil + } + + s, b, err := p.marshalSimple(val.Type(), val) + if err != nil { + return err + } + if b != nil { + s = string(b) + } + start.Attr = append(start.Attr, Attr{name, s}) + return nil +} + +// defaultStart returns the default start element to use, +// given the reflect type, field info, and start template. +func defaultStart(typ reflect.Type, finfo *fieldInfo, startTemplate *StartElement) StartElement { + var start StartElement + // Precedence for the XML element name is as above, + // except that we do not look inside structs for the first field. + if startTemplate != nil { + start.Name = startTemplate.Name + start.Attr = append(start.Attr, startTemplate.Attr...) + } else if finfo != nil && finfo.name != "" { + start.Name.Local = finfo.name + start.Name.Space = finfo.xmlns + } else if typ.Name() != "" { + start.Name.Local = typ.Name() + } else { + // Must be a pointer to a named type, + // since it has the Marshaler methods. + start.Name.Local = typ.Elem().Name() + } + return start +} + +// marshalInterface marshals a Marshaler interface value. +func (p *printer) marshalInterface(val Marshaler, start StartElement) error { + // Push a marker onto the tag stack so that MarshalXML + // cannot close the XML tags that it did not open. + p.tags = append(p.tags, Name{}) + n := len(p.tags) + + err := val.MarshalXML(p.encoder, start) + if err != nil { + return err + } + + // Make sure MarshalXML closed all its tags. p.tags[n-1] is the mark. + if len(p.tags) > n { + return fmt.Errorf("xml: %s.MarshalXML wrote invalid XML: <%s> not closed", receiverType(val), p.tags[len(p.tags)-1].Local) + } + p.tags = p.tags[:n-1] + return nil +} + +// marshalTextInterface marshals a TextMarshaler interface value. +func (p *printer) marshalTextInterface(val encoding.TextMarshaler, start StartElement) error { + if err := p.writeStart(&start); err != nil { + return err + } + text, err := val.MarshalText() + if err != nil { + return err + } + EscapeText(p, text) + return p.writeEnd(start.Name) +} + +// writeStart writes the given start element. +func (p *printer) writeStart(start *StartElement) error { + if start.Name.Local == "" { + return fmt.Errorf("xml: start tag with no name") + } + + p.tags = append(p.tags, start.Name) + p.markPrefix() + + p.writeIndent(1) + p.WriteByte('<') + p.WriteString(start.Name.Local) + + if start.Name.Space != "" { + p.WriteString(` xmlns="`) + p.EscapeString(start.Name.Space) + p.WriteByte('"') + } + + // Attributes + for _, attr := range start.Attr { + name := attr.Name + if name.Local == "" { + continue + } + p.WriteByte(' ') + if name.Space != "" { + p.WriteString(p.createAttrPrefix(name.Space)) + p.WriteByte(':') + } + p.WriteString(name.Local) + p.WriteString(`="`) + p.EscapeString(attr.Value) + p.WriteByte('"') + } + p.WriteByte('>') + return nil +} + +func (p *printer) writeEnd(name Name) error { + if name.Local == "" { + return fmt.Errorf("xml: end tag with no name") + } + if len(p.tags) == 0 || p.tags[len(p.tags)-1].Local == "" { + return fmt.Errorf("xml: end tag without start tag", name.Local) + } + if top := p.tags[len(p.tags)-1]; top != name { + if top.Local != name.Local { + return fmt.Errorf("xml: end tag does not match start tag <%s>", name.Local, top.Local) + } + return fmt.Errorf("xml: end tag in namespace %s does not match start tag <%s> in namespace %s", name.Local, name.Space, top.Local, top.Space) + } + p.tags = p.tags[:len(p.tags)-1] + + p.writeIndent(-1) + p.WriteByte('<') + p.WriteByte('/') + p.WriteString(name.Local) + p.WriteByte('>') + p.popPrefix() + return nil +} + +func (p *printer) marshalSimple(typ reflect.Type, val reflect.Value) (string, []byte, error) { + switch val.Kind() { + case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64: + return strconv.FormatInt(val.Int(), 10), nil, nil + case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uintptr: + return strconv.FormatUint(val.Uint(), 10), nil, nil + case reflect.Float32, reflect.Float64: + return strconv.FormatFloat(val.Float(), 'g', -1, val.Type().Bits()), nil, nil + case reflect.String: + return val.String(), nil, nil + case reflect.Bool: + return strconv.FormatBool(val.Bool()), nil, nil + case reflect.Array: + if typ.Elem().Kind() != reflect.Uint8 { + break + } + // [...]byte + var bytes []byte + if val.CanAddr() { + bytes = val.Bytes() + } else { + bytes = make([]byte, val.Len()) + reflect.Copy(reflect.ValueOf(bytes), val) + } + return "", bytes, nil + case reflect.Slice: + if typ.Elem().Kind() != reflect.Uint8 { + break + } + // []byte + return "", val.Bytes(), nil + } + return "", nil, &UnsupportedTypeError{typ} +} + +var ddBytes = []byte("--") + +// indirect drills into interfaces and pointers, returning the pointed-at value. +// If it encounters a nil interface or pointer, indirect returns that nil value. +// This can turn into an infinite loop given a cyclic chain, +// but it matches the Go 1 behavior. +func indirect(vf reflect.Value) reflect.Value { + for vf.Kind() == reflect.Interface || vf.Kind() == reflect.Pointer { + if vf.IsNil() { + return vf + } + vf = vf.Elem() + } + return vf +} + +func (p *printer) marshalStruct(tinfo *typeInfo, val reflect.Value) error { + s := parentStack{p: p} + for i := range tinfo.fields { + finfo := &tinfo.fields[i] + if finfo.flags&fAttr != 0 { + continue + } + vf := finfo.value(val, dontInitNilPointers) + if !vf.IsValid() { + // The field is behind an anonymous struct field that's + // nil. Skip it. + continue + } + + switch finfo.flags & fMode { + case fCDATA, fCharData: + emit := EscapeText + if finfo.flags&fMode == fCDATA { + emit = emitCDATA + } + if err := s.trim(finfo.parents); err != nil { + return err + } + if vf.CanInterface() && vf.Type().Implements(textMarshalerType) { + data, err := vf.Interface().(encoding.TextMarshaler).MarshalText() + if err != nil { + return err + } + if err := emit(p, data); err != nil { + return err + } + continue + } + if vf.CanAddr() { + pv := vf.Addr() + if pv.CanInterface() && pv.Type().Implements(textMarshalerType) { + data, err := pv.Interface().(encoding.TextMarshaler).MarshalText() + if err != nil { + return err + } + if err := emit(p, data); err != nil { + return err + } + continue + } + } + + var scratch [64]byte + vf = indirect(vf) + switch vf.Kind() { + case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64: + if err := emit(p, strconv.AppendInt(scratch[:0], vf.Int(), 10)); err != nil { + return err + } + case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uintptr: + if err := emit(p, strconv.AppendUint(scratch[:0], vf.Uint(), 10)); err != nil { + return err + } + case reflect.Float32, reflect.Float64: + if err := emit(p, strconv.AppendFloat(scratch[:0], vf.Float(), 'g', -1, vf.Type().Bits())); err != nil { + return err + } + case reflect.Bool: + if err := emit(p, strconv.AppendBool(scratch[:0], vf.Bool())); err != nil { + return err + } + case reflect.String: + if err := emit(p, []byte(vf.String())); err != nil { + return err + } + case reflect.Slice: + if elem, ok := vf.Interface().([]byte); ok { + if err := emit(p, elem); err != nil { + return err + } + } + } + continue + + case fComment: + if err := s.trim(finfo.parents); err != nil { + return err + } + vf = indirect(vf) + k := vf.Kind() + if !(k == reflect.String || k == reflect.Slice && vf.Type().Elem().Kind() == reflect.Uint8) { + return fmt.Errorf("xml: bad type for comment field of %s", val.Type()) + } + if vf.Len() == 0 { + continue + } + p.writeIndent(0) + p.WriteString("" is invalid grammar. Make it "- -->" + p.WriteByte(' ') + } + p.WriteString("-->") + continue + + case fInnerXML: + vf = indirect(vf) + iface := vf.Interface() + switch raw := iface.(type) { + case []byte: + p.Write(raw) + continue + case string: + p.WriteString(raw) + continue + } + + case fElement, fElement | fAny: + if err := s.trim(finfo.parents); err != nil { + return err + } + if len(finfo.parents) > len(s.stack) { + if vf.Kind() != reflect.Pointer && vf.Kind() != reflect.Interface || !vf.IsNil() { + if err := s.push(finfo.parents[len(s.stack):]); err != nil { + return err + } + } + } + } + if err := p.marshalValue(vf, finfo, nil); err != nil { + return err + } + } + s.trim(nil) + return p.cachedWriteError() +} + +// Write implements io.Writer +func (p *printer) Write(b []byte) (n int, err error) { + if p.closed && p.err == nil { + p.err = errors.New("use of closed Encoder") + } + if p.err == nil { + n, p.err = p.w.Write(b) + } + return n, p.err +} + +// WriteString implements io.StringWriter +func (p *printer) WriteString(s string) (n int, err error) { + if p.closed && p.err == nil { + p.err = errors.New("use of closed Encoder") + } + if p.err == nil { + n, p.err = p.w.WriteString(s) + } + return n, p.err +} + +// WriteByte implements io.ByteWriter +func (p *printer) WriteByte(c byte) error { + if p.closed && p.err == nil { + p.err = errors.New("use of closed Encoder") + } + if p.err == nil { + p.err = p.w.WriteByte(c) + } + return p.err +} + +// Close the Encoder, indicating that no more data will be written. It flushes +// any buffered XML to the underlying writer and returns an error if the +// written XML is invalid (e.g. by containing unclosed elements). +func (p *printer) Close() error { + if p.closed { + return nil + } + p.closed = true + if err := p.w.Flush(); err != nil { + return err + } + if len(p.tags) > 0 { + return fmt.Errorf("unclosed tag <%s>", p.tags[len(p.tags)-1].Local) + } + return nil +} + +// return the bufio Writer's cached write error +func (p *printer) cachedWriteError() error { + _, err := p.Write(nil) + return err +} + +func (p *printer) writeIndent(depthDelta int) { + if len(p.prefix) == 0 && len(p.indent) == 0 { + return + } + if depthDelta < 0 { + p.depth-- + if p.indentedIn { + p.indentedIn = false + return + } + p.indentedIn = false + } + if p.putNewline { + p.WriteByte('\n') + } else { + p.putNewline = true + } + if len(p.prefix) > 0 { + p.WriteString(p.prefix) + } + if len(p.indent) > 0 { + for i := 0; i < p.depth; i++ { + p.WriteString(p.indent) + } + } + if depthDelta > 0 { + p.depth++ + p.indentedIn = true + } +} + +type parentStack struct { + p *printer + stack []string +} + +// trim updates the XML context to match the longest common prefix of the stack +// and the given parents. A closing tag will be written for every parent +// popped. Passing a zero slice or nil will close all the elements. +func (s *parentStack) trim(parents []string) error { + split := 0 + for ; split < len(parents) && split < len(s.stack); split++ { + if parents[split] != s.stack[split] { + break + } + } + for i := len(s.stack) - 1; i >= split; i-- { + if err := s.p.writeEnd(Name{Local: s.stack[i]}); err != nil { + return err + } + } + s.stack = s.stack[:split] + return nil +} + +// push adds parent elements to the stack and writes open tags. +func (s *parentStack) push(parents []string) error { + for i := 0; i < len(parents); i++ { + if err := s.p.writeStart(&StartElement{Name: Name{Local: parents[i]}}); err != nil { + return err + } + } + s.stack = append(s.stack, parents...) + return nil +} + +// UnsupportedTypeError is returned when [Marshal] encounters a type +// that cannot be converted into XML. +type UnsupportedTypeError struct { + Type reflect.Type +} + +func (e *UnsupportedTypeError) Error() string { + return "xml: unsupported type: " + e.Type.String() +} + +func isEmptyValue(v reflect.Value) bool { + switch v.Kind() { + case reflect.Array, reflect.Map, reflect.Slice, reflect.String: + return v.Len() == 0 + case reflect.Bool, + reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64, + reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uintptr, + reflect.Float32, reflect.Float64, + reflect.Interface, reflect.Pointer: + return v.IsZero() + } + return false +} diff --git a/marshal_test.go b/marshal_test.go new file mode 100644 index 0000000..1a8e31d --- /dev/null +++ b/marshal_test.go @@ -0,0 +1,2591 @@ +// Copyright 2011 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package goxml + +import ( + "bytes" + "errors" + "fmt" + "io" + "reflect" + "strconv" + "strings" + "sync" + "testing" + "time" +) + +type DriveType int + +const ( + HyperDrive DriveType = iota + ImprobabilityDrive +) + +type Passenger struct { + Name []string `xml:"name"` + Weight float32 `xml:"weight"` +} + +type Ship struct { + XMLName struct{} `xml:"spaceship"` + + Name string `xml:"name,attr"` + Pilot string `xml:"pilot,attr"` + Drive DriveType `xml:"drive"` + Age uint `xml:"age"` + Passenger []*Passenger `xml:"passenger"` + secret string +} + +type NamedType string + +type Port struct { + XMLName struct{} `xml:"port"` + Type string `xml:"type,attr,omitempty"` + Comment string `xml:",comment"` + Number string `xml:",chardata"` +} + +type Domain struct { + XMLName struct{} `xml:"domain"` + Country string `xml:",attr,omitempty"` + Name []byte `xml:",chardata"` + Comment []byte `xml:",comment"` +} + +type Book struct { + XMLName struct{} `xml:"book"` + Title string `xml:",chardata"` +} + +type Event struct { + XMLName struct{} `xml:"event"` + Year int `xml:",chardata"` +} + +type Movie struct { + XMLName struct{} `xml:"movie"` + Length uint `xml:",chardata"` +} + +type Pi struct { + XMLName struct{} `xml:"pi"` + Approximation float32 `xml:",chardata"` +} + +type Universe struct { + XMLName struct{} `xml:"universe"` + Visible float64 `xml:",chardata"` +} + +type Particle struct { + XMLName struct{} `xml:"particle"` + HasMass bool `xml:",chardata"` +} + +type Departure struct { + XMLName struct{} `xml:"departure"` + When time.Time `xml:",chardata"` +} + +type SecretAgent struct { + XMLName struct{} `xml:"agent"` + Handle string `xml:"handle,attr"` + Identity string + Obfuscate string `xml:",innerxml"` +} + +type NestedItems struct { + XMLName struct{} `xml:"result"` + Items []string `xml:">item"` + Item1 []string `xml:"Items>item1"` +} + +type NestedOrder struct { + XMLName struct{} `xml:"result"` + Field1 string `xml:"parent>c"` + Field2 string `xml:"parent>b"` + Field3 string `xml:"parent>a"` +} + +type MixedNested struct { + XMLName struct{} `xml:"result"` + A string `xml:"parent1>a"` + B string `xml:"b"` + C string `xml:"parent1>parent2>c"` + D string `xml:"parent1>d"` +} + +type NilTest struct { + A any `xml:"parent1>parent2>a"` + B any `xml:"parent1>b"` + C any `xml:"parent1>parent2>c"` +} + +type Service struct { + XMLName struct{} `xml:"service"` + Domain *Domain `xml:"host>domain"` + Port *Port `xml:"host>port"` + Extra1 any + Extra2 any `xml:"host>extra2"` +} + +var nilStruct *Ship + +type EmbedA struct { + EmbedC + EmbedB EmbedB + FieldA string + embedD +} + +type EmbedB struct { + FieldB string + *EmbedC +} + +type EmbedC struct { + FieldA1 string `xml:"FieldA>A1"` + FieldA2 string `xml:"FieldA>A2"` + FieldB string + FieldC string +} + +type embedD struct { + fieldD string + FieldE string // Promoted and visible when embedD is embedded. +} + +type NameCasing struct { + XMLName struct{} `xml:"casing"` + Xy string + XY string + XyA string `xml:"Xy,attr"` + XYA string `xml:"XY,attr"` +} + +type NamePrecedence struct { + XMLName Name `xml:"Parent"` + FromTag XMLNameWithoutTag `xml:"InTag"` + FromNameVal XMLNameWithoutTag + FromNameTag XMLNameWithTag + InFieldName string +} + +type XMLNameWithTag struct { + XMLName Name `xml:"InXMLNameTag"` + Value string `xml:",chardata"` +} + +type XMLNameWithoutTag struct { + XMLName Name + Value string `xml:",chardata"` +} + +type NameInField struct { + Foo Name `xml:"ns foo"` +} + +type AttrTest struct { + Int int `xml:",attr"` + Named int `xml:"int,attr"` + Float float64 `xml:",attr"` + Uint8 uint8 `xml:",attr"` + Bool bool `xml:",attr"` + Str string `xml:",attr"` + Bytes []byte `xml:",attr"` +} + +type AttrsTest struct { + Attrs []Attr `xml:",any,attr"` + Int int `xml:",attr"` + Named int `xml:"int,attr"` + Float float64 `xml:",attr"` + Uint8 uint8 `xml:",attr"` + Bool bool `xml:",attr"` + Str string `xml:",attr"` + Bytes []byte `xml:",attr"` +} + +type OmitAttrTest struct { + Int int `xml:",attr,omitempty"` + Named int `xml:"int,attr,omitempty"` + Float float64 `xml:",attr,omitempty"` + Uint8 uint8 `xml:",attr,omitempty"` + Bool bool `xml:",attr,omitempty"` + Str string `xml:",attr,omitempty"` + Bytes []byte `xml:",attr,omitempty"` + PStr *string `xml:",attr,omitempty"` +} + +type OmitFieldTest struct { + Int int `xml:",omitempty"` + Named int `xml:"int,omitempty"` + Float float64 `xml:",omitempty"` + Uint8 uint8 `xml:",omitempty"` + Bool bool `xml:",omitempty"` + Str string `xml:",omitempty"` + Bytes []byte `xml:",omitempty"` + PStr *string `xml:",omitempty"` + Ptr *PresenceTest `xml:",omitempty"` +} + +type AnyTest struct { + XMLName struct{} `xml:"a"` + Nested string `xml:"nested>value"` + AnyField AnyHolder `xml:",any"` +} + +type AnyOmitTest struct { + XMLName struct{} `xml:"a"` + Nested string `xml:"nested>value"` + AnyField *AnyHolder `xml:",any,omitempty"` +} + +type AnySliceTest struct { + XMLName struct{} `xml:"a"` + Nested string `xml:"nested>value"` + AnyField []AnyHolder `xml:",any"` +} + +type AnyHolder struct { + XMLName Name + XML string `xml:",innerxml"` +} + +type RecurseA struct { + A string + B *RecurseB +} + +type RecurseB struct { + A *RecurseA + B string +} + +type PresenceTest struct { + Exists *struct{} +} + +type IgnoreTest struct { + PublicSecret string `xml:"-"` +} + +type MyBytes []byte + +type Data struct { + Bytes []byte + Attr []byte `xml:",attr"` + Custom MyBytes +} + +type Plain struct { + V any +} + +type MyInt int + +type EmbedInt struct { + MyInt +} + +type Strings struct { + X []string `xml:"A>B,omitempty"` +} + +type PointerFieldsTest struct { + XMLName Name `xml:"dummy"` + Name *string `xml:"name,attr"` + Age *uint `xml:"age,attr"` + Empty *string `xml:"empty,attr"` + Contents *string `xml:",chardata"` +} + +type ChardataEmptyTest struct { + XMLName Name `xml:"test"` + Contents *string `xml:",chardata"` +} + +type PointerAnonFields struct { + *MyInt + *NamedType +} + +type MyMarshalerTest struct { +} + +var _ Marshaler = (*MyMarshalerTest)(nil) + +func (m *MyMarshalerTest) MarshalXML(e *Encoder, start StartElement) error { + e.EncodeToken(start) + e.EncodeToken(CharData([]byte("hello world"))) + e.EncodeToken(EndElement{start.Name}) + return nil +} + +type MyMarshalerAttrTest struct { +} + +var _ MarshalerAttr = (*MyMarshalerAttrTest)(nil) + +func (m *MyMarshalerAttrTest) MarshalXMLAttr(name Name) (Attr, error) { + return Attr{name, "hello world"}, nil +} + +func (m *MyMarshalerAttrTest) UnmarshalXMLAttr(attr Attr) error { + return nil +} + +type MarshalerStruct struct { + Foo MyMarshalerAttrTest `xml:",attr"` +} + +type InnerStruct struct { + XMLName Name `xml:"testns outer"` +} + +type OuterStruct struct { + InnerStruct + IntAttr int `xml:"int,attr"` +} + +type OuterNamedStruct struct { + InnerStruct + XMLName Name `xml:"outerns test"` + IntAttr int `xml:"int,attr"` +} + +type OuterNamedOrderedStruct struct { + XMLName Name `xml:"outerns test"` + InnerStruct + IntAttr int `xml:"int,attr"` +} + +type OuterOuterStruct struct { + OuterStruct +} + +type NestedAndChardata struct { + AB []string `xml:"A>B"` + Chardata string `xml:",chardata"` +} + +type NestedAndComment struct { + AB []string `xml:"A>B"` + Comment string `xml:",comment"` +} + +type CDataTest struct { + Chardata string `xml:",cdata"` +} + +type NestedAndCData struct { + AB []string `xml:"A>B"` + CDATA string `xml:",cdata"` +} + +func ifaceptr(x any) any { + return &x +} + +func stringptr(x string) *string { + return &x +} + +type T1 struct{} +type T2 struct{} + +type IndirComment struct { + T1 T1 + Comment *string `xml:",comment"` + T2 T2 +} + +type DirectComment struct { + T1 T1 + Comment string `xml:",comment"` + T2 T2 +} + +type IfaceComment struct { + T1 T1 + Comment any `xml:",comment"` + T2 T2 +} + +type IndirChardata struct { + T1 T1 + Chardata *string `xml:",chardata"` + T2 T2 +} + +type DirectChardata struct { + T1 T1 + Chardata string `xml:",chardata"` + T2 T2 +} + +type IfaceChardata struct { + T1 T1 + Chardata any `xml:",chardata"` + T2 T2 +} + +type IndirCDATA struct { + T1 T1 + CDATA *string `xml:",cdata"` + T2 T2 +} + +type DirectCDATA struct { + T1 T1 + CDATA string `xml:",cdata"` + T2 T2 +} + +type IfaceCDATA struct { + T1 T1 + CDATA any `xml:",cdata"` + T2 T2 +} + +type IndirInnerXML struct { + T1 T1 + InnerXML *string `xml:",innerxml"` + T2 T2 +} + +type DirectInnerXML struct { + T1 T1 + InnerXML string `xml:",innerxml"` + T2 T2 +} + +type IfaceInnerXML struct { + T1 T1 + InnerXML any `xml:",innerxml"` + T2 T2 +} + +type IndirElement struct { + T1 T1 + Element *string + T2 T2 +} + +type DirectElement struct { + T1 T1 + Element string + T2 T2 +} + +type IfaceElement struct { + T1 T1 + Element any + T2 T2 +} + +type IndirOmitEmpty struct { + T1 T1 + OmitEmpty *string `xml:",omitempty"` + T2 T2 +} + +type DirectOmitEmpty struct { + T1 T1 + OmitEmpty string `xml:",omitempty"` + T2 T2 +} + +type IfaceOmitEmpty struct { + T1 T1 + OmitEmpty any `xml:",omitempty"` + T2 T2 +} + +type IndirAny struct { + T1 T1 + Any *string `xml:",any"` + T2 T2 +} + +type DirectAny struct { + T1 T1 + Any string `xml:",any"` + T2 T2 +} + +type IfaceAny struct { + T1 T1 + Any any `xml:",any"` + T2 T2 +} + +type Generic[T any] struct { + X T +} + +var ( + nameAttr = "Sarah" + ageAttr = uint(12) + contentsAttr = "lorem ipsum" + empty = "" +) + +// Unless explicitly stated as such (or *Plain), all of the +// tests below are two-way tests. When introducing new tests, +// please try to make them two-way as well to ensure that +// marshaling and unmarshaling are as symmetrical as feasible. +var marshalTests = []struct { + Value any + ExpectXML string + MarshalOnly bool + MarshalError string + UnmarshalOnly bool + UnmarshalError string +}{ + // Test nil marshals to nothing + {Value: nil, ExpectXML: ``, MarshalOnly: true}, + {Value: nilStruct, ExpectXML: ``, MarshalOnly: true}, + + // Test value types + {Value: &Plain{true}, ExpectXML: `true`}, + {Value: &Plain{false}, ExpectXML: `false`}, + {Value: &Plain{int(42)}, ExpectXML: `42`}, + {Value: &Plain{int8(42)}, ExpectXML: `42`}, + {Value: &Plain{int16(42)}, ExpectXML: `42`}, + {Value: &Plain{int32(42)}, ExpectXML: `42`}, + {Value: &Plain{uint(42)}, ExpectXML: `42`}, + {Value: &Plain{uint8(42)}, ExpectXML: `42`}, + {Value: &Plain{uint16(42)}, ExpectXML: `42`}, + {Value: &Plain{uint32(42)}, ExpectXML: `42`}, + {Value: &Plain{float32(1.25)}, ExpectXML: `1.25`}, + {Value: &Plain{float64(1.25)}, ExpectXML: `1.25`}, + {Value: &Plain{uintptr(0xFFDD)}, ExpectXML: `65501`}, + {Value: &Plain{"gopher"}, ExpectXML: `gopher`}, + {Value: &Plain{[]byte("gopher")}, ExpectXML: `gopher`}, + {Value: &Plain{""}, ExpectXML: `</>`}, + {Value: &Plain{[]byte("")}, ExpectXML: `</>`}, + {Value: &Plain{[3]byte{'<', '/', '>'}}, ExpectXML: `</>`}, + {Value: &Plain{NamedType("potato")}, ExpectXML: `potato`}, + {Value: &Plain{[]int{1, 2, 3}}, ExpectXML: `123`}, + {Value: &Plain{[3]int{1, 2, 3}}, ExpectXML: `123`}, + {Value: ifaceptr(true), MarshalOnly: true, ExpectXML: `true`}, + + // Test time. + { + Value: &Plain{time.Unix(1e9, 123456789).UTC()}, + ExpectXML: `2001-09-09T01:46:40.123456789Z`, + }, + + // A pointer to struct{} may be used to test for an element's presence. + { + Value: &PresenceTest{new(struct{})}, + ExpectXML: ``, + }, + { + Value: &PresenceTest{}, + ExpectXML: ``, + }, + + // A []byte field is only nil if the element was not found. + { + Value: &Data{}, + ExpectXML: ``, + UnmarshalOnly: true, + }, + { + Value: &Data{Bytes: []byte{}, Custom: MyBytes{}, Attr: []byte{}}, + ExpectXML: ``, + UnmarshalOnly: true, + }, + + // Check that []byte works, including named []byte types. + { + Value: &Data{Bytes: []byte("ab"), Custom: MyBytes("cd"), Attr: []byte{'v'}}, + ExpectXML: `abcd`, + }, + + // Test innerxml + { + Value: &SecretAgent{ + Handle: "007", + Identity: "James Bond", + Obfuscate: "", + }, + ExpectXML: `James Bond`, + MarshalOnly: true, + }, + { + Value: &SecretAgent{ + Handle: "007", + Identity: "James Bond", + Obfuscate: "James Bond", + }, + ExpectXML: `James Bond`, + UnmarshalOnly: true, + }, + + // Test structs + {Value: &Port{Type: "ssl", Number: "443"}, ExpectXML: `443`}, + {Value: &Port{Number: "443"}, ExpectXML: `443`}, + {Value: &Port{Type: ""}, ExpectXML: ``}, + {Value: &Port{Number: "443", Comment: "https"}, ExpectXML: `443`}, + {Value: &Port{Number: "443", Comment: "add space-"}, ExpectXML: `443`, MarshalOnly: true}, + {Value: &Domain{Name: []byte("google.com&friends")}, ExpectXML: `google.com&friends`}, + {Value: &Domain{Name: []byte("google.com"), Comment: []byte(" &friends ")}, ExpectXML: `google.com`}, + {Value: &Book{Title: "Pride & Prejudice"}, ExpectXML: `Pride & Prejudice`}, + {Value: &Event{Year: -3114}, ExpectXML: `-3114`}, + {Value: &Movie{Length: 13440}, ExpectXML: `13440`}, + {Value: &Pi{Approximation: 3.14159265}, ExpectXML: `3.1415927`}, + {Value: &Universe{Visible: 9.3e13}, ExpectXML: `9.3e+13`}, + {Value: &Particle{HasMass: true}, ExpectXML: `true`}, + {Value: &Departure{When: ParseTime("2013-01-09T00:15:00-09:00")}, ExpectXML: `2013-01-09T00:15:00-09:00`}, + {Value: atomValue, ExpectXML: atomXML}, + {Value: &Generic[int]{1}, ExpectXML: `1`}, + { + Value: &Ship{ + Name: "Heart of Gold", + Pilot: "Computer", + Age: 1, + Drive: ImprobabilityDrive, + Passenger: []*Passenger{ + { + Name: []string{"Zaphod", "Beeblebrox"}, + Weight: 7.25, + }, + { + Name: []string{"Trisha", "McMillen"}, + Weight: 5.5, + }, + { + Name: []string{"Ford", "Prefect"}, + Weight: 7, + }, + { + Name: []string{"Arthur", "Dent"}, + Weight: 6.75, + }, + }, + }, + ExpectXML: `` + + `` + strconv.Itoa(int(ImprobabilityDrive)) + `` + + `1` + + `` + + `Zaphod` + + `Beeblebrox` + + `7.25` + + `` + + `` + + `Trisha` + + `McMillen` + + `5.5` + + `` + + `` + + `Ford` + + `Prefect` + + `7` + + `` + + `` + + `Arthur` + + `Dent` + + `6.75` + + `` + + ``, + }, + + // Test a>b + { + Value: &NestedItems{Items: nil, Item1: nil}, + ExpectXML: `` + + `` + + `` + + ``, + }, + { + Value: &NestedItems{Items: []string{}, Item1: []string{}}, + ExpectXML: `` + + `` + + `` + + ``, + MarshalOnly: true, + }, + { + Value: &NestedItems{Items: nil, Item1: []string{"A"}}, + ExpectXML: `` + + `` + + `A` + + `` + + ``, + }, + { + Value: &NestedItems{Items: []string{"A", "B"}, Item1: nil}, + ExpectXML: `` + + `` + + `A` + + `B` + + `` + + ``, + }, + { + Value: &NestedItems{Items: []string{"A", "B"}, Item1: []string{"C"}}, + ExpectXML: `` + + `` + + `A` + + `B` + + `C` + + `` + + ``, + }, + { + Value: &NestedOrder{Field1: "C", Field2: "B", Field3: "A"}, + ExpectXML: `` + + `` + + `C` + + `B` + + `A` + + `` + + ``, + }, + { + Value: &NilTest{A: "A", B: nil, C: "C"}, + ExpectXML: `` + + `` + + `A` + + `C` + + `` + + ``, + MarshalOnly: true, // Uses interface{} + }, + { + Value: &MixedNested{A: "A", B: "B", C: "C", D: "D"}, + ExpectXML: `` + + `A` + + `B` + + `` + + `C` + + `D` + + `` + + ``, + }, + { + Value: &Service{Port: &Port{Number: "80"}}, + ExpectXML: `80`, + }, + { + Value: &Service{}, + ExpectXML: ``, + }, + { + Value: &Service{Port: &Port{Number: "80"}, Extra1: "A", Extra2: "B"}, + ExpectXML: `` + + `80` + + `A` + + `B` + + ``, + MarshalOnly: true, + }, + { + Value: &Service{Port: &Port{Number: "80"}, Extra2: "example"}, + ExpectXML: `` + + `80` + + `example` + + ``, + MarshalOnly: true, + }, + { + Value: &struct { + XMLName struct{} `xml:"space top"` + A string `xml:"x>a"` + B string `xml:"x>b"` + C string `xml:"space x>c"` + C1 string `xml:"space1 x>c"` + D1 string `xml:"space1 x>d"` + }{ + A: "a", + B: "b", + C: "c", + C1: "c1", + D1: "d1", + }, + ExpectXML: `` + + `abc` + + `c1` + + `d1` + + `` + + ``, + }, + { + Value: &struct { + XMLName Name + A string `xml:"x>a"` + B string `xml:"x>b"` + C string `xml:"space x>c"` + C1 string `xml:"space1 x>c"` + D1 string `xml:"space1 x>d"` + }{ + XMLName: Name{ + Space: "space0", + Local: "top", + }, + A: "a", + B: "b", + C: "c", + C1: "c1", + D1: "d1", + }, + ExpectXML: `` + + `ab` + + `c` + + `c1` + + `d1` + + `` + + ``, + }, + { + Value: &struct { + XMLName struct{} `xml:"top"` + B string `xml:"space x>b"` + B1 string `xml:"space1 x>b"` + }{ + B: "b", + B1: "b1", + }, + ExpectXML: `` + + `b` + + `b1` + + ``, + }, + + // Test struct embedding + { + Value: &EmbedA{ + EmbedC: EmbedC{ + FieldA1: "", // Shadowed by A.A + FieldA2: "", // Shadowed by A.A + FieldB: "A.C.B", + FieldC: "A.C.C", + }, + EmbedB: EmbedB{ + FieldB: "A.B.B", + EmbedC: &EmbedC{ + FieldA1: "A.B.C.A1", + FieldA2: "A.B.C.A2", + FieldB: "", // Shadowed by A.B.B + FieldC: "A.B.C.C", + }, + }, + FieldA: "A.A", + embedD: embedD{ + FieldE: "A.D.E", + }, + }, + ExpectXML: `` + + `A.C.B` + + `A.C.C` + + `` + + `A.B.B` + + `` + + `A.B.C.A1` + + `A.B.C.A2` + + `` + + `A.B.C.C` + + `` + + `A.A` + + `A.D.E` + + ``, + }, + + // Anonymous struct pointer field which is nil + { + Value: &EmbedB{}, + ExpectXML: ``, + }, + + // Other kinds of nil anonymous fields + { + Value: &PointerAnonFields{}, + ExpectXML: ``, + }, + + // Test that name casing matters + { + Value: &NameCasing{Xy: "mixed", XY: "upper", XyA: "mixedA", XYA: "upperA"}, + ExpectXML: `mixedupper`, + }, + + // Test the order in which the XML element name is chosen + { + Value: &NamePrecedence{ + FromTag: XMLNameWithoutTag{Value: "A"}, + FromNameVal: XMLNameWithoutTag{XMLName: Name{Local: "InXMLName"}, Value: "B"}, + FromNameTag: XMLNameWithTag{Value: "C"}, + InFieldName: "D", + }, + ExpectXML: `` + + `A` + + `B` + + `C` + + `D` + + ``, + MarshalOnly: true, + }, + { + Value: &NamePrecedence{ + XMLName: Name{Local: "Parent"}, + FromTag: XMLNameWithoutTag{XMLName: Name{Local: "InTag"}, Value: "A"}, + FromNameVal: XMLNameWithoutTag{XMLName: Name{Local: "FromNameVal"}, Value: "B"}, + FromNameTag: XMLNameWithTag{XMLName: Name{Local: "InXMLNameTag"}, Value: "C"}, + InFieldName: "D", + }, + ExpectXML: `` + + `A` + + `B` + + `C` + + `D` + + ``, + UnmarshalOnly: true, + }, + + // xml.Name works in a plain field as well. + { + Value: &NameInField{Name{Space: "ns", Local: "foo"}}, + ExpectXML: ``, + }, + { + Value: &NameInField{Name{Space: "ns", Local: "foo"}}, + ExpectXML: ``, + UnmarshalOnly: true, + }, + + // Marshaling zero xml.Name uses the tag or field name. + { + Value: &NameInField{}, + ExpectXML: ``, + MarshalOnly: true, + }, + + // Test attributes + { + Value: &AttrTest{ + Int: 8, + Named: 9, + Float: 23.5, + Uint8: 255, + Bool: true, + Str: "str", + Bytes: []byte("byt"), + }, + ExpectXML: ``, + }, + { + Value: &AttrTest{Bytes: []byte{}}, + ExpectXML: ``, + }, + { + Value: &AttrsTest{ + Attrs: []Attr{ + {Name: Name{Local: "Answer"}, Value: "42"}, + {Name: Name{Local: "Int"}, Value: "8"}, + {Name: Name{Local: "int"}, Value: "9"}, + {Name: Name{Local: "Float"}, Value: "23.5"}, + {Name: Name{Local: "Uint8"}, Value: "255"}, + {Name: Name{Local: "Bool"}, Value: "true"}, + {Name: Name{Local: "Str"}, Value: "str"}, + {Name: Name{Local: "Bytes"}, Value: "byt"}, + }, + }, + ExpectXML: ``, + MarshalOnly: true, + }, + { + Value: &AttrsTest{ + Attrs: []Attr{ + {Name: Name{Local: "Answer"}, Value: "42"}, + }, + Int: 8, + Named: 9, + Float: 23.5, + Uint8: 255, + Bool: true, + Str: "str", + Bytes: []byte("byt"), + }, + ExpectXML: ``, + }, + { + Value: &AttrsTest{ + Attrs: []Attr{ + {Name: Name{Local: "Int"}, Value: "0"}, + {Name: Name{Local: "int"}, Value: "0"}, + {Name: Name{Local: "Float"}, Value: "0"}, + {Name: Name{Local: "Uint8"}, Value: "0"}, + {Name: Name{Local: "Bool"}, Value: "false"}, + {Name: Name{Local: "Str"}}, + {Name: Name{Local: "Bytes"}}, + }, + Bytes: []byte{}, + }, + ExpectXML: ``, + MarshalOnly: true, + }, + { + Value: &OmitAttrTest{ + Int: 8, + Named: 9, + Float: 23.5, + Uint8: 255, + Bool: true, + Str: "str", + Bytes: []byte("byt"), + PStr: &empty, + }, + ExpectXML: ``, + }, + { + Value: &OmitAttrTest{}, + ExpectXML: ``, + }, + + // pointer fields + { + Value: &PointerFieldsTest{Name: &nameAttr, Age: &ageAttr, Contents: &contentsAttr}, + ExpectXML: `lorem ipsum`, + MarshalOnly: true, + }, + + // empty chardata pointer field + { + Value: &ChardataEmptyTest{}, + ExpectXML: ``, + MarshalOnly: true, + }, + + // omitempty on fields + { + Value: &OmitFieldTest{ + Int: 8, + Named: 9, + Float: 23.5, + Uint8: 255, + Bool: true, + Str: "str", + Bytes: []byte("byt"), + PStr: &empty, + Ptr: &PresenceTest{}, + }, + ExpectXML: `` + + `8` + + `9` + + `23.5` + + `255` + + `true` + + `str` + + `byt` + + `` + + `` + + ``, + }, + { + Value: &OmitFieldTest{}, + ExpectXML: ``, + }, + + // Test ",any" + { + ExpectXML: `knownunknown`, + Value: &AnyTest{ + Nested: "known", + AnyField: AnyHolder{ + XMLName: Name{Local: "other"}, + XML: "unknown", + }, + }, + }, + { + Value: &AnyTest{Nested: "known", + AnyField: AnyHolder{ + XML: "", + XMLName: Name{Local: "AnyField"}, + }, + }, + ExpectXML: `known`, + }, + { + ExpectXML: `b`, + Value: &AnyOmitTest{ + Nested: "b", + }, + }, + { + ExpectXML: `bei`, + Value: &AnySliceTest{ + Nested: "b", + AnyField: []AnyHolder{ + { + XMLName: Name{Local: "c"}, + XML: "e", + }, + { + XMLName: Name{Space: "f", Local: "g"}, + XML: "i", + }, + }, + }, + }, + { + ExpectXML: `b`, + Value: &AnySliceTest{ + Nested: "b", + }, + }, + + // Test recursive types. + { + Value: &RecurseA{ + A: "a1", + B: &RecurseB{ + A: &RecurseA{"a2", nil}, + B: "b1", + }, + }, + ExpectXML: `a1a2b1`, + }, + + // Test ignoring fields via "-" tag + { + ExpectXML: ``, + Value: &IgnoreTest{}, + }, + { + ExpectXML: ``, + Value: &IgnoreTest{PublicSecret: "can't tell"}, + MarshalOnly: true, + }, + { + ExpectXML: `ignore me`, + Value: &IgnoreTest{}, + UnmarshalOnly: true, + }, + + // Test escaping. + { + ExpectXML: `dquote: "; squote: '; ampersand: &; less: <; greater: >;`, + Value: &AnyTest{ + Nested: `dquote: "; squote: '; ampersand: &; less: <; greater: >;`, + AnyField: AnyHolder{XMLName: Name{Local: "empty"}}, + }, + }, + { + ExpectXML: `newline: ; cr: ; tab: ;`, + Value: &AnyTest{ + Nested: "newline: \n; cr: \r; tab: \t;", + AnyField: AnyHolder{XMLName: Name{Local: "AnyField"}}, + }, + }, + { + ExpectXML: "1\r2\r\n3\n\r4\n5", + Value: &AnyTest{ + Nested: "1\n2\n3\n\n4\n5", + }, + UnmarshalOnly: true, + }, + { + ExpectXML: `42`, + Value: &EmbedInt{ + MyInt: 42, + }, + }, + // Test outputting CDATA-wrapped text. + { + ExpectXML: ``, + Value: &CDataTest{}, + }, + { + ExpectXML: ``, + Value: &CDataTest{ + Chardata: "http://example.com/tests/1?foo=1&bar=baz", + }, + }, + { + ExpectXML: `!]]>`, + Value: &CDataTest{ + Chardata: "Literal !", + }, + }, + { + ExpectXML: ` Literal!]]>`, + Value: &CDataTest{ + Chardata: " Literal!", + }, + }, + { + ExpectXML: ` Literal! Literal!]]>`, + Value: &CDataTest{ + Chardata: " Literal! Literal!", + }, + }, + { + ExpectXML: `]]]]>]]>`, + Value: &CDataTest{ + Chardata: "]]>", + }, + }, + + // Test omitempty with parent chain; see golang.org/issue/4168. + { + ExpectXML: ``, + Value: &Strings{}, + }, + // Custom marshalers. + { + ExpectXML: `hello world`, + Value: &MyMarshalerTest{}, + }, + { + ExpectXML: ``, + Value: &MarshalerStruct{}, + }, + { + ExpectXML: ``, + Value: &OuterStruct{IntAttr: 10}, + }, + { + ExpectXML: ``, + Value: &OuterNamedStruct{XMLName: Name{Space: "outerns", Local: "test"}, IntAttr: 10}, + }, + { + ExpectXML: ``, + Value: &OuterNamedOrderedStruct{XMLName: Name{Space: "outerns", Local: "test"}, IntAttr: 10}, + }, + { + ExpectXML: ``, + Value: &OuterOuterStruct{OuterStruct{IntAttr: 10}}, + }, + { + ExpectXML: `test`, + Value: &NestedAndChardata{AB: make([]string, 2), Chardata: "test"}, + }, + { + ExpectXML: ``, + Value: &NestedAndComment{AB: make([]string, 2), Comment: "test"}, + }, + { + ExpectXML: ``, + Value: &NestedAndCData{AB: make([]string, 2), CDATA: "test"}, + }, + // Test pointer indirection in various kinds of fields. + // https://golang.org/issue/19063 + { + ExpectXML: ``, + Value: &IndirComment{Comment: stringptr("hi")}, + MarshalOnly: true, + }, + { + ExpectXML: ``, + Value: &IndirComment{Comment: stringptr("")}, + MarshalOnly: true, + }, + { + ExpectXML: ``, + Value: &IndirComment{Comment: nil}, + MarshalError: "xml: bad type for comment field of xml.IndirComment", + }, + { + ExpectXML: ``, + Value: &IndirComment{Comment: nil}, + UnmarshalOnly: true, + }, + { + ExpectXML: ``, + Value: &IfaceComment{Comment: "hi"}, + MarshalOnly: true, + }, + { + ExpectXML: ``, + Value: &IfaceComment{Comment: nil}, + UnmarshalOnly: true, + }, + { + ExpectXML: ``, + Value: &IfaceComment{Comment: nil}, + MarshalError: "xml: bad type for comment field of xml.IfaceComment", + }, + { + ExpectXML: ``, + Value: &IfaceComment{Comment: nil}, + UnmarshalOnly: true, + }, + { + ExpectXML: ``, + Value: &DirectComment{Comment: string("hi")}, + }, + { + ExpectXML: ``, + Value: &DirectComment{Comment: string("")}, + }, + { + ExpectXML: `hi`, + Value: &IndirChardata{Chardata: stringptr("hi")}, + }, + { + ExpectXML: ``, + Value: &IndirChardata{Chardata: stringptr("hi")}, + UnmarshalOnly: true, // marshals without CDATA + }, + { + ExpectXML: ``, + Value: &IndirChardata{Chardata: stringptr("")}, + }, + { + ExpectXML: ``, + Value: &IndirChardata{Chardata: nil}, + MarshalOnly: true, // unmarshal leaves Chardata=stringptr("") + }, + { + ExpectXML: `hi`, + Value: &IfaceChardata{Chardata: string("hi")}, + UnmarshalError: "cannot unmarshal into interface {}", + }, + { + ExpectXML: ``, + Value: &IfaceChardata{Chardata: string("hi")}, + UnmarshalOnly: true, // marshals without CDATA + UnmarshalError: "cannot unmarshal into interface {}", + }, + { + ExpectXML: ``, + Value: &IfaceChardata{Chardata: string("")}, + UnmarshalError: "cannot unmarshal into interface {}", + }, + { + ExpectXML: ``, + Value: &IfaceChardata{Chardata: nil}, + UnmarshalError: "cannot unmarshal into interface {}", + }, + { + ExpectXML: `hi`, + Value: &DirectChardata{Chardata: string("hi")}, + }, + { + ExpectXML: ``, + Value: &DirectChardata{Chardata: string("hi")}, + UnmarshalOnly: true, // marshals without CDATA + }, + { + ExpectXML: ``, + Value: &DirectChardata{Chardata: string("")}, + }, + { + ExpectXML: ``, + Value: &IndirCDATA{CDATA: stringptr("hi")}, + }, + { + ExpectXML: `hi`, + Value: &IndirCDATA{CDATA: stringptr("hi")}, + UnmarshalOnly: true, // marshals with CDATA + }, + { + ExpectXML: ``, + Value: &IndirCDATA{CDATA: stringptr("")}, + }, + { + ExpectXML: ``, + Value: &IndirCDATA{CDATA: nil}, + MarshalOnly: true, // unmarshal leaves CDATA=stringptr("") + }, + { + ExpectXML: ``, + Value: &IfaceCDATA{CDATA: string("hi")}, + UnmarshalError: "cannot unmarshal into interface {}", + }, + { + ExpectXML: `hi`, + Value: &IfaceCDATA{CDATA: string("hi")}, + UnmarshalOnly: true, // marshals with CDATA + UnmarshalError: "cannot unmarshal into interface {}", + }, + { + ExpectXML: ``, + Value: &IfaceCDATA{CDATA: string("")}, + UnmarshalError: "cannot unmarshal into interface {}", + }, + { + ExpectXML: ``, + Value: &IfaceCDATA{CDATA: nil}, + UnmarshalError: "cannot unmarshal into interface {}", + }, + { + ExpectXML: ``, + Value: &DirectCDATA{CDATA: string("hi")}, + }, + { + ExpectXML: `hi`, + Value: &DirectCDATA{CDATA: string("hi")}, + UnmarshalOnly: true, // marshals with CDATA + }, + { + ExpectXML: ``, + Value: &DirectCDATA{CDATA: string("")}, + }, + { + ExpectXML: ``, + Value: &IndirInnerXML{InnerXML: stringptr("")}, + MarshalOnly: true, + }, + { + ExpectXML: ``, + Value: &IndirInnerXML{InnerXML: stringptr("")}, + MarshalOnly: true, + }, + { + ExpectXML: ``, + Value: &IndirInnerXML{InnerXML: nil}, + }, + { + ExpectXML: ``, + Value: &IndirInnerXML{InnerXML: nil}, + UnmarshalOnly: true, + }, + { + ExpectXML: ``, + Value: &IfaceInnerXML{InnerXML: ""}, + MarshalOnly: true, + }, + { + ExpectXML: ``, + Value: &IfaceInnerXML{InnerXML: nil}, + UnmarshalOnly: true, + }, + { + ExpectXML: ``, + Value: &IfaceInnerXML{InnerXML: nil}, + }, + { + ExpectXML: ``, + Value: &IfaceInnerXML{InnerXML: nil}, + UnmarshalOnly: true, + }, + { + ExpectXML: ``, + Value: &DirectInnerXML{InnerXML: string("")}, + MarshalOnly: true, + }, + { + ExpectXML: ``, + Value: &DirectInnerXML{InnerXML: string("")}, + UnmarshalOnly: true, + }, + { + ExpectXML: ``, + Value: &DirectInnerXML{InnerXML: string("")}, + MarshalOnly: true, + }, + { + ExpectXML: ``, + Value: &DirectInnerXML{InnerXML: string("")}, + UnmarshalOnly: true, + }, + { + ExpectXML: `hi`, + Value: &IndirElement{Element: stringptr("hi")}, + }, + { + ExpectXML: ``, + Value: &IndirElement{Element: stringptr("")}, + }, + { + ExpectXML: ``, + Value: &IndirElement{Element: nil}, + }, + { + ExpectXML: `hi`, + Value: &IfaceElement{Element: "hi"}, + MarshalOnly: true, + }, + { + ExpectXML: `hi`, + Value: &IfaceElement{Element: nil}, + UnmarshalOnly: true, + }, + { + ExpectXML: ``, + Value: &IfaceElement{Element: nil}, + }, + { + ExpectXML: ``, + Value: &IfaceElement{Element: nil}, + UnmarshalOnly: true, + }, + { + ExpectXML: `hi`, + Value: &DirectElement{Element: string("hi")}, + }, + { + ExpectXML: ``, + Value: &DirectElement{Element: string("")}, + }, + { + ExpectXML: `hi`, + Value: &IndirOmitEmpty{OmitEmpty: stringptr("hi")}, + }, + { + // Note: Changed in Go 1.8 to include element (because x.OmitEmpty != nil). + ExpectXML: ``, + Value: &IndirOmitEmpty{OmitEmpty: stringptr("")}, + MarshalOnly: true, + }, + { + ExpectXML: ``, + Value: &IndirOmitEmpty{OmitEmpty: stringptr("")}, + UnmarshalOnly: true, + }, + { + ExpectXML: ``, + Value: &IndirOmitEmpty{OmitEmpty: nil}, + }, + { + ExpectXML: `hi`, + Value: &IfaceOmitEmpty{OmitEmpty: "hi"}, + MarshalOnly: true, + }, + { + ExpectXML: `hi`, + Value: &IfaceOmitEmpty{OmitEmpty: nil}, + UnmarshalOnly: true, + }, + { + ExpectXML: ``, + Value: &IfaceOmitEmpty{OmitEmpty: nil}, + }, + { + ExpectXML: ``, + Value: &IfaceOmitEmpty{OmitEmpty: nil}, + UnmarshalOnly: true, + }, + { + ExpectXML: `hi`, + Value: &DirectOmitEmpty{OmitEmpty: string("hi")}, + }, + { + ExpectXML: ``, + Value: &DirectOmitEmpty{OmitEmpty: string("")}, + }, + { + ExpectXML: `hi`, + Value: &IndirAny{Any: stringptr("hi")}, + }, + { + ExpectXML: ``, + Value: &IndirAny{Any: stringptr("")}, + }, + { + ExpectXML: ``, + Value: &IndirAny{Any: nil}, + }, + { + ExpectXML: `hi`, + Value: &IfaceAny{Any: "hi"}, + MarshalOnly: true, + }, + { + ExpectXML: `hi`, + Value: &IfaceAny{Any: nil}, + UnmarshalOnly: true, + }, + { + ExpectXML: ``, + Value: &IfaceAny{Any: nil}, + }, + { + ExpectXML: ``, + Value: &IfaceAny{Any: nil}, + UnmarshalOnly: true, + }, + { + ExpectXML: `hi`, + Value: &DirectAny{Any: string("hi")}, + }, + { + ExpectXML: ``, + Value: &DirectAny{Any: string("")}, + }, + { + ExpectXML: `hi`, + Value: &IndirAny{Any: stringptr("hi")}, + UnmarshalOnly: true, + }, + { + ExpectXML: ``, + Value: &IndirAny{Any: stringptr("")}, + UnmarshalOnly: true, + }, + { + ExpectXML: ``, + Value: &IndirAny{Any: nil}, + UnmarshalOnly: true, + }, + { + ExpectXML: `hi`, + Value: &IfaceAny{Any: nil}, + UnmarshalOnly: true, + }, + { + ExpectXML: ``, + Value: &IfaceAny{Any: nil}, + UnmarshalOnly: true, + }, + { + ExpectXML: ``, + Value: &IfaceAny{Any: nil}, + UnmarshalOnly: true, + }, + { + ExpectXML: `hi`, + Value: &DirectAny{Any: string("hi")}, + UnmarshalOnly: true, + }, + { + ExpectXML: ``, + Value: &DirectAny{Any: string("")}, + UnmarshalOnly: true, + }, +} + +func TestMarshal(t *testing.T) { + for idx, test := range marshalTests { + if test.UnmarshalOnly { + continue + } + + t.Run(fmt.Sprintf("%d", idx), func(t *testing.T) { + data, err := Marshal(test.Value) + if err != nil { + if test.MarshalError == "" { + t.Errorf("marshal(%#v): %s", test.Value, err) + return + } + if !strings.Contains(err.Error(), test.MarshalError) { + t.Errorf("marshal(%#v): %s, want %q", test.Value, err, test.MarshalError) + } + return + } + if test.MarshalError != "" { + t.Errorf("Marshal succeeded, want error %q", test.MarshalError) + return + } + if got, want := string(data), test.ExpectXML; got != want { + if strings.Contains(want, "\n") { + t.Errorf("marshal(%#v):\nHAVE:\n%s\nWANT:\n%s", test.Value, got, want) + } else { + t.Errorf("marshal(%#v):\nhave %#q\nwant %#q", test.Value, got, want) + } + } + }) + } +} + +type AttrParent struct { + X string `xml:"X>Y,attr"` +} + +type BadAttr struct { + Name map[string]string `xml:"name,attr"` +} + +var marshalErrorTests = []struct { + Value any + Err string + Kind reflect.Kind +}{ + { + Value: make(chan bool), + Err: "xml: unsupported type: chan bool", + Kind: reflect.Chan, + }, + { + Value: map[string]string{ + "question": "What do you get when you multiply six by nine?", + "answer": "42", + }, + Err: "xml: unsupported type: map[string]string", + Kind: reflect.Map, + }, + { + Value: map[*Ship]bool{nil: false}, + Err: "xml: unsupported type: map[*xml.Ship]bool", + Kind: reflect.Map, + }, + { + Value: &Domain{Comment: []byte("f--bar")}, + Err: `xml: comments must not contain "--"`, + }, + // Reject parent chain with attr, never worked; see golang.org/issue/5033. + { + Value: &AttrParent{}, + Err: `xml: X>Y chain not valid with attr flag`, + }, + { + Value: BadAttr{map[string]string{"X": "Y"}}, + Err: `xml: unsupported type: map[string]string`, + }, +} + +var marshalIndentTests = []struct { + Value any + Prefix string + Indent string + ExpectXML string +}{ + { + Value: &SecretAgent{ + Handle: "007", + Identity: "James Bond", + Obfuscate: "", + }, + Prefix: "", + Indent: "\t", + ExpectXML: "\n\tJames Bond\n", + }, +} + +func TestMarshalErrors(t *testing.T) { + for idx, test := range marshalErrorTests { + data, err := Marshal(test.Value) + if err == nil { + t.Errorf("#%d: marshal(%#v) = [success] %q, want error %v", idx, test.Value, data, test.Err) + continue + } + if err.Error() != test.Err { + t.Errorf("#%d: marshal(%#v) = [error] %v, want %v", idx, test.Value, err, test.Err) + } + if test.Kind != reflect.Invalid { + if kind := err.(*UnsupportedTypeError).Type.Kind(); kind != test.Kind { + t.Errorf("#%d: marshal(%#v) = [error kind] %s, want %s", idx, test.Value, kind, test.Kind) + } + } + } +} + +// Do invertibility testing on the various structures that we test +func TestUnmarshal(t *testing.T) { + for i, test := range marshalTests { + if test.MarshalOnly { + continue + } + if _, ok := test.Value.(*Plain); ok { + continue + } + if test.ExpectXML == ``+ + `b`+ + `b1`+ + `` { + // TODO(rogpeppe): re-enable this test in + // https://go-review.googlesource.com/#/c/5910/ + continue + } + + vt := reflect.TypeOf(test.Value) + dest := reflect.New(vt.Elem()).Interface() + err := Unmarshal([]byte(test.ExpectXML), dest) + + t.Run(fmt.Sprintf("%d", i), func(t *testing.T) { + switch fix := dest.(type) { + case *Feed: + fix.Author.InnerXML = "" + for i := range fix.Entry { + fix.Entry[i].Author.InnerXML = "" + } + } + + if err != nil { + if test.UnmarshalError == "" { + t.Errorf("unmarshal(%#v): %s", test.ExpectXML, err) + return + } + if !strings.Contains(err.Error(), test.UnmarshalError) { + t.Errorf("unmarshal(%#v): %s, want %q", test.ExpectXML, err, test.UnmarshalError) + } + return + } + if got, want := dest, test.Value; !reflect.DeepEqual(got, want) { + t.Errorf("unmarshal(%q):\nhave %#v\nwant %#v", test.ExpectXML, got, want) + } + }) + } +} + +func TestMarshalIndent(t *testing.T) { + for i, test := range marshalIndentTests { + data, err := MarshalIndent(test.Value, test.Prefix, test.Indent) + if err != nil { + t.Errorf("#%d: Error: %s", i, err) + continue + } + if got, want := string(data), test.ExpectXML; got != want { + t.Errorf("#%d: MarshalIndent:\nGot:%s\nWant:\n%s", i, got, want) + } + } +} + +type limitedBytesWriter struct { + w io.Writer + remain int // until writes fail +} + +func (lw *limitedBytesWriter) Write(p []byte) (n int, err error) { + if lw.remain <= 0 { + println("error") + return 0, errors.New("write limit hit") + } + if len(p) > lw.remain { + p = p[:lw.remain] + n, _ = lw.w.Write(p) + lw.remain = 0 + return n, errors.New("write limit hit") + } + n, err = lw.w.Write(p) + lw.remain -= n + return n, err +} + +func TestMarshalWriteErrors(t *testing.T) { + var buf bytes.Buffer + const writeCap = 1024 + w := &limitedBytesWriter{&buf, writeCap} + enc := NewEncoder(w) + var err error + var i int + const n = 4000 + for i = 1; i <= n; i++ { + err = enc.Encode(&Passenger{ + Name: []string{"Alice", "Bob"}, + Weight: 5, + }) + if err != nil { + break + } + } + if err == nil { + t.Error("expected an error") + } + if i == n { + t.Errorf("expected to fail before the end") + } + if buf.Len() != writeCap { + t.Errorf("buf.Len() = %d; want %d", buf.Len(), writeCap) + } +} + +func TestMarshalWriteIOErrors(t *testing.T) { + enc := NewEncoder(errWriter{}) + + expectErr := "unwritable" + err := enc.Encode(&Passenger{}) + if err == nil || err.Error() != expectErr { + t.Errorf("EscapeTest = [error] %v, want %v", err, expectErr) + } +} + +func TestMarshalFlush(t *testing.T) { + var buf strings.Builder + enc := NewEncoder(&buf) + if err := enc.EncodeToken(CharData("hello world")); err != nil { + t.Fatalf("enc.EncodeToken: %v", err) + } + if buf.Len() > 0 { + t.Fatalf("enc.EncodeToken caused actual write: %q", buf.String()) + } + if err := enc.Flush(); err != nil { + t.Fatalf("enc.Flush: %v", err) + } + if buf.String() != "hello world" { + t.Fatalf("after enc.Flush, buf.String() = %q, want %q", buf.String(), "hello world") + } +} + +func BenchmarkMarshal(b *testing.B) { + b.ReportAllocs() + b.RunParallel(func(pb *testing.PB) { + for pb.Next() { + Marshal(atomValue) + } + }) +} + +func BenchmarkUnmarshal(b *testing.B) { + b.ReportAllocs() + xml := []byte(atomXML) + b.RunParallel(func(pb *testing.PB) { + for pb.Next() { + Unmarshal(xml, &Feed{}) + } + }) +} + +// golang.org/issue/6556 +func TestStructPointerMarshal(t *testing.T) { + type A struct { + XMLName string `xml:"a"` + B []any + } + type C struct { + XMLName Name + Value string `xml:"value"` + } + + a := new(A) + a.B = append(a.B, &C{ + XMLName: Name{Local: "c"}, + Value: "x", + }) + + b, err := Marshal(a) + if err != nil { + t.Fatal(err) + } + if x := string(b); x != "x" { + t.Fatal(x) + } + var v A + err = Unmarshal(b, &v) + if err != nil { + t.Fatal(err) + } +} + +var encodeTokenTests = []struct { + desc string + toks []Token + want string + err string +}{{ + desc: "start element with name space", + toks: []Token{ + StartElement{Name{"space", "local"}, nil}, + }, + want: ``, +}, { + desc: "start element with no name", + toks: []Token{ + StartElement{Name{"space", ""}, nil}, + }, + err: "xml: start tag with no name", +}, { + desc: "end element with no name", + toks: []Token{ + EndElement{Name{"space", ""}}, + }, + err: "xml: end tag with no name", +}, { + desc: "char data", + toks: []Token{ + CharData("foo"), + }, + want: `foo`, +}, { + desc: "char data with escaped chars", + toks: []Token{ + CharData(" \t\n"), + }, + want: " \n", +}, { + desc: "comment", + toks: []Token{ + Comment("foo"), + }, + want: ``, +}, { + desc: "comment with invalid content", + toks: []Token{ + Comment("foo-->"), + }, + err: "xml: EncodeToken of Comment containing --> marker", +}, { + desc: "proc instruction", + toks: []Token{ + ProcInst{"Target", []byte("Instruction")}, + }, + want: ``, +}, { + desc: "proc instruction with empty target", + toks: []Token{ + ProcInst{"", []byte("Instruction")}, + }, + err: "xml: EncodeToken of ProcInst with invalid Target", +}, { + desc: "proc instruction with bad content", + toks: []Token{ + ProcInst{"", []byte("Instruction?>")}, + }, + err: "xml: EncodeToken of ProcInst with invalid Target", +}, { + desc: "directive", + toks: []Token{ + Directive("foo"), + }, + want: ``, +}, { + desc: "more complex directive", + toks: []Token{ + Directive("DOCTYPE doc [ '> ]"), + }, + want: `'> ]>`, +}, { + desc: "directive instruction with bad name", + toks: []Token{ + Directive("foo>"), + }, + err: "xml: EncodeToken of Directive containing wrong < or > markers", +}, { + desc: "end tag without start tag", + toks: []Token{ + EndElement{Name{"foo", "bar"}}, + }, + err: "xml: end tag without start tag", +}, { + desc: "mismatching end tag local name", + toks: []Token{ + StartElement{Name{"", "foo"}, nil}, + EndElement{Name{"", "bar"}}, + }, + err: "xml: end tag does not match start tag ", + want: ``, +}, { + desc: "mismatching end tag namespace", + toks: []Token{ + StartElement{Name{"space", "foo"}, nil}, + EndElement{Name{"another", "foo"}}, + }, + err: "xml: end tag in namespace another does not match start tag in namespace space", + want: ``, +}, { + desc: "start element with explicit namespace", + toks: []Token{ + StartElement{Name{"space", "local"}, []Attr{ + {Name{"xmlns", "x"}, "space"}, + {Name{"space", "foo"}, "value"}, + }}, + }, + want: ``, +}, { + desc: "start element with explicit namespace and colliding prefix", + toks: []Token{ + StartElement{Name{"space", "local"}, []Attr{ + {Name{"xmlns", "x"}, "space"}, + {Name{"space", "foo"}, "value"}, + {Name{"x", "bar"}, "other"}, + }}, + }, + want: ``, +}, { + desc: "start element using previously defined namespace", + toks: []Token{ + StartElement{Name{"", "local"}, []Attr{ + {Name{"xmlns", "x"}, "space"}, + }}, + StartElement{Name{"space", "foo"}, []Attr{ + {Name{"space", "x"}, "y"}, + }}, + }, + want: ``, +}, { + desc: "nested name space with same prefix", + toks: []Token{ + StartElement{Name{"", "foo"}, []Attr{ + {Name{"xmlns", "x"}, "space1"}, + }}, + StartElement{Name{"", "foo"}, []Attr{ + {Name{"xmlns", "x"}, "space2"}, + }}, + StartElement{Name{"", "foo"}, []Attr{ + {Name{"space1", "a"}, "space1 value"}, + {Name{"space2", "b"}, "space2 value"}, + }}, + EndElement{Name{"", "foo"}}, + EndElement{Name{"", "foo"}}, + StartElement{Name{"", "foo"}, []Attr{ + {Name{"space1", "a"}, "space1 value"}, + {Name{"space2", "b"}, "space2 value"}, + }}, + }, + want: ``, +}, { + desc: "start element defining several prefixes for the same name space", + toks: []Token{ + StartElement{Name{"space", "foo"}, []Attr{ + {Name{"xmlns", "a"}, "space"}, + {Name{"xmlns", "b"}, "space"}, + {Name{"space", "x"}, "value"}, + }}, + }, + want: ``, +}, { + desc: "nested element redefines name space", + toks: []Token{ + StartElement{Name{"", "foo"}, []Attr{ + {Name{"xmlns", "x"}, "space"}, + }}, + StartElement{Name{"space", "foo"}, []Attr{ + {Name{"xmlns", "y"}, "space"}, + {Name{"space", "a"}, "value"}, + }}, + }, + want: ``, +}, { + desc: "nested element creates alias for default name space", + toks: []Token{ + StartElement{Name{"space", "foo"}, []Attr{ + {Name{"", "xmlns"}, "space"}, + }}, + StartElement{Name{"space", "foo"}, []Attr{ + {Name{"xmlns", "y"}, "space"}, + {Name{"space", "a"}, "value"}, + }}, + }, + want: ``, +}, { + desc: "nested element defines default name space with existing prefix", + toks: []Token{ + StartElement{Name{"", "foo"}, []Attr{ + {Name{"xmlns", "x"}, "space"}, + }}, + StartElement{Name{"space", "foo"}, []Attr{ + {Name{"", "xmlns"}, "space"}, + {Name{"space", "a"}, "value"}, + }}, + }, + want: ``, +}, { + desc: "nested element uses empty attribute name space when default ns defined", + toks: []Token{ + StartElement{Name{"space", "foo"}, []Attr{ + {Name{"", "xmlns"}, "space"}, + }}, + StartElement{Name{"space", "foo"}, []Attr{ + {Name{"", "attr"}, "value"}, + }}, + }, + want: ``, +}, { + desc: "redefine xmlns", + toks: []Token{ + StartElement{Name{"", "foo"}, []Attr{ + {Name{"foo", "xmlns"}, "space"}, + }}, + }, + want: ``, +}, { + desc: "xmlns with explicit name space #1", + toks: []Token{ + StartElement{Name{"space", "foo"}, []Attr{ + {Name{"xml", "xmlns"}, "space"}, + }}, + }, + want: ``, +}, { + desc: "xmlns with explicit name space #2", + toks: []Token{ + StartElement{Name{"space", "foo"}, []Attr{ + {Name{xmlURL, "xmlns"}, "space"}, + }}, + }, + want: ``, +}, { + desc: "empty name space declaration is ignored", + toks: []Token{ + StartElement{Name{"", "foo"}, []Attr{ + {Name{"xmlns", "foo"}, ""}, + }}, + }, + want: ``, +}, { + desc: "attribute with no name is ignored", + toks: []Token{ + StartElement{Name{"", "foo"}, []Attr{ + {Name{"", ""}, "value"}, + }}, + }, + want: ``, +}, { + desc: "namespace URL with non-valid name", + toks: []Token{ + StartElement{Name{"/34", "foo"}, []Attr{ + {Name{"/34", "x"}, "value"}, + }}, + }, + want: ``, +}, { + desc: "nested element resets default namespace to empty", + toks: []Token{ + StartElement{Name{"space", "foo"}, []Attr{ + {Name{"", "xmlns"}, "space"}, + }}, + StartElement{Name{"", "foo"}, []Attr{ + {Name{"", "xmlns"}, ""}, + {Name{"", "x"}, "value"}, + {Name{"space", "x"}, "value"}, + }}, + }, + want: ``, +}, { + desc: "nested element requires empty default name space", + toks: []Token{ + StartElement{Name{"space", "foo"}, []Attr{ + {Name{"", "xmlns"}, "space"}, + }}, + StartElement{Name{"", "foo"}, nil}, + }, + want: ``, +}, { + desc: "attribute uses name space from xmlns", + toks: []Token{ + StartElement{Name{"some/space", "foo"}, []Attr{ + {Name{"", "attr"}, "value"}, + {Name{"some/space", "other"}, "other value"}, + }}, + }, + want: ``, +}, { + desc: "default name space should not be used by attributes", + toks: []Token{ + StartElement{Name{"space", "foo"}, []Attr{ + {Name{"", "xmlns"}, "space"}, + {Name{"xmlns", "bar"}, "space"}, + {Name{"space", "baz"}, "foo"}, + }}, + StartElement{Name{"space", "baz"}, nil}, + EndElement{Name{"space", "baz"}}, + EndElement{Name{"space", "foo"}}, + }, + want: ``, +}, { + desc: "default name space not used by attributes, not explicitly defined", + toks: []Token{ + StartElement{Name{"space", "foo"}, []Attr{ + {Name{"", "xmlns"}, "space"}, + {Name{"space", "baz"}, "foo"}, + }}, + StartElement{Name{"space", "baz"}, nil}, + EndElement{Name{"space", "baz"}}, + EndElement{Name{"space", "foo"}}, + }, + want: ``, +}, { + desc: "impossible xmlns declaration", + toks: []Token{ + StartElement{Name{"", "foo"}, []Attr{ + {Name{"", "xmlns"}, "space"}, + }}, + StartElement{Name{"space", "bar"}, []Attr{ + {Name{"space", "attr"}, "value"}, + }}, + }, + want: ``, +}, { + desc: "reserved namespace prefix -- all lower case", + toks: []Token{ + StartElement{Name{"", "foo"}, []Attr{ + {Name{"http://www.w3.org/2001/xmlSchema-instance", "nil"}, "true"}, + }}, + }, + want: ``, +}, { + desc: "reserved namespace prefix -- all upper case", + toks: []Token{ + StartElement{Name{"", "foo"}, []Attr{ + {Name{"http://www.w3.org/2001/XMLSchema-instance", "nil"}, "true"}, + }}, + }, + want: ``, +}, { + desc: "reserved namespace prefix -- all mixed case", + toks: []Token{ + StartElement{Name{"", "foo"}, []Attr{ + {Name{"http://www.w3.org/2001/XmLSchema-instance", "nil"}, "true"}, + }}, + }, + want: ``, +}} + +func TestEncodeToken(t *testing.T) { +loop: + for i, tt := range encodeTokenTests { + var buf strings.Builder + enc := NewEncoder(&buf) + var err error + for j, tok := range tt.toks { + err = enc.EncodeToken(tok) + if err != nil && j < len(tt.toks)-1 { + t.Errorf("#%d %s token #%d: %v", i, tt.desc, j, err) + continue loop + } + } + errorf := func(f string, a ...any) { + t.Errorf("#%d %s token #%d:%s", i, tt.desc, len(tt.toks)-1, fmt.Sprintf(f, a...)) + } + switch { + case tt.err != "" && err == nil: + errorf(" expected error; got none") + continue + case tt.err == "" && err != nil: + errorf(" got error: %v", err) + continue + case tt.err != "" && err != nil && tt.err != err.Error(): + errorf(" error mismatch; got %v, want %v", err, tt.err) + continue + } + if err := enc.Flush(); err != nil { + errorf(" %v", err) + continue + } + if got := buf.String(); got != tt.want { + errorf("\ngot %v\nwant %v", got, tt.want) + continue + } + } +} + +func TestProcInstEncodeToken(t *testing.T) { + var buf bytes.Buffer + enc := NewEncoder(&buf) + + if err := enc.EncodeToken(ProcInst{"xml", []byte("Instruction")}); err != nil { + t.Fatalf("enc.EncodeToken: expected to be able to encode xml target ProcInst as first token, %s", err) + } + + if err := enc.EncodeToken(ProcInst{"Target", []byte("Instruction")}); err != nil { + t.Fatalf("enc.EncodeToken: expected to be able to add non-xml target ProcInst") + } + + if err := enc.EncodeToken(ProcInst{"xml", []byte("Instruction")}); err == nil { + t.Fatalf("enc.EncodeToken: expected to not be allowed to encode xml target ProcInst when not first token") + } +} + +func TestDecodeEncode(t *testing.T) { + var in, out bytes.Buffer + in.WriteString(` + + + +`) + dec := NewDecoder(&in) + enc := NewEncoder(&out) + for tok, err := dec.Token(); err == nil; tok, err = dec.Token() { + err = enc.EncodeToken(tok) + if err != nil { + t.Fatalf("enc.EncodeToken: Unable to encode token (%#v), %v", tok, err) + } + } +} + +// Issue 9796. Used to fail with GORACE="halt_on_error=1" -race. +func TestRace9796(t *testing.T) { + type A struct{} + type B struct { + C []A `xml:"X>Y"` + } + var wg sync.WaitGroup + for i := 0; i < 2; i++ { + wg.Add(1) + go func() { + Marshal(B{[]A{{}}}) + wg.Done() + }() + } + wg.Wait() +} + +func TestIsValidDirective(t *testing.T) { + testOK := []string{ + "<>", + "< < > >", + "' '>' >", + " ]>", + " '<' ' doc ANY> ]>", + ">>> a < comment --> [ ] >", + } + testKO := []string{ + "<", + ">", + "", + "< > > < < >", + " -->", + "", + "'", + "", + } + for _, s := range testOK { + if !isValidDirective(Directive(s)) { + t.Errorf("Directive %q is expected to be valid", s) + } + } + for _, s := range testKO { + if isValidDirective(Directive(s)) { + t.Errorf("Directive %q is expected to be invalid", s) + } + } +} + +// Issue 11719. EncodeToken used to silently eat tokens with an invalid type. +func TestSimpleUseOfEncodeToken(t *testing.T) { + var buf strings.Builder + enc := NewEncoder(&buf) + if err := enc.EncodeToken(&StartElement{Name: Name{"", "object1"}}); err == nil { + t.Errorf("enc.EncodeToken: pointer type should be rejected") + } + if err := enc.EncodeToken(&EndElement{Name: Name{"", "object1"}}); err == nil { + t.Errorf("enc.EncodeToken: pointer type should be rejected") + } + if err := enc.EncodeToken(StartElement{Name: Name{"", "object2"}}); err != nil { + t.Errorf("enc.EncodeToken: StartElement %s", err) + } + if err := enc.EncodeToken(EndElement{Name: Name{"", "object2"}}); err != nil { + t.Errorf("enc.EncodeToken: EndElement %s", err) + } + if err := enc.EncodeToken(Universe{}); err == nil { + t.Errorf("enc.EncodeToken: invalid type not caught") + } + if err := enc.Flush(); err != nil { + t.Errorf("enc.Flush: %s", err) + } + if buf.Len() == 0 { + t.Errorf("enc.EncodeToken: empty buffer") + } + want := "" + if buf.String() != want { + t.Errorf("enc.EncodeToken: expected %q; got %q", want, buf.String()) + } +} + +// Issue 16158. Decoder.unmarshalAttr ignores the return value of copyValue. +func TestIssue16158(t *testing.T) { + const data = `` + err := Unmarshal([]byte(data), &struct { + B byte `xml:"b,attr,omitempty"` + }{}) + if err == nil { + t.Errorf("Unmarshal: expected error, got nil") + } +} + +// Issue 20953. Crash on invalid XMLName attribute. + +type InvalidXMLName struct { + XMLName Name `xml:"error"` + Type struct { + XMLName Name `xml:"type,attr"` + } +} + +func TestInvalidXMLName(t *testing.T) { + var buf bytes.Buffer + enc := NewEncoder(&buf) + if err := enc.Encode(InvalidXMLName{}); err == nil { + t.Error("unexpected success") + } else if want := "invalid tag"; !strings.Contains(err.Error(), want) { + t.Errorf("error %q does not contain %q", err, want) + } +} + +// Issue 50164. Crash on zero value XML attribute. +type LayerOne struct { + XMLName Name `xml:"l1"` + + Value *float64 `xml:"value,omitempty"` + *LayerTwo `xml:",omitempty"` +} + +type LayerTwo struct { + ValueTwo *int `xml:"value_two,attr,omitempty"` +} + +func TestMarshalZeroValue(t *testing.T) { + proofXml := `1.2345` + var l1 LayerOne + err := Unmarshal([]byte(proofXml), &l1) + if err != nil { + t.Fatalf("unmarshal XML error: %v", err) + } + want := float64(1.2345) + got := *l1.Value + if got != want { + t.Fatalf("unexpected unmarshal result, want %f but got %f", want, got) + } + + // Marshal again (or Encode again) + // In issue 50164, here `Marshal(l1)` will panic because of the zero value of xml attribute ValueTwo `value_two`. + anotherXML, err := Marshal(l1) + if err != nil { + t.Fatalf("marshal XML error: %v", err) + } + if string(anotherXML) != proofXml { + t.Fatalf("unexpected unmarshal result, want %q but got %q", proofXml, anotherXML) + } +} + +var closeTests = []struct { + desc string + toks []Token + want string + err string +}{{ + desc: "unclosed start element", + toks: []Token{ + StartElement{Name{"", "foo"}, nil}, + }, + want: ``, + err: "unclosed tag ", +}, { + desc: "closed element", + toks: []Token{ + StartElement{Name{"", "foo"}, nil}, + EndElement{Name{"", "foo"}}, + }, + want: ``, +}, { + desc: "directive", + toks: []Token{ + Directive("foo"), + }, + want: ``, +}} + +func TestClose(t *testing.T) { + for _, tt := range closeTests { + tt := tt + t.Run(tt.desc, func(t *testing.T) { + var out strings.Builder + enc := NewEncoder(&out) + for j, tok := range tt.toks { + if err := enc.EncodeToken(tok); err != nil { + t.Fatalf("token #%d: %v", j, err) + } + } + err := enc.Close() + switch { + case tt.err != "" && err == nil: + t.Error(" expected error; got none") + case tt.err == "" && err != nil: + t.Errorf(" got error: %v", err) + case tt.err != "" && err != nil && tt.err != err.Error(): + t.Errorf(" error mismatch; got %v, want %v", err, tt.err) + } + if got := out.String(); got != tt.want { + t.Errorf("\ngot %v\nwant %v", got, tt.want) + } + t.Log(enc.p.closed) + if err := enc.EncodeToken(Directive("foo")); err == nil { + t.Errorf("unexpected success when encoding after Close") + } + }) + } +} diff --git a/prefix_test.go b/prefix_test.go new file mode 100644 index 0000000..3090910 --- /dev/null +++ b/prefix_test.go @@ -0,0 +1,44 @@ +package goxml_test + +import ( + "testing" + + "git.pyer.club/kingecg/goxml" +) + +type Person struct { + Name string `xml:"p:name"` + Age int `xml:"p:age"` +} + +type Employee struct { + Person + Salary int `xml:"p:salary"` +} + +func TestUnmarshal(t *testing.T) { + xml := ` + + John Doe + 42 + 100000 + + ` + e := Employee{} + err := goxml.Unmarshal([]byte(xml), &e) + if err != nil { + t.Fatal(err) + } +} + +func TestMashal(t *testing.T) { + e := Employee{ + Person{ + Name: "John", + Age: 30, + }, + 50000, + } + b, _ := goxml.Marshal(e) + t.Log(string(b)) +} diff --git a/read.go b/read.go new file mode 100644 index 0000000..6416896 --- /dev/null +++ b/read.go @@ -0,0 +1,777 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package goxml + +import ( + "bytes" + "encoding" + "errors" + "fmt" + "reflect" + "runtime" + "strconv" + "strings" +) + +// BUG(rsc): Mapping between XML elements and data structures is inherently flawed: +// an XML element is an order-dependent collection of anonymous +// values, while a data structure is an order-independent collection +// of named values. +// See [encoding/json] for a textual representation more suitable +// to data structures. + +// Unmarshal parses the XML-encoded data and stores the result in +// the value pointed to by v, which must be an arbitrary struct, +// slice, or string. Well-formed data that does not fit into v is +// discarded. +// +// Because Unmarshal uses the reflect package, it can only assign +// to exported (upper case) fields. Unmarshal uses a case-sensitive +// comparison to match XML element names to tag values and struct +// field names. +// +// Unmarshal maps an XML element to a struct using the following rules. +// In the rules, the tag of a field refers to the value associated with the +// key 'xml' in the struct field's tag (see the example above). +// +// - If the struct has a field of type []byte or string with tag +// ",innerxml", Unmarshal accumulates the raw XML nested inside the +// element in that field. The rest of the rules still apply. +// +// - If the struct has a field named XMLName of type Name, +// Unmarshal records the element name in that field. +// +// - If the XMLName field has an associated tag of the form +// "name" or "namespace-URL name", the XML element must have +// the given name (and, optionally, name space) or else Unmarshal +// returns an error. +// +// - If the XML element has an attribute whose name matches a +// struct field name with an associated tag containing ",attr" or +// the explicit name in a struct field tag of the form "name,attr", +// Unmarshal records the attribute value in that field. +// +// - If the XML element has an attribute not handled by the previous +// rule and the struct has a field with an associated tag containing +// ",any,attr", Unmarshal records the attribute value in the first +// such field. +// +// - If the XML element contains character data, that data is +// accumulated in the first struct field that has tag ",chardata". +// The struct field may have type []byte or string. +// If there is no such field, the character data is discarded. +// +// - If the XML element contains comments, they are accumulated in +// the first struct field that has tag ",comment". The struct +// field may have type []byte or string. If there is no such +// field, the comments are discarded. +// +// - If the XML element contains a sub-element whose name matches +// the prefix of a tag formatted as "a" or "a>b>c", unmarshal +// will descend into the XML structure looking for elements with the +// given names, and will map the innermost elements to that struct +// field. A tag starting with ">" is equivalent to one starting +// with the field name followed by ">". +// +// - If the XML element contains a sub-element whose name matches +// a struct field's XMLName tag and the struct field has no +// explicit name tag as per the previous rule, unmarshal maps +// the sub-element to that struct field. +// +// - If the XML element contains a sub-element whose name matches a +// field without any mode flags (",attr", ",chardata", etc), Unmarshal +// maps the sub-element to that struct field. +// +// - If the XML element contains a sub-element that hasn't matched any +// of the above rules and the struct has a field with tag ",any", +// unmarshal maps the sub-element to that struct field. +// +// - An anonymous struct field is handled as if the fields of its +// value were part of the outer struct. +// +// - A struct field with tag "-" is never unmarshaled into. +// +// If Unmarshal encounters a field type that implements the Unmarshaler +// interface, Unmarshal calls its UnmarshalXML method to produce the value from +// the XML element. Otherwise, if the value implements +// [encoding.TextUnmarshaler], Unmarshal calls that value's UnmarshalText method. +// +// Unmarshal maps an XML element to a string or []byte by saving the +// concatenation of that element's character data in the string or +// []byte. The saved []byte is never nil. +// +// Unmarshal maps an attribute value to a string or []byte by saving +// the value in the string or slice. +// +// Unmarshal maps an attribute value to an [Attr] by saving the attribute, +// including its name, in the Attr. +// +// Unmarshal maps an XML element or attribute value to a slice by +// extending the length of the slice and mapping the element or attribute +// to the newly created value. +// +// Unmarshal maps an XML element or attribute value to a bool by +// setting it to the boolean value represented by the string. Whitespace +// is trimmed and ignored. +// +// Unmarshal maps an XML element or attribute value to an integer or +// floating-point field by setting the field to the result of +// interpreting the string value in decimal. There is no check for +// overflow. Whitespace is trimmed and ignored. +// +// Unmarshal maps an XML element to a Name by recording the element +// name. +// +// Unmarshal maps an XML element to a pointer by setting the pointer +// to a freshly allocated value and then mapping the element to that value. +// +// A missing element or empty attribute value will be unmarshaled as a zero value. +// If the field is a slice, a zero value will be appended to the field. Otherwise, the +// field will be set to its zero value. +func Unmarshal(data []byte, v any) error { + return NewDecoder(bytes.NewReader(data)).Decode(v) +} + +// Decode works like [Unmarshal], except it reads the decoder +// stream to find the start element. +func (d *Decoder) Decode(v any) error { + return d.DecodeElement(v, nil) +} + +// DecodeElement works like [Unmarshal] except that it takes +// a pointer to the start XML element to decode into v. +// It is useful when a client reads some raw XML tokens itself +// but also wants to defer to [Unmarshal] for some elements. +func (d *Decoder) DecodeElement(v any, start *StartElement) error { + val := reflect.ValueOf(v) + if val.Kind() != reflect.Pointer { + return errors.New("non-pointer passed to Unmarshal") + } + + if val.IsNil() { + return errors.New("nil pointer passed to Unmarshal") + } + return d.unmarshal(val.Elem(), start, 0) +} + +// An UnmarshalError represents an error in the unmarshaling process. +type UnmarshalError string + +func (e UnmarshalError) Error() string { return string(e) } + +// Unmarshaler is the interface implemented by objects that can unmarshal +// an XML element description of themselves. +// +// UnmarshalXML decodes a single XML element +// beginning with the given start element. +// If it returns an error, the outer call to Unmarshal stops and +// returns that error. +// UnmarshalXML must consume exactly one XML element. +// One common implementation strategy is to unmarshal into +// a separate value with a layout matching the expected XML +// using d.DecodeElement, and then to copy the data from +// that value into the receiver. +// Another common strategy is to use d.Token to process the +// XML object one token at a time. +// UnmarshalXML may not use d.RawToken. +type Unmarshaler interface { + UnmarshalXML(d *Decoder, start StartElement) error +} + +// UnmarshalerAttr is the interface implemented by objects that can unmarshal +// an XML attribute description of themselves. +// +// UnmarshalXMLAttr decodes a single XML attribute. +// If it returns an error, the outer call to [Unmarshal] stops and +// returns that error. +// UnmarshalXMLAttr is used only for struct fields with the +// "attr" option in the field tag. +type UnmarshalerAttr interface { + UnmarshalXMLAttr(attr Attr) error +} + +// receiverType returns the receiver type to use in an expression like "%s.MethodName". +func receiverType(val any) string { + t := reflect.TypeOf(val) + if t.Name() != "" { + return t.String() + } + return "(" + t.String() + ")" +} + +// unmarshalInterface unmarshals a single XML element into val. +// start is the opening tag of the element. +func (d *Decoder) unmarshalInterface(val Unmarshaler, start *StartElement) error { + // Record that decoder must stop at end tag corresponding to start. + d.pushEOF() + + d.unmarshalDepth++ + err := val.UnmarshalXML(d, *start) + d.unmarshalDepth-- + if err != nil { + d.popEOF() + return err + } + + if !d.popEOF() { + return fmt.Errorf("xml: %s.UnmarshalXML did not consume entire <%s> element", receiverType(val), start.Name.Local) + } + + return nil +} + +// unmarshalTextInterface unmarshals a single XML element into val. +// The chardata contained in the element (but not its children) +// is passed to the text unmarshaler. +func (d *Decoder) unmarshalTextInterface(val encoding.TextUnmarshaler) error { + var buf []byte + depth := 1 + for depth > 0 { + t, err := d.Token() + if err != nil { + return err + } + switch t := t.(type) { + case CharData: + if depth == 1 { + buf = append(buf, t...) + } + case StartElement: + depth++ + case EndElement: + depth-- + } + } + return val.UnmarshalText(buf) +} + +// unmarshalAttr unmarshals a single XML attribute into val. +func (d *Decoder) unmarshalAttr(val reflect.Value, attr Attr) error { + if val.Kind() == reflect.Pointer { + if val.IsNil() { + val.Set(reflect.New(val.Type().Elem())) + } + val = val.Elem() + } + if val.CanInterface() && val.Type().Implements(unmarshalerAttrType) { + // This is an unmarshaler with a non-pointer receiver, + // so it's likely to be incorrect, but we do what we're told. + return val.Interface().(UnmarshalerAttr).UnmarshalXMLAttr(attr) + } + if val.CanAddr() { + pv := val.Addr() + if pv.CanInterface() && pv.Type().Implements(unmarshalerAttrType) { + return pv.Interface().(UnmarshalerAttr).UnmarshalXMLAttr(attr) + } + } + + // Not an UnmarshalerAttr; try encoding.TextUnmarshaler. + if val.CanInterface() && val.Type().Implements(textUnmarshalerType) { + // This is an unmarshaler with a non-pointer receiver, + // so it's likely to be incorrect, but we do what we're told. + return val.Interface().(encoding.TextUnmarshaler).UnmarshalText([]byte(attr.Value)) + } + if val.CanAddr() { + pv := val.Addr() + if pv.CanInterface() && pv.Type().Implements(textUnmarshalerType) { + return pv.Interface().(encoding.TextUnmarshaler).UnmarshalText([]byte(attr.Value)) + } + } + + if val.Type().Kind() == reflect.Slice && val.Type().Elem().Kind() != reflect.Uint8 { + // Slice of element values. + // Grow slice. + n := val.Len() + val.Grow(1) + val.SetLen(n + 1) + + // Recur to read element into slice. + if err := d.unmarshalAttr(val.Index(n), attr); err != nil { + val.SetLen(n) + return err + } + return nil + } + + if val.Type() == attrType { + val.Set(reflect.ValueOf(attr)) + return nil + } + + return copyValue(val, []byte(attr.Value)) +} + +var ( + attrType = reflect.TypeFor[Attr]() + unmarshalerType = reflect.TypeFor[Unmarshaler]() + unmarshalerAttrType = reflect.TypeFor[UnmarshalerAttr]() + textUnmarshalerType = reflect.TypeFor[encoding.TextUnmarshaler]() +) + +const ( + maxUnmarshalDepth = 10000 + maxUnmarshalDepthWasm = 5000 // go.dev/issue/56498 +) + +var errUnmarshalDepth = errors.New("exceeded max depth") + +// Unmarshal a single XML element into val. +func (d *Decoder) unmarshal(val reflect.Value, start *StartElement, depth int) error { + if depth >= maxUnmarshalDepth || runtime.GOARCH == "wasm" && depth >= maxUnmarshalDepthWasm { + return errUnmarshalDepth + } + // Find start element if we need it. + if start == nil { + for { + tok, err := d.Token() + if err != nil { + return err + } + if t, ok := tok.(StartElement); ok { + start = &t + break + } + } + } + + // Load value from interface, but only if the result will be + // usefully addressable. + if val.Kind() == reflect.Interface && !val.IsNil() { + e := val.Elem() + if e.Kind() == reflect.Pointer && !e.IsNil() { + val = e + } + } + + if val.Kind() == reflect.Pointer { + if val.IsNil() { + val.Set(reflect.New(val.Type().Elem())) + } + val = val.Elem() + } + + if val.CanInterface() && val.Type().Implements(unmarshalerType) { + // This is an unmarshaler with a non-pointer receiver, + // so it's likely to be incorrect, but we do what we're told. + return d.unmarshalInterface(val.Interface().(Unmarshaler), start) + } + + if val.CanAddr() { + pv := val.Addr() + if pv.CanInterface() && pv.Type().Implements(unmarshalerType) { + return d.unmarshalInterface(pv.Interface().(Unmarshaler), start) + } + } + + if val.CanInterface() && val.Type().Implements(textUnmarshalerType) { + return d.unmarshalTextInterface(val.Interface().(encoding.TextUnmarshaler)) + } + + if val.CanAddr() { + pv := val.Addr() + if pv.CanInterface() && pv.Type().Implements(textUnmarshalerType) { + return d.unmarshalTextInterface(pv.Interface().(encoding.TextUnmarshaler)) + } + } + + var ( + data []byte + saveData reflect.Value + comment []byte + saveComment reflect.Value + saveXML reflect.Value + saveXMLIndex int + saveXMLData []byte + saveAny reflect.Value + sv reflect.Value + tinfo *typeInfo + err error + ) + + switch v := val; v.Kind() { + default: + return errors.New("unknown type " + v.Type().String()) + + case reflect.Interface: + // TODO: For now, simply ignore the field. In the near + // future we may choose to unmarshal the start + // element on it, if not nil. + return d.Skip() + + case reflect.Slice: + typ := v.Type() + if typ.Elem().Kind() == reflect.Uint8 { + // []byte + saveData = v + break + } + + // Slice of element values. + // Grow slice. + n := v.Len() + v.Grow(1) + v.SetLen(n + 1) + + // Recur to read element into slice. + if err := d.unmarshal(v.Index(n), start, depth+1); err != nil { + v.SetLen(n) + return err + } + return nil + + case reflect.Bool, reflect.Float32, reflect.Float64, reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64, reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uintptr, reflect.String: + saveData = v + + case reflect.Struct: + typ := v.Type() + if typ == nameType { + v.Set(reflect.ValueOf(start.Name)) + break + } + + sv = v + tinfo, err = getTypeInfo(typ, true) + if err != nil { + return err + } + + // Validate and assign element name. + if tinfo.xmlname != nil { + finfo := tinfo.xmlname + if finfo.name != "" && finfo.name != start.Name.Local { + return UnmarshalError("expected element type <" + finfo.name + "> but have <" + start.Name.Local + ">") + } + if finfo.xmlns != "" && finfo.xmlns != start.Name.Space { + e := "expected element <" + finfo.name + "> in name space " + finfo.xmlns + " but have " + if start.Name.Space == "" { + e += "no name space" + } else { + e += start.Name.Space + } + return UnmarshalError(e) + } + fv := finfo.value(sv, initNilPointers) + if _, ok := fv.Interface().(Name); ok { + fv.Set(reflect.ValueOf(start.Name)) + } + } + + // Assign attributes. + for _, a := range start.Attr { + handled := false + any := -1 + for i := range tinfo.fields { + finfo := &tinfo.fields[i] + switch finfo.flags & fMode { + case fAttr: + strv := finfo.value(sv, initNilPointers) + if a.Name.Local == finfo.name && (finfo.xmlns == "" || finfo.xmlns == a.Name.Space) { + if err := d.unmarshalAttr(strv, a); err != nil { + return err + } + handled = true + } + + case fAny | fAttr: + if any == -1 { + any = i + } + } + } + if !handled && any >= 0 { + finfo := &tinfo.fields[any] + strv := finfo.value(sv, initNilPointers) + if err := d.unmarshalAttr(strv, a); err != nil { + return err + } + } + } + + // Determine whether we need to save character data or comments. + for i := range tinfo.fields { + finfo := &tinfo.fields[i] + switch finfo.flags & fMode { + case fCDATA, fCharData: + if !saveData.IsValid() { + saveData = finfo.value(sv, initNilPointers) + } + + case fComment: + if !saveComment.IsValid() { + saveComment = finfo.value(sv, initNilPointers) + } + + case fAny, fAny | fElement: + if !saveAny.IsValid() { + saveAny = finfo.value(sv, initNilPointers) + } + + case fInnerXML: + if !saveXML.IsValid() { + saveXML = finfo.value(sv, initNilPointers) + if d.saved == nil { + saveXMLIndex = 0 + d.saved = new(bytes.Buffer) + } else { + saveXMLIndex = d.savedOffset() + } + } + } + } + } + + // Find end element. + // Process sub-elements along the way. +Loop: + for { + var savedOffset int + if saveXML.IsValid() { + savedOffset = d.savedOffset() + } + tok, err := d.Token() + if err != nil { + return err + } + switch t := tok.(type) { + case StartElement: + consumed := false + if sv.IsValid() { + // unmarshalPath can call unmarshal, so we need to pass the depth through so that + // we can continue to enforce the maximum recursion limit. + consumed, err = d.unmarshalPath(tinfo, sv, nil, &t, depth) + if err != nil { + return err + } + if !consumed && saveAny.IsValid() { + consumed = true + if err := d.unmarshal(saveAny, &t, depth+1); err != nil { + return err + } + } + } + if !consumed { + if err := d.Skip(); err != nil { + return err + } + } + + case EndElement: + if saveXML.IsValid() { + saveXMLData = d.saved.Bytes()[saveXMLIndex:savedOffset] + if saveXMLIndex == 0 { + d.saved = nil + } + } + break Loop + + case CharData: + if saveData.IsValid() { + data = append(data, t...) + } + + case Comment: + if saveComment.IsValid() { + comment = append(comment, t...) + } + } + } + + if saveData.IsValid() && saveData.CanInterface() && saveData.Type().Implements(textUnmarshalerType) { + if err := saveData.Interface().(encoding.TextUnmarshaler).UnmarshalText(data); err != nil { + return err + } + saveData = reflect.Value{} + } + + if saveData.IsValid() && saveData.CanAddr() { + pv := saveData.Addr() + if pv.CanInterface() && pv.Type().Implements(textUnmarshalerType) { + if err := pv.Interface().(encoding.TextUnmarshaler).UnmarshalText(data); err != nil { + return err + } + saveData = reflect.Value{} + } + } + + if err := copyValue(saveData, data); err != nil { + return err + } + + switch t := saveComment; t.Kind() { + case reflect.String: + t.SetString(string(comment)) + case reflect.Slice: + t.Set(reflect.ValueOf(comment)) + } + + switch t := saveXML; t.Kind() { + case reflect.String: + t.SetString(string(saveXMLData)) + case reflect.Slice: + if t.Type().Elem().Kind() == reflect.Uint8 { + t.Set(reflect.ValueOf(saveXMLData)) + } + } + + return nil +} + +func copyValue(dst reflect.Value, src []byte) (err error) { + dst0 := dst + + if dst.Kind() == reflect.Pointer { + if dst.IsNil() { + dst.Set(reflect.New(dst.Type().Elem())) + } + dst = dst.Elem() + } + + // Save accumulated data. + switch dst.Kind() { + case reflect.Invalid: + // Probably a comment. + default: + return errors.New("cannot unmarshal into " + dst0.Type().String()) + case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64: + if len(src) == 0 { + dst.SetInt(0) + return nil + } + itmp, err := strconv.ParseInt(strings.TrimSpace(string(src)), 10, dst.Type().Bits()) + if err != nil { + return err + } + dst.SetInt(itmp) + case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uintptr: + if len(src) == 0 { + dst.SetUint(0) + return nil + } + utmp, err := strconv.ParseUint(strings.TrimSpace(string(src)), 10, dst.Type().Bits()) + if err != nil { + return err + } + dst.SetUint(utmp) + case reflect.Float32, reflect.Float64: + if len(src) == 0 { + dst.SetFloat(0) + return nil + } + ftmp, err := strconv.ParseFloat(strings.TrimSpace(string(src)), dst.Type().Bits()) + if err != nil { + return err + } + dst.SetFloat(ftmp) + case reflect.Bool: + if len(src) == 0 { + dst.SetBool(false) + return nil + } + value, err := strconv.ParseBool(strings.TrimSpace(string(src))) + if err != nil { + return err + } + dst.SetBool(value) + case reflect.String: + dst.SetString(string(src)) + case reflect.Slice: + if len(src) == 0 { + // non-nil to flag presence + src = []byte{} + } + dst.SetBytes(src) + } + return nil +} + +// unmarshalPath walks down an XML structure looking for wanted +// paths, and calls unmarshal on them. +// The consumed result tells whether XML elements have been consumed +// from the Decoder until start's matching end element, or if it's +// still untouched because start is uninteresting for sv's fields. +func (d *Decoder) unmarshalPath(tinfo *typeInfo, sv reflect.Value, parents []string, start *StartElement, depth int) (consumed bool, err error) { + recurse := false +Loop: + for i := range tinfo.fields { + finfo := &tinfo.fields[i] + if finfo.flags&fElement == 0 || len(finfo.parents) < len(parents) || finfo.xmlns != "" && finfo.xmlns != start.Name.Space { + continue + } + for j := range parents { + if parents[j] != finfo.parents[j] { + continue Loop + } + } + if len(finfo.parents) == len(parents) && finfo.name == start.Name.Local { + // It's a perfect match, unmarshal the field. + return true, d.unmarshal(finfo.value(sv, initNilPointers), start, depth+1) + } + if len(finfo.parents) > len(parents) && finfo.parents[len(parents)] == start.Name.Local { + // It's a prefix for the field. Break and recurse + // since it's not ok for one field path to be itself + // the prefix for another field path. + recurse = true + + // We can reuse the same slice as long as we + // don't try to append to it. + parents = finfo.parents[:len(parents)+1] + break + } + } + if !recurse { + // We have no business with this element. + return false, nil + } + // The element is not a perfect match for any field, but one + // or more fields have the path to this element as a parent + // prefix. Recurse and attempt to match these. + for { + var tok Token + tok, err = d.Token() + if err != nil { + return true, err + } + switch t := tok.(type) { + case StartElement: + // the recursion depth of unmarshalPath is limited to the path length specified + // by the struct field tag, so we don't increment the depth here. + consumed2, err := d.unmarshalPath(tinfo, sv, parents, &t, depth) + if err != nil { + return true, err + } + if !consumed2 { + if err := d.Skip(); err != nil { + return true, err + } + } + case EndElement: + return true, nil + } + } +} + +// Skip reads tokens until it has consumed the end element +// matching the most recent start element already consumed, +// skipping nested structures. +// It returns nil if it finds an end element matching the start +// element; otherwise it returns an error describing the problem. +func (d *Decoder) Skip() error { + var depth int64 + for { + tok, err := d.Token() + if err != nil { + return err + } + switch tok.(type) { + case StartElement: + depth++ + case EndElement: + if depth == 0 { + return nil + } + depth-- + } + } +} diff --git a/read_test.go b/read_test.go new file mode 100644 index 0000000..6799f13 --- /dev/null +++ b/read_test.go @@ -0,0 +1,1128 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package goxml + +import ( + "bytes" + "errors" + "io" + "reflect" + "runtime" + "strings" + "testing" + "time" +) + +// Stripped down Atom feed data structures. + +func TestUnmarshalFeed(t *testing.T) { + var f Feed + if err := Unmarshal([]byte(atomFeedString), &f); err != nil { + t.Fatalf("Unmarshal: %s", err) + } + if !reflect.DeepEqual(f, atomFeed) { + t.Fatalf("have %#v\nwant %#v", f, atomFeed) + } +} + +// hget http://codereview.appspot.com/rss/mine/rsc +const atomFeedString = ` + +Code Review - My issueshttp://codereview.appspot.com/rietveld<>rietveld: an attempt at pubsubhubbub +2009-10-04T01:35:58+00:00email-address-removedurn:md5:134d9179c41f806be79b3a5f7877d19a + An attempt at adding pubsubhubbub support to Rietveld. +http://code.google.com/p/pubsubhubbub +http://code.google.com/p/rietveld/issues/detail?id=155 + +The server side of the protocol is trivial: + 1. add a &lt;link rel=&quot;hub&quot; href=&quot;hub-server&quot;&gt; tag to all + feeds that will be pubsubhubbubbed. + 2. every time one of those feeds changes, tell the hub + with a simple POST request. + +I have tested this by adding debug prints to a local hub +server and checking that the server got the right publish +requests. + +I can&#39;t quite get the server to work, but I think the bug +is not in my code. I think that the server expects to be +able to grab the feed and see the feed&#39;s actual URL in +the link rel=&quot;self&quot;, but the default value for that drops +the :port from the URL, and I cannot for the life of me +figure out how to get the Atom generator deep inside +django not to do that, or even where it is doing that, +or even what code is running to generate the Atom feed. +(I thought I knew but I added some assert False statements +and it kept running!) + +Ignoring that particular problem, I would appreciate +feedback on the right way to get the two values at +the top of feeds.py marked NOTE(rsc). + + +rietveld: correct tab handling +2009-10-03T23:02:17+00:00email-address-removedurn:md5:0a2a4f19bb815101f0ba2904aed7c35a + This fixes the buggy tab rendering that can be seen at +http://codereview.appspot.com/116075/diff/1/2 + +The fundamental problem was that the tab code was +not being told what column the text began in, so it +didn&#39;t know where to put the tab stops. Another problem +was that some of the code assumed that string byte +offsets were the same as column offsets, which is only +true if there are no tabs. + +In the process of fixing this, I cleaned up the arguments +to Fold and ExpandTabs and renamed them Break and +_ExpandTabs so that I could be sure that I found all the +call sites. I also wanted to verify that ExpandTabs was +not being used from outside intra_region_diff.py. + + + ` + +type Feed struct { + XMLName Name `xml:"http://www.w3.org/2005/Atom feed"` + Title string `xml:"title"` + ID string `xml:"id"` + Link []Link `xml:"link"` + Updated time.Time `xml:"updated,attr"` + Author Person `xml:"author"` + Entry []Entry `xml:"entry"` +} + +type Entry struct { + Title string `xml:"title"` + ID string `xml:"id"` + Link []Link `xml:"link"` + Updated time.Time `xml:"updated"` + Author Person `xml:"author"` + Summary Text `xml:"summary"` +} + +type Link struct { + Rel string `xml:"rel,attr,omitempty"` + Href string `xml:"href,attr"` +} + +type Person struct { + Name string `xml:"name"` + URI string `xml:"uri"` + Email string `xml:"email"` + InnerXML string `xml:",innerxml"` +} + +type Text struct { + Type string `xml:"type,attr,omitempty"` + Body string `xml:",chardata"` +} + +var atomFeed = Feed{ + XMLName: Name{"http://www.w3.org/2005/Atom", "feed"}, + Title: "Code Review - My issues", + Link: []Link{ + {Rel: "alternate", Href: "http://codereview.appspot.com/"}, + {Rel: "self", Href: "http://codereview.appspot.com/rss/mine/rsc"}, + }, + ID: "http://codereview.appspot.com/", + Updated: ParseTime("2009-10-04T01:35:58+00:00"), + Author: Person{ + Name: "rietveld<>", + InnerXML: "rietveld<>", + }, + Entry: []Entry{ + { + Title: "rietveld: an attempt at pubsubhubbub\n", + Link: []Link{ + {Rel: "alternate", Href: "http://codereview.appspot.com/126085"}, + }, + Updated: ParseTime("2009-10-04T01:35:58+00:00"), + Author: Person{ + Name: "email-address-removed", + InnerXML: "email-address-removed", + }, + ID: "urn:md5:134d9179c41f806be79b3a5f7877d19a", + Summary: Text{ + Type: "html", + Body: ` + An attempt at adding pubsubhubbub support to Rietveld. +http://code.google.com/p/pubsubhubbub +http://code.google.com/p/rietveld/issues/detail?id=155 + +The server side of the protocol is trivial: + 1. add a <link rel="hub" href="hub-server"> tag to all + feeds that will be pubsubhubbubbed. + 2. every time one of those feeds changes, tell the hub + with a simple POST request. + +I have tested this by adding debug prints to a local hub +server and checking that the server got the right publish +requests. + +I can't quite get the server to work, but I think the bug +is not in my code. I think that the server expects to be +able to grab the feed and see the feed's actual URL in +the link rel="self", but the default value for that drops +the :port from the URL, and I cannot for the life of me +figure out how to get the Atom generator deep inside +django not to do that, or even where it is doing that, +or even what code is running to generate the Atom feed. +(I thought I knew but I added some assert False statements +and it kept running!) + +Ignoring that particular problem, I would appreciate +feedback on the right way to get the two values at +the top of feeds.py marked NOTE(rsc). + + +`, + }, + }, + { + Title: "rietveld: correct tab handling\n", + Link: []Link{ + {Rel: "alternate", Href: "http://codereview.appspot.com/124106"}, + }, + Updated: ParseTime("2009-10-03T23:02:17+00:00"), + Author: Person{ + Name: "email-address-removed", + InnerXML: "email-address-removed", + }, + ID: "urn:md5:0a2a4f19bb815101f0ba2904aed7c35a", + Summary: Text{ + Type: "html", + Body: ` + This fixes the buggy tab rendering that can be seen at +http://codereview.appspot.com/116075/diff/1/2 + +The fundamental problem was that the tab code was +not being told what column the text began in, so it +didn't know where to put the tab stops. Another problem +was that some of the code assumed that string byte +offsets were the same as column offsets, which is only +true if there are no tabs. + +In the process of fixing this, I cleaned up the arguments +to Fold and ExpandTabs and renamed them Break and +_ExpandTabs so that I could be sure that I found all the +call sites. I also wanted to verify that ExpandTabs was +not being used from outside intra_region_diff.py. + + +`, + }, + }, + }, +} + +const pathTestString = ` + + 1 + + + A + + + B + + + C + D + + <_> + E + + + 2 + +` + +type PathTestItem struct { + Value string +} + +type PathTestA struct { + Items []PathTestItem `xml:">Item1"` + Before, After string +} + +type PathTestB struct { + Other []PathTestItem `xml:"Items>Item1"` + Before, After string +} + +type PathTestC struct { + Values1 []string `xml:"Items>Item1>Value"` + Values2 []string `xml:"Items>Item2>Value"` + Before, After string +} + +type PathTestSet struct { + Item1 []PathTestItem +} + +type PathTestD struct { + Other PathTestSet `xml:"Items"` + Before, After string +} + +type PathTestE struct { + Underline string `xml:"Items>_>Value"` + Before, After string +} + +var pathTests = []any{ + &PathTestA{Items: []PathTestItem{{"A"}, {"D"}}, Before: "1", After: "2"}, + &PathTestB{Other: []PathTestItem{{"A"}, {"D"}}, Before: "1", After: "2"}, + &PathTestC{Values1: []string{"A", "C", "D"}, Values2: []string{"B"}, Before: "1", After: "2"}, + &PathTestD{Other: PathTestSet{Item1: []PathTestItem{{"A"}, {"D"}}}, Before: "1", After: "2"}, + &PathTestE{Underline: "E", Before: "1", After: "2"}, +} + +func TestUnmarshalPaths(t *testing.T) { + for _, pt := range pathTests { + v := reflect.New(reflect.TypeOf(pt).Elem()).Interface() + if err := Unmarshal([]byte(pathTestString), v); err != nil { + t.Fatalf("Unmarshal: %s", err) + } + if !reflect.DeepEqual(v, pt) { + t.Fatalf("have %#v\nwant %#v", v, pt) + } + } +} + +type BadPathTestA struct { + First string `xml:"items>item1"` + Other string `xml:"items>item2"` + Second string `xml:"items"` +} + +type BadPathTestB struct { + Other string `xml:"items>item2>value"` + First string `xml:"items>item1"` + Second string `xml:"items>item1>value"` +} + +type BadPathTestC struct { + First string + Second string `xml:"First"` +} + +type BadPathTestD struct { + BadPathEmbeddedA + BadPathEmbeddedB +} + +type BadPathEmbeddedA struct { + First string +} + +type BadPathEmbeddedB struct { + Second string `xml:"First"` +} + +var badPathTests = []struct { + v, e any +}{ + {&BadPathTestA{}, &TagPathError{reflect.TypeFor[BadPathTestA](), "First", "items>item1", "Second", "items"}}, + {&BadPathTestB{}, &TagPathError{reflect.TypeFor[BadPathTestB](), "First", "items>item1", "Second", "items>item1>value"}}, + {&BadPathTestC{}, &TagPathError{reflect.TypeFor[BadPathTestC](), "First", "", "Second", "First"}}, + {&BadPathTestD{}, &TagPathError{reflect.TypeFor[BadPathTestD](), "First", "", "Second", "First"}}, +} + +func TestUnmarshalBadPaths(t *testing.T) { + for _, tt := range badPathTests { + err := Unmarshal([]byte(pathTestString), tt.v) + if !reflect.DeepEqual(err, tt.e) { + t.Fatalf("Unmarshal with %#v didn't fail properly:\nhave %#v,\nwant %#v", tt.v, err, tt.e) + } + } +} + +const OK = "OK" +const withoutNameTypeData = ` + +` + +type TestThree struct { + XMLName Name `xml:"Test3"` + Attr string `xml:",attr"` +} + +func TestUnmarshalWithoutNameType(t *testing.T) { + var x TestThree + if err := Unmarshal([]byte(withoutNameTypeData), &x); err != nil { + t.Fatalf("Unmarshal: %s", err) + } + if x.Attr != OK { + t.Fatalf("have %v\nwant %v", x.Attr, OK) + } +} + +func TestUnmarshalAttr(t *testing.T) { + type ParamVal struct { + Int int `xml:"int,attr"` + } + + type ParamPtr struct { + Int *int `xml:"int,attr"` + } + + type ParamStringPtr struct { + Int *string `xml:"int,attr"` + } + + x := []byte(``) + + p1 := &ParamPtr{} + if err := Unmarshal(x, p1); err != nil { + t.Fatalf("Unmarshal: %s", err) + } + if p1.Int == nil { + t.Fatalf("Unmarshal failed in to *int field") + } else if *p1.Int != 1 { + t.Fatalf("Unmarshal with %s failed:\nhave %#v,\n want %#v", x, p1.Int, 1) + } + + p2 := &ParamVal{} + if err := Unmarshal(x, p2); err != nil { + t.Fatalf("Unmarshal: %s", err) + } + if p2.Int != 1 { + t.Fatalf("Unmarshal with %s failed:\nhave %#v,\n want %#v", x, p2.Int, 1) + } + + p3 := &ParamStringPtr{} + if err := Unmarshal(x, p3); err != nil { + t.Fatalf("Unmarshal: %s", err) + } + if p3.Int == nil { + t.Fatalf("Unmarshal failed in to *string field") + } else if *p3.Int != "1" { + t.Fatalf("Unmarshal with %s failed:\nhave %#v,\n want %#v", x, p3.Int, 1) + } +} + +type Tables struct { + HTable string `xml:"http://www.w3.org/TR/html4/ table"` + FTable string `xml:"http://www.w3schools.com/furniture table"` +} + +var tables = []struct { + xml string + tab Tables + ns string +}{ + { + xml: `` + + `hello
` + + `world
` + + `
`, + tab: Tables{"hello", "world"}, + }, + { + xml: `` + + `world
` + + `hello
` + + `
`, + tab: Tables{"hello", "world"}, + }, + { + xml: `` + + `world` + + `hello` + + ``, + tab: Tables{"hello", "world"}, + }, + { + xml: `` + + `bogus
` + + `
`, + tab: Tables{}, + }, + { + xml: `` + + `only
` + + `
`, + tab: Tables{HTable: "only"}, + ns: "http://www.w3.org/TR/html4/", + }, + { + xml: `` + + `only
` + + `
`, + tab: Tables{FTable: "only"}, + ns: "http://www.w3schools.com/furniture", + }, + { + xml: `` + + `only
` + + `
`, + tab: Tables{}, + ns: "something else entirely", + }, +} + +func TestUnmarshalNS(t *testing.T) { + for i, tt := range tables { + var dst Tables + var err error + if tt.ns != "" { + d := NewDecoder(strings.NewReader(tt.xml)) + d.DefaultSpace = tt.ns + err = d.Decode(&dst) + } else { + err = Unmarshal([]byte(tt.xml), &dst) + } + if err != nil { + t.Errorf("#%d: Unmarshal: %v", i, err) + continue + } + want := tt.tab + if dst != want { + t.Errorf("#%d: dst=%+v, want %+v", i, dst, want) + } + } +} + +func TestMarshalNS(t *testing.T) { + dst := Tables{"hello", "world"} + data, err := Marshal(&dst) + if err != nil { + t.Fatalf("Marshal: %v", err) + } + want := `hello
world
` + str := string(data) + if str != want { + t.Errorf("have: %q\nwant: %q\n", str, want) + } +} + +type TableAttrs struct { + TAttr TAttr +} + +type TAttr struct { + HTable string `xml:"http://www.w3.org/TR/html4/ table,attr"` + FTable string `xml:"http://www.w3schools.com/furniture table,attr"` + Lang string `xml:"http://www.w3.org/XML/1998/namespace lang,attr,omitempty"` + Other1 string `xml:"http://golang.org/xml/ other,attr,omitempty"` + Other2 string `xml:"http://golang.org/xmlfoo/ other,attr,omitempty"` + Other3 string `xml:"http://golang.org/json/ other,attr,omitempty"` + Other4 string `xml:"http://golang.org/2/json/ other,attr,omitempty"` +} + +var tableAttrs = []struct { + xml string + tab TableAttrs + ns string +}{ + { + xml: ``, + tab: TableAttrs{TAttr{HTable: "hello", FTable: "world"}}, + }, + { + xml: ``, + tab: TableAttrs{TAttr{HTable: "hello", FTable: "world"}}, + }, + { + xml: ``, + tab: TableAttrs{TAttr{HTable: "hello", FTable: "world"}}, + }, + { + // Default space does not apply to attribute names. + xml: ``, + tab: TableAttrs{TAttr{HTable: "hello", FTable: ""}}, + }, + { + // Default space does not apply to attribute names. + xml: ``, + tab: TableAttrs{TAttr{HTable: "", FTable: "world"}}, + }, + { + xml: ``, + tab: TableAttrs{}, + }, + { + // Default space does not apply to attribute names. + xml: ``, + tab: TableAttrs{TAttr{HTable: "hello", FTable: ""}}, + ns: "http://www.w3schools.com/furniture", + }, + { + // Default space does not apply to attribute names. + xml: ``, + tab: TableAttrs{TAttr{HTable: "", FTable: "world"}}, + ns: "http://www.w3.org/TR/html4/", + }, + { + xml: ``, + tab: TableAttrs{}, + ns: "something else entirely", + }, +} + +func TestUnmarshalNSAttr(t *testing.T) { + for i, tt := range tableAttrs { + var dst TableAttrs + var err error + if tt.ns != "" { + d := NewDecoder(strings.NewReader(tt.xml)) + d.DefaultSpace = tt.ns + err = d.Decode(&dst) + } else { + err = Unmarshal([]byte(tt.xml), &dst) + } + if err != nil { + t.Errorf("#%d: Unmarshal: %v", i, err) + continue + } + want := tt.tab + if dst != want { + t.Errorf("#%d: dst=%+v, want %+v", i, dst, want) + } + } +} + +func TestMarshalNSAttr(t *testing.T) { + src := TableAttrs{TAttr{"hello", "world", "en_US", "other1", "other2", "other3", "other4"}} + data, err := Marshal(&src) + if err != nil { + t.Fatalf("Marshal: %v", err) + } + want := `` + str := string(data) + if str != want { + t.Errorf("Marshal:\nhave: %#q\nwant: %#q\n", str, want) + } + + var dst TableAttrs + if err := Unmarshal(data, &dst); err != nil { + t.Errorf("Unmarshal: %v", err) + } + + if dst != src { + t.Errorf("Unmarshal = %q, want %q", dst, src) + } +} + +type MyCharData struct { + body string +} + +func (m *MyCharData) UnmarshalXML(d *Decoder, start StartElement) error { + for { + t, err := d.Token() + if err == io.EOF { // found end of element + break + } + if err != nil { + return err + } + if char, ok := t.(CharData); ok { + m.body += string(char) + } + } + return nil +} + +var _ Unmarshaler = (*MyCharData)(nil) + +func (m *MyCharData) UnmarshalXMLAttr(attr Attr) error { + panic("must not call") +} + +type MyAttr struct { + attr string +} + +func (m *MyAttr) UnmarshalXMLAttr(attr Attr) error { + m.attr = attr.Value + return nil +} + +var _ UnmarshalerAttr = (*MyAttr)(nil) + +type MyStruct struct { + Data *MyCharData + Attr *MyAttr `xml:",attr"` + + Data2 MyCharData + Attr2 MyAttr `xml:",attr"` +} + +func TestUnmarshaler(t *testing.T) { + xml := ` + + hello world + howdy world + + ` + + var m MyStruct + if err := Unmarshal([]byte(xml), &m); err != nil { + t.Fatal(err) + } + + if m.Data == nil || m.Attr == nil || m.Data.body != "hello world" || m.Attr.attr != "attr1" || m.Data2.body != "howdy world" || m.Attr2.attr != "attr2" { + t.Errorf("m=%#+v\n", m) + } +} + +type Pea struct { + Cotelydon string +} + +type Pod struct { + Pea any `xml:"Pea"` +} + +// https://golang.org/issue/6836 +func TestUnmarshalIntoInterface(t *testing.T) { + pod := new(Pod) + pod.Pea = new(Pea) + xml := `Green stuff` + err := Unmarshal([]byte(xml), pod) + if err != nil { + t.Fatalf("failed to unmarshal %q: %v", xml, err) + } + pea, ok := pod.Pea.(*Pea) + if !ok { + t.Fatalf("unmarshaled into wrong type: have %T want *Pea", pod.Pea) + } + have, want := pea.Cotelydon, "Green stuff" + if have != want { + t.Errorf("failed to unmarshal into interface, have %q want %q", have, want) + } +} + +type X struct { + D string `xml:",comment"` +} + +// Issue 11112. Unmarshal must reject invalid comments. +func TestMalformedComment(t *testing.T) { + testData := []string{ + "", + "", + "", + "", + } + for i, test := range testData { + data := []byte(test) + v := new(X) + if err := Unmarshal(data, v); err == nil { + t.Errorf("%d: unmarshal should reject invalid comments", i) + } + } +} + +type IXField struct { + Five int `xml:"five"` + NotInnerXML []string `xml:",innerxml"` +} + +// Issue 15600. ",innerxml" on a field that can't hold it. +func TestInvalidInnerXMLType(t *testing.T) { + v := new(IXField) + if err := Unmarshal([]byte(`5`), v); err != nil { + t.Errorf("Unmarshal failed: got %v", err) + } + if v.Five != 5 { + t.Errorf("Five = %v, want 5", v.Five) + } + if v.NotInnerXML != nil { + t.Errorf("NotInnerXML = %v, want nil", v.NotInnerXML) + } +} + +type Child struct { + G struct { + I int + } +} + +type ChildToEmbed struct { + X bool +} + +type Parent struct { + I int + IPtr *int + Is []int + IPtrs []*int + F float32 + FPtr *float32 + Fs []float32 + FPtrs []*float32 + B bool + BPtr *bool + Bs []bool + BPtrs []*bool + Bytes []byte + BytesPtr *[]byte + S string + SPtr *string + Ss []string + SPtrs []*string + MyI MyInt + Child Child + Children []Child + ChildPtr *Child + ChildToEmbed +} + +const ( + emptyXML = ` + + + + + + + + + + + + + + + + + + + + + + + + + +` +) + +// golang.org/issues/13417 +func TestUnmarshalEmptyValues(t *testing.T) { + // Test first with a zero-valued dst. + v := new(Parent) + if err := Unmarshal([]byte(emptyXML), v); err != nil { + t.Fatalf("zero: Unmarshal failed: got %v", err) + } + + zBytes, zInt, zStr, zFloat, zBool := []byte{}, 0, "", float32(0), false + want := &Parent{ + IPtr: &zInt, + Is: []int{zInt}, + IPtrs: []*int{&zInt}, + FPtr: &zFloat, + Fs: []float32{zFloat}, + FPtrs: []*float32{&zFloat}, + BPtr: &zBool, + Bs: []bool{zBool}, + BPtrs: []*bool{&zBool}, + Bytes: []byte{}, + BytesPtr: &zBytes, + SPtr: &zStr, + Ss: []string{zStr}, + SPtrs: []*string{&zStr}, + Children: []Child{{}}, + ChildPtr: new(Child), + ChildToEmbed: ChildToEmbed{}, + } + if !reflect.DeepEqual(v, want) { + t.Fatalf("zero: Unmarshal:\nhave: %#+v\nwant: %#+v", v, want) + } + + // Test with a pre-populated dst. + // Multiple addressable copies, as pointer-to fields will replace value during unmarshal. + vBytes0, vInt0, vStr0, vFloat0, vBool0 := []byte("x"), 1, "x", float32(1), true + vBytes1, vInt1, vStr1, vFloat1, vBool1 := []byte("x"), 1, "x", float32(1), true + vInt2, vStr2, vFloat2, vBool2 := 1, "x", float32(1), true + v = &Parent{ + I: vInt0, + IPtr: &vInt1, + Is: []int{vInt0}, + IPtrs: []*int{&vInt2}, + F: vFloat0, + FPtr: &vFloat1, + Fs: []float32{vFloat0}, + FPtrs: []*float32{&vFloat2}, + B: vBool0, + BPtr: &vBool1, + Bs: []bool{vBool0}, + BPtrs: []*bool{&vBool2}, + Bytes: vBytes0, + BytesPtr: &vBytes1, + S: vStr0, + SPtr: &vStr1, + Ss: []string{vStr0}, + SPtrs: []*string{&vStr2}, + MyI: MyInt(vInt0), + Child: Child{G: struct{ I int }{I: vInt0}}, + Children: []Child{{G: struct{ I int }{I: vInt0}}}, + ChildPtr: &Child{G: struct{ I int }{I: vInt0}}, + ChildToEmbed: ChildToEmbed{X: vBool0}, + } + if err := Unmarshal([]byte(emptyXML), v); err != nil { + t.Fatalf("populated: Unmarshal failed: got %v", err) + } + + want = &Parent{ + IPtr: &zInt, + Is: []int{vInt0, zInt}, + IPtrs: []*int{&vInt0, &zInt}, + FPtr: &zFloat, + Fs: []float32{vFloat0, zFloat}, + FPtrs: []*float32{&vFloat0, &zFloat}, + BPtr: &zBool, + Bs: []bool{vBool0, zBool}, + BPtrs: []*bool{&vBool0, &zBool}, + Bytes: []byte{}, + BytesPtr: &zBytes, + SPtr: &zStr, + Ss: []string{vStr0, zStr}, + SPtrs: []*string{&vStr0, &zStr}, + Child: Child{G: struct{ I int }{I: vInt0}}, // I should == zInt0? (zero value) + Children: []Child{{G: struct{ I int }{I: vInt0}}, {}}, + ChildPtr: &Child{G: struct{ I int }{I: vInt0}}, // I should == zInt0? (zero value) + } + if !reflect.DeepEqual(v, want) { + t.Fatalf("populated: Unmarshal:\nhave: %#+v\nwant: %#+v", v, want) + } +} + +type WhitespaceValuesParent struct { + BFalse bool + BTrue bool + I int + INeg int + I8 int8 + I8Neg int8 + I16 int16 + I16Neg int16 + I32 int32 + I32Neg int32 + I64 int64 + I64Neg int64 + UI uint + UI8 uint8 + UI16 uint16 + UI32 uint32 + UI64 uint64 + F32 float32 + F32Neg float32 + F64 float64 + F64Neg float64 +} + +const whitespaceValuesXML = ` + + false + true + 266703 + -266703 + 112 + -112 + 6703 + -6703 + 266703 + -266703 + 266703 + -266703 + 266703 + 112 + 6703 + 266703 + 266703 + 266.703 + -266.703 + 266.703 + -266.703 + +` + +// golang.org/issues/22146 +func TestUnmarshalWhitespaceValues(t *testing.T) { + v := WhitespaceValuesParent{} + if err := Unmarshal([]byte(whitespaceValuesXML), &v); err != nil { + t.Fatalf("whitespace values: Unmarshal failed: got %v", err) + } + + want := WhitespaceValuesParent{ + BFalse: false, + BTrue: true, + I: 266703, + INeg: -266703, + I8: 112, + I8Neg: -112, + I16: 6703, + I16Neg: -6703, + I32: 266703, + I32Neg: -266703, + I64: 266703, + I64Neg: -266703, + UI: 266703, + UI8: 112, + UI16: 6703, + UI32: 266703, + UI64: 266703, + F32: 266.703, + F32Neg: -266.703, + F64: 266.703, + F64Neg: -266.703, + } + if v != want { + t.Fatalf("whitespace values: Unmarshal:\nhave: %#+v\nwant: %#+v", v, want) + } +} + +type WhitespaceAttrsParent struct { + BFalse bool `xml:",attr"` + BTrue bool `xml:",attr"` + I int `xml:",attr"` + INeg int `xml:",attr"` + I8 int8 `xml:",attr"` + I8Neg int8 `xml:",attr"` + I16 int16 `xml:",attr"` + I16Neg int16 `xml:",attr"` + I32 int32 `xml:",attr"` + I32Neg int32 `xml:",attr"` + I64 int64 `xml:",attr"` + I64Neg int64 `xml:",attr"` + UI uint `xml:",attr"` + UI8 uint8 `xml:",attr"` + UI16 uint16 `xml:",attr"` + UI32 uint32 `xml:",attr"` + UI64 uint64 `xml:",attr"` + F32 float32 `xml:",attr"` + F32Neg float32 `xml:",attr"` + F64 float64 `xml:",attr"` + F64Neg float64 `xml:",attr"` +} + +const whitespaceAttrsXML = ` + + +` + +// golang.org/issues/22146 +func TestUnmarshalWhitespaceAttrs(t *testing.T) { + v := WhitespaceAttrsParent{} + if err := Unmarshal([]byte(whitespaceAttrsXML), &v); err != nil { + t.Fatalf("whitespace attrs: Unmarshal failed: got %v", err) + } + + want := WhitespaceAttrsParent{ + BFalse: false, + BTrue: true, + I: 266703, + INeg: -266703, + I8: 112, + I8Neg: -112, + I16: 6703, + I16Neg: -6703, + I32: 266703, + I32Neg: -266703, + I64: 266703, + I64Neg: -266703, + UI: 266703, + UI8: 112, + UI16: 6703, + UI32: 266703, + UI64: 266703, + F32: 266.703, + F32Neg: -266.703, + F64: 266.703, + F64Neg: -266.703, + } + if v != want { + t.Fatalf("whitespace attrs: Unmarshal:\nhave: %#+v\nwant: %#+v", v, want) + } +} + +// golang.org/issues/53350 +func TestUnmarshalIntoNil(t *testing.T) { + type T struct { + A int `xml:"A"` + } + + var nilPointer *T + err := Unmarshal([]byte("1"), nilPointer) + + if err == nil { + t.Fatalf("no error in unmarshaling") + } + +} + +func TestCVE202228131(t *testing.T) { + type nested struct { + Parent *nested `xml:",any"` + } + var n nested + err := Unmarshal(bytes.Repeat([]byte(""), maxUnmarshalDepth+1), &n) + if err == nil { + t.Fatal("Unmarshal did not fail") + } else if !errors.Is(err, errUnmarshalDepth) { + t.Fatalf("Unmarshal unexpected error: got %q, want %q", err, errUnmarshalDepth) + } +} + +func TestCVE202230633(t *testing.T) { + if testing.Short() || runtime.GOARCH == "wasm" { + t.Skip("test requires significant memory") + } + defer func() { + p := recover() + if p != nil { + t.Fatal("Unmarshal panicked") + } + }() + var example struct { + Things []string + } + Unmarshal(bytes.Repeat([]byte(""), 17_000_000), &example) +} diff --git a/typeinfo.go b/typeinfo.go new file mode 100644 index 0000000..7ca195f --- /dev/null +++ b/typeinfo.go @@ -0,0 +1,372 @@ +// Copyright 2011 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package goxml + +import ( + "fmt" + "reflect" + "strings" + "sync" +) + +// typeInfo holds details for the xml representation of a type. +type typeInfo struct { + xmlname *fieldInfo + fields []fieldInfo +} + +// fieldInfo holds details for the xml representation of a single field. +type fieldInfo struct { + idx []int + name string + xmlns string + flags fieldFlags + parents []string +} + +type fieldFlags int + +const ( + fElement fieldFlags = 1 << iota + fAttr + fCDATA + fCharData + fInnerXML + fComment + fAny + + fOmitEmpty + + fMode = fElement | fAttr | fCDATA | fCharData | fInnerXML | fComment | fAny + + xmlName = "XMLName" +) + +var tinfoMap sync.Map // map[reflect.Type]*typeInfo + +var nameType = reflect.TypeFor[Name]() + +// getTypeInfo returns the typeInfo structure with details necessary +// for marshaling and unmarshaling typ. +func getTypeInfo(typ reflect.Type, inDecode bool) (*typeInfo, error) { + if ti, ok := tinfoMap.Load(typ); ok { + return ti.(*typeInfo), nil + } + + tinfo := &typeInfo{} + if typ.Kind() == reflect.Struct && typ != nameType { + n := typ.NumField() + for i := 0; i < n; i++ { + f := typ.Field(i) + if (!f.IsExported() && !f.Anonymous) || f.Tag.Get("xml") == "-" { + continue // Private field + } + + // For embedded structs, embed its fields. + if f.Anonymous { + t := f.Type + if t.Kind() == reflect.Pointer { + t = t.Elem() + } + if t.Kind() == reflect.Struct { + inner, err := getTypeInfo(t, inDecode) + if err != nil { + return nil, err + } + if tinfo.xmlname == nil { + tinfo.xmlname = inner.xmlname + } + for _, finfo := range inner.fields { + finfo.idx = append([]int{i}, finfo.idx...) + if err := addFieldInfo(typ, tinfo, &finfo); err != nil { + return nil, err + } + } + continue + } + } + + finfo, err := structFieldInfo(typ, &f, inDecode) + if err != nil { + return nil, err + } + + if f.Name == xmlName { + tinfo.xmlname = finfo + continue + } + + // Add the field if it doesn't conflict with other fields. + if err := addFieldInfo(typ, tinfo, finfo); err != nil { + return nil, err + } + } + } + + ti, _ := tinfoMap.LoadOrStore(typ, tinfo) + return ti.(*typeInfo), nil +} + +// structFieldInfo builds and returns a fieldInfo for f. +func structFieldInfo(typ reflect.Type, f *reflect.StructField, inDecode bool) (*fieldInfo, error) { + finfo := &fieldInfo{idx: f.Index} + + // Split the tag from the xml namespace if necessary. + tag := f.Tag.Get("xml") + if ns, t, ok := strings.Cut(tag, " "); ok { + finfo.xmlns, tag = ns, t + } + if inDecode { + if ns, t, ok := strings.Cut(tag, ":"); ok { + finfo.xmlns, tag = ns, t + } + } + + // Parse flags. + tokens := strings.Split(tag, ",") + if len(tokens) == 1 { + finfo.flags = fElement + } else { + tag = tokens[0] + for _, flag := range tokens[1:] { + switch flag { + case "attr": + finfo.flags |= fAttr + case "cdata": + finfo.flags |= fCDATA + case "chardata": + finfo.flags |= fCharData + case "innerxml": + finfo.flags |= fInnerXML + case "comment": + finfo.flags |= fComment + case "any": + finfo.flags |= fAny + case "omitempty": + finfo.flags |= fOmitEmpty + } + } + + // Validate the flags used. + valid := true + switch mode := finfo.flags & fMode; mode { + case 0: + finfo.flags |= fElement + case fAttr, fCDATA, fCharData, fInnerXML, fComment, fAny, fAny | fAttr: + if f.Name == xmlName || tag != "" && mode != fAttr { + valid = false + } + default: + // This will also catch multiple modes in a single field. + valid = false + } + if finfo.flags&fMode == fAny { + finfo.flags |= fElement + } + if finfo.flags&fOmitEmpty != 0 && finfo.flags&(fElement|fAttr) == 0 { + valid = false + } + if !valid { + return nil, fmt.Errorf("xml: invalid tag in field %s of type %s: %q", + f.Name, typ, f.Tag.Get("xml")) + } + } + + // Use of xmlns without a name is not allowed. + if finfo.xmlns != "" && tag == "" { + return nil, fmt.Errorf("xml: namespace without name in field %s of type %s: %q", + f.Name, typ, f.Tag.Get("xml")) + } + + if f.Name == xmlName { + // The XMLName field records the XML element name. Don't + // process it as usual because its name should default to + // empty rather than to the field name. + finfo.name = tag + return finfo, nil + } + + if tag == "" { + // If the name part of the tag is completely empty, get + // default from XMLName of underlying struct if feasible, + // or field name otherwise. + if xmlname := lookupXMLName(f.Type, inDecode); xmlname != nil { + finfo.xmlns, finfo.name = xmlname.xmlns, xmlname.name + } else { + finfo.name = f.Name + } + return finfo, nil + } + + // Prepare field name and parents. + parents := strings.Split(tag, ">") + if parents[0] == "" { + parents[0] = f.Name + } + if parents[len(parents)-1] == "" { + return nil, fmt.Errorf("xml: trailing '>' in field %s of type %s", f.Name, typ) + } + finfo.name = parents[len(parents)-1] + if len(parents) > 1 { + if (finfo.flags & fElement) == 0 { + return nil, fmt.Errorf("xml: %s chain not valid with %s flag", tag, strings.Join(tokens[1:], ",")) + } + finfo.parents = parents[:len(parents)-1] + } + + // If the field type has an XMLName field, the names must match + // so that the behavior of both marshaling and unmarshaling + // is straightforward and unambiguous. + if finfo.flags&fElement != 0 { + ftyp := f.Type + xmlname := lookupXMLName(ftyp, inDecode) + if xmlname != nil && xmlname.name != finfo.name { + return nil, fmt.Errorf("xml: name %q in tag of %s.%s conflicts with name %q in %s.XMLName", + finfo.name, typ, f.Name, xmlname.name, ftyp) + } + } + return finfo, nil +} + +// lookupXMLName returns the fieldInfo for typ's XMLName field +// in case it exists and has a valid xml field tag, otherwise +// it returns nil. +func lookupXMLName(typ reflect.Type, inDecode bool) (xmlname *fieldInfo) { + for typ.Kind() == reflect.Pointer { + typ = typ.Elem() + } + if typ.Kind() != reflect.Struct { + return nil + } + for i, n := 0, typ.NumField(); i < n; i++ { + f := typ.Field(i) + if f.Name != xmlName { + continue + } + finfo, err := structFieldInfo(typ, &f, inDecode) + if err == nil && finfo.name != "" { + return finfo + } + // Also consider errors as a non-existent field tag + // and let getTypeInfo itself report the error. + break + } + return nil +} + +// addFieldInfo adds finfo to tinfo.fields if there are no +// conflicts, or if conflicts arise from previous fields that were +// obtained from deeper embedded structures than finfo. In the latter +// case, the conflicting entries are dropped. +// A conflict occurs when the path (parent + name) to a field is +// itself a prefix of another path, or when two paths match exactly. +// It is okay for field paths to share a common, shorter prefix. +func addFieldInfo(typ reflect.Type, tinfo *typeInfo, newf *fieldInfo) error { + var conflicts []int +Loop: + // First, figure all conflicts. Most working code will have none. + for i := range tinfo.fields { + oldf := &tinfo.fields[i] + if oldf.flags&fMode != newf.flags&fMode { + continue + } + if oldf.xmlns != "" && newf.xmlns != "" && oldf.xmlns != newf.xmlns { + continue + } + minl := min(len(newf.parents), len(oldf.parents)) + for p := 0; p < minl; p++ { + if oldf.parents[p] != newf.parents[p] { + continue Loop + } + } + if len(oldf.parents) > len(newf.parents) { + if oldf.parents[len(newf.parents)] == newf.name { + conflicts = append(conflicts, i) + } + } else if len(oldf.parents) < len(newf.parents) { + if newf.parents[len(oldf.parents)] == oldf.name { + conflicts = append(conflicts, i) + } + } else { + if newf.name == oldf.name && newf.xmlns == oldf.xmlns { + conflicts = append(conflicts, i) + } + } + } + // Without conflicts, add the new field and return. + if conflicts == nil { + tinfo.fields = append(tinfo.fields, *newf) + return nil + } + + // If any conflict is shallower, ignore the new field. + // This matches the Go field resolution on embedding. + for _, i := range conflicts { + if len(tinfo.fields[i].idx) < len(newf.idx) { + return nil + } + } + + // Otherwise, if any of them is at the same depth level, it's an error. + for _, i := range conflicts { + oldf := &tinfo.fields[i] + if len(oldf.idx) == len(newf.idx) { + f1 := typ.FieldByIndex(oldf.idx) + f2 := typ.FieldByIndex(newf.idx) + return &TagPathError{typ, f1.Name, f1.Tag.Get("xml"), f2.Name, f2.Tag.Get("xml")} + } + } + + // Otherwise, the new field is shallower, and thus takes precedence, + // so drop the conflicting fields from tinfo and append the new one. + for c := len(conflicts) - 1; c >= 0; c-- { + i := conflicts[c] + copy(tinfo.fields[i:], tinfo.fields[i+1:]) + tinfo.fields = tinfo.fields[:len(tinfo.fields)-1] + } + tinfo.fields = append(tinfo.fields, *newf) + return nil +} + +// A TagPathError represents an error in the unmarshaling process +// caused by the use of field tags with conflicting paths. +type TagPathError struct { + Struct reflect.Type + Field1, Tag1 string + Field2, Tag2 string +} + +func (e *TagPathError) Error() string { + return fmt.Sprintf("%s field %q with tag %q conflicts with field %q with tag %q", e.Struct, e.Field1, e.Tag1, e.Field2, e.Tag2) +} + +const ( + initNilPointers = true + dontInitNilPointers = false +) + +// value returns v's field value corresponding to finfo. +// It's equivalent to v.FieldByIndex(finfo.idx), but when passed +// initNilPointers, it initializes and dereferences pointers as necessary. +// When passed dontInitNilPointers and a nil pointer is reached, the function +// returns a zero reflect.Value. +func (finfo *fieldInfo) value(v reflect.Value, shouldInitNilPointers bool) reflect.Value { + for i, x := range finfo.idx { + if i > 0 { + t := v.Type() + if t.Kind() == reflect.Pointer && t.Elem().Kind() == reflect.Struct { + if v.IsNil() { + if !shouldInitNilPointers { + return reflect.Value{} + } + v.Set(reflect.New(v.Type().Elem())) + } + v = v.Elem() + } + } + v = v.Field(x) + } + return v +} diff --git a/xml.go b/xml.go new file mode 100644 index 0000000..c9e9df8 --- /dev/null +++ b/xml.go @@ -0,0 +1,2075 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// package goxml implements a simple XML 1.0 parser that +// understands XML name spaces. +package goxml + +// References: +// Annotated XML spec: https://www.xml.com/axml/testaxml.htm +// XML name spaces: https://www.w3.org/TR/REC-xml-names/ + +import ( + "bufio" + "bytes" + "errors" + "fmt" + "io" + "strconv" + "strings" + "unicode" + "unicode/utf8" +) + +// A SyntaxError represents a syntax error in the XML input stream. +type SyntaxError struct { + Msg string + Line int +} + +func (e *SyntaxError) Error() string { + return "XML syntax error on line " + strconv.Itoa(e.Line) + ": " + e.Msg +} + +// A Name represents an XML name (Local) annotated +// with a name space identifier (Space). +// In tokens returned by [Decoder.Token], the Space identifier +// is given as a canonical URL, not the short prefix used +// in the document being parsed. +type Name struct { + Space, Local string +} + +// An Attr represents an attribute in an XML element (Name=Value). +type Attr struct { + Name Name + Value string +} + +// A Token is an interface holding one of the token types: +// [StartElement], [EndElement], [CharData], [Comment], [ProcInst], or [Directive]. +type Token any + +// A StartElement represents an XML start element. +type StartElement struct { + Name Name + Attr []Attr +} + +// Copy creates a new copy of StartElement. +func (e StartElement) Copy() StartElement { + attrs := make([]Attr, len(e.Attr)) + copy(attrs, e.Attr) + e.Attr = attrs + return e +} + +// End returns the corresponding XML end element. +func (e StartElement) End() EndElement { + return EndElement{e.Name} +} + +// An EndElement represents an XML end element. +type EndElement struct { + Name Name +} + +// A CharData represents XML character data (raw text), +// in which XML escape sequences have been replaced by +// the characters they represent. +type CharData []byte + +// Copy creates a new copy of CharData. +func (c CharData) Copy() CharData { return CharData(bytes.Clone(c)) } + +// A Comment represents an XML comment of the form . +// The bytes do not include the comment markers. +type Comment []byte + +// Copy creates a new copy of Comment. +func (c Comment) Copy() Comment { return Comment(bytes.Clone(c)) } + +// A ProcInst represents an XML processing instruction of the form +type ProcInst struct { + Target string + Inst []byte +} + +// Copy creates a new copy of ProcInst. +func (p ProcInst) Copy() ProcInst { + p.Inst = bytes.Clone(p.Inst) + return p +} + +// A Directive represents an XML directive of the form . +// The bytes do not include the markers. +type Directive []byte + +// Copy creates a new copy of Directive. +func (d Directive) Copy() Directive { return Directive(bytes.Clone(d)) } + +// CopyToken returns a copy of a Token. +func CopyToken(t Token) Token { + switch v := t.(type) { + case CharData: + return v.Copy() + case Comment: + return v.Copy() + case Directive: + return v.Copy() + case ProcInst: + return v.Copy() + case StartElement: + return v.Copy() + } + return t +} + +// A TokenReader is anything that can decode a stream of XML tokens, including a +// [Decoder]. +// +// When Token encounters an error or end-of-file condition after successfully +// reading a token, it returns the token. It may return the (non-nil) error from +// the same call or return the error (and a nil token) from a subsequent call. +// An instance of this general case is that a TokenReader returning a non-nil +// token at the end of the token stream may return either io.EOF or a nil error. +// The next Read should return nil, [io.EOF]. +// +// Implementations of Token are discouraged from returning a nil token with a +// nil error. Callers should treat a return of nil, nil as indicating that +// nothing happened; in particular it does not indicate EOF. +type TokenReader interface { + Token() (Token, error) +} + +// A Decoder represents an XML parser reading a particular input stream. +// The parser assumes that its input is encoded in UTF-8. +type Decoder struct { + // Strict defaults to true, enforcing the requirements + // of the XML specification. + // If set to false, the parser allows input containing common + // mistakes: + // * If an element is missing an end tag, the parser invents + // end tags as necessary to keep the return values from Token + // properly balanced. + // * In attribute values and character data, unknown or malformed + // character entities (sequences beginning with &) are left alone. + // + // Setting: + // + // d.Strict = false + // d.AutoClose = xml.HTMLAutoClose + // d.Entity = xml.HTMLEntity + // + // creates a parser that can handle typical HTML. + // + // Strict mode does not enforce the requirements of the XML name spaces TR. + // In particular it does not reject name space tags using undefined prefixes. + // Such tags are recorded with the unknown prefix as the name space URL. + Strict bool + + // When Strict == false, AutoClose indicates a set of elements to + // consider closed immediately after they are opened, regardless + // of whether an end element is present. + AutoClose []string + + // Entity can be used to map non-standard entity names to string replacements. + // The parser behaves as if these standard mappings are present in the map, + // regardless of the actual map content: + // + // "lt": "<", + // "gt": ">", + // "amp": "&", + // "apos": "'", + // "quot": `"`, + Entity map[string]string + + // CharsetReader, if non-nil, defines a function to generate + // charset-conversion readers, converting from the provided + // non-UTF-8 charset into UTF-8. If CharsetReader is nil or + // returns an error, parsing stops with an error. One of the + // CharsetReader's result values must be non-nil. + CharsetReader func(charset string, input io.Reader) (io.Reader, error) + + // DefaultSpace sets the default name space used for unadorned tags, + // as if the entire XML stream were wrapped in an element containing + // the attribute xmlns="DefaultSpace". + DefaultSpace string + + r io.ByteReader + t TokenReader + buf bytes.Buffer + saved *bytes.Buffer + stk *stack + free *stack + needClose bool + toClose Name + nextToken Token + nextByte int + ns map[string]string + err error + line int + linestart int64 + offset int64 + unmarshalDepth int +} + +// NewDecoder creates a new XML parser reading from r. +// If r does not implement [io.ByteReader], NewDecoder will +// do its own buffering. +func NewDecoder(r io.Reader) *Decoder { + d := &Decoder{ + ns: make(map[string]string), + nextByte: -1, + line: 1, + Strict: true, + } + d.switchToReader(r) + return d +} + +// NewTokenDecoder creates a new XML parser using an underlying token stream. +func NewTokenDecoder(t TokenReader) *Decoder { + // Is it already a Decoder? + if d, ok := t.(*Decoder); ok { + return d + } + d := &Decoder{ + ns: make(map[string]string), + t: t, + nextByte: -1, + line: 1, + Strict: true, + } + return d +} + +// Token returns the next XML token in the input stream. +// At the end of the input stream, Token returns nil, [io.EOF]. +// +// Slices of bytes in the returned token data refer to the +// parser's internal buffer and remain valid only until the next +// call to Token. To acquire a copy of the bytes, call [CopyToken] +// or the token's Copy method. +// +// Token expands self-closing elements such as
+// into separate start and end elements returned by successive calls. +// +// Token guarantees that the [StartElement] and [EndElement] +// tokens it returns are properly nested and matched: +// if Token encounters an unexpected end element +// or EOF before all expected end elements, +// it will return an error. +// +// If [Decoder.CharsetReader] is called and returns an error, +// the error is wrapped and returned. +// +// Token implements XML name spaces as described by +// https://www.w3.org/TR/REC-xml-names/. Each of the +// [Name] structures contained in the Token has the Space +// set to the URL identifying its name space when known. +// If Token encounters an unrecognized name space prefix, +// it uses the prefix as the Space rather than report an error. +func (d *Decoder) Token() (Token, error) { + var t Token + var err error + if d.stk != nil && d.stk.kind == stkEOF { + return nil, io.EOF + } + if d.nextToken != nil { + t = d.nextToken + d.nextToken = nil + } else { + if t, err = d.rawToken(); t == nil && err != nil { + if err == io.EOF && d.stk != nil && d.stk.kind != stkEOF { + err = d.syntaxError("unexpected EOF") + } + return nil, err + } + // We still have a token to process, so clear any + // errors (e.g. EOF) and proceed. + err = nil + } + if !d.Strict { + if t1, ok := d.autoClose(t); ok { + d.nextToken = t + t = t1 + } + } + switch t1 := t.(type) { + case StartElement: + // In XML name spaces, the translations listed in the + // attributes apply to the element name and + // to the other attribute names, so process + // the translations first. + for _, a := range t1.Attr { + if a.Name.Space == xmlnsPrefix { + v, ok := d.ns[a.Name.Local] + d.pushNs(a.Name.Local, v, ok) + d.ns[a.Name.Local] = a.Value + } + if a.Name.Space == "" && a.Name.Local == xmlnsPrefix { + // Default space for untagged names + v, ok := d.ns[""] + d.pushNs("", v, ok) + d.ns[""] = a.Value + } + } + + d.pushElement(t1.Name) + d.translate(&t1.Name, true) + for i := range t1.Attr { + d.translate(&t1.Attr[i].Name, false) + } + t = t1 + + case EndElement: + if !d.popElement(&t1) { + return nil, d.err + } + t = t1 + } + return t, err +} + +const ( + xmlURL = "http://www.w3.org/XML/1998/namespace" + xmlnsPrefix = "xmlns" + xmlPrefix = "xml" +) + +// Apply name space translation to name n. +// The default name space (for Space=="") +// applies only to element names, not to attribute names. +func (d *Decoder) translate(n *Name, isElementName bool) { + switch { + case n.Space == xmlnsPrefix: + return + case n.Space == "" && !isElementName: + return + case n.Space == xmlPrefix: + n.Space = xmlURL + case n.Space == "" && n.Local == xmlnsPrefix: + return + } + if v, ok := d.ns[n.Space]; ok { + n.Space = v + } else if n.Space == "" { + n.Space = d.DefaultSpace + } +} + +func (d *Decoder) switchToReader(r io.Reader) { + // Get efficient byte at a time reader. + // Assume that if reader has its own + // ReadByte, it's efficient enough. + // Otherwise, use bufio. + if rb, ok := r.(io.ByteReader); ok { + d.r = rb + } else { + d.r = bufio.NewReader(r) + } +} + +// Parsing state - stack holds old name space translations +// and the current set of open elements. The translations to pop when +// ending a given tag are *below* it on the stack, which is +// more work but forced on us by XML. +type stack struct { + next *stack + kind int + name Name + ok bool +} + +const ( + stkStart = iota + stkNs + stkEOF +) + +func (d *Decoder) push(kind int) *stack { + s := d.free + if s != nil { + d.free = s.next + } else { + s = new(stack) + } + s.next = d.stk + s.kind = kind + d.stk = s + return s +} + +func (d *Decoder) pop() *stack { + s := d.stk + if s != nil { + d.stk = s.next + s.next = d.free + d.free = s + } + return s +} + +// Record that after the current element is finished +// (that element is already pushed on the stack) +// Token should return EOF until popEOF is called. +func (d *Decoder) pushEOF() { + // Walk down stack to find Start. + // It might not be the top, because there might be stkNs + // entries above it. + start := d.stk + for start.kind != stkStart { + start = start.next + } + // The stkNs entries below a start are associated with that + // element too; skip over them. + for start.next != nil && start.next.kind == stkNs { + start = start.next + } + s := d.free + if s != nil { + d.free = s.next + } else { + s = new(stack) + } + s.kind = stkEOF + s.next = start.next + start.next = s +} + +// Undo a pushEOF. +// The element must have been finished, so the EOF should be at the top of the stack. +func (d *Decoder) popEOF() bool { + if d.stk == nil || d.stk.kind != stkEOF { + return false + } + d.pop() + return true +} + +// Record that we are starting an element with the given name. +func (d *Decoder) pushElement(name Name) { + s := d.push(stkStart) + s.name = name +} + +// Record that we are changing the value of ns[local]. +// The old value is url, ok. +func (d *Decoder) pushNs(local string, url string, ok bool) { + s := d.push(stkNs) + s.name.Local = local + s.name.Space = url + s.ok = ok +} + +// Creates a SyntaxError with the current line number. +func (d *Decoder) syntaxError(msg string) error { + return &SyntaxError{Msg: msg, Line: d.line} +} + +// Record that we are ending an element with the given name. +// The name must match the record at the top of the stack, +// which must be a pushElement record. +// After popping the element, apply any undo records from +// the stack to restore the name translations that existed +// before we saw this element. +func (d *Decoder) popElement(t *EndElement) bool { + s := d.pop() + name := t.Name + switch { + case s == nil || s.kind != stkStart: + d.err = d.syntaxError("unexpected end element ") + return false + case s.name.Local != name.Local: + if !d.Strict { + d.needClose = true + d.toClose = t.Name + t.Name = s.name + return true + } + d.err = d.syntaxError("element <" + s.name.Local + "> closed by ") + return false + case s.name.Space != name.Space: + ns := name.Space + if name.Space == "" { + ns = `""` + } + d.err = d.syntaxError("element <" + s.name.Local + "> in space " + s.name.Space + + " closed by in space " + ns) + return false + } + + d.translate(&t.Name, true) + + // Pop stack until a Start or EOF is on the top, undoing the + // translations that were associated with the element we just closed. + for d.stk != nil && d.stk.kind != stkStart && d.stk.kind != stkEOF { + s := d.pop() + if s.ok { + d.ns[s.name.Local] = s.name.Space + } else { + delete(d.ns, s.name.Local) + } + } + + return true +} + +// If the top element on the stack is autoclosing and +// t is not the end tag, invent the end tag. +func (d *Decoder) autoClose(t Token) (Token, bool) { + if d.stk == nil || d.stk.kind != stkStart { + return nil, false + } + for _, s := range d.AutoClose { + if strings.EqualFold(s, d.stk.name.Local) { + // This one should be auto closed if t doesn't close it. + et, ok := t.(EndElement) + if !ok || !strings.EqualFold(et.Name.Local, d.stk.name.Local) { + return EndElement{d.stk.name}, true + } + break + } + } + return nil, false +} + +var errRawToken = errors.New("xml: cannot use RawToken from UnmarshalXML method") + +// RawToken is like [Decoder.Token] but does not verify that +// start and end elements match and does not translate +// name space prefixes to their corresponding URLs. +func (d *Decoder) RawToken() (Token, error) { + if d.unmarshalDepth > 0 { + return nil, errRawToken + } + return d.rawToken() +} + +func (d *Decoder) rawToken() (Token, error) { + if d.t != nil { + return d.t.Token() + } + if d.err != nil { + return nil, d.err + } + if d.needClose { + // The last element we read was self-closing and + // we returned just the StartElement half. + // Return the EndElement half now. + d.needClose = false + return EndElement{d.toClose}, nil + } + + b, ok := d.getc() + if !ok { + return nil, d.err + } + + if b != '<' { + // Text section. + d.ungetc(b) + data := d.text(-1, false) + if data == nil { + return nil, d.err + } + return CharData(data), nil + } + + if b, ok = d.mustgetc(); !ok { + return nil, d.err + } + switch b { + case '/': + // ' { + d.err = d.syntaxError("invalid characters between ") + return nil, d.err + } + return EndElement{name}, nil + + case '?': + // ' { + break + } + b0 = b + } + data := d.buf.Bytes() + data = data[0 : len(data)-2] // chop ?> + + if target == "xml" { + content := string(data) + ver := procInst("version", content) + if ver != "" && ver != "1.0" { + d.err = fmt.Errorf("xml: unsupported version %q; only version 1.0 is supported", ver) + return nil, d.err + } + enc := procInst("encoding", content) + if enc != "" && enc != "utf-8" && enc != "UTF-8" && !strings.EqualFold(enc, "utf-8") { + if d.CharsetReader == nil { + d.err = fmt.Errorf("xml: encoding %q declared but Decoder.CharsetReader is nil", enc) + return nil, d.err + } + newr, err := d.CharsetReader(enc, d.r.(io.Reader)) + if err != nil { + d.err = fmt.Errorf("xml: opening charset %q: %w", enc, err) + return nil, d.err + } + if newr == nil { + panic("CharsetReader returned a nil Reader for charset " + enc) + } + d.switchToReader(newr) + } + } + return ProcInst{target, data}, nil + + case '!': + // ' { + d.err = d.syntaxError( + `invalid sequence "--" not allowed in comments`) + return nil, d.err + } + break + } + b0, b1 = b1, b + } + data := d.buf.Bytes() + data = data[0 : len(data)-3] // chop --> + return Comment(data), nil + + case '[': // . + data := d.text(-1, true) + if data == nil { + return nil, d.err + } + return CharData(data), nil + } + + // Probably a directive: , , etc. + // We don't care, but accumulate for caller. Quoted angle + // brackets do not count for nesting. + d.buf.Reset() + d.buf.WriteByte(b) + inquote := uint8(0) + depth := 0 + for { + if b, ok = d.mustgetc(); !ok { + return nil, d.err + } + if inquote == 0 && b == '>' && depth == 0 { + break + } + HandleB: + d.buf.WriteByte(b) + switch { + case b == inquote: + inquote = 0 + + case inquote != 0: + // in quotes, no special action + + case b == '\'' || b == '"': + inquote = b + + case b == '>' && inquote == 0: + depth-- + + case b == '<' && inquote == 0: + // Look for ` + +var testEntity = map[string]string{"何": "What", "is-it": "is it?"} + +var rawTokens = []Token{ + CharData("\n"), + ProcInst{"xml", []byte(`version="1.0" encoding="UTF-8"`)}, + CharData("\n"), + Directive(`DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" + "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"`), + CharData("\n"), + StartElement{Name{"", "body"}, []Attr{{Name{"xmlns", "foo"}, "ns1"}, {Name{"", "xmlns"}, "ns2"}, {Name{"xmlns", "tag"}, "ns3"}}}, + CharData("\n "), + StartElement{Name{"", "hello"}, []Attr{{Name{"", "lang"}, "en"}}}, + CharData("World <>'\" 白鵬翔"), + EndElement{Name{"", "hello"}}, + CharData("\n "), + StartElement{Name{"", "query"}, []Attr{}}, + CharData("What is it?"), + EndElement{Name{"", "query"}}, + CharData("\n "), + StartElement{Name{"", "goodbye"}, []Attr{}}, + EndElement{Name{"", "goodbye"}}, + CharData("\n "), + StartElement{Name{"", "outer"}, []Attr{{Name{"foo", "attr"}, "value"}, {Name{"xmlns", "tag"}, "ns4"}}}, + CharData("\n "), + StartElement{Name{"", "inner"}, []Attr{}}, + EndElement{Name{"", "inner"}}, + CharData("\n "), + EndElement{Name{"", "outer"}}, + CharData("\n "), + StartElement{Name{"tag", "name"}, []Attr{}}, + CharData("\n "), + CharData("Some text here."), + CharData("\n "), + EndElement{Name{"tag", "name"}}, + CharData("\n"), + EndElement{Name{"", "body"}}, + Comment(" missing final newline "), +} + +var cookedTokens = []Token{ + CharData("\n"), + ProcInst{"xml", []byte(`version="1.0" encoding="UTF-8"`)}, + CharData("\n"), + Directive(`DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" + "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"`), + CharData("\n"), + StartElement{Name{"ns2", "body"}, []Attr{{Name{"xmlns", "foo"}, "ns1"}, {Name{"", "xmlns"}, "ns2"}, {Name{"xmlns", "tag"}, "ns3"}}}, + CharData("\n "), + StartElement{Name{"ns2", "hello"}, []Attr{{Name{"", "lang"}, "en"}}}, + CharData("World <>'\" 白鵬翔"), + EndElement{Name{"ns2", "hello"}}, + CharData("\n "), + StartElement{Name{"ns2", "query"}, []Attr{}}, + CharData("What is it?"), + EndElement{Name{"ns2", "query"}}, + CharData("\n "), + StartElement{Name{"ns2", "goodbye"}, []Attr{}}, + EndElement{Name{"ns2", "goodbye"}}, + CharData("\n "), + StartElement{Name{"ns2", "outer"}, []Attr{{Name{"ns1", "attr"}, "value"}, {Name{"xmlns", "tag"}, "ns4"}}}, + CharData("\n "), + StartElement{Name{"ns2", "inner"}, []Attr{}}, + EndElement{Name{"ns2", "inner"}}, + CharData("\n "), + EndElement{Name{"ns2", "outer"}}, + CharData("\n "), + StartElement{Name{"ns3", "name"}, []Attr{}}, + CharData("\n "), + CharData("Some text here."), + CharData("\n "), + EndElement{Name{"ns3", "name"}}, + CharData("\n"), + EndElement{Name{"ns2", "body"}}, + Comment(" missing final newline "), +} + +const testInputAltEncoding = ` + +VALUE` + +var rawTokensAltEncoding = []Token{ + CharData("\n"), + ProcInst{"xml", []byte(`version="1.0" encoding="x-testing-uppercase"`)}, + CharData("\n"), + StartElement{Name{"", "tag"}, []Attr{}}, + CharData("value"), + EndElement{Name{"", "tag"}}, +} + +var xmlInput = []string{ + // unexpected EOF cases + "<", + "", + "", + "", + // "", // let the Token() caller handle + "", + "", + "", + "", + " c;", + "", + "", + "", + // "", // let the Token() caller handle + "", + "", + "cdata]]>", +} + +func TestRawToken(t *testing.T) { + d := NewDecoder(strings.NewReader(testInput)) + d.Entity = testEntity + testRawToken(t, d, testInput, rawTokens) +} + +const nonStrictInput = ` +non&entity +&unknown;entity +{ +&#zzz; +&なまえ3; +<-gt; +&; +&0a; +` + +var nonStrictTokens = []Token{ + CharData("\n"), + StartElement{Name{"", "tag"}, []Attr{}}, + CharData("non&entity"), + EndElement{Name{"", "tag"}}, + CharData("\n"), + StartElement{Name{"", "tag"}, []Attr{}}, + CharData("&unknown;entity"), + EndElement{Name{"", "tag"}}, + CharData("\n"), + StartElement{Name{"", "tag"}, []Attr{}}, + CharData("{"), + EndElement{Name{"", "tag"}}, + CharData("\n"), + StartElement{Name{"", "tag"}, []Attr{}}, + CharData("&#zzz;"), + EndElement{Name{"", "tag"}}, + CharData("\n"), + StartElement{Name{"", "tag"}, []Attr{}}, + CharData("&なまえ3;"), + EndElement{Name{"", "tag"}}, + CharData("\n"), + StartElement{Name{"", "tag"}, []Attr{}}, + CharData("<-gt;"), + EndElement{Name{"", "tag"}}, + CharData("\n"), + StartElement{Name{"", "tag"}, []Attr{}}, + CharData("&;"), + EndElement{Name{"", "tag"}}, + CharData("\n"), + StartElement{Name{"", "tag"}, []Attr{}}, + CharData("&0a;"), + EndElement{Name{"", "tag"}}, + CharData("\n"), +} + +func TestNonStrictRawToken(t *testing.T) { + d := NewDecoder(strings.NewReader(nonStrictInput)) + d.Strict = false + testRawToken(t, d, nonStrictInput, nonStrictTokens) +} + +type downCaser struct { + t *testing.T + r io.ByteReader +} + +func (d *downCaser) ReadByte() (c byte, err error) { + c, err = d.r.ReadByte() + if c >= 'A' && c <= 'Z' { + c += 'a' - 'A' + } + return +} + +func (d *downCaser) Read(p []byte) (int, error) { + d.t.Fatalf("unexpected Read call on downCaser reader") + panic("unreachable") +} + +func TestRawTokenAltEncoding(t *testing.T) { + d := NewDecoder(strings.NewReader(testInputAltEncoding)) + d.CharsetReader = func(charset string, input io.Reader) (io.Reader, error) { + if charset != "x-testing-uppercase" { + t.Fatalf("unexpected charset %q", charset) + } + return &downCaser{t, input.(io.ByteReader)}, nil + } + testRawToken(t, d, testInputAltEncoding, rawTokensAltEncoding) +} + +func TestRawTokenAltEncodingNoConverter(t *testing.T) { + d := NewDecoder(strings.NewReader(testInputAltEncoding)) + token, err := d.RawToken() + if token == nil { + t.Fatalf("expected a token on first RawToken call") + } + if err != nil { + t.Fatal(err) + } + token, err = d.RawToken() + if token != nil { + t.Errorf("expected a nil token; got %#v", token) + } + if err == nil { + t.Fatalf("expected an error on second RawToken call") + } + const encoding = "x-testing-uppercase" + if !strings.Contains(err.Error(), encoding) { + t.Errorf("expected error to contain %q; got error: %v", + encoding, err) + } +} + +func testRawToken(t *testing.T, d *Decoder, raw string, rawTokens []Token) { + lastEnd := int64(0) + for i, want := range rawTokens { + start := d.InputOffset() + have, err := d.RawToken() + end := d.InputOffset() + if err != nil { + t.Fatalf("token %d: unexpected error: %s", i, err) + } + if !reflect.DeepEqual(have, want) { + var shave, swant string + if _, ok := have.(CharData); ok { + shave = fmt.Sprintf("CharData(%q)", have) + } else { + shave = fmt.Sprintf("%#v", have) + } + if _, ok := want.(CharData); ok { + swant = fmt.Sprintf("CharData(%q)", want) + } else { + swant = fmt.Sprintf("%#v", want) + } + t.Errorf("token %d = %s, want %s", i, shave, swant) + } + + // Check that InputOffset returned actual token. + switch { + case start < lastEnd: + t.Errorf("token %d: position [%d,%d) for %T is before previous token", i, start, end, have) + case start >= end: + // Special case: EndElement can be synthesized. + if start == end && end == lastEnd { + break + } + t.Errorf("token %d: position [%d,%d) for %T is empty", i, start, end, have) + case end > int64(len(raw)): + t.Errorf("token %d: position [%d,%d) for %T extends beyond input", i, start, end, have) + default: + text := raw[start:end] + if strings.ContainsAny(text, "<>") && (!strings.HasPrefix(text, "<") || !strings.HasSuffix(text, ">")) { + t.Errorf("token %d: misaligned raw token %#q for %T", i, text, have) + } + } + lastEnd = end + } +} + +// Ensure that directives (specifically !DOCTYPE) include the complete +// text of any nested directives, noting that < and > do not change +// nesting depth if they are in single or double quotes. + +var nestedDirectivesInput = ` +]> +">]> +]> +'>]> +]> +'>]> +]> +` + +var nestedDirectivesTokens = []Token{ + CharData("\n"), + Directive(`DOCTYPE []`), + CharData("\n"), + Directive(`DOCTYPE [">]`), + CharData("\n"), + Directive(`DOCTYPE []`), + CharData("\n"), + Directive(`DOCTYPE ['>]`), + CharData("\n"), + Directive(`DOCTYPE []`), + CharData("\n"), + Directive(`DOCTYPE ['>]`), + CharData("\n"), + Directive(`DOCTYPE []`), + CharData("\n"), +} + +func TestNestedDirectives(t *testing.T) { + d := NewDecoder(strings.NewReader(nestedDirectivesInput)) + + for i, want := range nestedDirectivesTokens { + have, err := d.Token() + if err != nil { + t.Fatalf("token %d: unexpected error: %s", i, err) + } + if !reflect.DeepEqual(have, want) { + t.Errorf("token %d = %#v want %#v", i, have, want) + } + } +} + +func TestToken(t *testing.T) { + d := NewDecoder(strings.NewReader(testInput)) + d.Entity = testEntity + + for i, want := range cookedTokens { + have, err := d.Token() + if err != nil { + t.Fatalf("token %d: unexpected error: %s", i, err) + } + if !reflect.DeepEqual(have, want) { + t.Errorf("token %d = %#v want %#v", i, have, want) + } + } +} + +func TestSyntax(t *testing.T) { + for i := range xmlInput { + d := NewDecoder(strings.NewReader(xmlInput[i])) + var err error + for _, err = d.Token(); err == nil; _, err = d.Token() { + } + if _, ok := err.(*SyntaxError); !ok { + t.Fatalf(`xmlInput "%s": expected SyntaxError not received`, xmlInput[i]) + } + } +} + +func TestInputLinePos(t *testing.T) { + testInput := ` + + +` + linePos := [][]int{ + {1, 7}, + {2, 1}, + {3, 4}, + {3, 6}, + {6, 7}, + {7, 1}, + {8, 4}, + {10, 4}, + {10, 10}, + {11, 1}, + {11, 8}, + } + dec := NewDecoder(strings.NewReader(testInput)) + for _, want := range linePos { + if _, err := dec.Token(); err != nil { + t.Errorf("Unexpected error: %v", err) + continue + } + + gotLine, gotCol := dec.InputPos() + if gotLine != want[0] || gotCol != want[1] { + t.Errorf("dec.InputPos() = %d,%d, want %d,%d", gotLine, gotCol, want[0], want[1]) + } + } +} + +type allScalars struct { + True1 bool + True2 bool + False1 bool + False2 bool + Int int + Int8 int8 + Int16 int16 + Int32 int32 + Int64 int64 + Uint int + Uint8 uint8 + Uint16 uint16 + Uint32 uint32 + Uint64 uint64 + Uintptr uintptr + Float32 float32 + Float64 float64 + String string + PtrString *string +} + +var all = allScalars{ + True1: true, + True2: true, + False1: false, + False2: false, + Int: 1, + Int8: -2, + Int16: 3, + Int32: -4, + Int64: 5, + Uint: 6, + Uint8: 7, + Uint16: 8, + Uint32: 9, + Uint64: 10, + Uintptr: 11, + Float32: 13.0, + Float64: 14.0, + String: "15", + PtrString: &sixteen, +} + +var sixteen = "16" + +const testScalarsInput = ` + true + 1 + false + 0 + 1 + -2 + 3 + -4 + 5 + 6 + 7 + 8 + 9 + 10 + 11 + 12.0 + 13.0 + 14.0 + 15 + 16 +` + +func TestAllScalars(t *testing.T) { + var a allScalars + err := Unmarshal([]byte(testScalarsInput), &a) + + if err != nil { + t.Fatal(err) + } + if !reflect.DeepEqual(a, all) { + t.Errorf("have %+v want %+v", a, all) + } +} + +type item struct { + FieldA string +} + +func TestIssue569(t *testing.T) { + data := `abcd` + var i item + err := Unmarshal([]byte(data), &i) + + if err != nil || i.FieldA != "abcd" { + t.Fatal("Expecting abcd") + } +} + +func TestUnquotedAttrs(t *testing.T) { + data := "" + d := NewDecoder(strings.NewReader(data)) + d.Strict = false + token, err := d.Token() + if _, ok := err.(*SyntaxError); ok { + t.Errorf("Unexpected error: %v", err) + } + if token.(StartElement).Name.Local != "tag" { + t.Errorf("Unexpected tag name: %v", token.(StartElement).Name.Local) + } + attr := token.(StartElement).Attr[0] + if attr.Value != "azAZ09:-_" { + t.Errorf("Unexpected attribute value: %v", attr.Value) + } + if attr.Name.Local != "attr" { + t.Errorf("Unexpected attribute name: %v", attr.Name.Local) + } +} + +func TestValuelessAttrs(t *testing.T) { + tests := [][3]string{ + {"

", "p", "nowrap"}, + {"

", "p", "nowrap"}, + {"", "input", "checked"}, + {"", "input", "checked"}, + } + for _, test := range tests { + d := NewDecoder(strings.NewReader(test[0])) + d.Strict = false + token, err := d.Token() + if _, ok := err.(*SyntaxError); ok { + t.Errorf("Unexpected error: %v", err) + } + if token.(StartElement).Name.Local != test[1] { + t.Errorf("Unexpected tag name: %v", token.(StartElement).Name.Local) + } + attr := token.(StartElement).Attr[0] + if attr.Value != test[2] { + t.Errorf("Unexpected attribute value: %v", attr.Value) + } + if attr.Name.Local != test[2] { + t.Errorf("Unexpected attribute name: %v", attr.Name.Local) + } + } +} + +func TestCopyTokenCharData(t *testing.T) { + data := []byte("same data") + var tok1 Token = CharData(data) + tok2 := CopyToken(tok1) + if !reflect.DeepEqual(tok1, tok2) { + t.Error("CopyToken(CharData) != CharData") + } + data[1] = 'o' + if reflect.DeepEqual(tok1, tok2) { + t.Error("CopyToken(CharData) uses same buffer.") + } +} + +func TestCopyTokenStartElement(t *testing.T) { + elt := StartElement{Name{"", "hello"}, []Attr{{Name{"", "lang"}, "en"}}} + var tok1 Token = elt + tok2 := CopyToken(tok1) + if tok1.(StartElement).Attr[0].Value != "en" { + t.Error("CopyToken overwrote Attr[0]") + } + if !reflect.DeepEqual(tok1, tok2) { + t.Error("CopyToken(StartElement) != StartElement") + } + tok1.(StartElement).Attr[0] = Attr{Name{"", "lang"}, "de"} + if reflect.DeepEqual(tok1, tok2) { + t.Error("CopyToken(CharData) uses same buffer.") + } +} + +func TestCopyTokenComment(t *testing.T) { + data := []byte("") + var tok1 Token = Comment(data) + tok2 := CopyToken(tok1) + if !reflect.DeepEqual(tok1, tok2) { + t.Error("CopyToken(Comment) != Comment") + } + data[1] = 'o' + if reflect.DeepEqual(tok1, tok2) { + t.Error("CopyToken(Comment) uses same buffer.") + } +} + +func TestSyntaxErrorLineNum(t *testing.T) { + testInput := "

Foo

\n\n

Bar\n" + d := NewDecoder(strings.NewReader(testInput)) + var err error + for _, err = d.Token(); err == nil; _, err = d.Token() { + } + synerr, ok := err.(*SyntaxError) + if !ok { + t.Error("Expected SyntaxError.") + } + if synerr.Line != 3 { + t.Error("SyntaxError didn't have correct line number.") + } +} + +func TestTrailingRawToken(t *testing.T) { + input := ` ` + d := NewDecoder(strings.NewReader(input)) + var err error + for _, err = d.RawToken(); err == nil; _, err = d.RawToken() { + } + if err != io.EOF { + t.Fatalf("d.RawToken() = _, %v, want _, io.EOF", err) + } +} + +func TestTrailingToken(t *testing.T) { + input := ` ` + d := NewDecoder(strings.NewReader(input)) + var err error + for _, err = d.Token(); err == nil; _, err = d.Token() { + } + if err != io.EOF { + t.Fatalf("d.Token() = _, %v, want _, io.EOF", err) + } +} + +func TestEntityInsideCDATA(t *testing.T) { + input := `` + d := NewDecoder(strings.NewReader(input)) + var err error + for _, err = d.Token(); err == nil; _, err = d.Token() { + } + if err != io.EOF { + t.Fatalf("d.Token() = _, %v, want _, io.EOF", err) + } +} + +var characterTests = []struct { + in string + err string +}{ + {"\x12", "illegal character code U+0012"}, + {"\x0b", "illegal character code U+000B"}, + {"\xef\xbf\xbe", "illegal character code U+FFFE"}, + {"\r\n\x07", "illegal character code U+0007"}, + {"what's up", "expected attribute name in element"}, + {"&abc\x01;", "invalid character entity &abc (no semicolon)"}, + {"&\x01;", "invalid character entity & (no semicolon)"}, + {"&\xef\xbf\xbe;", "invalid character entity &\uFFFE;"}, + {"&hello;", "invalid character entity &hello;"}, +} + +func TestDisallowedCharacters(t *testing.T) { + + for i, tt := range characterTests { + d := NewDecoder(strings.NewReader(tt.in)) + var err error + + for err == nil { + _, err = d.Token() + } + synerr, ok := err.(*SyntaxError) + if !ok { + t.Fatalf("input %d d.Token() = _, %v, want _, *SyntaxError", i, err) + } + if synerr.Msg != tt.err { + t.Fatalf("input %d synerr.Msg wrong: want %q, got %q", i, tt.err, synerr.Msg) + } + } +} + +func TestIsInCharacterRange(t *testing.T) { + invalid := []rune{ + utf8.MaxRune + 1, + 0xD800, // surrogate min + 0xDFFF, // surrogate max + -1, + } + for _, r := range invalid { + if isInCharacterRange(r) { + t.Errorf("rune %U considered valid", r) + } + } +} + +var procInstTests = []struct { + input string + expect [2]string +}{ + {`version="1.0" encoding="utf-8"`, [2]string{"1.0", "utf-8"}}, + {`version="1.0" encoding='utf-8'`, [2]string{"1.0", "utf-8"}}, + {`version="1.0" encoding='utf-8' `, [2]string{"1.0", "utf-8"}}, + {`version="1.0" encoding=utf-8`, [2]string{"1.0", ""}}, + {`encoding="FOO" `, [2]string{"", "FOO"}}, + {`version=2.0 version="1.0" encoding=utf-7 encoding='utf-8'`, [2]string{"1.0", "utf-8"}}, + {`version= encoding=`, [2]string{"", ""}}, + {`encoding="version=1.0"`, [2]string{"", "version=1.0"}}, + {``, [2]string{"", ""}}, + // TODO: what's the right approach to handle these nested cases? + {`encoding="version='1.0'"`, [2]string{"1.0", "version='1.0'"}}, + {`version="encoding='utf-8'"`, [2]string{"encoding='utf-8'", "utf-8"}}, +} + +func TestProcInstEncoding(t *testing.T) { + for _, test := range procInstTests { + if got := procInst("version", test.input); got != test.expect[0] { + t.Errorf("procInst(version, %q) = %q; want %q", test.input, got, test.expect[0]) + } + if got := procInst("encoding", test.input); got != test.expect[1] { + t.Errorf("procInst(encoding, %q) = %q; want %q", test.input, got, test.expect[1]) + } + } +} + +// Ensure that directives with comments include the complete +// text of any nested directives. + +var directivesWithCommentsInput = ` +]> +]> + --> --> []> +` + +var directivesWithCommentsTokens = []Token{ + CharData("\n"), + Directive(`DOCTYPE [ ]`), + CharData("\n"), + Directive(`DOCTYPE [ ]`), + CharData("\n"), + Directive(`DOCTYPE [ ]`), + CharData("\n"), +} + +func TestDirectivesWithComments(t *testing.T) { + d := NewDecoder(strings.NewReader(directivesWithCommentsInput)) + + for i, want := range directivesWithCommentsTokens { + have, err := d.Token() + if err != nil { + t.Fatalf("token %d: unexpected error: %s", i, err) + } + if !reflect.DeepEqual(have, want) { + t.Errorf("token %d = %#v want %#v", i, have, want) + } + } +} + +// Writer whose Write method always returns an error. +type errWriter struct{} + +func (errWriter) Write(p []byte) (n int, err error) { return 0, fmt.Errorf("unwritable") } + +func TestEscapeTextIOErrors(t *testing.T) { + expectErr := "unwritable" + err := EscapeText(errWriter{}, []byte{'A'}) + + if err == nil || err.Error() != expectErr { + t.Errorf("have %v, want %v", err, expectErr) + } +} + +func TestEscapeTextInvalidChar(t *testing.T) { + input := []byte("A \x00 terminated string.") + expected := "A \uFFFD terminated string." + + buff := new(strings.Builder) + if err := EscapeText(buff, input); err != nil { + t.Fatalf("have %v, want nil", err) + } + text := buff.String() + + if text != expected { + t.Errorf("have %v, want %v", text, expected) + } +} + +func TestIssue5880(t *testing.T) { + type T []byte + data, err := Marshal(T{192, 168, 0, 1}) + if err != nil { + t.Errorf("Marshal error: %v", err) + } + if !utf8.Valid(data) { + t.Errorf("Marshal generated invalid UTF-8: %x", data) + } +} + +func TestIssue8535(t *testing.T) { + + type ExampleConflict struct { + XMLName Name `xml:"example"` + Link string `xml:"link"` + AtomLink string `xml:"http://www.w3.org/2005/Atom link"` // Same name in a different name space + } + testCase := ` + Example + http://example.com/default + http://example.com/home + http://example.com/ns + ` + + var dest ExampleConflict + d := NewDecoder(strings.NewReader(testCase)) + if err := d.Decode(&dest); err != nil { + t.Fatal(err) + } +} + +func TestEncodeXMLNS(t *testing.T) { + testCases := []struct { + f func() ([]byte, error) + want string + ok bool + }{ + {encodeXMLNS1, `hello world`, true}, + {encodeXMLNS2, `hello world`, true}, + {encodeXMLNS3, `hello world`, true}, + {encodeXMLNS4, `hello world`, false}, + } + + for i, tc := range testCases { + if b, err := tc.f(); err == nil { + if got, want := string(b), tc.want; got != want { + t.Errorf("%d: got %s, want %s \n", i, got, want) + } + } else { + t.Errorf("%d: marshal failed with %s", i, err) + } + } +} + +func encodeXMLNS1() ([]byte, error) { + + type T struct { + XMLName Name `xml:"Test"` + Ns string `xml:"xmlns,attr"` + Body string + } + + s := &T{Ns: "http://example.com/ns", Body: "hello world"} + return Marshal(s) +} + +func encodeXMLNS2() ([]byte, error) { + + type Test struct { + Body string `xml:"http://example.com/ns body"` + } + + s := &Test{Body: "hello world"} + return Marshal(s) +} + +func encodeXMLNS3() ([]byte, error) { + + type Test struct { + XMLName Name `xml:"http://example.com/ns Test"` + Body string + } + + //s := &Test{XMLName: Name{"http://example.com/ns",""}, Body: "hello world"} is unusable as the "-" is missing + // as documentation states + s := &Test{Body: "hello world"} + return Marshal(s) +} + +func encodeXMLNS4() ([]byte, error) { + + type Test struct { + Ns string `xml:"xmlns,attr"` + Body string + } + + s := &Test{Ns: "http://example.com/ns", Body: "hello world"} + return Marshal(s) +} + +func TestIssue11405(t *testing.T) { + testCases := []string{ + "", + "", + "", + } + for _, tc := range testCases { + d := NewDecoder(strings.NewReader(tc)) + var err error + for { + _, err = d.Token() + if err != nil { + break + } + } + if _, ok := err.(*SyntaxError); !ok { + t.Errorf("%s: Token: Got error %v, want SyntaxError", tc, err) + } + } +} + +func TestIssue12417(t *testing.T) { + testCases := []struct { + s string + ok bool + }{ + {``, true}, + {``, true}, + {``, true}, + {``, false}, + } + for _, tc := range testCases { + d := NewDecoder(strings.NewReader(tc.s)) + var err error + for { + _, err = d.Token() + if err != nil { + if err == io.EOF { + err = nil + } + break + } + } + if err != nil && tc.ok { + t.Errorf("%q: Encoding charset: expected no error, got %s", tc.s, err) + continue + } + if err == nil && !tc.ok { + t.Errorf("%q: Encoding charset: expected error, got nil", tc.s) + } + } +} + +func TestIssue7113(t *testing.T) { + type C struct { + XMLName Name `xml:""` // Sets empty namespace + } + + type D struct { + XMLName Name `xml:"d"` + } + + type A struct { + XMLName Name `xml:""` + C C `xml:""` + D D + } + + var a A + structSpace := "b" + xmlTest := `` + t.Log(xmlTest) + err := Unmarshal([]byte(xmlTest), &a) + if err != nil { + t.Fatal(err) + } + + if a.XMLName.Space != structSpace { + t.Errorf("overidding with empty namespace: unmarshaling, got %s, want %s\n", a.XMLName.Space, structSpace) + } + if len(a.C.XMLName.Space) != 0 { + t.Fatalf("overidding with empty namespace: unmarshaling, got %s, want empty\n", a.C.XMLName.Space) + } + + var b []byte + b, err = Marshal(&a) + if err != nil { + t.Fatal(err) + } + if len(a.C.XMLName.Space) != 0 { + t.Errorf("overidding with empty namespace: marshaling, got %s in C tag which should be empty\n", a.C.XMLName.Space) + } + if string(b) != xmlTest { + t.Fatalf("overidding with empty namespace: marshaling, got %s, want %s\n", b, xmlTest) + } + var c A + err = Unmarshal(b, &c) + if err != nil { + t.Fatalf("second Unmarshal failed: %s", err) + } + if c.XMLName.Space != "b" { + t.Errorf("overidding with empty namespace: after marshaling & unmarshaling, XML name space: got %s, want %s\n", a.XMLName.Space, structSpace) + } + if len(c.C.XMLName.Space) != 0 { + t.Errorf("overidding with empty namespace: after marshaling & unmarshaling, got %s, want empty\n", a.C.XMLName.Space) + } +} + +func TestIssue20396(t *testing.T) { + + var attrError = UnmarshalError("XML syntax error on line 1: expected attribute name in element") + + testCases := []struct { + s string + wantErr error + }{ + {``, // Issue 20396 + UnmarshalError("XML syntax error on line 1: expected element name after <")}, + {``, attrError}, + {``, attrError}, + {``, nil}, + {`1`, + UnmarshalError("XML syntax error on line 1: expected element name after <")}, + {`1`, attrError}, + {`1`, attrError}, + {`1`, nil}, + } + + var dest string + for _, tc := range testCases { + if got, want := Unmarshal([]byte(tc.s), &dest), tc.wantErr; got != want { + if got == nil { + t.Errorf("%s: Unexpected success, want %v", tc.s, want) + } else if want == nil { + t.Errorf("%s: Unexpected error, got %v", tc.s, got) + } else if got.Error() != want.Error() { + t.Errorf("%s: got %v, want %v", tc.s, got, want) + } + } + } +} + +func TestIssue20685(t *testing.T) { + testCases := []struct { + s string + ok bool + }{ + {`one`, false}, + {`one`, true}, + {`one`, false}, + {`one`, false}, + {`one`, false}, + {`one`, false}, + {`one`, false}, + } + for _, tc := range testCases { + d := NewDecoder(strings.NewReader(tc.s)) + var err error + for { + _, err = d.Token() + if err != nil { + if err == io.EOF { + err = nil + } + break + } + } + if err != nil && tc.ok { + t.Errorf("%q: Closing tag with namespace : expected no error, got %s", tc.s, err) + continue + } + if err == nil && !tc.ok { + t.Errorf("%q: Closing tag with namespace : expected error, got nil", tc.s) + } + } +} + +func tokenMap(mapping func(t Token) Token) func(TokenReader) TokenReader { + return func(src TokenReader) TokenReader { + return mapper{ + t: src, + f: mapping, + } + } +} + +type mapper struct { + t TokenReader + f func(Token) Token +} + +func (m mapper) Token() (Token, error) { + tok, err := m.t.Token() + if err != nil { + return nil, err + } + return m.f(tok), nil +} + +func TestNewTokenDecoderIdempotent(t *testing.T) { + d := NewDecoder(strings.NewReader(`
`)) + d2 := NewTokenDecoder(d) + if d != d2 { + t.Error("NewTokenDecoder did not detect underlying Decoder") + } +} + +func TestWrapDecoder(t *testing.T) { + d := NewDecoder(strings.NewReader(`[Re-enter Clown with a letter, and FABIAN]`)) + m := tokenMap(func(t Token) Token { + switch tok := t.(type) { + case StartElement: + if tok.Name.Local == "quote" { + tok.Name.Local = "blocking" + return tok + } + case EndElement: + if tok.Name.Local == "quote" { + tok.Name.Local = "blocking" + return tok + } + } + return t + }) + + d = NewTokenDecoder(m(d)) + + o := struct { + XMLName Name `xml:"blocking"` + Chardata string `xml:",chardata"` + }{} + + if err := d.Decode(&o); err != nil { + t.Fatal("Got unexpected error while decoding:", err) + } + + if o.Chardata != "[Re-enter Clown with a letter, and FABIAN]" { + t.Fatalf("Got unexpected chardata: `%s`\n", o.Chardata) + } +} + +type tokReader struct{} + +func (tokReader) Token() (Token, error) { + return StartElement{}, nil +} + +type Failure struct{} + +func (Failure) UnmarshalXML(*Decoder, StartElement) error { + return nil +} + +func TestTokenUnmarshaler(t *testing.T) { + defer func() { + if r := recover(); r != nil { + t.Error("Unexpected panic using custom token unmarshaler") + } + }() + + d := NewTokenDecoder(tokReader{}) + d.Decode(&Failure{}) +} + +func testRoundTrip(t *testing.T, input string) { + d := NewDecoder(strings.NewReader(input)) + var tokens []Token + var buf bytes.Buffer + e := NewEncoder(&buf) + for { + tok, err := d.Token() + if err == io.EOF { + break + } + if err != nil { + t.Fatalf("invalid input: %v", err) + } + if err := e.EncodeToken(tok); err != nil { + t.Fatalf("failed to re-encode input: %v", err) + } + tokens = append(tokens, CopyToken(tok)) + } + if err := e.Flush(); err != nil { + t.Fatal(err) + } + + d = NewDecoder(&buf) + for { + tok, err := d.Token() + if err == io.EOF { + break + } + if err != nil { + t.Fatalf("failed to decode output: %v", err) + } + if len(tokens) == 0 { + t.Fatalf("unexpected token: %#v", tok) + } + a, b := tokens[0], tok + if !reflect.DeepEqual(a, b) { + t.Fatalf("token mismatch: %#v vs %#v", a, b) + } + tokens = tokens[1:] + } + if len(tokens) > 0 { + t.Fatalf("lost tokens: %#v", tokens) + } +} + +func TestRoundTrip(t *testing.T) { + tests := map[string]string{ + "trailing colon": ``, + "comments in directives": `--x --> > --x ]>`, + } + for name, input := range tests { + t.Run(name, func(t *testing.T) { testRoundTrip(t, input) }) + } +} + +func TestParseErrors(t *testing.T) { + withDefaultHeader := func(s string) string { + return `` + s + } + tests := []struct { + src string + err string + }{ + {withDefaultHeader(``), `unexpected end element `}, + {withDefaultHeader(``), `element in space x closed by in space y`}, + {withDefaultHeader(``), `expected target name after `), `invalid sequence `), `invalid sequence `), `invalid baz`), + `element in space zzz closed by in space ""`}, + {withDefaultHeader("\xf1"), `invalid UTF-8`}, + + // Header-related errors. + {``, `unsupported version "1.1"; only version 1.0 is supported`}, + + // Cases below are for "no errors". + {withDefaultHeader(``), ``}, + {withDefaultHeader(``), ``}, + } + + for _, test := range tests { + d := NewDecoder(strings.NewReader(test.src)) + var err error + for { + _, err = d.Token() + if err != nil { + break + } + } + if test.err == "" { + if err != io.EOF { + t.Errorf("parse %s: have %q error, expected none", test.src, err) + } + continue + } + // Inv: err != nil + if err == io.EOF { + t.Errorf("parse %s: unexpected EOF", test.src) + continue + } + if !strings.Contains(err.Error(), test.err) { + t.Errorf("parse %s: can't find %q error substring\nerror: %q", test.src, test.err, err) + continue + } + } +} + +const testInputHTMLAutoClose = ` +
+

+

+

+
+

+

+
abc

` + +func BenchmarkHTMLAutoClose(b *testing.B) { + b.RunParallel(func(p *testing.PB) { + for p.Next() { + d := NewDecoder(strings.NewReader(testInputHTMLAutoClose)) + d.Strict = false + d.AutoClose = HTMLAutoClose + d.Entity = HTMLEntity + for { + _, err := d.Token() + if err != nil { + if err == io.EOF { + break + } + b.Fatalf("unexpected error: %v", err) + } + } + } + }) +} + +func TestHTMLAutoClose(t *testing.T) { + wantTokens := []Token{ + ProcInst{"xml", []byte(`version="1.0" encoding="UTF-8"`)}, + CharData("\n"), + StartElement{Name{"", "br"}, []Attr{}}, + EndElement{Name{"", "br"}}, + CharData("\n"), + StartElement{Name{"", "br"}, []Attr{}}, + EndElement{Name{"", "br"}}, + StartElement{Name{"", "br"}, []Attr{}}, + EndElement{Name{"", "br"}}, + CharData("\n"), + StartElement{Name{"", "br"}, []Attr{}}, + EndElement{Name{"", "br"}}, + StartElement{Name{"", "br"}, []Attr{}}, + EndElement{Name{"", "br"}}, + CharData("\n"), + StartElement{Name{"", "br"}, []Attr{}}, + EndElement{Name{"", "br"}}, + CharData("\n"), + StartElement{Name{"", "BR"}, []Attr{}}, + EndElement{Name{"", "BR"}}, + CharData("\n"), + StartElement{Name{"", "BR"}, []Attr{}}, + EndElement{Name{"", "BR"}}, + StartElement{Name{"", "BR"}, []Attr{}}, + EndElement{Name{"", "BR"}}, + CharData("\n"), + StartElement{Name{"", "Br"}, []Attr{}}, + EndElement{Name{"", "Br"}}, + CharData("\n"), + StartElement{Name{"", "BR"}, []Attr{}}, + EndElement{Name{"", "BR"}}, + StartElement{Name{"", "span"}, []Attr{{Name: Name{"", "id"}, Value: "test"}}}, + CharData("abc"), + EndElement{Name{"", "span"}}, + StartElement{Name{"", "br"}, []Attr{}}, + EndElement{Name{"", "br"}}, + StartElement{Name{"", "br"}, []Attr{}}, + EndElement{Name{"", "br"}}, + } + + d := NewDecoder(strings.NewReader(testInputHTMLAutoClose)) + d.Strict = false + d.AutoClose = HTMLAutoClose + d.Entity = HTMLEntity + var haveTokens []Token + for { + tok, err := d.Token() + if err != nil { + if err == io.EOF { + break + } + t.Fatalf("unexpected error: %v", err) + } + haveTokens = append(haveTokens, CopyToken(tok)) + } + if len(haveTokens) != len(wantTokens) { + t.Errorf("tokens count mismatch: have %d, want %d", len(haveTokens), len(wantTokens)) + } + for i, want := range wantTokens { + if i >= len(haveTokens) { + t.Errorf("token[%d] expected %#v, have no token", i, want) + } else { + have := haveTokens[i] + if !reflect.DeepEqual(have, want) { + t.Errorf("token[%d] mismatch:\nhave: %#v\nwant: %#v", i, have, want) + } + } + } +}