Example Feed

commit 95b872aa6173815c86d04d2714f734be7123956c Author: 程广 Date: Thu Nov 7 22:48:55 2024 +0800 add code diff --git a/atom_test.go b/atom_test.go new file mode 100644 index 0000000..d43c531 --- /dev/null +++ b/atom_test.go @@ -0,0 +1,56 @@ +// Copyright 2011 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package goxml + +import "time" + +var atomValue = &Feed{ + XMLName: Name{"http://www.w3.org/2005/Atom", "feed"}, + Title: "Example Feed", + Link: []Link{{Href: "http://example.org/"}}, + Updated: ParseTime("2003-12-13T18:30:02Z"), + Author: Person{Name: "John Doe"}, + ID: "urn:uuid:60a76c80-d399-11d9-b93C-0003939e0af6", + + Entry: []Entry{ + { + Title: "Atom-Powered Robots Run Amok", + Link: []Link{{Href: "http://example.org/2003/12/13/atom03"}}, + ID: "urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a", + Updated: ParseTime("2003-12-13T18:30:02Z"), + Summary: NewText("Some text."), + }, + }, +} + +var atomXML = `` + + `` + + `Example Feed` + + `urn:uuid:60a76c80-d399-11d9-b93C-0003939e0af6` + + `` + + `John Doe` + + `` + + `Atom-Powered Robots Run Amok` + + `urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a` + + `` + + `2003-12-13T18:30:02Z` + + `` + + `

Some text.

` + + `` + + `` + +func ParseTime(str string) time.Time { + t, err := time.Parse(time.RFC3339, str) + if err != nil { + panic(err) + } + return t +} + +func NewText(text string) Text { + return Text{ + Body: text, + } +} diff --git a/example_marshaling_test.go b/example_marshaling_test.go new file mode 100644 index 0000000..28bb1e2 --- /dev/null +++ b/example_marshaling_test.go @@ -0,0 +1,85 @@ +// Copyright 2018 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package goxml_test + +import ( + "fmt" + "log" + "strings" + + "git.pyer.club/kingecg/goxml" +) + +type Animal int + +const ( + Unknown Animal = iota + Gopher + Zebra +) + +func (a *Animal) UnmarshalXML(d *goxml.Decoder, start goxml.StartElement) error { + var s string + if err := d.DecodeElement(&s, &start); err != nil { + return err + } + switch strings.ToLower(s) { + default: + *a = Unknown + case "gopher": + *a = Gopher + case "zebra": + *a = Zebra + } + + return nil +} + +func (a Animal) MarshalXML(e *goxml.Encoder, start goxml.StartElement) error { + var s string + switch a { + default: + s = "unknown" + case Gopher: + s = "gopher" + case Zebra: + s = "zebra" + } + return e.EncodeElement(s, start) +} + +func Example_customMarshalXML() { + blob := ` + + gopher + armadillo + zebra + unknown + gopher + bee + gopher + zebra + ` + var zoo struct { + Animals []Animal `xml:"animal"` + } + if err := goxml.Unmarshal([]byte(blob), &zoo); err != nil { + log.Fatal(err) + } + + census := make(map[Animal]int) + for _, animal := range zoo.Animals { + census[animal] += 1 + } + + fmt.Printf("Zoo Census:\n* Gophers: %d\n* Zebras: %d\n* Unknown: %d\n", + census[Gopher], census[Zebra], census[Unknown]) + + // Output: + // Zoo Census: + // * Gophers: 3 + // * Zebras: 2 + // * Unknown: 3 +} diff --git a/example_test.go b/example_test.go new file mode 100644 index 0000000..b964e35 --- /dev/null +++ b/example_test.go @@ -0,0 +1,152 @@ +// Copyright 2012 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package goxml_test + +import ( + "fmt" + "os" + + "git.pyer.club/kingecg/goxml" +) + +func ExampleMarshalIndent() { + type Address struct { + City, State string + } + type Person struct { + XMLName goxml.Name `xml:"person"` + Id int `xml:"id,attr"` + FirstName string `xml:"name>first"` + LastName string `xml:"name>last"` + Age int `xml:"age"` + Height float32 `xml:"height,omitempty"` + Married bool + Address + Comment string `xml:",comment"` + } + + v := &Person{Id: 13, FirstName: "John", LastName: "Doe", Age: 42} + v.Comment = " Need more details. " + v.Address = Address{"Hanga Roa", "Easter Island"} + + output, err := goxml.MarshalIndent(v, " ", " ") + if err != nil { + fmt.Printf("error: %v\n", err) + } + + os.Stdout.Write(output) + // Output: + // + // + // John + // Doe + // + // 42 + // false + // Hanga Roa + // Easter Island + // + // +} + +func ExampleEncoder() { + type Address struct { + City, State string + } + type Person struct { + XMLName goxml.Name `xml:"person"` + Id int `xml:"id,attr"` + FirstName string `xml:"name>first"` + LastName string `xml:"name>last"` + Age int `xml:"age"` + Height float32 `xml:"height,omitempty"` + Married bool + Address + Comment string `xml:",comment"` + } + + v := &Person{Id: 13, FirstName: "John", LastName: "Doe", Age: 42} + v.Comment = " Need more details. " + v.Address = Address{"Hanga Roa", "Easter Island"} + + enc := goxml.NewEncoder(os.Stdout) + enc.Indent(" ", " ") + if err := enc.Encode(v); err != nil { + fmt.Printf("error: %v\n", err) + } + + // Output: + // + // + // John + // Doe + // + // 42 + // false + // Hanga Roa + // Easter Island + // + // +} + +// This example demonstrates unmarshaling an XML excerpt into a value with +// some preset fields. Note that the Phone field isn't modified and that +// the XML element is ignored. Also, the Groups field is assigned +// considering the element path provided in its tag. +func ExampleUnmarshal() { + type Email struct { + Where string `xml:"where,attr"` + Addr string + } + type Address struct { + City, State string + } + type Result struct { + XMLName goxml.Name `xml:"Person"` + Name string `xml:"FullName"` + Phone string + Email []Email + Groups []string `xml:"Group>Value"` + Address + } + v := Result{Name: "none", Phone: "none"} + + data := ` + + Grace R. Emlin + Example Inc. + + gre@example.com + + + gre@work.com + + + Friends + Squash + + Hanga Roa + Easter Island + + ` + err := goxml.Unmarshal([]byte(data), &v) + if err != nil { + fmt.Printf("error: %v", err) + return + } + fmt.Printf("XMLName: %#v\n", v.XMLName) + fmt.Printf("Name: %q\n", v.Name) + fmt.Printf("Phone: %q\n", v.Phone) + fmt.Printf("Email: %v\n", v.Email) + fmt.Printf("Groups: %v\n", v.Groups) + fmt.Printf("Address: %v\n", v.Address) + // Output: + // XMLName: goxml.Name{Space:"", Local:"Person"} + // Name: "Grace R. Emlin" + // Phone: "none" + // Email: [{home gre@example.com} {work gre@work.com}] + // Groups: [Friends Squash] + // Address: {Hanga Roa Easter Island} +} diff --git a/example_text_marshaling_test.go b/example_text_marshaling_test.go new file mode 100644 index 0000000..b013bca --- /dev/null +++ b/example_text_marshaling_test.go @@ -0,0 +1,80 @@ +// Copyright 2018 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package goxml_test + +import ( + "fmt" + "log" + "strings" + + "git.pyer.club/kingecg/goxml" +) + +type Size int + +const ( + Unrecognized Size = iota + Small + Large +) + +func (s *Size) UnmarshalText(text []byte) error { + switch strings.ToLower(string(text)) { + default: + *s = Unrecognized + case "small": + *s = Small + case "large": + *s = Large + } + return nil +} + +func (s Size) MarshalText() ([]byte, error) { + var name string + switch s { + default: + name = "unrecognized" + case Small: + name = "small" + case Large: + name = "large" + } + return []byte(name), nil +} + +func Example_textMarshalXML() { + blob := ` + + small + regular + large + unrecognized + small + normal + small + large + ` + var inventory struct { + Sizes []Size `xml:"size"` + } + if err := goxml.Unmarshal([]byte(blob), &inventory); err != nil { + log.Fatal(err) + } + + counts := make(map[Size]int) + for _, size := range inventory.Sizes { + counts[size] += 1 + } + + fmt.Printf("Inventory Counts:\n* Small: %d\n* Large: %d\n* Unrecognized: %d\n", + counts[Small], counts[Large], counts[Unrecognized]) + + // Output: + // Inventory Counts: + // * Small: 3 + // * Large: 2 + // * Unrecognized: 3 +} diff --git a/go.mod b/go.mod new file mode 100644 index 0000000..c1fa4b2 --- /dev/null +++ b/go.mod @@ -0,0 +1,3 @@ +module git.pyer.club/kingecg/goxml + +go 1.23.1 diff --git a/marshal.go b/marshal.go new file mode 100644 index 0000000..69a0d22 --- /dev/null +++ b/marshal.go @@ -0,0 +1,1131 @@ +// Copyright 2011 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package goxml + +import ( + "bufio" + "bytes" + "encoding" + "errors" + "fmt" + "io" + "reflect" + "strconv" + "strings" +) + +const ( + // Header is a generic XML header suitable for use with the output of [Marshal]. + // This is not automatically added to any output of this package, + // it is provided as a convenience. + Header = `` + "\n" +) + +// Marshal returns the XML encoding of v. +// +// Marshal handles an array or slice by marshaling each of the elements. +// Marshal handles a pointer by marshaling the value it points at or, if the +// pointer is nil, by writing nothing. Marshal handles an interface value by +// marshaling the value it contains or, if the interface value is nil, by +// writing nothing. Marshal handles all other data by writing one or more XML +// elements containing the data. +// +// The name for the XML elements is taken from, in order of preference: +// - the tag on the XMLName field, if the data is a struct +// - the value of the XMLName field of type [Name] +// - the tag of the struct field used to obtain the data +// - the name of the struct field used to obtain the data +// - the name of the marshaled type +// +// The XML element for a struct contains marshaled elements for each of the +// exported fields of the struct, with these exceptions: +// - the XMLName field, described above, is omitted. +// - a field with tag "-" is omitted. +// - a field with tag "name,attr" becomes an attribute with +// the given name in the XML element. +// - a field with tag ",attr" becomes an attribute with the +// field name in the XML element. +// - a field with tag ",chardata" is written as character data, +// not as an XML element. +// - a field with tag ",cdata" is written as character data +// wrapped in one or more tags, not as an XML element. +// - a field with tag ",innerxml" is written verbatim, not subject +// to the usual marshaling procedure. +// - a field with tag ",comment" is written as an XML comment, not +// subject to the usual marshaling procedure. It must not contain +// the "--" string within it. +// - a field with a tag including the "omitempty" option is omitted +// if the field value is empty. The empty values are false, 0, any +// nil pointer or interface value, and any array, slice, map, or +// string of length zero. +// - an anonymous struct field is handled as if the fields of its +// value were part of the outer struct. +// - a field implementing [Marshaler] is written by calling its MarshalXML +// method. +// - a field implementing [encoding.TextMarshaler] is written by encoding the +// result of its MarshalText method as text. +// +// If a field uses a tag "a>b>c", then the element c will be nested inside +// parent elements a and b. Fields that appear next to each other that name +// the same parent will be enclosed in one XML element. +// +// If the XML name for a struct field is defined by both the field tag and the +// struct's XMLName field, the names must match. +// +// See [MarshalIndent] for an example. +// +// Marshal will return an error if asked to marshal a channel, function, or map. +func Marshal(v any) ([]byte, error) { + var b bytes.Buffer + enc := NewEncoder(&b) + if err := enc.Encode(v); err != nil { + return nil, err + } + if err := enc.Close(); err != nil { + return nil, err + } + return b.Bytes(), nil +} + +// Marshaler is the interface implemented by objects that can marshal +// themselves into valid XML elements. +// +// MarshalXML encodes the receiver as zero or more XML elements. +// By convention, arrays or slices are typically encoded as a sequence +// of elements, one per entry. +// Using start as the element tag is not required, but doing so +// will enable [Unmarshal] to match the XML elements to the correct +// struct field. +// One common implementation strategy is to construct a separate +// value with a layout corresponding to the desired XML and then +// to encode it using e.EncodeElement. +// Another common strategy is to use repeated calls to e.EncodeToken +// to generate the XML output one token at a time. +// The sequence of encoded tokens must make up zero or more valid +// XML elements. +type Marshaler interface { + MarshalXML(e *Encoder, start StartElement) error +} + +// MarshalerAttr is the interface implemented by objects that can marshal +// themselves into valid XML attributes. +// +// MarshalXMLAttr returns an XML attribute with the encoded value of the receiver. +// Using name as the attribute name is not required, but doing so +// will enable [Unmarshal] to match the attribute to the correct +// struct field. +// If MarshalXMLAttr returns the zero attribute [Attr]{}, no attribute +// will be generated in the output. +// MarshalXMLAttr is used only for struct fields with the +// "attr" option in the field tag. +type MarshalerAttr interface { + MarshalXMLAttr(name Name) (Attr, error) +} + +// MarshalIndent works like [Marshal], but each XML element begins on a new +// indented line that starts with prefix and is followed by one or more +// copies of indent according to the nesting depth. +func MarshalIndent(v any, prefix, indent string) ([]byte, error) { + var b bytes.Buffer + enc := NewEncoder(&b) + enc.Indent(prefix, indent) + if err := enc.Encode(v); err != nil { + return nil, err + } + if err := enc.Close(); err != nil { + return nil, err + } + return b.Bytes(), nil +} + +// An Encoder writes XML data to an output stream. +type Encoder struct { + p printer +} + +// NewEncoder returns a new encoder that writes to w. +func NewEncoder(w io.Writer) *Encoder { + e := &Encoder{printer{w: bufio.NewWriter(w)}} + e.p.encoder = e + return e +} + +// Indent sets the encoder to generate XML in which each element +// begins on a new indented line that starts with prefix and is followed by +// one or more copies of indent according to the nesting depth. +func (enc *Encoder) Indent(prefix, indent string) { + enc.p.prefix = prefix + enc.p.indent = indent +} + +// Encode writes the XML encoding of v to the stream. +// +// See the documentation for [Marshal] for details about the conversion +// of Go values to XML. +// +// Encode calls [Encoder.Flush] before returning. +func (enc *Encoder) Encode(v any) error { + err := enc.p.marshalValue(reflect.ValueOf(v), nil, nil) + if err != nil { + return err + } + return enc.p.w.Flush() +} + +// EncodeElement writes the XML encoding of v to the stream, +// using start as the outermost tag in the encoding. +// +// See the documentation for [Marshal] for details about the conversion +// of Go values to XML. +// +// EncodeElement calls [Encoder.Flush] before returning. +func (enc *Encoder) EncodeElement(v any, start StartElement) error { + err := enc.p.marshalValue(reflect.ValueOf(v), nil, &start) + if err != nil { + return err + } + return enc.p.w.Flush() +} + +var ( + begComment = []byte("") + endProcInst = []byte("?>") +) + +// EncodeToken writes the given XML token to the stream. +// It returns an error if [StartElement] and [EndElement] tokens are not properly matched. +// +// EncodeToken does not call [Encoder.Flush], because usually it is part of a larger operation +// such as [Encoder.Encode] or [Encoder.EncodeElement] (or a custom [Marshaler]'s MarshalXML invoked +// during those), and those will call Flush when finished. +// Callers that create an Encoder and then invoke EncodeToken directly, without +// using Encode or EncodeElement, need to call Flush when finished to ensure +// that the XML is written to the underlying writer. +// +// EncodeToken allows writing a [ProcInst] with Target set to "xml" only as the first token +// in the stream. +func (enc *Encoder) EncodeToken(t Token) error { + + p := &enc.p + switch t := t.(type) { + case StartElement: + if err := p.writeStart(&t); err != nil { + return err + } + case EndElement: + if err := p.writeEnd(t.Name); err != nil { + return err + } + case CharData: + escapeText(p, t, false) + case Comment: + if bytes.Contains(t, endComment) { + return fmt.Errorf("xml: EncodeToken of Comment containing --> marker") + } + p.WriteString("") + return p.cachedWriteError() + case ProcInst: + // First token to be encoded which is also a ProcInst with target of xml + // is the xml declaration. The only ProcInst where target of xml is allowed. + if t.Target == "xml" && p.w.Buffered() != 0 { + return fmt.Errorf("xml: EncodeToken of ProcInst xml target only valid for xml declaration, first token encoded") + } + if !isNameString(t.Target) { + return fmt.Errorf("xml: EncodeToken of ProcInst with invalid Target") + } + if bytes.Contains(t.Inst, endProcInst) { + return fmt.Errorf("xml: EncodeToken of ProcInst containing ?> marker") + } + p.WriteString(" 0 { + p.WriteByte(' ') + p.Write(t.Inst) + } + p.WriteString("?>") + case Directive: + if !isValidDirective(t) { + return fmt.Errorf("xml: EncodeToken of Directive containing wrong < or > markers") + } + p.WriteString("") + default: + return fmt.Errorf("xml: EncodeToken of invalid token type") + + } + return p.cachedWriteError() +} + +// isValidDirective reports whether dir is a valid directive text, +// meaning angle brackets are matched, ignoring comments and strings. +func isValidDirective(dir Directive) bool { + var ( + depth int + inquote uint8 + incomment bool + ) + for i, c := range dir { + switch { + case incomment: + if c == '>' { + if n := 1 + i - len(endComment); n >= 0 && bytes.Equal(dir[n:i+1], endComment) { + incomment = false + } + } + // Just ignore anything in comment + case inquote != 0: + if c == inquote { + inquote = 0 + } + // Just ignore anything within quotes + case c == '\'' || c == '"': + inquote = c + case c == '<': + if i+len(begComment) < len(dir) && bytes.Equal(dir[i:i+len(begComment)], begComment) { + incomment = true + } else { + depth++ + } + case c == '>': + if depth == 0 { + return false + } + depth-- + } + } + return depth == 0 && inquote == 0 && !incomment +} + +// Flush flushes any buffered XML to the underlying writer. +// See the [Encoder.EncodeToken] documentation for details about when it is necessary. +func (enc *Encoder) Flush() error { + return enc.p.w.Flush() +} + +// Close the Encoder, indicating that no more data will be written. It flushes +// any buffered XML to the underlying writer and returns an error if the +// written XML is invalid (e.g. by containing unclosed elements). +func (enc *Encoder) Close() error { + return enc.p.Close() +} + +type printer struct { + w *bufio.Writer + encoder *Encoder + seq int + indent string + prefix string + depth int + indentedIn bool + putNewline bool + attrNS map[string]string // map prefix -> name space + attrPrefix map[string]string // map name space -> prefix + prefixes []string + tags []Name + closed bool + err error +} + +// createAttrPrefix finds the name space prefix attribute to use for the given name space, +// defining a new prefix if necessary. It returns the prefix. +func (p *printer) createAttrPrefix(url string) string { + if prefix := p.attrPrefix[url]; prefix != "" { + return prefix + } + + // The "http://www.w3.org/XML/1998/namespace" name space is predefined as "xml" + // and must be referred to that way. + // (The "http://www.w3.org/2000/xmlns/" name space is also predefined as "xmlns", + // but users should not be trying to use that one directly - that's our job.) + if url == xmlURL { + return xmlPrefix + } + + // Need to define a new name space. + if p.attrPrefix == nil { + p.attrPrefix = make(map[string]string) + p.attrNS = make(map[string]string) + } + + // Pick a name. We try to use the final element of the path + // but fall back to _. + prefix := strings.TrimRight(url, "/") + if i := strings.LastIndex(prefix, "/"); i >= 0 { + prefix = prefix[i+1:] + } + if prefix == "" || !isName([]byte(prefix)) || strings.Contains(prefix, ":") { + prefix = "_" + } + // xmlanything is reserved and any variant of it regardless of + // case should be matched, so: + // (('X'|'x') ('M'|'m') ('L'|'l')) + // See Section 2.3 of https://www.w3.org/TR/REC-xml/ + if len(prefix) >= 3 && strings.EqualFold(prefix[:3], "xml") { + prefix = "_" + prefix + } + if p.attrNS[prefix] != "" { + // Name is taken. Find a better one. + for p.seq++; ; p.seq++ { + if id := prefix + "_" + strconv.Itoa(p.seq); p.attrNS[id] == "" { + prefix = id + break + } + } + } + + p.attrPrefix[url] = prefix + p.attrNS[prefix] = url + + p.WriteString(`xmlns:`) + p.WriteString(prefix) + p.WriteString(`="`) + EscapeText(p, []byte(url)) + p.WriteString(`" `) + + p.prefixes = append(p.prefixes, prefix) + + return prefix +} + +// deleteAttrPrefix removes an attribute name space prefix. +func (p *printer) deleteAttrPrefix(prefix string) { + delete(p.attrPrefix, p.attrNS[prefix]) + delete(p.attrNS, prefix) +} + +func (p *printer) markPrefix() { + p.prefixes = append(p.prefixes, "") +} + +func (p *printer) popPrefix() { + for len(p.prefixes) > 0 { + prefix := p.prefixes[len(p.prefixes)-1] + p.prefixes = p.prefixes[:len(p.prefixes)-1] + if prefix == "" { + break + } + p.deleteAttrPrefix(prefix) + } +} + +var ( + marshalerType = reflect.TypeFor[Marshaler]() + marshalerAttrType = reflect.TypeFor[MarshalerAttr]() + textMarshalerType = reflect.TypeFor[encoding.TextMarshaler]() +) + +// marshalValue writes one or more XML elements representing val. +// If val was obtained from a struct field, finfo must have its details. +func (p *printer) marshalValue(val reflect.Value, finfo *fieldInfo, startTemplate *StartElement) error { + if startTemplate != nil && startTemplate.Name.Local == "" { + return fmt.Errorf("xml: EncodeElement of StartElement with missing name") + } + + if !val.IsValid() { + return nil + } + if finfo != nil && finfo.flags&fOmitEmpty != 0 && isEmptyValue(val) { + return nil + } + + // Drill into interfaces and pointers. + // This can turn into an infinite loop given a cyclic chain, + // but it matches the Go 1 behavior. + for val.Kind() == reflect.Interface || val.Kind() == reflect.Pointer { + if val.IsNil() { + return nil + } + val = val.Elem() + } + + kind := val.Kind() + typ := val.Type() + + // Check for marshaler. + if val.CanInterface() && typ.Implements(marshalerType) { + return p.marshalInterface(val.Interface().(Marshaler), defaultStart(typ, finfo, startTemplate)) + } + if val.CanAddr() { + pv := val.Addr() + if pv.CanInterface() && pv.Type().Implements(marshalerType) { + return p.marshalInterface(pv.Interface().(Marshaler), defaultStart(pv.Type(), finfo, startTemplate)) + } + } + + // Check for text marshaler. + if val.CanInterface() && typ.Implements(textMarshalerType) { + return p.marshalTextInterface(val.Interface().(encoding.TextMarshaler), defaultStart(typ, finfo, startTemplate)) + } + if val.CanAddr() { + pv := val.Addr() + if pv.CanInterface() && pv.Type().Implements(textMarshalerType) { + return p.marshalTextInterface(pv.Interface().(encoding.TextMarshaler), defaultStart(pv.Type(), finfo, startTemplate)) + } + } + + // Slices and arrays iterate over the elements. They do not have an enclosing tag. + if (kind == reflect.Slice || kind == reflect.Array) && typ.Elem().Kind() != reflect.Uint8 { + for i, n := 0, val.Len(); i < n; i++ { + if err := p.marshalValue(val.Index(i), finfo, startTemplate); err != nil { + return err + } + } + return nil + } + + tinfo, err := getTypeInfo(typ, false) + if err != nil { + return err + } + + // Create start element. + // Precedence for the XML element name is: + // 0. startTemplate + // 1. XMLName field in underlying struct; + // 2. field name/tag in the struct field; and + // 3. type name + var start StartElement + + if startTemplate != nil { + start.Name = startTemplate.Name + start.Attr = append(start.Attr, startTemplate.Attr...) + } else if tinfo.xmlname != nil { + xmlname := tinfo.xmlname + if xmlname.name != "" { + start.Name.Space, start.Name.Local = xmlname.xmlns, xmlname.name + } else { + fv := xmlname.value(val, dontInitNilPointers) + if v, ok := fv.Interface().(Name); ok && v.Local != "" { + start.Name = v + } + } + } + if start.Name.Local == "" && finfo != nil { + start.Name.Space, start.Name.Local = finfo.xmlns, finfo.name + } + if start.Name.Local == "" { + name := typ.Name() + if i := strings.IndexByte(name, '['); i >= 0 { + // Truncate generic instantiation name. See issue 48318. + name = name[:i] + } + if name == "" { + return &UnsupportedTypeError{typ} + } + start.Name.Local = name + } + + // Attributes + for i := range tinfo.fields { + finfo := &tinfo.fields[i] + if finfo.flags&fAttr == 0 { + continue + } + fv := finfo.value(val, dontInitNilPointers) + + if finfo.flags&fOmitEmpty != 0 && (!fv.IsValid() || isEmptyValue(fv)) { + continue + } + + if fv.Kind() == reflect.Interface && fv.IsNil() { + continue + } + + name := Name{Space: finfo.xmlns, Local: finfo.name} + if err := p.marshalAttr(&start, name, fv); err != nil { + return err + } + } + + // If an empty name was found, namespace is overridden with an empty space + if tinfo.xmlname != nil && start.Name.Space == "" && + tinfo.xmlname.xmlns == "" && tinfo.xmlname.name == "" && + len(p.tags) != 0 && p.tags[len(p.tags)-1].Space != "" { + start.Attr = append(start.Attr, Attr{Name{"", xmlnsPrefix}, ""}) + } + if err := p.writeStart(&start); err != nil { + return err + } + + if val.Kind() == reflect.Struct { + err = p.marshalStruct(tinfo, val) + } else { + s, b, err1 := p.marshalSimple(typ, val) + if err1 != nil { + err = err1 + } else if b != nil { + EscapeText(p, b) + } else { + p.EscapeString(s) + } + } + if err != nil { + return err + } + + if err := p.writeEnd(start.Name); err != nil { + return err + } + + return p.cachedWriteError() +} + +// marshalAttr marshals an attribute with the given name and value, adding to start.Attr. +func (p *printer) marshalAttr(start *StartElement, name Name, val reflect.Value) error { + if val.CanInterface() && val.Type().Implements(marshalerAttrType) { + attr, err := val.Interface().(MarshalerAttr).MarshalXMLAttr(name) + if err != nil { + return err + } + if attr.Name.Local != "" { + start.Attr = append(start.Attr, attr) + } + return nil + } + + if val.CanAddr() { + pv := val.Addr() + if pv.CanInterface() && pv.Type().Implements(marshalerAttrType) { + attr, err := pv.Interface().(MarshalerAttr).MarshalXMLAttr(name) + if err != nil { + return err + } + if attr.Name.Local != "" { + start.Attr = append(start.Attr, attr) + } + return nil + } + } + + if val.CanInterface() && val.Type().Implements(textMarshalerType) { + text, err := val.Interface().(encoding.TextMarshaler).MarshalText() + if err != nil { + return err + } + start.Attr = append(start.Attr, Attr{name, string(text)}) + return nil + } + + if val.CanAddr() { + pv := val.Addr() + if pv.CanInterface() && pv.Type().Implements(textMarshalerType) { + text, err := pv.Interface().(encoding.TextMarshaler).MarshalText() + if err != nil { + return err + } + start.Attr = append(start.Attr, Attr{name, string(text)}) + return nil + } + } + + // Dereference or skip nil pointer, interface values. + switch val.Kind() { + case reflect.Pointer, reflect.Interface: + if val.IsNil() { + return nil + } + val = val.Elem() + } + + // Walk slices. + if val.Kind() == reflect.Slice && val.Type().Elem().Kind() != reflect.Uint8 { + n := val.Len() + for i := 0; i < n; i++ { + if err := p.marshalAttr(start, name, val.Index(i)); err != nil { + return err + } + } + return nil + } + + if val.Type() == attrType { + start.Attr = append(start.Attr, val.Interface().(Attr)) + return nil + } + + s, b, err := p.marshalSimple(val.Type(), val) + if err != nil { + return err + } + if b != nil { + s = string(b) + } + start.Attr = append(start.Attr, Attr{name, s}) + return nil +} + +// defaultStart returns the default start element to use, +// given the reflect type, field info, and start template. +func defaultStart(typ reflect.Type, finfo *fieldInfo, startTemplate *StartElement) StartElement { + var start StartElement + // Precedence for the XML element name is as above, + // except that we do not look inside structs for the first field. + if startTemplate != nil { + start.Name = startTemplate.Name + start.Attr = append(start.Attr, startTemplate.Attr...) + } else if finfo != nil && finfo.name != "" { + start.Name.Local = finfo.name + start.Name.Space = finfo.xmlns + } else if typ.Name() != "" { + start.Name.Local = typ.Name() + } else { + // Must be a pointer to a named type, + // since it has the Marshaler methods. + start.Name.Local = typ.Elem().Name() + } + return start +} + +// marshalInterface marshals a Marshaler interface value. +func (p *printer) marshalInterface(val Marshaler, start StartElement) error { + // Push a marker onto the tag stack so that MarshalXML + // cannot close the XML tags that it did not open. + p.tags = append(p.tags, Name{}) + n := len(p.tags) + + err := val.MarshalXML(p.encoder, start) + if err != nil { + return err + } + + // Make sure MarshalXML closed all its tags. p.tags[n-1] is the mark. + if len(p.tags) > n { + return fmt.Errorf("xml: %s.MarshalXML wrote invalid XML: <%s> not closed", receiverType(val), p.tags[len(p.tags)-1].Local) + } + p.tags = p.tags[:n-1] + return nil +} + +// marshalTextInterface marshals a TextMarshaler interface value. +func (p *printer) marshalTextInterface(val encoding.TextMarshaler, start StartElement) error { + if err := p.writeStart(&start); err != nil { + return err + } + text, err := val.MarshalText() + if err != nil { + return err + } + EscapeText(p, text) + return p.writeEnd(start.Name) +} + +// writeStart writes the given start element. +func (p *printer) writeStart(start *StartElement) error { + if start.Name.Local == "" { + return fmt.Errorf("xml: start tag with no name") + } + + p.tags = append(p.tags, start.Name) + p.markPrefix() + + p.writeIndent(1) + p.WriteByte('<') + p.WriteString(start.Name.Local) + + if start.Name.Space != "" { + p.WriteString(` xmlns="`) + p.EscapeString(start.Name.Space) + p.WriteByte('"') + } + + // Attributes + for _, attr := range start.Attr { + name := attr.Name + if name.Local == "" { + continue + } + p.WriteByte(' ') + if name.Space != "" { + p.WriteString(p.createAttrPrefix(name.Space)) + p.WriteByte(':') + } + p.WriteString(name.Local) + p.WriteString(`="`) + p.EscapeString(attr.Value) + p.WriteByte('"') + } + p.WriteByte('>') + return nil +} + +func (p *printer) writeEnd(name Name) error { + if name.Local == "" { + return fmt.Errorf("xml: end tag with no name") + } + if len(p.tags) == 0 || p.tags[len(p.tags)-1].Local == "" { + return fmt.Errorf("xml: end tag without start tag", name.Local) + } + if top := p.tags[len(p.tags)-1]; top != name { + if top.Local != name.Local { + return fmt.Errorf("xml: end tag does not match start tag <%s>", name.Local, top.Local) + } + return fmt.Errorf("xml: end tag in namespace %s does not match start tag <%s> in namespace %s", name.Local, name.Space, top.Local, top.Space) + } + p.tags = p.tags[:len(p.tags)-1] + + p.writeIndent(-1) + p.WriteByte('<') + p.WriteByte('/') + p.WriteString(name.Local) + p.WriteByte('>') + p.popPrefix() + return nil +} + +func (p *printer) marshalSimple(typ reflect.Type, val reflect.Value) (string, []byte, error) { + switch val.Kind() { + case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64: + return strconv.FormatInt(val.Int(), 10), nil, nil + case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uintptr: + return strconv.FormatUint(val.Uint(), 10), nil, nil + case reflect.Float32, reflect.Float64: + return strconv.FormatFloat(val.Float(), 'g', -1, val.Type().Bits()), nil, nil + case reflect.String: + return val.String(), nil, nil + case reflect.Bool: + return strconv.FormatBool(val.Bool()), nil, nil + case reflect.Array: + if typ.Elem().Kind() != reflect.Uint8 { + break + } + // [...]byte + var bytes []byte + if val.CanAddr() { + bytes = val.Bytes() + } else { + bytes = make([]byte, val.Len()) + reflect.Copy(reflect.ValueOf(bytes), val) + } + return "", bytes, nil + case reflect.Slice: + if typ.Elem().Kind() != reflect.Uint8 { + break + } + // []byte + return "", val.Bytes(), nil + } + return "", nil, &UnsupportedTypeError{typ} +} + +var ddBytes = []byte("--") + +// indirect drills into interfaces and pointers, returning the pointed-at value. +// If it encounters a nil interface or pointer, indirect returns that nil value. +// This can turn into an infinite loop given a cyclic chain, +// but it matches the Go 1 behavior. +func indirect(vf reflect.Value) reflect.Value { + for vf.Kind() == reflect.Interface || vf.Kind() == reflect.Pointer { + if vf.IsNil() { + return vf + } + vf = vf.Elem() + } + return vf +} + +func (p *printer) marshalStruct(tinfo *typeInfo, val reflect.Value) error { + s := parentStack{p: p} + for i := range tinfo.fields { + finfo := &tinfo.fields[i] + if finfo.flags&fAttr != 0 { + continue + } + vf := finfo.value(val, dontInitNilPointers) + if !vf.IsValid() { + // The field is behind an anonymous struct field that's + // nil. Skip it. + continue + } + + switch finfo.flags & fMode { + case fCDATA, fCharData: + emit := EscapeText + if finfo.flags&fMode == fCDATA { + emit = emitCDATA + } + if err := s.trim(finfo.parents); err != nil { + return err + } + if vf.CanInterface() && vf.Type().Implements(textMarshalerType) { + data, err := vf.Interface().(encoding.TextMarshaler).MarshalText() + if err != nil { + return err + } + if err := emit(p, data); err != nil { + return err + } + continue + } + if vf.CanAddr() { + pv := vf.Addr() + if pv.CanInterface() && pv.Type().Implements(textMarshalerType) { + data, err := pv.Interface().(encoding.TextMarshaler).MarshalText() + if err != nil { + return err + } + if err := emit(p, data); err != nil { + return err + } + continue + } + } + + var scratch [64]byte + vf = indirect(vf) + switch vf.Kind() { + case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64: + if err := emit(p, strconv.AppendInt(scratch[:0], vf.Int(), 10)); err != nil { + return err + } + case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uintptr: + if err := emit(p, strconv.AppendUint(scratch[:0], vf.Uint(), 10)); err != nil { + return err + } + case reflect.Float32, reflect.Float64: + if err := emit(p, strconv.AppendFloat(scratch[:0], vf.Float(), 'g', -1, vf.Type().Bits())); err != nil { + return err + } + case reflect.Bool: + if err := emit(p, strconv.AppendBool(scratch[:0], vf.Bool())); err != nil { + return err + } + case reflect.String: + if err := emit(p, []byte(vf.String())); err != nil { + return err + } + case reflect.Slice: + if elem, ok := vf.Interface().([]byte); ok { + if err := emit(p, elem); err != nil { + return err + } + } + } + continue + + case fComment: + if err := s.trim(finfo.parents); err != nil { + return err + } + vf = indirect(vf) + k := vf.Kind() + if !(k == reflect.String || k == reflect.Slice && vf.Type().Elem().Kind() == reflect.Uint8) { + return fmt.Errorf("xml: bad type for comment field of %s", val.Type()) + } + if vf.Len() == 0 { + continue + } + p.writeIndent(0) + p.WriteString("" is invalid grammar. Make it "- -->" + p.WriteByte(' ') + } + p.WriteString("-->") + continue + + case fInnerXML: + vf = indirect(vf) + iface := vf.Interface() + switch raw := iface.(type) { + case []byte: + p.Write(raw) + continue + case string: + p.WriteString(raw) + continue + } + + case fElement, fElement | fAny: + if err := s.trim(finfo.parents); err != nil { + return err + } + if len(finfo.parents) > len(s.stack) { + if vf.Kind() != reflect.Pointer && vf.Kind() != reflect.Interface || !vf.IsNil() { + if err := s.push(finfo.parents[len(s.stack):]); err != nil { + return err + } + } + } + } + if err := p.marshalValue(vf, finfo, nil); err != nil { + return err + } + } + s.trim(nil) + return p.cachedWriteError() +} + +// Write implements io.Writer +func (p *printer) Write(b []byte) (n int, err error) { + if p.closed && p.err == nil { + p.err = errors.New("use of closed Encoder") + } + if p.err == nil { + n, p.err = p.w.Write(b) + } + return n, p.err +} + +// WriteString implements io.StringWriter +func (p *printer) WriteString(s string) (n int, err error) { + if p.closed && p.err == nil { + p.err = errors.New("use of closed Encoder") + } + if p.err == nil { + n, p.err = p.w.WriteString(s) + } + return n, p.err +} + +// WriteByte implements io.ByteWriter +func (p *printer) WriteByte(c byte) error { + if p.closed && p.err == nil { + p.err = errors.New("use of closed Encoder") + } + if p.err == nil { + p.err = p.w.WriteByte(c) + } + return p.err +} + +// Close the Encoder, indicating that no more data will be written. It flushes +// any buffered XML to the underlying writer and returns an error if the +// written XML is invalid (e.g. by containing unclosed elements). +func (p *printer) Close() error { + if p.closed { + return nil + } + p.closed = true + if err := p.w.Flush(); err != nil { + return err + } + if len(p.tags) > 0 { + return fmt.Errorf("unclosed tag <%s>", p.tags[len(p.tags)-1].Local) + } + return nil +} + +// return the bufio Writer's cached write error +func (p *printer) cachedWriteError() error { + _, err := p.Write(nil) + return err +} + +func (p *printer) writeIndent(depthDelta int) { + if len(p.prefix) == 0 && len(p.indent) == 0 { + return + } + if depthDelta < 0 { + p.depth-- + if p.indentedIn { + p.indentedIn = false + return + } + p.indentedIn = false + } + if p.putNewline { + p.WriteByte('\n') + } else { + p.putNewline = true + } + if len(p.prefix) > 0 { + p.WriteString(p.prefix) + } + if len(p.indent) > 0 { + for i := 0; i < p.depth; i++ { + p.WriteString(p.indent) + } + } + if depthDelta > 0 { + p.depth++ + p.indentedIn = true + } +} + +type parentStack struct { + p *printer + stack []string +} + +// trim updates the XML context to match the longest common prefix of the stack +// and the given parents. A closing tag will be written for every parent +// popped. Passing a zero slice or nil will close all the elements. +func (s *parentStack) trim(parents []string) error { + split := 0 + for ; split < len(parents) && split < len(s.stack); split++ { + if parents[split] != s.stack[split] { + break + } + } + for i := len(s.stack) - 1; i >= split; i-- { + if err := s.p.writeEnd(Name{Local: s.stack[i]}); err != nil { + return err + } + } + s.stack = s.stack[:split] + return nil +} + +// push adds parent elements to the stack and writes open tags. +func (s *parentStack) push(parents []string) error { + for i := 0; i < len(parents); i++ { + if err := s.p.writeStart(&StartElement{Name: Name{Local: parents[i]}}); err != nil { + return err + } + } + s.stack = append(s.stack, parents...) + return nil +} + +// UnsupportedTypeError is returned when [Marshal] encounters a type +// that cannot be converted into XML. +type UnsupportedTypeError struct { + Type reflect.Type +} + +func (e *UnsupportedTypeError) Error() string { + return "xml: unsupported type: " + e.Type.String() +} + +func isEmptyValue(v reflect.Value) bool { + switch v.Kind() { + case reflect.Array, reflect.Map, reflect.Slice, reflect.String: + return v.Len() == 0 + case reflect.Bool, + reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64, + reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uintptr, + reflect.Float32, reflect.Float64, + reflect.Interface, reflect.Pointer: + return v.IsZero() + } + return false +} diff --git a/marshal_test.go b/marshal_test.go new file mode 100644 index 0000000..1a8e31d --- /dev/null +++ b/marshal_test.go @@ -0,0 +1,2591 @@ +// Copyright 2011 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package goxml + +import ( + "bytes" + "errors" + "fmt" + "io" + "reflect" + "strconv" + "strings" + "sync" + "testing" + "time" +) + +type DriveType int + +const ( + HyperDrive DriveType = iota + ImprobabilityDrive +) + +type Passenger struct { + Name []string `xml:"name"` + Weight float32 `xml:"weight"` +} + +type Ship struct { + XMLName struct{} `xml:"spaceship"` + + Name string `xml:"name,attr"` + Pilot string `xml:"pilot,attr"` + Drive DriveType `xml:"drive"` + Age uint `xml:"age"` + Passenger []*Passenger `xml:"passenger"` + secret string +} + +type NamedType string + +type Port struct { + XMLName struct{} `xml:"port"` + Type string `xml:"type,attr,omitempty"` + Comment string `xml:",comment"` + Number string `xml:",chardata"` +} + +type Domain struct { + XMLName struct{} `xml:"domain"` + Country string `xml:",attr,omitempty"` + Name []byte `xml:",chardata"` + Comment []byte `xml:",comment"` +} + +type Book struct { + XMLName struct{} `xml:"book"` + Title string `xml:",chardata"` +} + +type Event struct { + XMLName struct{} `xml:"event"` + Year int `xml:",chardata"` +} + +type Movie struct { + XMLName struct{} `xml:"movie"` + Length uint `xml:",chardata"` +} + +type Pi struct { + XMLName struct{} `xml:"pi"` + Approximation float32 `xml:",chardata"` +} + +type Universe struct { + XMLName struct{} `xml:"universe"` + Visible float64 `xml:",chardata"` +} + +type Particle struct { + XMLName struct{} `xml:"particle"` + HasMass bool `xml:",chardata"` +} + +type Departure struct { + XMLName struct{} `xml:"departure"` + When time.Time `xml:",chardata"` +} + +type SecretAgent struct { + XMLName struct{} `xml:"agent"` + Handle string `xml:"handle,attr"` + Identity string + Obfuscate string `xml:",innerxml"` +} + +type NestedItems struct { + XMLName struct{} `xml:"result"` + Items []string `xml:">item"` + Item1 []string `xml:"Items>item1"` +} + +type NestedOrder struct { + XMLName struct{} `xml:"result"` + Field1 string `xml:"parent>c"` + Field2 string `xml:"parent>b"` + Field3 string `xml:"parent>a"` +} + +type MixedNested struct { + XMLName struct{} `xml:"result"` + A string `xml:"parent1>a"` + B string `xml:"b"` + C string `xml:"parent1>parent2>c"` + D string `xml:"parent1>d"` +} + +type NilTest struct { + A any `xml:"parent1>parent2>a"` + B any `xml:"parent1>b"` + C any `xml:"parent1>parent2>c"` +} + +type Service struct { + XMLName struct{} `xml:"service"` + Domain *Domain `xml:"host>domain"` + Port *Port `xml:"host>port"` + Extra1 any + Extra2 any `xml:"host>extra2"` +} + +var nilStruct *Ship + +type EmbedA struct { + EmbedC + EmbedB EmbedB + FieldA string + embedD +} + +type EmbedB struct { + FieldB string + *EmbedC +} + +type EmbedC struct { + FieldA1 string `xml:"FieldA>A1"` + FieldA2 string `xml:"FieldA>A2"` + FieldB string + FieldC string +} + +type embedD struct { + fieldD string + FieldE string // Promoted and visible when embedD is embedded. +} + +type NameCasing struct { + XMLName struct{} `xml:"casing"` + Xy string + XY string + XyA string `xml:"Xy,attr"` + XYA string `xml:"XY,attr"` +} + +type NamePrecedence struct { + XMLName Name `xml:"Parent"` + FromTag XMLNameWithoutTag `xml:"InTag"` + FromNameVal XMLNameWithoutTag + FromNameTag XMLNameWithTag + InFieldName string +} + +type XMLNameWithTag struct { + XMLName Name `xml:"InXMLNameTag"` + Value string `xml:",chardata"` +} + +type XMLNameWithoutTag struct { + XMLName Name + Value string `xml:",chardata"` +} + +type NameInField struct { + Foo Name `xml:"ns foo"` +} + +type AttrTest struct { + Int int `xml:",attr"` + Named int `xml:"int,attr"` + Float float64 `xml:",attr"` + Uint8 uint8 `xml:",attr"` + Bool bool `xml:",attr"` + Str string `xml:",attr"` + Bytes []byte `xml:",attr"` +} + +type AttrsTest struct { + Attrs []Attr `xml:",any,attr"` + Int int `xml:",attr"` + Named int `xml:"int,attr"` + Float float64 `xml:",attr"` + Uint8 uint8 `xml:",attr"` + Bool bool `xml:",attr"` + Str string `xml:",attr"` + Bytes []byte `xml:",attr"` +} + +type OmitAttrTest struct { + Int int `xml:",attr,omitempty"` + Named int `xml:"int,attr,omitempty"` + Float float64 `xml:",attr,omitempty"` + Uint8 uint8 `xml:",attr,omitempty"` + Bool bool `xml:",attr,omitempty"` + Str string `xml:",attr,omitempty"` + Bytes []byte `xml:",attr,omitempty"` + PStr *string `xml:",attr,omitempty"` +} + +type OmitFieldTest struct { + Int int `xml:",omitempty"` + Named int `xml:"int,omitempty"` + Float float64 `xml:",omitempty"` + Uint8 uint8 `xml:",omitempty"` + Bool bool `xml:",omitempty"` + Str string `xml:",omitempty"` + Bytes []byte `xml:",omitempty"` + PStr *string `xml:",omitempty"` + Ptr *PresenceTest `xml:",omitempty"` +} + +type AnyTest struct { + XMLName struct{} `xml:"a"` + Nested string `xml:"nested>value"` + AnyField AnyHolder `xml:",any"` +} + +type AnyOmitTest struct { + XMLName struct{} `xml:"a"` + Nested string `xml:"nested>value"` + AnyField *AnyHolder `xml:",any,omitempty"` +} + +type AnySliceTest struct { + XMLName struct{} `xml:"a"` + Nested string `xml:"nested>value"` + AnyField []AnyHolder `xml:",any"` +} + +type AnyHolder struct { + XMLName Name + XML string `xml:",innerxml"` +} + +type RecurseA struct { + A string + B *RecurseB +} + +type RecurseB struct { + A *RecurseA + B string +} + +type PresenceTest struct { + Exists *struct{} +} + +type IgnoreTest struct { + PublicSecret string `xml:"-"` +} + +type MyBytes []byte + +type Data struct { + Bytes []byte + Attr []byte `xml:",attr"` + Custom MyBytes +} + +type Plain struct { + V any +} + +type MyInt int + +type EmbedInt struct { + MyInt +} + +type Strings struct { + X []string `xml:"A>B,omitempty"` +} + +type PointerFieldsTest struct { + XMLName Name `xml:"dummy"` + Name *string `xml:"name,attr"` + Age *uint `xml:"age,attr"` + Empty *string `xml:"empty,attr"` + Contents *string `xml:",chardata"` +} + +type ChardataEmptyTest struct { + XMLName Name `xml:"test"` + Contents *string `xml:",chardata"` +} + +type PointerAnonFields struct { + *MyInt + *NamedType +} + +type MyMarshalerTest struct { +} + +var _ Marshaler = (*MyMarshalerTest)(nil) + +func (m *MyMarshalerTest) MarshalXML(e *Encoder, start StartElement) error { + e.EncodeToken(start) + e.EncodeToken(CharData([]byte("hello world"))) + e.EncodeToken(EndElement{start.Name}) + return nil +} + +type MyMarshalerAttrTest struct { +} + +var _ MarshalerAttr = (*MyMarshalerAttrTest)(nil) + +func (m *MyMarshalerAttrTest) MarshalXMLAttr(name Name) (Attr, error) { + return Attr{name, "hello world"}, nil +} + +func (m *MyMarshalerAttrTest) UnmarshalXMLAttr(attr Attr) error { + return nil +} + +type MarshalerStruct struct { + Foo MyMarshalerAttrTest `xml:",attr"` +} + +type InnerStruct struct { + XMLName Name `xml:"testns outer"` +} + +type OuterStruct struct { + InnerStruct + IntAttr int `xml:"int,attr"` +} + +type OuterNamedStruct struct { + InnerStruct + XMLName Name `xml:"outerns test"` + IntAttr int `xml:"int,attr"` +} + +type OuterNamedOrderedStruct struct { + XMLName Name `xml:"outerns test"` + InnerStruct + IntAttr int `xml:"int,attr"` +} + +type OuterOuterStruct struct { + OuterStruct +} + +type NestedAndChardata struct { + AB []string `xml:"A>B"` + Chardata string `xml:",chardata"` +} + +type NestedAndComment struct { + AB []string `xml:"A>B"` + Comment string `xml:",comment"` +} + +type CDataTest struct { + Chardata string `xml:",cdata"` +} + +type NestedAndCData struct { + AB []string `xml:"A>B"` + CDATA string `xml:",cdata"` +} + +func ifaceptr(x any) any { + return &x +} + +func stringptr(x string) *string { + return &x +} + +type T1 struct{} +type T2 struct{} + +type IndirComment struct { + T1 T1 + Comment *string `xml:",comment"` + T2 T2 +} + +type DirectComment struct { + T1 T1 + Comment string `xml:",comment"` + T2 T2 +} + +type IfaceComment struct { + T1 T1 + Comment any `xml:",comment"` + T2 T2 +} + +type IndirChardata struct { + T1 T1 + Chardata *string `xml:",chardata"` + T2 T2 +} + +type DirectChardata struct { + T1 T1 + Chardata string `xml:",chardata"` + T2 T2 +} + +type IfaceChardata struct { + T1 T1 + Chardata any `xml:",chardata"` + T2 T2 +} + +type IndirCDATA struct { + T1 T1 + CDATA *string `xml:",cdata"` + T2 T2 +} + +type DirectCDATA struct { + T1 T1 + CDATA string `xml:",cdata"` + T2 T2 +} + +type IfaceCDATA struct { + T1 T1 + CDATA any `xml:",cdata"` + T2 T2 +} + +type IndirInnerXML struct { + T1 T1 + InnerXML *string `xml:",innerxml"` + T2 T2 +} + +type DirectInnerXML struct { + T1 T1 + InnerXML string `xml:",innerxml"` + T2 T2 +} + +type IfaceInnerXML struct { + T1 T1 + InnerXML any `xml:",innerxml"` + T2 T2 +} + +type IndirElement struct { + T1 T1 + Element *string + T2 T2 +} + +type DirectElement struct { + T1 T1 + Element string + T2 T2 +} + +type IfaceElement struct { + T1 T1 + Element any + T2 T2 +} + +type IndirOmitEmpty struct { + T1 T1 + OmitEmpty *string `xml:",omitempty"` + T2 T2 +} + +type DirectOmitEmpty struct { + T1 T1 + OmitEmpty string `xml:",omitempty"` + T2 T2 +} + +type IfaceOmitEmpty struct { + T1 T1 + OmitEmpty any `xml:",omitempty"` + T2 T2 +} + +type IndirAny struct { + T1 T1 + Any *string `xml:",any"` + T2 T2 +} + +type DirectAny struct { + T1 T1 + Any string `xml:",any"` + T2 T2 +} + +type IfaceAny struct { + T1 T1 + Any any `xml:",any"` + T2 T2 +} + +type Generic[T any] struct { + X T +} + +var ( + nameAttr = "Sarah" + ageAttr = uint(12) + contentsAttr = "lorem ipsum" + empty = "" +) + +// Unless explicitly stated as such (or *Plain), all of the +// tests below are two-way tests. When introducing new tests, +// please try to make them two-way as well to ensure that +// marshaling and unmarshaling are as symmetrical as feasible. +var marshalTests = []struct { + Value any + ExpectXML string + MarshalOnly bool + MarshalError string + UnmarshalOnly bool + UnmarshalError string +}{ + // Test nil marshals to nothing + {Value: nil, ExpectXML: ``, MarshalOnly: true}, + {Value: nilStruct, ExpectXML: ``, MarshalOnly: true}, + + // Test value types + {Value: &Plain{true}, ExpectXML: `true`}, + {Value: &Plain{false}, ExpectXML: `false`}, + {Value: &Plain{int(42)}, ExpectXML: `42`}, + {Value: &Plain{int8(42)}, ExpectXML: `42`}, + {Value: &Plain{int16(42)}, ExpectXML: `42`}, + {Value: &Plain{int32(42)}, ExpectXML: `42`}, + {Value: &Plain{uint(42)}, ExpectXML: `42`}, + {Value: &Plain{uint8(42)}, ExpectXML: `42`}, + {Value: &Plain{uint16(42)}, ExpectXML: `42`}, + {Value: &Plain{uint32(42)}, ExpectXML: `42`}, + {Value: &Plain{float32(1.25)}, ExpectXML: `1.25`}, + {Value: &Plain{float64(1.25)}, ExpectXML: `1.25`}, + {Value: &Plain{uintptr(0xFFDD)}, ExpectXML: `65501`}, + {Value: &Plain{"gopher"}, ExpectXML: `gopher`}, + {Value: &Plain{[]byte("gopher")}, ExpectXML: `gopher`}, + {Value: &Plain{""}, ExpectXML: `</>`}, + {Value: &Plain{[]byte("")}, ExpectXML: `</>`}, + {Value: &Plain{[3]byte{'<', '/', '>'}}, ExpectXML: `</>`}, + {Value: &Plain{NamedType("potato")}, ExpectXML: `potato`}, + {Value: &Plain{[]int{1, 2, 3}}, ExpectXML: `123`}, + {Value: &Plain{[3]int{1, 2, 3}}, ExpectXML: `123`}, + {Value: ifaceptr(true), MarshalOnly: true, ExpectXML: `true`}, + + // Test time. + { + Value: &Plain{time.Unix(1e9, 123456789).UTC()}, + ExpectXML: `2001-09-09T01:46:40.123456789Z`, + }, + + // A pointer to struct{} may be used to test for an element's presence. + { + Value: &PresenceTest{new(struct{})}, + ExpectXML: ``, + }, + { + Value: &PresenceTest{}, + ExpectXML: ``, + }, + + // A []byte field is only nil if the element was not found. + { + Value: &Data{}, + ExpectXML: ``, + UnmarshalOnly: true, + }, + { + Value: &Data{Bytes: []byte{}, Custom: MyBytes{}, Attr: []byte{}}, + ExpectXML: ``, + UnmarshalOnly: true, + }, + + // Check that []byte works, including named []byte types. + { + Value: &Data{Bytes: []byte("ab"), Custom: MyBytes("cd"), Attr: []byte{'v'}}, + ExpectXML: `abcd`, + }, + + // Test innerxml + { + Value: &SecretAgent{ + Handle: "007", + Identity: "James Bond", + Obfuscate: "", + }, + ExpectXML: `James Bond`, + MarshalOnly: true, + }, + { + Value: &SecretAgent{ + Handle: "007", + Identity: "James Bond", + Obfuscate: "James Bond", + }, + ExpectXML: `James Bond`, + UnmarshalOnly: true, + }, + + // Test structs + {Value: &Port{Type: "ssl", Number: "443"}, ExpectXML: `443`}, + {Value: &Port{Number: "443"}, ExpectXML: `443`}, + {Value: &Port{Type: ""}, ExpectXML: ``}, + {Value: &Port{Number: "443", Comment: "https"}, ExpectXML: `443`}, + {Value: &Port{Number: "443", Comment: "add space-"}, ExpectXML: `443`, MarshalOnly: true}, + {Value: &Domain{Name: []byte("google.com&friends")}, ExpectXML: `google.com&friends`}, + {Value: &Domain{Name: []byte("google.com"), Comment: []byte(" &friends ")}, ExpectXML: `google.com`}, + {Value: &Book{Title: "Pride & Prejudice"}, ExpectXML: `Pride & Prejudice`}, + {Value: &Event{Year: -3114}, ExpectXML: `-3114`}, + {Value: &Movie{Length: 13440}, ExpectXML: `13440`}, + {Value: &Pi{Approximation: 3.14159265}, ExpectXML: `3.1415927`}, + {Value: &Universe{Visible: 9.3e13}, ExpectXML: `9.3e+13`}, + {Value: &Particle{HasMass: true}, ExpectXML: `true`}, + {Value: &Departure{When: ParseTime("2013-01-09T00:15:00-09:00")}, ExpectXML: `2013-01-09T00:15:00-09:00`}, + {Value: atomValue, ExpectXML: atomXML}, + {Value: &Generic[int]{1}, ExpectXML: `1`}, + { + Value: &Ship{ + Name: "Heart of Gold", + Pilot: "Computer", + Age: 1, + Drive: ImprobabilityDrive, + Passenger: []*Passenger{ + { + Name: []string{"Zaphod", "Beeblebrox"}, + Weight: 7.25, + }, + { + Name: []string{"Trisha", "McMillen"}, + Weight: 5.5, + }, + { + Name: []string{"Ford", "Prefect"}, + Weight: 7, + }, + { + Name: []string{"Arthur", "Dent"}, + Weight: 6.75, + }, + }, + }, + ExpectXML: `` + + `` + strconv.Itoa(int(ImprobabilityDrive)) + `` + + `1` + + `` + + `Zaphod` + + `Beeblebrox` + + `7.25` + + `` + + `` + + `Trisha` + + `McMillen` + + `5.5` + + `` + + `` + + `Ford` + + `Prefect` + + `7` + + `` + + `` + + `Arthur` + + `Dent` + + `6.75` + + `` + + ``, + }, + + // Test a>b + { + Value: &NestedItems{Items: nil, Item1: nil}, + ExpectXML: `` + + `` + + `` + + ``, + }, + { + Value: &NestedItems{Items: []string{}, Item1: []string{}}, + ExpectXML: `` + + `` + + `` + + ``, + MarshalOnly: true, + }, + { + Value: &NestedItems{Items: nil, Item1: []string{"A"}}, + ExpectXML: `` + + `` + + `A` + + `` + + ``, + }, + { + Value: &NestedItems{Items: []string{"A", "B"}, Item1: nil}, + ExpectXML: `` + + `` + + `A` + + `B` + + `` + + ``, + }, + { + Value: &NestedItems{Items: []string{"A", "B"}, Item1: []string{"C"}}, + ExpectXML: `` + + `` + + `A` + + `B` + + `C` + + `` + + ``, + }, + { + Value: &NestedOrder{Field1: "C", Field2: "B", Field3: "A"}, + ExpectXML: `` + + `` + + `C` + + `B` + + `A` + + `` + + ``, + }, + { + Value: &NilTest{A: "A", B: nil, C: "C"}, + ExpectXML: `` + + `` + + `A` + + `C` + + `` + + ``, + MarshalOnly: true, // Uses interface{} + }, + { + Value: &MixedNested{A: "A", B: "B", C: "C", D: "D"}, + ExpectXML: `` + + `A` + + `B` + + `` + + `C` + + `D` + + `` + + ``, + }, + { + Value: &Service{Port: &Port{Number: "80"}}, + ExpectXML: `80`, + }, + { + Value: &Service{}, + ExpectXML: ``, + }, + { + Value: &Service{Port: &Port{Number: "80"}, Extra1: "A", Extra2: "B"}, + ExpectXML: `` + + `80` + + `A` + + `B` + + ``, + MarshalOnly: true, + }, + { + Value: &Service{Port: &Port{Number: "80"}, Extra2: "example"}, + ExpectXML: `` + + `80` + + `example` + + ``, + MarshalOnly: true, + }, + { + Value: &struct { + XMLName struct{} `xml:"space top"` + A string `xml:"x>a"` + B string `xml:"x>b"` + C string `xml:"space x>c"` + C1 string `xml:"space1 x>c"` + D1 string `xml:"space1 x>d"` + }{ + A: "a", + B: "b", + C: "c", + C1: "c1", + D1: "d1", + }, + ExpectXML: `` + + `abc` + + `c1` + + `d1` + + `` + + ``, + }, + { + Value: &struct { + XMLName Name + A string `xml:"x>a"` + B string `xml:"x>b"` + C string `xml:"space x>c"` + C1 string `xml:"space1 x>c"` + D1 string `xml:"space1 x>d"` + }{ + XMLName: Name{ + Space: "space0", + Local: "top", + }, + A: "a", + B: "b", + C: "c", + C1: "c1", + D1: "d1", + }, + ExpectXML: `` + + `ab` + + `c` + + `c1` + + `d1` + + `` + + ``, + }, + { + Value: &struct { + XMLName struct{} `xml:"top"` + B string `xml:"space x>b"` + B1 string `xml:"space1 x>b"` + }{ + B: "b", + B1: "b1", + }, + ExpectXML: `` + + `b` + + `b1` + + ``, + }, + + // Test struct embedding + { + Value: &EmbedA{ + EmbedC: EmbedC{ + FieldA1: "", // Shadowed by A.A + FieldA2: "", // Shadowed by A.A + FieldB: "A.C.B", + FieldC: "A.C.C", + }, + EmbedB: EmbedB{ + FieldB: "A.B.B", + EmbedC: &EmbedC{ + FieldA1: "A.B.C.A1", + FieldA2: "A.B.C.A2", + FieldB: "", // Shadowed by A.B.B + FieldC: "A.B.C.C", + }, + }, + FieldA: "A.A", + embedD: embedD{ + FieldE: "A.D.E", + }, + }, + ExpectXML: `` + + `A.C.B` + + `A.C.C` + + `` + + `A.B.B` + + `` + + `A.B.C.A1` + + `A.B.C.A2` + + `` + + `A.B.C.C` + + `` + + `A.A` + + `A.D.E` + + ``, + }, + + // Anonymous struct pointer field which is nil + { + Value: &EmbedB{}, + ExpectXML: ``, + }, + + // Other kinds of nil anonymous fields + { + Value: &PointerAnonFields{}, + ExpectXML: ``, + }, + + // Test that name casing matters + { + Value: &NameCasing{Xy: "mixed", XY: "upper", XyA: "mixedA", XYA: "upperA"}, + ExpectXML: `mixedupper`, + }, + + // Test the order in which the XML element name is chosen + { + Value: &NamePrecedence{ + FromTag: XMLNameWithoutTag{Value: "A"}, + FromNameVal: XMLNameWithoutTag{XMLName: Name{Local: "InXMLName"}, Value: "B"}, + FromNameTag: XMLNameWithTag{Value: "C"}, + InFieldName: "D", + }, + ExpectXML: `` + + `A` + + `B` + + `C` + + `D` + + ``, + MarshalOnly: true, + }, + { + Value: &NamePrecedence{ + XMLName: Name{Local: "Parent"}, + FromTag: XMLNameWithoutTag{XMLName: Name{Local: "InTag"}, Value: "A"}, + FromNameVal: XMLNameWithoutTag{XMLName: Name{Local: "FromNameVal"}, Value: "B"}, + FromNameTag: XMLNameWithTag{XMLName: Name{Local: "InXMLNameTag"}, Value: "C"}, + InFieldName: "D", + }, + ExpectXML: `` + + `A` + + `B` + + `C` + + `D` + + ``, + UnmarshalOnly: true, + }, + + // xml.Name works in a plain field as well. + { + Value: &NameInField{Name{Space: "ns", Local: "foo"}}, + ExpectXML: ``, + }, + { + Value: &NameInField{Name{Space: "ns", Local: "foo"}}, + ExpectXML: ``, + UnmarshalOnly: true, + }, + + // Marshaling zero xml.Name uses the tag or field name. + { + Value: &NameInField{}, + ExpectXML: ``, + MarshalOnly: true, + }, + + // Test attributes + { + Value: &AttrTest{ + Int: 8, + Named: 9, + Float: 23.5, + Uint8: 255, + Bool: true, + Str: "str", + Bytes: []byte("byt"), + }, + ExpectXML: ``, + }, + { + Value: &AttrTest{Bytes: []byte{}}, + ExpectXML: ``, + }, + { + Value: &AttrsTest{ + Attrs: []Attr{ + {Name: Name{Local: "Answer"}, Value: "42"}, + {Name: Name{Local: "Int"}, Value: "8"}, + {Name: Name{Local: "int"}, Value: "9"}, + {Name: Name{Local: "Float"}, Value: "23.5"}, + {Name: Name{Local: "Uint8"}, Value: "255"}, + {Name: Name{Local: "Bool"}, Value: "true"}, + {Name: Name{Local: "Str"}, Value: "str"}, + {Name: Name{Local: "Bytes"}, Value: "byt"}, + }, + }, + ExpectXML: ``, + MarshalOnly: true, + }, + { + Value: &AttrsTest{ + Attrs: []Attr{ + {Name: Name{Local: "Answer"}, Value: "42"}, + }, + Int: 8, + Named: 9, + Float: 23.5, + Uint8: 255, + Bool: true, + Str: "str", + Bytes: []byte("byt"), + }, + ExpectXML: ``, + }, + { + Value: &AttrsTest{ + Attrs: []Attr{ + {Name: Name{Local: "Int"}, Value: "0"}, + {Name: Name{Local: "int"}, Value: "0"}, + {Name: Name{Local: "Float"}, Value: "0"}, + {Name: Name{Local: "Uint8"}, Value: "0"}, + {Name: Name{Local: "Bool"}, Value: "false"}, + {Name: Name{Local: "Str"}}, + {Name: Name{Local: "Bytes"}}, + }, + Bytes: []byte{}, + }, + ExpectXML: ``, + MarshalOnly: true, + }, + { + Value: &OmitAttrTest{ + Int: 8, + Named: 9, + Float: 23.5, + Uint8: 255, + Bool: true, + Str: "str", + Bytes: []byte("byt"), + PStr: &empty, + }, + ExpectXML: ``, + }, + { + Value: &OmitAttrTest{}, + ExpectXML: ``, + }, + + // pointer fields + { + Value: &PointerFieldsTest{Name: &nameAttr, Age: &ageAttr, Contents: &contentsAttr}, + ExpectXML: `lorem ipsum`, + MarshalOnly: true, + }, + + // empty chardata pointer field + { + Value: &ChardataEmptyTest{}, + ExpectXML: ``, + MarshalOnly: true, + }, + + // omitempty on fields + { + Value: &OmitFieldTest{ + Int: 8, + Named: 9, + Float: 23.5, + Uint8: 255, + Bool: true, + Str: "str", + Bytes: []byte("byt"), + PStr: &empty, + Ptr: &PresenceTest{}, + }, + ExpectXML: `` + + `8` + + `9` + + `23.5` + + `255` + + `true` + + `str` + + `byt` + + `` + + `` + + ``, + }, + { + Value: &OmitFieldTest{}, + ExpectXML: ``, + }, + + // Test ",any" + { + ExpectXML: `known_unknown`, + Value: &AnyTest{ + Nested: "known", + AnyField: AnyHolder{ + XMLName: Name{Local: "other"}, + XML: "_unknown", + }, + }, + }, + { + Value: &AnyTest{Nested: "known", + AnyField: AnyHolder{ + XML: "", + XMLName: Name{Local: "AnyField"}, + }, + }, + ExpectXML: `known`, + }, + { + ExpectXML: `b`, + Value: &AnyOmitTest{ + Nested: "b", + }, + }, + { + ExpectXML: `bei`, + Value: &AnySliceTest{ + Nested: "b", + AnyField: []AnyHolder{ + { + XMLName: Name{Local: "c"}, + XML: "e", + }, + { + XMLName: Name{Space: "f", Local: "g"}, + XML: "i", + }, + }, + }, + }, + { + ExpectXML: `b`, + Value: &AnySliceTest{ + Nested: "b", + }, + }, + + // Test recursive types. + { + Value: &RecurseA{ + A: "a1", + B: &RecurseB{ + A: &RecurseA{"a2", nil}, + B: "b1", + }, + }, + ExpectXML: `a1 a2b1`, + }, + + // Test ignoring fields via "-" tag + { + ExpectXML: ``, + Value: &IgnoreTest{}, + }, + { + ExpectXML: ``, + Value: &IgnoreTest{PublicSecret: "can't tell"}, + MarshalOnly: true, + }, + { + ExpectXML: `ignore me`, + Value: &IgnoreTest{}, + UnmarshalOnly: true, + }, + + // Test escaping. + { + ExpectXML: `dquote: "; squote: '; ampersand: &; less: <; greater: >;`, + Value: &AnyTest{ + Nested: `dquote: "; squote: '; ampersand: &; less: <; greater: >;`, + AnyField: AnyHolder{XMLName: Name{Local: "empty"}}, + }, + }, + { + ExpectXML: `newline: ; cr: ; tab: ;`, + Value: &AnyTest{ + Nested: "newline: \n; cr: \r; tab: \t;", + AnyField: AnyHolder{XMLName: Name{Local: "AnyField"}}, + }, + }, + { + ExpectXML: "1\r2\r\n3\n\r4\n5", + Value: &AnyTest{ + Nested: "1\n2\n3\n\n4\n5", + }, + UnmarshalOnly: true, + }, + { + ExpectXML: `42`, + Value: &EmbedInt{ + MyInt: 42, + }, + }, + // Test outputting CDATA-wrapped text. + { + ExpectXML: ``, + Value: &CDataTest{}, + }, + { + ExpectXML: ``, + Value: &CDataTest{ + Chardata: "http://example.com/tests/1?foo=1&bar=baz", + }, + }, + { + ExpectXML: `!]]>`, + Value: &CDataTest{ + Chardata: "Literal !", + }, + }, + { + ExpectXML: ` Literal!]]>`, + Value: &CDataTest{ + Chardata: " Literal!", + }, + }, + { + ExpectXML: ` Literal! Literal!]]>`, + Value: &CDataTest{ + Chardata: " Literal! Literal!", + }, + }, + { + ExpectXML: `]]]]>]]>`, + Value: &CDataTest{ + Chardata: "]]>", + }, + }, + + // Test omitempty with parent chain; see golang.org/issue/4168. + { + ExpectXML: ``, + Value: &Strings{}, + }, + // Custom marshalers. + { + ExpectXML: `hello world`, + Value: &MyMarshalerTest{}, + }, + { + ExpectXML: ``, + Value: &MarshalerStruct{}, + }, + { + ExpectXML: ``, + Value: &OuterStruct{IntAttr: 10}, + }, + { + ExpectXML: ``, + Value: &OuterNamedStruct{XMLName: Name{Space: "outerns", Local: "test"}, IntAttr: 10}, + }, + { + ExpectXML: ``, + Value: &OuterNamedOrderedStruct{XMLName: Name{Space: "outerns", Local: "test"}, IntAttr: 10}, + }, + { + ExpectXML: ``, + Value: &OuterOuterStruct{OuterStruct{IntAttr: 10}}, + }, + { + ExpectXML: `test`, + Value: &NestedAndChardata{AB: make([]string, 2), Chardata: "test"}, + }, + { + ExpectXML: ``, + Value: &NestedAndComment{AB: make([]string, 2), Comment: "test"}, + }, + { + ExpectXML: ``, + Value: &NestedAndCData{AB: make([]string, 2), CDATA: "test"}, + }, + // Test pointer indirection in various kinds of fields. + // https://golang.org/issue/19063 + { + ExpectXML: ``, + Value: &IndirComment{Comment: stringptr("hi")}, + MarshalOnly: true, + }, + { + ExpectXML: ``, + Value: &IndirComment{Comment: stringptr("")}, + MarshalOnly: true, + }, + { + ExpectXML: ``, + Value: &IndirComment{Comment: nil}, + MarshalError: "xml: bad type for comment field of xml.IndirComment", + }, + { + ExpectXML: ``, + Value: &IndirComment{Comment: nil}, + UnmarshalOnly: true, + }, + { + ExpectXML: ``, + Value: &IfaceComment{Comment: "hi"}, + MarshalOnly: true, + }, + { + ExpectXML: ``, + Value: &IfaceComment{Comment: nil}, + UnmarshalOnly: true, + }, + { + ExpectXML: ``, + Value: &IfaceComment{Comment: nil}, + MarshalError: "xml: bad type for comment field of xml.IfaceComment", + }, + { + ExpectXML: ``, + Value: &IfaceComment{Comment: nil}, + UnmarshalOnly: true, + }, + { + ExpectXML: ``, + Value: &DirectComment{Comment: string("hi")}, + }, + { + ExpectXML: ``, + Value: &DirectComment{Comment: string("")}, + }, + { + ExpectXML: `hi`, + Value: &IndirChardata{Chardata: stringptr("hi")}, + }, + { + ExpectXML: ``, + Value: &IndirChardata{Chardata: stringptr("hi")}, + UnmarshalOnly: true, // marshals without CDATA + }, + { + ExpectXML: ``, + Value: &IndirChardata{Chardata: stringptr("")}, + }, + { + ExpectXML: ``, + Value: &IndirChardata{Chardata: nil}, + MarshalOnly: true, // unmarshal leaves Chardata=stringptr("") + }, + { + ExpectXML: `hi`, + Value: &IfaceChardata{Chardata: string("hi")}, + UnmarshalError: "cannot unmarshal into interface {}", + }, + { + ExpectXML: ``, + Value: &IfaceChardata{Chardata: string("hi")}, + UnmarshalOnly: true, // marshals without CDATA + UnmarshalError: "cannot unmarshal into interface {}", + }, + { + ExpectXML: ``, + Value: &IfaceChardata{Chardata: string("")}, + UnmarshalError: "cannot unmarshal into interface {}", + }, + { + ExpectXML: ``, + Value: &IfaceChardata{Chardata: nil}, + UnmarshalError: "cannot unmarshal into interface {}", + }, + { + ExpectXML: `hi`, + Value: &DirectChardata{Chardata: string("hi")}, + }, + { + ExpectXML: ``, + Value: &DirectChardata{Chardata: string("hi")}, + UnmarshalOnly: true, // marshals without CDATA + }, + { + ExpectXML: ``, + Value: &DirectChardata{Chardata: string("")}, + }, + { + ExpectXML: ``, + Value: &IndirCDATA{CDATA: stringptr("hi")}, + }, + { + ExpectXML: `hi`, + Value: &IndirCDATA{CDATA: stringptr("hi")}, + UnmarshalOnly: true, // marshals with CDATA + }, + { + ExpectXML: ``, + Value: &IndirCDATA{CDATA: stringptr("")}, + }, + { + ExpectXML: ``, + Value: &IndirCDATA{CDATA: nil}, + MarshalOnly: true, // unmarshal leaves CDATA=stringptr("") + }, + { + ExpectXML: ``, + Value: &IfaceCDATA{CDATA: string("hi")}, + UnmarshalError: "cannot unmarshal into interface {}", + }, + { + ExpectXML: `hi`, + Value: &IfaceCDATA{CDATA: string("hi")}, + UnmarshalOnly: true, // marshals with CDATA + UnmarshalError: "cannot unmarshal into interface {}", + }, + { + ExpectXML: ``, + Value: &IfaceCDATA{CDATA: string("")}, + UnmarshalError: "cannot unmarshal into interface {}", + }, + { + ExpectXML: ``, + Value: &IfaceCDATA{CDATA: nil}, + UnmarshalError: "cannot unmarshal into interface {}", + }, + { + ExpectXML: ``, + Value: &DirectCDATA{CDATA: string("hi")}, + }, + { + ExpectXML: `hi`, + Value: &DirectCDATA{CDATA: string("hi")}, + UnmarshalOnly: true, // marshals with CDATA + }, + { + ExpectXML: ``, + Value: &DirectCDATA{CDATA: string("")}, + }, + { + ExpectXML: ``, + Value: &IndirInnerXML{InnerXML: stringptr("")}, + MarshalOnly: true, + }, + { + ExpectXML: ``, + Value: &IndirInnerXML{InnerXML: stringptr("")}, + MarshalOnly: true, + }, + { + ExpectXML: ``, + Value: &IndirInnerXML{InnerXML: nil}, + }, + { + ExpectXML: ``, + Value: &IndirInnerXML{InnerXML: nil}, + UnmarshalOnly: true, + }, + { + ExpectXML: ``, + Value: &IfaceInnerXML{InnerXML: ""}, + MarshalOnly: true, + }, + { + ExpectXML: ``, + Value: &IfaceInnerXML{InnerXML: nil}, + UnmarshalOnly: true, + }, + { + ExpectXML: ``, + Value: &IfaceInnerXML{InnerXML: nil}, + }, + { + ExpectXML: ``, + Value: &IfaceInnerXML{InnerXML: nil}, + UnmarshalOnly: true, + }, + { + ExpectXML: ``, + Value: &DirectInnerXML{InnerXML: string("")}, + MarshalOnly: true, + }, + { + ExpectXML: ``, + Value: &DirectInnerXML{InnerXML: string("")}, + UnmarshalOnly: true, + }, + { + ExpectXML: ``, + Value: &DirectInnerXML{InnerXML: string("")}, + MarshalOnly: true, + }, + { + ExpectXML: ``, + Value: &DirectInnerXML{InnerXML: string("")}, + UnmarshalOnly: true, + }, + { + ExpectXML: `hi`, + Value: &IndirElement{Element: stringptr("hi")}, + }, + { + ExpectXML: ``, + Value: &IndirElement{Element: stringptr("")}, + }, + { + ExpectXML: ``, + Value: &IndirElement{Element: nil}, + }, + { + ExpectXML: `hi`, + Value: &IfaceElement{Element: "hi"}, + MarshalOnly: true, + }, + { + ExpectXML: `hi`, + Value: &IfaceElement{Element: nil}, + UnmarshalOnly: true, + }, + { + ExpectXML: ``, + Value: &IfaceElement{Element: nil}, + }, + { + ExpectXML: ``, + Value: &IfaceElement{Element: nil}, + UnmarshalOnly: true, + }, + { + ExpectXML: `hi`, + Value: &DirectElement{Element: string("hi")}, + }, + { + ExpectXML: ``, + Value: &DirectElement{Element: string("")}, + }, + { + ExpectXML: `hi`, + Value: &IndirOmitEmpty{OmitEmpty: stringptr("hi")}, + }, + { + // Note: Changed in Go 1.8 to include element (because x.OmitEmpty != nil). + ExpectXML: ``, + Value: &IndirOmitEmpty{OmitEmpty: stringptr("")}, + MarshalOnly: true, + }, + { + ExpectXML: ``, + Value: &IndirOmitEmpty{OmitEmpty: stringptr("")}, + UnmarshalOnly: true, + }, + { + ExpectXML: ``, + Value: &IndirOmitEmpty{OmitEmpty: nil}, + }, + { + ExpectXML: `hi`, + Value: &IfaceOmitEmpty{OmitEmpty: "hi"}, + MarshalOnly: true, + }, + { + ExpectXML: `hi`, + Value: &IfaceOmitEmpty{OmitEmpty: nil}, + UnmarshalOnly: true, + }, + { + ExpectXML: ``, + Value: &IfaceOmitEmpty{OmitEmpty: nil}, + }, + { + ExpectXML: ``, + Value: &IfaceOmitEmpty{OmitEmpty: nil}, + UnmarshalOnly: true, + }, + { + ExpectXML: `hi`, + Value: &DirectOmitEmpty{OmitEmpty: string("hi")}, + }, + { + ExpectXML: ``, + Value: &DirectOmitEmpty{OmitEmpty: string("")}, + }, + { + ExpectXML: `hi`, + Value: &IndirAny{Any: stringptr("hi")}, + }, + { + ExpectXML: ``, + Value: &IndirAny{Any: stringptr("")}, + }, + { + ExpectXML: ``, + Value: &IndirAny{Any: nil}, + }, + { + ExpectXML: `hi`, + Value: &IfaceAny{Any: "hi"}, + MarshalOnly: true, + }, + { + ExpectXML: `hi`, + Value: &IfaceAny{Any: nil}, + UnmarshalOnly: true, + }, + { + ExpectXML: ``, + Value: &IfaceAny{Any: nil}, + }, + { + ExpectXML: ``, + Value: &IfaceAny{Any: nil}, + UnmarshalOnly: true, + }, + { + ExpectXML: `hi`, + Value: &DirectAny{Any: string("hi")}, + }, + { + ExpectXML: ``, + Value: &DirectAny{Any: string("")}, + }, + { + ExpectXML: `hi`, + Value: &IndirAny{Any: stringptr("hi")}, + UnmarshalOnly: true, + }, + { + ExpectXML: ``, + Value: &IndirAny{Any: stringptr("")}, + UnmarshalOnly: true, + }, + { + ExpectXML: ``, + Value: &IndirAny{Any: nil}, + UnmarshalOnly: true, + }, + { + ExpectXML: `hi`, + Value: &IfaceAny{Any: nil}, + UnmarshalOnly: true, + }, + { + ExpectXML: ``, + Value: &IfaceAny{Any: nil}, + UnmarshalOnly: true, + }, + { + ExpectXML: ``, + Value: &IfaceAny{Any: nil}, + UnmarshalOnly: true, + }, + { + ExpectXML: `hi`, + Value: &DirectAny{Any: string("hi")}, + UnmarshalOnly: true, + }, + { + ExpectXML: ``, + Value: &DirectAny{Any: string("")}, + UnmarshalOnly: true, + }, +} + +func TestMarshal(t *testing.T) { + for idx, test := range marshalTests { + if test.UnmarshalOnly { + continue + } + + t.Run(fmt.Sprintf("%d", idx), func(t *testing.T) { + data, err := Marshal(test.Value) + if err != nil { + if test.MarshalError == "" { + t.Errorf("marshal(%#v): %s", test.Value, err) + return + } + if !strings.Contains(err.Error(), test.MarshalError) { + t.Errorf("marshal(%#v): %s, want %q", test.Value, err, test.MarshalError) + } + return + } + if test.MarshalError != "" { + t.Errorf("Marshal succeeded, want error %q", test.MarshalError) + return + } + if got, want := string(data), test.ExpectXML; got != want { + if strings.Contains(want, "\n") { + t.Errorf("marshal(%#v):\nHAVE:\n%s\nWANT:\n%s", test.Value, got, want) + } else { + t.Errorf("marshal(%#v):\nhave %#q\nwant %#q", test.Value, got, want) + } + } + }) + } +} + +type AttrParent struct { + X string `xml:"X>Y,attr"` +} + +type BadAttr struct { + Name map[string]string `xml:"name,attr"` +} + +var marshalErrorTests = []struct { + Value any + Err string + Kind reflect.Kind +}{ + { + Value: make(chan bool), + Err: "xml: unsupported type: chan bool", + Kind: reflect.Chan, + }, + { + Value: map[string]string{ + "question": "What do you get when you multiply six by nine?", + "answer": "42", + }, + Err: "xml: unsupported type: map[string]string", + Kind: reflect.Map, + }, + { + Value: map[*Ship]bool{nil: false}, + Err: "xml: unsupported type: map[*xml.Ship]bool", + Kind: reflect.Map, + }, + { + Value: &Domain{Comment: []byte("f--bar")}, + Err: `xml: comments must not contain "--"`, + }, + // Reject parent chain with attr, never worked; see golang.org/issue/5033. + { + Value: &AttrParent{}, + Err: `xml: X>Y chain not valid with attr flag`, + }, + { + Value: BadAttr{map[string]string{"X": "Y"}}, + Err: `xml: unsupported type: map[string]string`, + }, +} + +var marshalIndentTests = []struct { + Value any + Prefix string + Indent string + ExpectXML string +}{ + { + Value: &SecretAgent{ + Handle: "007", + Identity: "James Bond", + Obfuscate: "", + }, + Prefix: "", + Indent: "\t", + ExpectXML: "\n\tJames Bond\n", + }, +} + +func TestMarshalErrors(t *testing.T) { + for idx, test := range marshalErrorTests { + data, err := Marshal(test.Value) + if err == nil { + t.Errorf("#%d: marshal(%#v) = [success] %q, want error %v", idx, test.Value, data, test.Err) + continue + } + if err.Error() != test.Err { + t.Errorf("#%d: marshal(%#v) = [error] %v, want %v", idx, test.Value, err, test.Err) + } + if test.Kind != reflect.Invalid { + if kind := err.(*UnsupportedTypeError).Type.Kind(); kind != test.Kind { + t.Errorf("#%d: marshal(%#v) = [error kind] %s, want %s", idx, test.Value, kind, test.Kind) + } + } + } +} + +// Do invertibility testing on the various structures that we test +func TestUnmarshal(t *testing.T) { + for i, test := range marshalTests { + if test.MarshalOnly { + continue + } + if _, ok := test.Value.(*Plain); ok { + continue + } + if test.ExpectXML == ``+ + `b`+ + `b1`+ + `` { + // TODO(rogpeppe): re-enable this test in + // https://go-review.googlesource.com/#/c/5910/ + continue + } + + vt := reflect.TypeOf(test.Value) + dest := reflect.New(vt.Elem()).Interface() + err := Unmarshal([]byte(test.ExpectXML), dest) + + t.Run(fmt.Sprintf("%d", i), func(t *testing.T) { + switch fix := dest.(type) { + case *Feed: + fix.Author.InnerXML = "" + for i := range fix.Entry { + fix.Entry[i].Author.InnerXML = "" + } + } + + if err != nil { + if test.UnmarshalError == "" { + t.Errorf("unmarshal(%#v): %s", test.ExpectXML, err) + return + } + if !strings.Contains(err.Error(), test.UnmarshalError) { + t.Errorf("unmarshal(%#v): %s, want %q", test.ExpectXML, err, test.UnmarshalError) + } + return + } + if got, want := dest, test.Value; !reflect.DeepEqual(got, want) { + t.Errorf("unmarshal(%q):\nhave %#v\nwant %#v", test.ExpectXML, got, want) + } + }) + } +} + +func TestMarshalIndent(t *testing.T) { + for i, test := range marshalIndentTests { + data, err := MarshalIndent(test.Value, test.Prefix, test.Indent) + if err != nil { + t.Errorf("#%d: Error: %s", i, err) + continue + } + if got, want := string(data), test.ExpectXML; got != want { + t.Errorf("#%d: MarshalIndent:\nGot:%s\nWant:\n%s", i, got, want) + } + } +} + +type limitedBytesWriter struct { + w io.Writer + remain int // until writes fail +} + +func (lw *limitedBytesWriter) Write(p []byte) (n int, err error) { + if lw.remain <= 0 { + println("error") + return 0, errors.New("write limit hit") + } + if len(p) > lw.remain { + p = p[:lw.remain] + n, _ = lw.w.Write(p) + lw.remain = 0 + return n, errors.New("write limit hit") + } + n, err = lw.w.Write(p) + lw.remain -= n + return n, err +} + +func TestMarshalWriteErrors(t *testing.T) { + var buf bytes.Buffer + const writeCap = 1024 + w := &limitedBytesWriter{&buf, writeCap} + enc := NewEncoder(w) + var err error + var i int + const n = 4000 + for i = 1; i <= n; i++ { + err = enc.Encode(&Passenger{ + Name: []string{"Alice", "Bob"}, + Weight: 5, + }) + if err != nil { + break + } + } + if err == nil { + t.Error("expected an error") + } + if i == n { + t.Errorf("expected to fail before the end") + } + if buf.Len() != writeCap { + t.Errorf("buf.Len() = %d; want %d", buf.Len(), writeCap) + } +} + +func TestMarshalWriteIOErrors(t *testing.T) { + enc := NewEncoder(errWriter{}) + + expectErr := "unwritable" + err := enc.Encode(&Passenger{}) + if err == nil || err.Error() != expectErr { + t.Errorf("EscapeTest = [error] %v, want %v", err, expectErr) + } +} + +func TestMarshalFlush(t *testing.T) { + var buf strings.Builder + enc := NewEncoder(&buf) + if err := enc.EncodeToken(CharData("hello world")); err != nil { + t.Fatalf("enc.EncodeToken: %v", err) + } + if buf.Len() > 0 { + t.Fatalf("enc.EncodeToken caused actual write: %q", buf.String()) + } + if err := enc.Flush(); err != nil { + t.Fatalf("enc.Flush: %v", err) + } + if buf.String() != "hello world" { + t.Fatalf("after enc.Flush, buf.String() = %q, want %q", buf.String(), "hello world") + } +} + +func BenchmarkMarshal(b *testing.B) { + b.ReportAllocs() + b.RunParallel(func(pb *testing.PB) { + for pb.Next() { + Marshal(atomValue) + } + }) +} + +func BenchmarkUnmarshal(b *testing.B) { + b.ReportAllocs() + xml := []byte(atomXML) + b.RunParallel(func(pb *testing.PB) { + for pb.Next() { + Unmarshal(xml, &Feed{}) + } + }) +} + +// golang.org/issue/6556 +func TestStructPointerMarshal(t *testing.T) { + type A struct { + XMLName string `xml:"a"` + B []any + } + type C struct { + XMLName Name + Value string `xml:"value"` + } + + a := new(A) + a.B = append(a.B, &C{ + XMLName: Name{Local: "c"}, + Value: "x", + }) + + b, err := Marshal(a) + if err != nil { + t.Fatal(err) + } + if x := string(b); x != "x" { + t.Fatal(x) + } + var v A + err = Unmarshal(b, &v) + if err != nil { + t.Fatal(err) + } +} + +var encodeTokenTests = []struct { + desc string + toks []Token + want string + err string +}{{ + desc: "start element with name space", + toks: []Token{ + StartElement{Name{"space", "local"}, nil}, + }, + want: ``, +}, { + desc: "start element with no name", + toks: []Token{ + StartElement{Name{"space", ""}, nil}, + }, + err: "xml: start tag with no name", +}, { + desc: "end element with no name", + toks: []Token{ + EndElement{Name{"space", ""}}, + }, + err: "xml: end tag with no name", +}, { + desc: "char data", + toks: []Token{ + CharData("foo"), + }, + want: `foo`, +}, { + desc: "char data with escaped chars", + toks: []Token{ + CharData(" \t\n"), + }, + want: " \n", +}, { + desc: "comment", + toks: []Token{ + Comment("foo"), + }, + want: ``, +}, { + desc: "comment with invalid content", + toks: []Token{ + Comment("foo-->"), + }, + err: "xml: EncodeToken of Comment containing --> marker", +}, { + desc: "proc instruction", + toks: []Token{ + ProcInst{"Target", []byte("Instruction")}, + }, + want: ``, +}, { + desc: "proc instruction with empty target", + toks: []Token{ + ProcInst{"", []byte("Instruction")}, + }, + err: "xml: EncodeToken of ProcInst with invalid Target", +}, { + desc: "proc instruction with bad content", + toks: []Token{ + ProcInst{"", []byte("Instruction?>")}, + }, + err: "xml: EncodeToken of ProcInst with invalid Target", +}, { + desc: "directive", + toks: []Token{ + Directive("foo"), + }, + want: ``, +}, { + desc: "more complex directive", + toks: []Token{ + Directive("DOCTYPE doc [ '> ]"), + }, + want: `'> ]>`, +}, { + desc: "directive instruction with bad name", + toks: []Token{ + Directive("foo>"), + }, + err: "xml: EncodeToken of Directive containing wrong < or > markers", +}, { + desc: "end tag without start tag", + toks: []Token{ + EndElement{Name{"foo", "bar"}}, + }, + err: "xml: end tag without start tag", +}, { + desc: "mismatching end tag local name", + toks: []Token{ + StartElement{Name{"", "foo"}, nil}, + EndElement{Name{"", "bar"}}, + }, + err: "xml: end tag does not match start tag ", + want: ``, +}, { + desc: "mismatching end tag namespace", + toks: []Token{ + StartElement{Name{"space", "foo"}, nil}, + EndElement{Name{"another", "foo"}}, + }, + err: "xml: end tag in namespace another does not match start tag in namespace space", + want: ``, +}, { + desc: "start element with explicit namespace", + toks: []Token{ + StartElement{Name{"space", "local"}, []Attr{ + {Name{"xmlns", "x"}, "space"}, + {Name{"space", "foo"}, "value"}, + }}, + }, + want: ``, +}, { + desc: "start element with explicit namespace and colliding prefix", + toks: []Token{ + StartElement{Name{"space", "local"}, []Attr{ + {Name{"xmlns", "x"}, "space"}, + {Name{"space", "foo"}, "value"}, + {Name{"x", "bar"}, "other"}, + }}, + }, + want: ``, +}, { + desc: "start element using previously defined namespace", + toks: []Token{ + StartElement{Name{"", "local"}, []Attr{ + {Name{"xmlns", "x"}, "space"}, + }}, + StartElement{Name{"space", "foo"}, []Attr{ + {Name{"space", "x"}, "y"}, + }}, + }, + want: ``, +}, { + desc: "nested name space with same prefix", + toks: []Token{ + StartElement{Name{"", "foo"}, []Attr{ + {Name{"xmlns", "x"}, "space1"}, + }}, + StartElement{Name{"", "foo"}, []Attr{ + {Name{"xmlns", "x"}, "space2"}, + }}, + StartElement{Name{"", "foo"}, []Attr{ + {Name{"space1", "a"}, "space1 value"}, + {Name{"space2", "b"}, "space2 value"}, + }}, + EndElement{Name{"", "foo"}}, + EndElement{Name{"", "foo"}}, + StartElement{Name{"", "foo"}, []Attr{ + {Name{"space1", "a"}, "space1 value"}, + {Name{"space2", "b"}, "space2 value"}, + }}, + }, + want: ``, +}, { + desc: "start element defining several prefixes for the same name space", + toks: []Token{ + StartElement{Name{"space", "foo"}, []Attr{ + {Name{"xmlns", "a"}, "space"}, + {Name{"xmlns", "b"}, "space"}, + {Name{"space", "x"}, "value"}, + }}, + }, + want: ``, +}, { + desc: "nested element redefines name space", + toks: []Token{ + StartElement{Name{"", "foo"}, []Attr{ + {Name{"xmlns", "x"}, "space"}, + }}, + StartElement{Name{"space", "foo"}, []Attr{ + {Name{"xmlns", "y"}, "space"}, + {Name{"space", "a"}, "value"}, + }}, + }, + want: ``, +}, { + desc: "nested element creates alias for default name space", + toks: []Token{ + StartElement{Name{"space", "foo"}, []Attr{ + {Name{"", "xmlns"}, "space"}, + }}, + StartElement{Name{"space", "foo"}, []Attr{ + {Name{"xmlns", "y"}, "space"}, + {Name{"space", "a"}, "value"}, + }}, + }, + want: ``, +}, { + desc: "nested element defines default name space with existing prefix", + toks: []Token{ + StartElement{Name{"", "foo"}, []Attr{ + {Name{"xmlns", "x"}, "space"}, + }}, + StartElement{Name{"space", "foo"}, []Attr{ + {Name{"", "xmlns"}, "space"}, + {Name{"space", "a"}, "value"}, + }}, + }, + want: ``, +}, { + desc: "nested element uses empty attribute name space when default ns defined", + toks: []Token{ + StartElement{Name{"space", "foo"}, []Attr{ + {Name{"", "xmlns"}, "space"}, + }}, + StartElement{Name{"space", "foo"}, []Attr{ + {Name{"", "attr"}, "value"}, + }}, + }, + want: ``, +}, { + desc: "redefine xmlns", + toks: []Token{ + StartElement{Name{"", "foo"}, []Attr{ + {Name{"foo", "xmlns"}, "space"}, + }}, + }, + want: ``, +}, { + desc: "xmlns with explicit name space #1", + toks: []Token{ + StartElement{Name{"space", "foo"}, []Attr{ + {Name{"xml", "xmlns"}, "space"}, + }}, + }, + want: ``, +}, { + desc: "xmlns with explicit name space #2", + toks: []Token{ + StartElement{Name{"space", "foo"}, []Attr{ + {Name{xmlURL, "xmlns"}, "space"}, + }}, + }, + want: ``, +}, { + desc: "empty name space declaration is ignored", + toks: []Token{ + StartElement{Name{"", "foo"}, []Attr{ + {Name{"xmlns", "foo"}, ""}, + }}, + }, + want: ``, +}, { + desc: "attribute with no name is ignored", + toks: []Token{ + StartElement{Name{"", "foo"}, []Attr{ + {Name{"", ""}, "value"}, + }}, + }, + want: ``, +}, { + desc: "namespace URL with non-valid name", + toks: []Token{ + StartElement{Name{"/34", "foo"}, []Attr{ + {Name{"/34", "x"}, "value"}, + }}, + }, + want: ``, +}, { + desc: "nested element resets default namespace to empty", + toks: []Token{ + StartElement{Name{"space", "foo"}, []Attr{ + {Name{"", "xmlns"}, "space"}, + }}, + StartElement{Name{"", "foo"}, []Attr{ + {Name{"", "xmlns"}, ""}, + {Name{"", "x"}, "value"}, + {Name{"space", "x"}, "value"}, + }}, + }, + want: ``, +}, { + desc: "nested element requires empty default name space", + toks: []Token{ + StartElement{Name{"space", "foo"}, []Attr{ + {Name{"", "xmlns"}, "space"}, + }}, + StartElement{Name{"", "foo"}, nil}, + }, + want: ``, +}, { + desc: "attribute uses name space from xmlns", + toks: []Token{ + StartElement{Name{"some/space", "foo"}, []Attr{ + {Name{"", "attr"}, "value"}, + {Name{"some/space", "other"}, "other value"}, + }}, + }, + want: ``, +}, { + desc: "default name space should not be used by attributes", + toks: []Token{ + StartElement{Name{"space", "foo"}, []Attr{ + {Name{"", "xmlns"}, "space"}, + {Name{"xmlns", "bar"}, "space"}, + {Name{"space", "baz"}, "foo"}, + }}, + StartElement{Name{"space", "baz"}, nil}, + EndElement{Name{"space", "baz"}}, + EndElement{Name{"space", "foo"}}, + }, + want: ``, +}, { + desc: "default name space not used by attributes, not explicitly defined", + toks: []Token{ + StartElement{Name{"space", "foo"}, []Attr{ + {Name{"", "xmlns"}, "space"}, + {Name{"space", "baz"}, "foo"}, + }}, + StartElement{Name{"space", "baz"}, nil}, + EndElement{Name{"space", "baz"}}, + EndElement{Name{"space", "foo"}}, + }, + want: ``, +}, { + desc: "impossible xmlns declaration", + toks: []Token{ + StartElement{Name{"", "foo"}, []Attr{ + {Name{"", "xmlns"}, "space"}, + }}, + StartElement{Name{"space", "bar"}, []Attr{ + {Name{"space", "attr"}, "value"}, + }}, + }, + want: ``, +}, { + desc: "reserved namespace prefix -- all lower case", + toks: []Token{ + StartElement{Name{"", "foo"}, []Attr{ + {Name{"http://www.w3.org/2001/xmlSchema-instance", "nil"}, "true"}, + }}, + }, + want: ``, +}, { + desc: "reserved namespace prefix -- all upper case", + toks: []Token{ + StartElement{Name{"", "foo"}, []Attr{ + {Name{"http://www.w3.org/2001/XMLSchema-instance", "nil"}, "true"}, + }}, + }, + want: ``, +}, { + desc: "reserved namespace prefix -- all mixed case", + toks: []Token{ + StartElement{Name{"", "foo"}, []Attr{ + {Name{"http://www.w3.org/2001/XmLSchema-instance", "nil"}, "true"}, + }}, + }, + want: ``, +}} + +func TestEncodeToken(t *testing.T) { +loop: + for i, tt := range encodeTokenTests { + var buf strings.Builder + enc := NewEncoder(&buf) + var err error + for j, tok := range tt.toks { + err = enc.EncodeToken(tok) + if err != nil && j < len(tt.toks)-1 { + t.Errorf("#%d %s token #%d: %v", i, tt.desc, j, err) + continue loop + } + } + errorf := func(f string, a ...any) { + t.Errorf("#%d %s token #%d:%s", i, tt.desc, len(tt.toks)-1, fmt.Sprintf(f, a...)) + } + switch { + case tt.err != "" && err == nil: + errorf(" expected error; got none") + continue + case tt.err == "" && err != nil: + errorf(" got error: %v", err) + continue + case tt.err != "" && err != nil && tt.err != err.Error(): + errorf(" error mismatch; got %v, want %v", err, tt.err) + continue + } + if err := enc.Flush(); err != nil { + errorf(" %v", err) + continue + } + if got := buf.String(); got != tt.want { + errorf("\ngot %v\nwant %v", got, tt.want) + continue + } + } +} + +func TestProcInstEncodeToken(t *testing.T) { + var buf bytes.Buffer + enc := NewEncoder(&buf) + + if err := enc.EncodeToken(ProcInst{"xml", []byte("Instruction")}); err != nil { + t.Fatalf("enc.EncodeToken: expected to be able to encode xml target ProcInst as first token, %s", err) + } + + if err := enc.EncodeToken(ProcInst{"Target", []byte("Instruction")}); err != nil { + t.Fatalf("enc.EncodeToken: expected to be able to add non-xml target ProcInst") + } + + if err := enc.EncodeToken(ProcInst{"xml", []byte("Instruction")}); err == nil { + t.Fatalf("enc.EncodeToken: expected to not be allowed to encode xml target ProcInst when not first token") + } +} + +func TestDecodeEncode(t *testing.T) { + var in, out bytes.Buffer + in.WriteString(` + + + +`) + dec := NewDecoder(&in) + enc := NewEncoder(&out) + for tok, err := dec.Token(); err == nil; tok, err = dec.Token() { + err = enc.EncodeToken(tok) + if err != nil { + t.Fatalf("enc.EncodeToken: Unable to encode token (%#v), %v", tok, err) + } + } +} + +// Issue 9796. Used to fail with GORACE="halt_on_error=1" -race. +func TestRace9796(t *testing.T) { + type A struct{} + type B struct { + C []A `xml:"X>Y"` + } + var wg sync.WaitGroup + for i := 0; i < 2; i++ { + wg.Add(1) + go func() { + Marshal(B{[]A{{}}}) + wg.Done() + }() + } + wg.Wait() +} + +func TestIsValidDirective(t *testing.T) { + testOK := []string{ + "<>", + "< < > >", + "' '>' >", + " ]>", + " '<' ' doc ANY> ]>", + ">>> a < comment --> [ ] >", + } + testKO := []string{ + "<", + ">", + "", + "< > > < < >", + " -->", + "", + "'", + "", + } + for _, s := range testOK { + if !isValidDirective(Directive(s)) { + t.Errorf("Directive %q is expected to be valid", s) + } + } + for _, s := range testKO { + if isValidDirective(Directive(s)) { + t.Errorf("Directive %q is expected to be invalid", s) + } + } +} + +// Issue 11719. EncodeToken used to silently eat tokens with an invalid type. +func TestSimpleUseOfEncodeToken(t *testing.T) { + var buf strings.Builder + enc := NewEncoder(&buf) + if err := enc.EncodeToken(&StartElement{Name: Name{"", "object1"}}); err == nil { + t.Errorf("enc.EncodeToken: pointer type should be rejected") + } + if err := enc.EncodeToken(&EndElement{Name: Name{"", "object1"}}); err == nil { + t.Errorf("enc.EncodeToken: pointer type should be rejected") + } + if err := enc.EncodeToken(StartElement{Name: Name{"", "object2"}}); err != nil { + t.Errorf("enc.EncodeToken: StartElement %s", err) + } + if err := enc.EncodeToken(EndElement{Name: Name{"", "object2"}}); err != nil { + t.Errorf("enc.EncodeToken: EndElement %s", err) + } + if err := enc.EncodeToken(Universe{}); err == nil { + t.Errorf("enc.EncodeToken: invalid type not caught") + } + if err := enc.Flush(); err != nil { + t.Errorf("enc.Flush: %s", err) + } + if buf.Len() == 0 { + t.Errorf("enc.EncodeToken: empty buffer") + } + want := "" + if buf.String() != want { + t.Errorf("enc.EncodeToken: expected %q; got %q", want, buf.String()) + } +} + +// Issue 16158. Decoder.unmarshalAttr ignores the return value of copyValue. +func TestIssue16158(t *testing.T) { + const data = `` + err := Unmarshal([]byte(data), &struct { + B byte `xml:"b,attr,omitempty"` + }{}) + if err == nil { + t.Errorf("Unmarshal: expected error, got nil") + } +} + +// Issue 20953. Crash on invalid XMLName attribute. + +type InvalidXMLName struct { + XMLName Name `xml:"error"` + Type struct { + XMLName Name `xml:"type,attr"` + } +} + +func TestInvalidXMLName(t *testing.T) { + var buf bytes.Buffer + enc := NewEncoder(&buf) + if err := enc.Encode(InvalidXMLName{}); err == nil { + t.Error("unexpected success") + } else if want := "invalid tag"; !strings.Contains(err.Error(), want) { + t.Errorf("error %q does not contain %q", err, want) + } +} + +// Issue 50164. Crash on zero value XML attribute. +type LayerOne struct { + XMLName Name `xml:"l1"` + + Value *float64 `xml:"value,omitempty"` + *LayerTwo `xml:",omitempty"` +} + +type LayerTwo struct { + ValueTwo *int `xml:"value_two,attr,omitempty"` +} + +func TestMarshalZeroValue(t *testing.T) { + proofXml := `1.2345` + var l1 LayerOne + err := Unmarshal([]byte(proofXml), &l1) + if err != nil { + t.Fatalf("unmarshal XML error: %v", err) + } + want := float64(1.2345) + got := *l1.Value + if got != want { + t.Fatalf("unexpected unmarshal result, want %f but got %f", want, got) + } + + // Marshal again (or Encode again) + // In issue 50164, here `Marshal(l1)` will panic because of the zero value of xml attribute ValueTwo `value_two`. + anotherXML, err := Marshal(l1) + if err != nil { + t.Fatalf("marshal XML error: %v", err) + } + if string(anotherXML) != proofXml { + t.Fatalf("unexpected unmarshal result, want %q but got %q", proofXml, anotherXML) + } +} + +var closeTests = []struct { + desc string + toks []Token + want string + err string +}{{ + desc: "unclosed start element", + toks: []Token{ + StartElement{Name{"", "foo"}, nil}, + }, + want: ``, + err: "unclosed tag ", +}, { + desc: "closed element", + toks: []Token{ + StartElement{Name{"", "foo"}, nil}, + EndElement{Name{"", "foo"}}, + }, + want: ``, +}, { + desc: "directive", + toks: []Token{ + Directive("foo"), + }, + want: ``, +}} + +func TestClose(t *testing.T) { + for _, tt := range closeTests { + tt := tt + t.Run(tt.desc, func(t *testing.T) { + var out strings.Builder + enc := NewEncoder(&out) + for j, tok := range tt.toks { + if err := enc.EncodeToken(tok); err != nil { + t.Fatalf("token #%d: %v", j, err) + } + } + err := enc.Close() + switch { + case tt.err != "" && err == nil: + t.Error(" expected error; got none") + case tt.err == "" && err != nil: + t.Errorf(" got error: %v", err) + case tt.err != "" && err != nil && tt.err != err.Error(): + t.Errorf(" error mismatch; got %v, want %v", err, tt.err) + } + if got := out.String(); got != tt.want { + t.Errorf("\ngot %v\nwant %v", got, tt.want) + } + t.Log(enc.p.closed) + if err := enc.EncodeToken(Directive("foo")); err == nil { + t.Errorf("unexpected success when encoding after Close") + } + }) + } +} diff --git a/prefix_test.go b/prefix_test.go new file mode 100644 index 0000000..3090910 --- /dev/null +++ b/prefix_test.go @@ -0,0 +1,44 @@ +package goxml_test + +import ( + "testing" + + "git.pyer.club/kingecg/goxml" +) + +type Person struct { + Name string `xml:"p:name"` + Age int `xml:"p:age"` +} + +type Employee struct { + Person + Salary int `xml:"p:salary"` +} + +func TestUnmarshal(t *testing.T) { + xml := ` + + John Doe + 42 + 100000 + + ` + e := Employee{} + err := goxml.Unmarshal([]byte(xml), &e) + if err != nil { + t.Fatal(err) + } +} + +func TestMashal(t *testing.T) { + e := Employee{ + Person{ + Name: "John", + Age: 30, + }, + 50000, + } + b, _ := goxml.Marshal(e) + t.Log(string(b)) +} diff --git a/read.go b/read.go new file mode 100644 index 0000000..6416896 --- /dev/null +++ b/read.go @@ -0,0 +1,777 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package goxml + +import ( + "bytes" + "encoding" + "errors" + "fmt" + "reflect" + "runtime" + "strconv" + "strings" +) + +// BUG(rsc): Mapping between XML elements and data structures is inherently flawed: +// an XML element is an order-dependent collection of anonymous +// values, while a data structure is an order-independent collection +// of named values. +// See [encoding/json] for a textual representation more suitable +// to data structures. + +// Unmarshal parses the XML-encoded data and stores the result in +// the value pointed to by v, which must be an arbitrary struct, +// slice, or string. Well-formed data that does not fit into v is +// discarded. +// +// Because Unmarshal uses the reflect package, it can only assign +// to exported (upper case) fields. Unmarshal uses a case-sensitive +// comparison to match XML element names to tag values and struct +// field names. +// +// Unmarshal maps an XML element to a struct using the following rules. +// In the rules, the tag of a field refers to the value associated with the +// key 'xml' in the struct field's tag (see the example above). +// +// - If the struct has a field of type []byte or string with tag +// ",innerxml", Unmarshal accumulates the raw XML nested inside the +// element in that field. The rest of the rules still apply. +// +// - If the struct has a field named XMLName of type Name, +// Unmarshal records the element name in that field. +// +// - If the XMLName field has an associated tag of the form +// "name" or "namespace-URL name", the XML element must have +// the given name (and, optionally, name space) or else Unmarshal +// returns an error. +// +// - If the XML element has an attribute whose name matches a +// struct field name with an associated tag containing ",attr" or +// the explicit name in a struct field tag of the form "name,attr", +// Unmarshal records the attribute value in that field. +// +// - If the XML element has an attribute not handled by the previous +// rule and the struct has a field with an associated tag containing +// ",any,attr", Unmarshal records the attribute value in the first +// such field. +// +// - If the XML element contains character data, that data is +// accumulated in the first struct field that has tag ",chardata". +// The struct field may have type []byte or string. +// If there is no such field, the character data is discarded. +// +// - If the XML element contains comments, they are accumulated in +// the first struct field that has tag ",comment". The struct +// field may have type []byte or string. If there is no such +// field, the comments are discarded. +// +// - If the XML element contains a sub-element whose name matches +// the prefix of a tag formatted as "a" or "a>b>c", unmarshal +// will descend into the XML structure looking for elements with the +// given names, and will map the innermost elements to that struct +// field. A tag starting with ">" is equivalent to one starting +// with the field name followed by ">". +// +// - If the XML element contains a sub-element whose name matches +// a struct field's XMLName tag and the struct field has no +// explicit name tag as per the previous rule, unmarshal maps +// the sub-element to that struct field. +// +// - If the XML element contains a sub-element whose name matches a +// field without any mode flags (",attr", ",chardata", etc), Unmarshal +// maps the sub-element to that struct field. +// +// - If the XML element contains a sub-element that hasn't matched any +// of the above rules and the struct has a field with tag ",any", +// unmarshal maps the sub-element to that struct field. +// +// - An anonymous struct field is handled as if the fields of its +// value were part of the outer struct. +// +// - A struct field with tag "-" is never unmarshaled into. +// +// If Unmarshal encounters a field type that implements the Unmarshaler +// interface, Unmarshal calls its UnmarshalXML method to produce the value from +// the XML element. Otherwise, if the value implements +// [encoding.TextUnmarshaler], Unmarshal calls that value's UnmarshalText method. +// +// Unmarshal maps an XML element to a string or []byte by saving the +// concatenation of that element's character data in the string or +// []byte. The saved []byte is never nil. +// +// Unmarshal maps an attribute value to a string or []byte by saving +// the value in the string or slice. +// +// Unmarshal maps an attribute value to an [Attr] by saving the attribute, +// including its name, in the Attr. +// +// Unmarshal maps an XML element or attribute value to a slice by +// extending the length of the slice and mapping the element or attribute +// to the newly created value. +// +// Unmarshal maps an XML element or attribute value to a bool by +// setting it to the boolean value represented by the string. Whitespace +// is trimmed and ignored. +// +// Unmarshal maps an XML element or attribute value to an integer or +// floating-point field by setting the field to the result of +// interpreting the string value in decimal. There is no check for +// overflow. Whitespace is trimmed and ignored. +// +// Unmarshal maps an XML element to a Name by recording the element +// name. +// +// Unmarshal maps an XML element to a pointer by setting the pointer +// to a freshly allocated value and then mapping the element to that value. +// +// A missing element or empty attribute value will be unmarshaled as a zero value. +// If the field is a slice, a zero value will be appended to the field. Otherwise, the +// field will be set to its zero value. +func Unmarshal(data []byte, v any) error { + return NewDecoder(bytes.NewReader(data)).Decode(v) +} + +// Decode works like [Unmarshal], except it reads the decoder +// stream to find the start element. +func (d *Decoder) Decode(v any) error { + return d.DecodeElement(v, nil) +} + +// DecodeElement works like [Unmarshal] except that it takes +// a pointer to the start XML element to decode into v. +// It is useful when a client reads some raw XML tokens itself +// but also wants to defer to [Unmarshal] for some elements. +func (d *Decoder) DecodeElement(v any, start *StartElement) error { + val := reflect.ValueOf(v) + if val.Kind() != reflect.Pointer { + return errors.New("non-pointer passed to Unmarshal") + } + + if val.IsNil() { + return errors.New("nil pointer passed to Unmarshal") + } + return d.unmarshal(val.Elem(), start, 0) +} + +// An UnmarshalError represents an error in the unmarshaling process. +type UnmarshalError string + +func (e UnmarshalError) Error() string { return string(e) } + +// Unmarshaler is the interface implemented by objects that can unmarshal +// an XML element description of themselves. +// +// UnmarshalXML decodes a single XML element +// beginning with the given start element. +// If it returns an error, the outer call to Unmarshal stops and +// returns that error. +// UnmarshalXML must consume exactly one XML element. +// One common implementation strategy is to unmarshal into +// a separate value with a layout matching the expected XML +// using d.DecodeElement, and then to copy the data from +// that value into the receiver. +// Another common strategy is to use d.Token to process the +// XML object one token at a time. +// UnmarshalXML may not use d.RawToken. +type Unmarshaler interface { + UnmarshalXML(d *Decoder, start StartElement) error +} + +// UnmarshalerAttr is the interface implemented by objects that can unmarshal +// an XML attribute description of themselves. +// +// UnmarshalXMLAttr decodes a single XML attribute. +// If it returns an error, the outer call to [Unmarshal] stops and +// returns that error. +// UnmarshalXMLAttr is used only for struct fields with the +// "attr" option in the field tag. +type UnmarshalerAttr interface { + UnmarshalXMLAttr(attr Attr) error +} + +// receiverType returns the receiver type to use in an expression like "%s.MethodName". +func receiverType(val any) string { + t := reflect.TypeOf(val) + if t.Name() != "" { + return t.String() + } + return "(" + t.String() + ")" +} + +// unmarshalInterface unmarshals a single XML element into val. +// start is the opening tag of the element. +func (d *Decoder) unmarshalInterface(val Unmarshaler, start *StartElement) error { + // Record that decoder must stop at end tag corresponding to start. + d.pushEOF() + + d.unmarshalDepth++ + err := val.UnmarshalXML(d, *start) + d.unmarshalDepth-- + if err != nil { + d.popEOF() + return err + } + + if !d.popEOF() { + return fmt.Errorf("xml: %s.UnmarshalXML did not consume entire <%s> element", receiverType(val), start.Name.Local) + } + + return nil +} + +// unmarshalTextInterface unmarshals a single XML element into val. +// The chardata contained in the element (but not its children) +// is passed to the text unmarshaler. +func (d *Decoder) unmarshalTextInterface(val encoding.TextUnmarshaler) error { + var buf []byte + depth := 1 + for depth > 0 { + t, err := d.Token() + if err != nil { + return err + } + switch t := t.(type) { + case CharData: + if depth == 1 { + buf = append(buf, t...) + } + case StartElement: + depth++ + case EndElement: + depth-- + } + } + return val.UnmarshalText(buf) +} + +// unmarshalAttr unmarshals a single XML attribute into val. +func (d *Decoder) unmarshalAttr(val reflect.Value, attr Attr) error { + if val.Kind() == reflect.Pointer { + if val.IsNil() { + val.Set(reflect.New(val.Type().Elem())) + } + val = val.Elem() + } + if val.CanInterface() && val.Type().Implements(unmarshalerAttrType) { + // This is an unmarshaler with a non-pointer receiver, + // so it's likely to be incorrect, but we do what we're told. + return val.Interface().(UnmarshalerAttr).UnmarshalXMLAttr(attr) + } + if val.CanAddr() { + pv := val.Addr() + if pv.CanInterface() && pv.Type().Implements(unmarshalerAttrType) { + return pv.Interface().(UnmarshalerAttr).UnmarshalXMLAttr(attr) + } + } + + // Not an UnmarshalerAttr; try encoding.TextUnmarshaler. + if val.CanInterface() && val.Type().Implements(textUnmarshalerType) { + // This is an unmarshaler with a non-pointer receiver, + // so it's likely to be incorrect, but we do what we're told. + return val.Interface().(encoding.TextUnmarshaler).UnmarshalText([]byte(attr.Value)) + } + if val.CanAddr() { + pv := val.Addr() + if pv.CanInterface() && pv.Type().Implements(textUnmarshalerType) { + return pv.Interface().(encoding.TextUnmarshaler).UnmarshalText([]byte(attr.Value)) + } + } + + if val.Type().Kind() == reflect.Slice && val.Type().Elem().Kind() != reflect.Uint8 { + // Slice of element values. + // Grow slice. + n := val.Len() + val.Grow(1) + val.SetLen(n + 1) + + // Recur to read element into slice. + if err := d.unmarshalAttr(val.Index(n), attr); err != nil { + val.SetLen(n) + return err + } + return nil + } + + if val.Type() == attrType { + val.Set(reflect.ValueOf(attr)) + return nil + } + + return copyValue(val, []byte(attr.Value)) +} + +var ( + attrType = reflect.TypeFor[Attr]() + unmarshalerType = reflect.TypeFor[Unmarshaler]() + unmarshalerAttrType = reflect.TypeFor[UnmarshalerAttr]() + textUnmarshalerType = reflect.TypeFor[encoding.TextUnmarshaler]() +) + +const ( + maxUnmarshalDepth = 10000 + maxUnmarshalDepthWasm = 5000 // go.dev/issue/56498 +) + +var errUnmarshalDepth = errors.New("exceeded max depth") + +// Unmarshal a single XML element into val. +func (d *Decoder) unmarshal(val reflect.Value, start *StartElement, depth int) error { + if depth >= maxUnmarshalDepth || runtime.GOARCH == "wasm" && depth >= maxUnmarshalDepthWasm { + return errUnmarshalDepth + } + // Find start element if we need it. + if start == nil { + for { + tok, err := d.Token() + if err != nil { + return err + } + if t, ok := tok.(StartElement); ok { + start = &t + break + } + } + } + + // Load value from interface, but only if the result will be + // usefully addressable. + if val.Kind() == reflect.Interface && !val.IsNil() { + e := val.Elem() + if e.Kind() == reflect.Pointer && !e.IsNil() { + val = e + } + } + + if val.Kind() == reflect.Pointer { + if val.IsNil() { + val.Set(reflect.New(val.Type().Elem())) + } + val = val.Elem() + } + + if val.CanInterface() && val.Type().Implements(unmarshalerType) { + // This is an unmarshaler with a non-pointer receiver, + // so it's likely to be incorrect, but we do what we're told. + return d.unmarshalInterface(val.Interface().(Unmarshaler), start) + } + + if val.CanAddr() { + pv := val.Addr() + if pv.CanInterface() && pv.Type().Implements(unmarshalerType) { + return d.unmarshalInterface(pv.Interface().(Unmarshaler), start) + } + } + + if val.CanInterface() && val.Type().Implements(textUnmarshalerType) { + return d.unmarshalTextInterface(val.Interface().(encoding.TextUnmarshaler)) + } + + if val.CanAddr() { + pv := val.Addr() + if pv.CanInterface() && pv.Type().Implements(textUnmarshalerType) { + return d.unmarshalTextInterface(pv.Interface().(encoding.TextUnmarshaler)) + } + } + + var ( + data []byte + saveData reflect.Value + comment []byte + saveComment reflect.Value + saveXML reflect.Value + saveXMLIndex int + saveXMLData []byte + saveAny reflect.Value + sv reflect.Value + tinfo *typeInfo + err error + ) + + switch v := val; v.Kind() { + default: + return errors.New("unknown type " + v.Type().String()) + + case reflect.Interface: + // TODO: For now, simply ignore the field. In the near + // future we may choose to unmarshal the start + // element on it, if not nil. + return d.Skip() + + case reflect.Slice: + typ := v.Type() + if typ.Elem().Kind() == reflect.Uint8 { + // []byte + saveData = v + break + } + + // Slice of element values. + // Grow slice. + n := v.Len() + v.Grow(1) + v.SetLen(n + 1) + + // Recur to read element into slice. + if err := d.unmarshal(v.Index(n), start, depth+1); err != nil { + v.SetLen(n) + return err + } + return nil + + case reflect.Bool, reflect.Float32, reflect.Float64, reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64, reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uintptr, reflect.String: + saveData = v + + case reflect.Struct: + typ := v.Type() + if typ == nameType { + v.Set(reflect.ValueOf(start.Name)) + break + } + + sv = v + tinfo, err = getTypeInfo(typ, true) + if err != nil { + return err + } + + // Validate and assign element name. + if tinfo.xmlname != nil { + finfo := tinfo.xmlname + if finfo.name != "" && finfo.name != start.Name.Local { + return UnmarshalError("expected element type <" + finfo.name + "> but have <" + start.Name.Local + ">") + } + if finfo.xmlns != "" && finfo.xmlns != start.Name.Space { + e := "expected element <" + finfo.name + "> in name space " + finfo.xmlns + " but have " + if start.Name.Space == "" { + e += "no name space" + } else { + e += start.Name.Space + } + return UnmarshalError(e) + } + fv := finfo.value(sv, initNilPointers) + if _, ok := fv.Interface().(Name); ok { + fv.Set(reflect.ValueOf(start.Name)) + } + } + + // Assign attributes. + for _, a := range start.Attr { + handled := false + any := -1 + for i := range tinfo.fields { + finfo := &tinfo.fields[i] + switch finfo.flags & fMode { + case fAttr: + strv := finfo.value(sv, initNilPointers) + if a.Name.Local == finfo.name && (finfo.xmlns == "" || finfo.xmlns == a.Name.Space) { + if err := d.unmarshalAttr(strv, a); err != nil { + return err + } + handled = true + } + + case fAny | fAttr: + if any == -1 { + any = i + } + } + } + if !handled && any >= 0 { + finfo := &tinfo.fields[any] + strv := finfo.value(sv, initNilPointers) + if err := d.unmarshalAttr(strv, a); err != nil { + return err + } + } + } + + // Determine whether we need to save character data or comments. + for i := range tinfo.fields { + finfo := &tinfo.fields[i] + switch finfo.flags & fMode { + case fCDATA, fCharData: + if !saveData.IsValid() { + saveData = finfo.value(sv, initNilPointers) + } + + case fComment: + if !saveComment.IsValid() { + saveComment = finfo.value(sv, initNilPointers) + } + + case fAny, fAny | fElement: + if !saveAny.IsValid() { + saveAny = finfo.value(sv, initNilPointers) + } + + case fInnerXML: + if !saveXML.IsValid() { + saveXML = finfo.value(sv, initNilPointers) + if d.saved == nil { + saveXMLIndex = 0 + d.saved = new(bytes.Buffer) + } else { + saveXMLIndex = d.savedOffset() + } + } + } + } + } + + // Find end element. + // Process sub-elements along the way. +Loop: + for { + var savedOffset int + if saveXML.IsValid() { + savedOffset = d.savedOffset() + } + tok, err := d.Token() + if err != nil { + return err + } + switch t := tok.(type) { + case StartElement: + consumed := false + if sv.IsValid() { + // unmarshalPath can call unmarshal, so we need to pass the depth through so that + // we can continue to enforce the maximum recursion limit. + consumed, err = d.unmarshalPath(tinfo, sv, nil, &t, depth) + if err != nil { + return err + } + if !consumed && saveAny.IsValid() { + consumed = true + if err := d.unmarshal(saveAny, &t, depth+1); err != nil { + return err + } + } + } + if !consumed { + if err := d.Skip(); err != nil { + return err + } + } + + case EndElement: + if saveXML.IsValid() { + saveXMLData = d.saved.Bytes()[saveXMLIndex:savedOffset] + if saveXMLIndex == 0 { + d.saved = nil + } + } + break Loop + + case CharData: + if saveData.IsValid() { + data = append(data, t...) + } + + case Comment: + if saveComment.IsValid() { + comment = append(comment, t...) + } + } + } + + if saveData.IsValid() && saveData.CanInterface() && saveData.Type().Implements(textUnmarshalerType) { + if err := saveData.Interface().(encoding.TextUnmarshaler).UnmarshalText(data); err != nil { + return err + } + saveData = reflect.Value{} + } + + if saveData.IsValid() && saveData.CanAddr() { + pv := saveData.Addr() + if pv.CanInterface() && pv.Type().Implements(textUnmarshalerType) { + if err := pv.Interface().(encoding.TextUnmarshaler).UnmarshalText(data); err != nil { + return err + } + saveData = reflect.Value{} + } + } + + if err := copyValue(saveData, data); err != nil { + return err + } + + switch t := saveComment; t.Kind() { + case reflect.String: + t.SetString(string(comment)) + case reflect.Slice: + t.Set(reflect.ValueOf(comment)) + } + + switch t := saveXML; t.Kind() { + case reflect.String: + t.SetString(string(saveXMLData)) + case reflect.Slice: + if t.Type().Elem().Kind() == reflect.Uint8 { + t.Set(reflect.ValueOf(saveXMLData)) + } + } + + return nil +} + +func copyValue(dst reflect.Value, src []byte) (err error) { + dst0 := dst + + if dst.Kind() == reflect.Pointer { + if dst.IsNil() { + dst.Set(reflect.New(dst.Type().Elem())) + } + dst = dst.Elem() + } + + // Save accumulated data. + switch dst.Kind() { + case reflect.Invalid: + // Probably a comment. + default: + return errors.New("cannot unmarshal into " + dst0.Type().String()) + case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64: + if len(src) == 0 { + dst.SetInt(0) + return nil + } + itmp, err := strconv.ParseInt(strings.TrimSpace(string(src)), 10, dst.Type().Bits()) + if err != nil { + return err + } + dst.SetInt(itmp) + case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uintptr: + if len(src) == 0 { + dst.SetUint(0) + return nil + } + utmp, err := strconv.ParseUint(strings.TrimSpace(string(src)), 10, dst.Type().Bits()) + if err != nil { + return err + } + dst.SetUint(utmp) + case reflect.Float32, reflect.Float64: + if len(src) == 0 { + dst.SetFloat(0) + return nil + } + ftmp, err := strconv.ParseFloat(strings.TrimSpace(string(src)), dst.Type().Bits()) + if err != nil { + return err + } + dst.SetFloat(ftmp) + case reflect.Bool: + if len(src) == 0 { + dst.SetBool(false) + return nil + } + value, err := strconv.ParseBool(strings.TrimSpace(string(src))) + if err != nil { + return err + } + dst.SetBool(value) + case reflect.String: + dst.SetString(string(src)) + case reflect.Slice: + if len(src) == 0 { + // non-nil to flag presence + src = []byte{} + } + dst.SetBytes(src) + } + return nil +} + +// unmarshalPath walks down an XML structure looking for wanted +// paths, and calls unmarshal on them. +// The consumed result tells whether XML elements have been consumed +// from the Decoder until start's matching end element, or if it's +// still untouched because start is uninteresting for sv's fields. +func (d *Decoder) unmarshalPath(tinfo *typeInfo, sv reflect.Value, parents []string, start *StartElement, depth int) (consumed bool, err error) { + recurse := false +Loop: + for i := range tinfo.fields { + finfo := &tinfo.fields[i] + if finfo.flags&fElement == 0 || len(finfo.parents) < len(parents) || finfo.xmlns != "" && finfo.xmlns != start.Name.Space { + continue + } + for j := range parents { + if parents[j] != finfo.parents[j] { + continue Loop + } + } + if len(finfo.parents) == len(parents) && finfo.name == start.Name.Local { + // It's a perfect match, unmarshal the field. + return true, d.unmarshal(finfo.value(sv, initNilPointers), start, depth+1) + } + if len(finfo.parents) > len(parents) && finfo.parents[len(parents)] == start.Name.Local { + // It's a prefix for the field. Break and recurse + // since it's not ok for one field path to be itself + // the prefix for another field path. + recurse = true + + // We can reuse the same slice as long as we + // don't try to append to it. + parents = finfo.parents[:len(parents)+1] + break + } + } + if !recurse { + // We have no business with this element. + return false, nil + } + // The element is not a perfect match for any field, but one + // or more fields have the path to this element as a parent + // prefix. Recurse and attempt to match these. + for { + var tok Token + tok, err = d.Token() + if err != nil { + return true, err + } + switch t := tok.(type) { + case StartElement: + // the recursion depth of unmarshalPath is limited to the path length specified + // by the struct field tag, so we don't increment the depth here. + consumed2, err := d.unmarshalPath(tinfo, sv, parents, &t, depth) + if err != nil { + return true, err + } + if !consumed2 { + if err := d.Skip(); err != nil { + return true, err + } + } + case EndElement: + return true, nil + } + } +} + +// Skip reads tokens until it has consumed the end element +// matching the most recent start element already consumed, +// skipping nested structures. +// It returns nil if it finds an end element matching the start +// element; otherwise it returns an error describing the problem. +func (d *Decoder) Skip() error { + var depth int64 + for { + tok, err := d.Token() + if err != nil { + return err + } + switch tok.(type) { + case StartElement: + depth++ + case EndElement: + if depth == 0 { + return nil + } + depth-- + } + } +} diff --git a/read_test.go b/read_test.go new file mode 100644 index 0000000..6799f13 --- /dev/null +++ b/read_test.go @@ -0,0 +1,1128 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package goxml + +import ( + "bytes" + "errors" + "io" + "reflect" + "runtime" + "strings" + "testing" + "time" +) + +// Stripped down Atom feed data structures. + +func TestUnmarshalFeed(t *testing.T) { + var f Feed + if err := Unmarshal([]byte(atomFeedString), &f); err != nil { + t.Fatalf("Unmarshal: %s", err) + } + if !reflect.DeepEqual(f, atomFeed) { + t.Fatalf("have %#v\nwant %#v", f, atomFeed) + } +} + +// hget http://codereview.appspot.com/rss/mine/rsc +const atomFeedString = ` + +Code Review - My issueshttp://codereview.appspot.com/rietveld<>rietveld: an attempt at pubsubhubbub +2009-10-04T01:35:58+00:00email-address-removedurn:md5:134d9179c41f806be79b3a5f7877d19a

+ An attempt at adding pubsubhubbub support to Rietveld. +http://code.google.com/p/pubsubhubbub +http://code.google.com/p/rietveld/issues/detail?id=155 + +The server side of the protocol is trivial: + 1. add a <link rel="hub" href="hub-server"> tag to all + feeds that will be pubsubhubbubbed. + 2. every time one of those feeds changes, tell the hub + with a simple POST request. + +I have tested this by adding debug prints to a local hub +server and checking that the server got the right publish +requests. + +I can't quite get the server to work, but I think the bug +is not in my code. I think that the server expects to be +able to grab the feed and see the feed's actual URL in +the link rel="self", but the default value for that drops +the :port from the URL, and I cannot for the life of me +figure out how to get the Atom generator deep inside +django not to do that, or even where it is doing that, +or even what code is running to generate the Atom feed. +(I thought I knew but I added some assert False statements +and it kept running!) + +Ignoring that particular problem, I would appreciate +feedback on the right way to get the two values at +the top of feeds.py marked NOTE(rsc). + + +

rietveld: correct tab handling +2009-10-03T23:02:17+00:00email-address-removedurn:md5:0a2a4f19bb815101f0ba2904aed7c35a

+ This fixes the buggy tab rendering that can be seen at +http://codereview.appspot.com/116075/diff/1/2 + +The fundamental problem was that the tab code was +not being told what column the text began in, so it +didn't know where to put the tab stops. Another problem +was that some of the code assumed that string byte +offsets were the same as column offsets, which is only +true if there are no tabs. + +In the process of fixing this, I cleaned up the arguments +to Fold and ExpandTabs and renamed them Break and +_ExpandTabs so that I could be sure that I found all the +call sites. I also wanted to verify that ExpandTabs was +not being used from outside intra_region_diff.py. + + +

` + +type Feed struct { + XMLName Name `xml:"http://www.w3.org/2005/Atom feed"` + Title string `xml:"title"` + ID string `xml:"id"` + Link []Link `xml:"link"` + Updated time.Time `xml:"updated,attr"` + Author Person `xml:"author"` + Entry []Entry `xml:"entry"` +} + +type Entry struct { + Title string `xml:"title"` + ID string `xml:"id"` + Link []Link `xml:"link"` + Updated time.Time `xml:"updated"` + Author Person `xml:"author"` + Summary Text `xml:"summary"` +} + +type Link struct { + Rel string `xml:"rel,attr,omitempty"` + Href string `xml:"href,attr"` +} + +type Person struct { + Name string `xml:"name"` + URI string `xml:"uri"` + Email string `xml:"email"` + InnerXML string `xml:",innerxml"` +} + +type Text struct { + Type string `xml:"type,attr,omitempty"` + Body string `xml:",chardata"` +} + +var atomFeed = Feed{ + XMLName: Name{"http://www.w3.org/2005/Atom", "feed"}, + Title: "Code Review - My issues", + Link: []Link{ + {Rel: "alternate", Href: "http://codereview.appspot.com/"}, + {Rel: "self", Href: "http://codereview.appspot.com/rss/mine/rsc"}, + }, + ID: "http://codereview.appspot.com/", + Updated: ParseTime("2009-10-04T01:35:58+00:00"), + Author: Person{ + Name: "rietveld<>", + InnerXML: "rietveld<>", + }, + Entry: []Entry{ + { + Title: "rietveld: an attempt at pubsubhubbub\n", + Link: []Link{ + {Rel: "alternate", Href: "http://codereview.appspot.com/126085"}, + }, + Updated: ParseTime("2009-10-04T01:35:58+00:00"), + Author: Person{ + Name: "email-address-removed", + InnerXML: "email-address-removed", + }, + ID: "urn:md5:134d9179c41f806be79b3a5f7877d19a", + Summary: Text{ + Type: "html", + Body: ` + An attempt at adding pubsubhubbub support to Rietveld. +http://code.google.com/p/pubsubhubbub +http://code.google.com/p/rietveld/issues/detail?id=155 + +The server side of the protocol is trivial: + 1. add a <link rel="hub" href="hub-server"> tag to all + feeds that will be pubsubhubbubbed. + 2. every time one of those feeds changes, tell the hub + with a simple POST request. + +I have tested this by adding debug prints to a local hub +server and checking that the server got the right publish +requests. + +I can't quite get the server to work, but I think the bug +is not in my code. I think that the server expects to be +able to grab the feed and see the feed's actual URL in +the link rel="self", but the default value for that drops +the :port from the URL, and I cannot for the life of me +figure out how to get the Atom generator deep inside +django not to do that, or even where it is doing that, +or even what code is running to generate the Atom feed. +(I thought I knew but I added some assert False statements +and it kept running!) + +Ignoring that particular problem, I would appreciate +feedback on the right way to get the two values at +the top of feeds.py marked NOTE(rsc). + + +`, + }, + }, + { + Title: "rietveld: correct tab handling\n", + Link: []Link{ + {Rel: "alternate", Href: "http://codereview.appspot.com/124106"}, + }, + Updated: ParseTime("2009-10-03T23:02:17+00:00"), + Author: Person{ + Name: "email-address-removed", + InnerXML: "email-address-removed", + }, + ID: "urn:md5:0a2a4f19bb815101f0ba2904aed7c35a", + Summary: Text{ + Type: "html", + Body: ` + This fixes the buggy tab rendering that can be seen at +http://codereview.appspot.com/116075/diff/1/2 + +The fundamental problem was that the tab code was +not being told what column the text began in, so it +didn't know where to put the tab stops. Another problem +was that some of the code assumed that string byte +offsets were the same as column offsets, which is only +true if there are no tabs. + +In the process of fixing this, I cleaned up the arguments +to Fold and ExpandTabs and renamed them Break and +_ExpandTabs so that I could be sure that I found all the +call sites. I also wanted to verify that ExpandTabs was +not being used from outside intra_region_diff.py. + + +`, + }, + }, + }, +} + +const pathTestString = ` + + 1 + + + A + + + B + + + C + D + + <_> + E + + + 2 + +` + +type PathTestItem struct { + Value string +} + +type PathTestA struct { + Items []PathTestItem `xml:">Item1"` + Before, After string +} + +type PathTestB struct { + Other []PathTestItem `xml:"Items>Item1"` + Before, After string +} + +type PathTestC struct { + Values1 []string `xml:"Items>Item1>Value"` + Values2 []string `xml:"Items>Item2>Value"` + Before, After string +} + +type PathTestSet struct { + Item1 []PathTestItem +} + +type PathTestD struct { + Other PathTestSet `xml:"Items"` + Before, After string +} + +type PathTestE struct { + Underline string `xml:"Items>_>Value"` + Before, After string +} + +var pathTests = []any{ + &PathTestA{Items: []PathTestItem{{"A"}, {"D"}}, Before: "1", After: "2"}, + &PathTestB{Other: []PathTestItem{{"A"}, {"D"}}, Before: "1", After: "2"}, + &PathTestC{Values1: []string{"A", "C", "D"}, Values2: []string{"B"}, Before: "1", After: "2"}, + &PathTestD{Other: PathTestSet{Item1: []PathTestItem{{"A"}, {"D"}}}, Before: "1", After: "2"}, + &PathTestE{Underline: "E", Before: "1", After: "2"}, +} + +func TestUnmarshalPaths(t *testing.T) { + for _, pt := range pathTests { + v := reflect.New(reflect.TypeOf(pt).Elem()).Interface() + if err := Unmarshal([]byte(pathTestString), v); err != nil { + t.Fatalf("Unmarshal: %s", err) + } + if !reflect.DeepEqual(v, pt) { + t.Fatalf("have %#v\nwant %#v", v, pt) + } + } +} + +type BadPathTestA struct { + First string `xml:"items>item1"` + Other string `xml:"items>item2"` + Second string `xml:"items"` +} + +type BadPathTestB struct { + Other string `xml:"items>item2>value"` + First string `xml:"items>item1"` + Second string `xml:"items>item1>value"` +} + +type BadPathTestC struct { + First string + Second string `xml:"First"` +} + +type BadPathTestD struct { + BadPathEmbeddedA + BadPathEmbeddedB +} + +type BadPathEmbeddedA struct { + First string +} + +type BadPathEmbeddedB struct { + Second string `xml:"First"` +} + +var badPathTests = []struct { + v, e any +}{ + {&BadPathTestA{}, &TagPathError{reflect.TypeFor[BadPathTestA](), "First", "items>item1", "Second", "items"}}, + {&BadPathTestB{}, &TagPathError{reflect.TypeFor[BadPathTestB](), "First", "items>item1", "Second", "items>item1>value"}}, + {&BadPathTestC{}, &TagPathError{reflect.TypeFor[BadPathTestC](), "First", "", "Second", "First"}}, + {&BadPathTestD{}, &TagPathError{reflect.TypeFor[BadPathTestD](), "First", "", "Second", "First"}}, +} + +func TestUnmarshalBadPaths(t *testing.T) { + for _, tt := range badPathTests { + err := Unmarshal([]byte(pathTestString), tt.v) + if !reflect.DeepEqual(err, tt.e) { + t.Fatalf("Unmarshal with %#v didn't fail properly:\nhave %#v,\nwant %#v", tt.v, err, tt.e) + } + } +} + +const OK = "OK" +const withoutNameTypeData = ` + +` + +type TestThree struct { + XMLName Name `xml:"Test3"` + Attr string `xml:",attr"` +} + +func TestUnmarshalWithoutNameType(t *testing.T) { + var x TestThree + if err := Unmarshal([]byte(withoutNameTypeData), &x); err != nil { + t.Fatalf("Unmarshal: %s", err) + } + if x.Attr != OK { + t.Fatalf("have %v\nwant %v", x.Attr, OK) + } +} + +func TestUnmarshalAttr(t *testing.T) { + type ParamVal struct { + Int int `xml:"int,attr"` + } + + type ParamPtr struct { + Int *int `xml:"int,attr"` + } + + type ParamStringPtr struct { + Int *string `xml:"int,attr"` + } + + x := []byte(``) + + p1 := &ParamPtr{} + if err := Unmarshal(x, p1); err != nil { + t.Fatalf("Unmarshal: %s", err) + } + if p1.Int == nil { + t.Fatalf("Unmarshal failed in to *int field") + } else if *p1.Int != 1 { + t.Fatalf("Unmarshal with %s failed:\nhave %#v,\n want %#v", x, p1.Int, 1) + } + + p2 := &ParamVal{} + if err := Unmarshal(x, p2); err != nil { + t.Fatalf("Unmarshal: %s", err) + } + if p2.Int != 1 { + t.Fatalf("Unmarshal with %s failed:\nhave %#v,\n want %#v", x, p2.Int, 1) + } + + p3 := &ParamStringPtr{} + if err := Unmarshal(x, p3); err != nil { + t.Fatalf("Unmarshal: %s", err) + } + if p3.Int == nil { + t.Fatalf("Unmarshal failed in to *string field") + } else if *p3.Int != "1" { + t.Fatalf("Unmarshal with %s failed:\nhave %#v,\n want %#v", x, p3.Int, 1) + } +} + +type Tables struct { + HTable string `xml:"http://www.w3.org/TR/html4/ table"` + FTable string `xml:"http://www.w3schools.com/furniture table"` +} + +var tables = []struct { + xml string + tab Tables + ns string +}{ + { + xml: `` + + `hello

` + + `world

` + + ``, + tab: Tables{"hello", "world"}, + }, + { + xml: `` + + `world

` + + `hello

` + + ``, + tab: Tables{"hello", "world"}, + }, + { + xml: `` + + `world` + + `hello` + + ``, + tab: Tables{"hello", "world"}, + }, + { + xml: `` + + `bogus

` + + ``, + tab: Tables{}, + }, + { + xml: `` + + `only

` + + ``, + tab: Tables{HTable: "only"}, + ns: "http://www.w3.org/TR/html4/", + }, + { + xml: `` + + `only

` + + ``, + tab: Tables{FTable: "only"}, + ns: "http://www.w3schools.com/furniture", + }, + { + xml: `` + + `only

` + + ``, + tab: Tables{}, + ns: "something else entirely", + }, +} + +func TestUnmarshalNS(t *testing.T) { + for i, tt := range tables { + var dst Tables + var err error + if tt.ns != "" { + d := NewDecoder(strings.NewReader(tt.xml)) + d.DefaultSpace = tt.ns + err = d.Decode(&dst) + } else { + err = Unmarshal([]byte(tt.xml), &dst) + } + if err != nil { + t.Errorf("#%d: Unmarshal: %v", i, err) + continue + } + want := tt.tab + if dst != want { + t.Errorf("#%d: dst=%+v, want %+v", i, dst, want) + } + } +} + +func TestMarshalNS(t *testing.T) { + dst := Tables{"hello", "world"} + data, err := Marshal(&dst) + if err != nil { + t.Fatalf("Marshal: %v", err) + } + want := `hello

world

", "p", "nowrap"}, + {"

", "p", "nowrap"}, + {"", "input", "checked"}, + {"", "input", "checked"}, + } + for _, test := range tests { + d := NewDecoder(strings.NewReader(test[0])) + d.Strict = false + token, err := d.Token() + if _, ok := err.(*SyntaxError); ok { + t.Errorf("Unexpected error: %v", err) + } + if token.(StartElement).Name.Local != test[1] { + t.Errorf("Unexpected tag name: %v", token.(StartElement).Name.Local) + } + attr := token.(StartElement).Attr[0] + if attr.Value != test[2] { + t.Errorf("Unexpected attribute value: %v", attr.Value) + } + if attr.Name.Local != test[2] { + t.Errorf("Unexpected attribute name: %v", attr.Name.Local) + } + } +} + +func TestCopyTokenCharData(t *testing.T) { + data := []byte("same data") + var tok1 Token = CharData(data) + tok2 := CopyToken(tok1) + if !reflect.DeepEqual(tok1, tok2) { + t.Error("CopyToken(CharData) != CharData") + } + data[1] = 'o' + if reflect.DeepEqual(tok1, tok2) { + t.Error("CopyToken(CharData) uses same buffer.") + } +} + +func TestCopyTokenStartElement(t *testing.T) { + elt := StartElement{Name{"", "hello"}, []Attr{{Name{"", "lang"}, "en"}}} + var tok1 Token = elt + tok2 := CopyToken(tok1) + if tok1.(StartElement).Attr[0].Value != "en" { + t.Error("CopyToken overwrote Attr[0]") + } + if !reflect.DeepEqual(tok1, tok2) { + t.Error("CopyToken(StartElement) != StartElement") + } + tok1.(StartElement).Attr[0] = Attr{Name{"", "lang"}, "de"} + if reflect.DeepEqual(tok1, tok2) { + t.Error("CopyToken(CharData) uses same buffer.") + } +} + +func TestCopyTokenComment(t *testing.T) { + data := []byte("") + var tok1 Token = Comment(data) + tok2 := CopyToken(tok1) + if !reflect.DeepEqual(tok1, tok2) { + t.Error("CopyToken(Comment) != Comment") + } + data[1] = 'o' + if reflect.DeepEqual(tok1, tok2) { + t.Error("CopyToken(Comment) uses same buffer.") + } +} + +func TestSyntaxErrorLineNum(t *testing.T) { + testInput := "

Foo

\n\n

Bar\n" + d := NewDecoder(strings.NewReader(testInput)) + var err error + for _, err = d.Token(); err == nil; _, err = d.Token() { + } + synerr, ok := err.(*SyntaxError) + if !ok { + t.Error("Expected SyntaxError.") + } + if synerr.Line != 3 { + t.Error("SyntaxError didn't have correct line number.") + } +} + +func TestTrailingRawToken(t *testing.T) { + input := ` ` + d := NewDecoder(strings.NewReader(input)) + var err error + for _, err = d.RawToken(); err == nil; _, err = d.RawToken() { + } + if err != io.EOF { + t.Fatalf("d.RawToken() = _, %v, want _, io.EOF", err) + } +} + +func TestTrailingToken(t *testing.T) { + input := ` ` + d := NewDecoder(strings.NewReader(input)) + var err error + for _, err = d.Token(); err == nil; _, err = d.Token() { + } + if err != io.EOF { + t.Fatalf("d.Token() = _, %v, want _, io.EOF", err) + } +} + +func TestEntityInsideCDATA(t *testing.T) { + input := `` + d := NewDecoder(strings.NewReader(input)) + var err error + for _, err = d.Token(); err == nil; _, err = d.Token() { + } + if err != io.EOF { + t.Fatalf("d.Token() = _, %v, want _, io.EOF", err) + } +} + +var characterTests = []struct { + in string + err string +}{ + {"\x12", "illegal character code U+0012"}, + {"\x0b", "illegal character code U+000B"}, + {"\xef\xbf\xbe", "illegal character code U+FFFE"}, + {"\r\n\x07", "illegal character code U+0007"}, + {"what's up", "expected attribute name in element"}, + {"&abc\x01;", "invalid character entity &abc (no semicolon)"}, + {"&\x01;", "invalid character entity & (no semicolon)"}, + {"&\xef\xbf\xbe;", "invalid character entity &\uFFFE;"}, + {"&hello;", "invalid character entity &hello;"}, +} + +func TestDisallowedCharacters(t *testing.T) { + + for i, tt := range characterTests { + d := NewDecoder(strings.NewReader(tt.in)) + var err error + + for err == nil { + _, err = d.Token() + } + synerr, ok := err.(*SyntaxError) + if !ok { + t.Fatalf("input %d d.Token() = _, %v, want _, *SyntaxError", i, err) + } + if synerr.Msg != tt.err { + t.Fatalf("input %d synerr.Msg wrong: want %q, got %q", i, tt.err, synerr.Msg) + } + } +} + +func TestIsInCharacterRange(t *testing.T) { + invalid := []rune{ + utf8.MaxRune + 1, + 0xD800, // surrogate min + 0xDFFF, // surrogate max + -1, + } + for _, r := range invalid { + if isInCharacterRange(r) { + t.Errorf("rune %U considered valid", r) + } + } +} + +var procInstTests = []struct { + input string + expect [2]string +}{ + {`version="1.0" encoding="utf-8"`, [2]string{"1.0", "utf-8"}}, + {`version="1.0" encoding='utf-8'`, [2]string{"1.0", "utf-8"}}, + {`version="1.0" encoding='utf-8' `, [2]string{"1.0", "utf-8"}}, + {`version="1.0" encoding=utf-8`, [2]string{"1.0", ""}}, + {`encoding="FOO" `, [2]string{"", "FOO"}}, + {`version=2.0 version="1.0" encoding=utf-7 encoding='utf-8'`, [2]string{"1.0", "utf-8"}}, + {`version= encoding=`, [2]string{"", ""}}, + {`encoding="version=1.0"`, [2]string{"", "version=1.0"}}, + {``, [2]string{"", ""}}, + // TODO: what's the right approach to handle these nested cases? + {`encoding="version='1.0'"`, [2]string{"1.0", "version='1.0'"}}, + {`version="encoding='utf-8'"`, [2]string{"encoding='utf-8'", "utf-8"}}, +} + +func TestProcInstEncoding(t *testing.T) { + for _, test := range procInstTests { + if got := procInst("version", test.input); got != test.expect[0] { + t.Errorf("procInst(version, %q) = %q; want %q", test.input, got, test.expect[0]) + } + if got := procInst("encoding", test.input); got != test.expect[1] { + t.Errorf("procInst(encoding, %q) = %q; want %q", test.input, got, test.expect[1]) + } + } +} + +// Ensure that directives with comments include the complete +// text of any nested directives. + +var directivesWithCommentsInput = ` +]> +]> + --> --> []> +` + +var directivesWithCommentsTokens = []Token{ + CharData("\n"), + Directive(`DOCTYPE [ ]`), + CharData("\n"), + Directive(`DOCTYPE [ ]`), + CharData("\n"), + Directive(`DOCTYPE [ ]`), + CharData("\n"), +} + +func TestDirectivesWithComments(t *testing.T) { + d := NewDecoder(strings.NewReader(directivesWithCommentsInput)) + + for i, want := range directivesWithCommentsTokens { + have, err := d.Token() + if err != nil { + t.Fatalf("token %d: unexpected error: %s", i, err) + } + if !reflect.DeepEqual(have, want) { + t.Errorf("token %d = %#v want %#v", i, have, want) + } + } +} + +// Writer whose Write method always returns an error. +type errWriter struct{} + +func (errWriter) Write(p []byte) (n int, err error) { return 0, fmt.Errorf("unwritable") } + +func TestEscapeTextIOErrors(t *testing.T) { + expectErr := "unwritable" + err := EscapeText(errWriter{}, []byte{'A'}) + + if err == nil || err.Error() != expectErr { + t.Errorf("have %v, want %v", err, expectErr) + } +} + +func TestEscapeTextInvalidChar(t *testing.T) { + input := []byte("A \x00 terminated string.") + expected := "A \uFFFD terminated string." + + buff := new(strings.Builder) + if err := EscapeText(buff, input); err != nil { + t.Fatalf("have %v, want nil", err) + } + text := buff.String() + + if text != expected { + t.Errorf("have %v, want %v", text, expected) + } +} + +func TestIssue5880(t *testing.T) { + type T []byte + data, err := Marshal(T{192, 168, 0, 1}) + if err != nil { + t.Errorf("Marshal error: %v", err) + } + if !utf8.Valid(data) { + t.Errorf("Marshal generated invalid UTF-8: %x", data) + } +} + +func TestIssue8535(t *testing.T) { + + type ExampleConflict struct { + XMLName Name `xml:"example"` + Link string `xml:"link"` + AtomLink string `xml:"http://www.w3.org/2005/Atom link"` // Same name in a different name space + } + testCase := ` + Example + http://example.com/default + http://example.com/home + http://example.com/ns + ` + + var dest ExampleConflict + d := NewDecoder(strings.NewReader(testCase)) + if err := d.Decode(&dest); err != nil { + t.Fatal(err) + } +} + +func TestEncodeXMLNS(t *testing.T) { + testCases := []struct { + f func() ([]byte, error) + want string + ok bool + }{ + {encodeXMLNS1, `hello world`, true}, + {encodeXMLNS2, `hello world`, true}, + {encodeXMLNS3, `hello world`, true}, + {encodeXMLNS4, `hello world`, false}, + } + + for i, tc := range testCases { + if b, err := tc.f(); err == nil { + if got, want := string(b), tc.want; got != want { + t.Errorf("%d: got %s, want %s \n", i, got, want) + } + } else { + t.Errorf("%d: marshal failed with %s", i, err) + } + } +} + +func encodeXMLNS1() ([]byte, error) { + + type T struct { + XMLName Name `xml:"Test"` + Ns string `xml:"xmlns,attr"` + Body string + } + + s := &T{Ns: "http://example.com/ns", Body: "hello world"} + return Marshal(s) +} + +func encodeXMLNS2() ([]byte, error) { + + type Test struct { + Body string `xml:"http://example.com/ns body"` + } + + s := &Test{Body: "hello world"} + return Marshal(s) +} + +func encodeXMLNS3() ([]byte, error) { + + type Test struct { + XMLName Name `xml:"http://example.com/ns Test"` + Body string + } + + //s := &Test{XMLName: Name{"http://example.com/ns",""}, Body: "hello world"} is unusable as the "-" is missing + // as documentation states + s := &Test{Body: "hello world"} + return Marshal(s) +} + +func encodeXMLNS4() ([]byte, error) { + + type Test struct { + Ns string `xml:"xmlns,attr"` + Body string + } + + s := &Test{Ns: "http://example.com/ns", Body: "hello world"} + return Marshal(s) +} + +func TestIssue11405(t *testing.T) { + testCases := []string{ + "", + "", + "", + } + for _, tc := range testCases { + d := NewDecoder(strings.NewReader(tc)) + var err error + for { + _, err = d.Token() + if err != nil { + break + } + } + if _, ok := err.(*SyntaxError); !ok { + t.Errorf("%s: Token: Got error %v, want SyntaxError", tc, err) + } + } +} + +func TestIssue12417(t *testing.T) { + testCases := []struct { + s string + ok bool + }{ + {``, true}, + {``, true}, + {``, true}, + {``, false}, + } + for _, tc := range testCases { + d := NewDecoder(strings.NewReader(tc.s)) + var err error + for { + _, err = d.Token() + if err != nil { + if err == io.EOF { + err = nil + } + break + } + } + if err != nil && tc.ok { + t.Errorf("%q: Encoding charset: expected no error, got %s", tc.s, err) + continue + } + if err == nil && !tc.ok { + t.Errorf("%q: Encoding charset: expected error, got nil", tc.s) + } + } +} + +func TestIssue7113(t *testing.T) { + type C struct { + XMLName Name `xml:""` // Sets empty namespace + } + + type D struct { + XMLName Name `xml:"d"` + } + + type A struct { + XMLName Name `xml:""` + C C `xml:""` + D D + } + + var a A + structSpace := "b" + xmlTest := `` + t.Log(xmlTest) + err := Unmarshal([]byte(xmlTest), &a) + if err != nil { + t.Fatal(err) + } + + if a.XMLName.Space != structSpace { + t.Errorf("overidding with empty namespace: unmarshaling, got %s, want %s\n", a.XMLName.Space, structSpace) + } + if len(a.C.XMLName.Space) != 0 { + t.Fatalf("overidding with empty namespace: unmarshaling, got %s, want empty\n", a.C.XMLName.Space) + } + + var b []byte + b, err = Marshal(&a) + if err != nil { + t.Fatal(err) + } + if len(a.C.XMLName.Space) != 0 { + t.Errorf("overidding with empty namespace: marshaling, got %s in C tag which should be empty\n", a.C.XMLName.Space) + } + if string(b) != xmlTest { + t.Fatalf("overidding with empty namespace: marshaling, got %s, want %s\n", b, xmlTest) + } + var c A + err = Unmarshal(b, &c) + if err != nil { + t.Fatalf("second Unmarshal failed: %s", err) + } + if c.XMLName.Space != "b" { + t.Errorf("overidding with empty namespace: after marshaling & unmarshaling, XML name space: got %s, want %s\n", a.XMLName.Space, structSpace) + } + if len(c.C.XMLName.Space) != 0 { + t.Errorf("overidding with empty namespace: after marshaling & unmarshaling, got %s, want empty\n", a.C.XMLName.Space) + } +} + +func TestIssue20396(t *testing.T) { + + var attrError = UnmarshalError("XML syntax error on line 1: expected attribute name in element") + + testCases := []struct { + s string + wantErr error + }{ + {``, // Issue 20396 + UnmarshalError("XML syntax error on line 1: expected element name after <")}, + {``, attrError}, + {``, attrError}, + {``, nil}, + {`1`, + UnmarshalError("XML syntax error on line 1: expected element name after <")}, + {`1`, attrError}, + {`1`, attrError}, + {`1`, nil}, + } + + var dest string + for _, tc := range testCases { + if got, want := Unmarshal([]byte(tc.s), &dest), tc.wantErr; got != want { + if got == nil { + t.Errorf("%s: Unexpected success, want %v", tc.s, want) + } else if want == nil { + t.Errorf("%s: Unexpected error, got %v", tc.s, got) + } else if got.Error() != want.Error() { + t.Errorf("%s: got %v, want %v", tc.s, got, want) + } + } + } +} + +func TestIssue20685(t *testing.T) { + testCases := []struct { + s string + ok bool + }{ + {`one`, false}, + {`one`, true}, + {`one`, false}, + {`one`, false}, + {`one`, false}, + {`one`, false}, + {`one`, false}, + } + for _, tc := range testCases { + d := NewDecoder(strings.NewReader(tc.s)) + var err error + for { + _, err = d.Token() + if err != nil { + if err == io.EOF { + err = nil + } + break + } + } + if err != nil && tc.ok { + t.Errorf("%q: Closing tag with namespace : expected no error, got %s", tc.s, err) + continue + } + if err == nil && !tc.ok { + t.Errorf("%q: Closing tag with namespace : expected error, got nil", tc.s) + } + } +} + +func tokenMap(mapping func(t Token) Token) func(TokenReader) TokenReader { + return func(src TokenReader) TokenReader { + return mapper{ + t: src, + f: mapping, + } + } +} + +type mapper struct { + t TokenReader + f func(Token) Token +} + +func (m mapper) Token() (Token, error) { + tok, err := m.t.Token() + if err != nil { + return nil, err + } + return m.f(tok), nil +} + +func TestNewTokenDecoderIdempotent(t *testing.T) { + d := NewDecoder(strings.NewReader(`
`)) + d2 := NewTokenDecoder(d) + if d != d2 { + t.Error("NewTokenDecoder did not detect underlying Decoder") + } +} + +func TestWrapDecoder(t *testing.T) { + d := NewDecoder(strings.NewReader(`[Re-enter Clown with a letter, and FABIAN]`)) + m := tokenMap(func(t Token) Token { + switch tok := t.(type) { + case StartElement: + if tok.Name.Local == "quote" { + tok.Name.Local = "blocking" + return tok + } + case EndElement: + if tok.Name.Local == "quote" { + tok.Name.Local = "blocking" + return tok + } + } + return t + }) + + d = NewTokenDecoder(m(d)) + + o := struct { + XMLName Name `xml:"blocking"` + Chardata string `xml:",chardata"` + }{} + + if err := d.Decode(&o); err != nil { + t.Fatal("Got unexpected error while decoding:", err) + } + + if o.Chardata != "[Re-enter Clown with a letter, and FABIAN]" { + t.Fatalf("Got unexpected chardata: `%s`\n", o.Chardata) + } +} + +type tokReader struct{} + +func (tokReader) Token() (Token, error) { + return StartElement{}, nil +} + +type Failure struct{} + +func (Failure) UnmarshalXML(*Decoder, StartElement) error { + return nil +} + +func TestTokenUnmarshaler(t *testing.T) { + defer func() { + if r := recover(); r != nil { + t.Error("Unexpected panic using custom token unmarshaler") + } + }() + + d := NewTokenDecoder(tokReader{}) + d.Decode(&Failure{}) +} + +func testRoundTrip(t *testing.T, input string) { + d := NewDecoder(strings.NewReader(input)) + var tokens []Token + var buf bytes.Buffer + e := NewEncoder(&buf) + for { + tok, err := d.Token() + if err == io.EOF { + break + } + if err != nil { + t.Fatalf("invalid input: %v", err) + } + if err := e.EncodeToken(tok); err != nil { + t.Fatalf("failed to re-encode input: %v", err) + } + tokens = append(tokens, CopyToken(tok)) + } + if err := e.Flush(); err != nil { + t.Fatal(err) + } + + d = NewDecoder(&buf) + for { + tok, err := d.Token() + if err == io.EOF { + break + } + if err != nil { + t.Fatalf("failed to decode output: %v", err) + } + if len(tokens) == 0 { + t.Fatalf("unexpected token: %#v", tok) + } + a, b := tokens[0], tok + if !reflect.DeepEqual(a, b) { + t.Fatalf("token mismatch: %#v vs %#v", a, b) + } + tokens = tokens[1:] + } + if len(tokens) > 0 { + t.Fatalf("lost tokens: %#v", tokens) + } +} + +func TestRoundTrip(t *testing.T) { + tests := map[string]string{ + "trailing colon": ``, + "comments in directives": `--x --> > --x ]>`, + } + for name, input := range tests { + t.Run(name, func(t *testing.T) { testRoundTrip(t, input) }) + } +} + +func TestParseErrors(t *testing.T) { + withDefaultHeader := func(s string) string { + return `` + s + } + tests := []struct { + src string + err string + }{ + {withDefaultHeader(``), `unexpected end element `}, + {withDefaultHeader(``), `element in space x closed by in space y`}, + {withDefaultHeader(``), `expected target name after `), `invalid sequence `), `invalid sequence `), `invalid baz`), + `element in space zzz closed by in space ""`}, + {withDefaultHeader("\xf1"), `invalid UTF-8`}, + + // Header-related errors. + {``, `unsupported version "1.1"; only version 1.0 is supported`}, + + // Cases below are for "no errors". + {withDefaultHeader(``), ``}, + {withDefaultHeader(``), ``}, + } + + for _, test := range tests { + d := NewDecoder(strings.NewReader(test.src)) + var err error + for { + _, err = d.Token() + if err != nil { + break + } + } + if test.err == "" { + if err != io.EOF { + t.Errorf("parse %s: have %q error, expected none", test.src, err) + } + continue + } + // Inv: err != nil + if err == io.EOF { + t.Errorf("parse %s: unexpected EOF", test.src) + continue + } + if !strings.Contains(err.Error(), test.err) { + t.Errorf("parse %s: can't find %q error substring\nerror: %q", test.src, test.err, err) + continue + } + } +} + +const testInputHTMLAutoClose = ` +
+

+

+

+
+

+

+
abc

` + +func BenchmarkHTMLAutoClose(b *testing.B) { + b.RunParallel(func(p *testing.PB) { + for p.Next() { + d := NewDecoder(strings.NewReader(testInputHTMLAutoClose)) + d.Strict = false + d.AutoClose = HTMLAutoClose + d.Entity = HTMLEntity + for { + _, err := d.Token() + if err != nil { + if err == io.EOF { + break + } + b.Fatalf("unexpected error: %v", err) + } + } + } + }) +} + +func TestHTMLAutoClose(t *testing.T) { + wantTokens := []Token{ + ProcInst{"xml", []byte(`version="1.0" encoding="UTF-8"`)}, + CharData("\n"), + StartElement{Name{"", "br"}, []Attr{}}, + EndElement{Name{"", "br"}}, + CharData("\n"), + StartElement{Name{"", "br"}, []Attr{}}, + EndElement{Name{"", "br"}}, + StartElement{Name{"", "br"}, []Attr{}}, + EndElement{Name{"", "br"}}, + CharData("\n"), + StartElement{Name{"", "br"}, []Attr{}}, + EndElement{Name{"", "br"}}, + StartElement{Name{"", "br"}, []Attr{}}, + EndElement{Name{"", "br"}}, + CharData("\n"), + StartElement{Name{"", "br"}, []Attr{}}, + EndElement{Name{"", "br"}}, + CharData("\n"), + StartElement{Name{"", "BR"}, []Attr{}}, + EndElement{Name{"", "BR"}}, + CharData("\n"), + StartElement{Name{"", "BR"}, []Attr{}}, + EndElement{Name{"", "BR"}}, + StartElement{Name{"", "BR"}, []Attr{}}, + EndElement{Name{"", "BR"}}, + CharData("\n"), + StartElement{Name{"", "Br"}, []Attr{}}, + EndElement{Name{"", "Br"}}, + CharData("\n"), + StartElement{Name{"", "BR"}, []Attr{}}, + EndElement{Name{"", "BR"}}, + StartElement{Name{"", "span"}, []Attr{{Name: Name{"", "id"}, Value: "test"}}}, + CharData("abc"), + EndElement{Name{"", "span"}}, + StartElement{Name{"", "br"}, []Attr{}}, + EndElement{Name{"", "br"}}, + StartElement{Name{"", "br"}, []Attr{}}, + EndElement{Name{"", "br"}}, + } + + d := NewDecoder(strings.NewReader(testInputHTMLAutoClose)) + d.Strict = false + d.AutoClose = HTMLAutoClose + d.Entity = HTMLEntity + var haveTokens []Token + for { + tok, err := d.Token() + if err != nil { + if err == io.EOF { + break + } + t.Fatalf("unexpected error: %v", err) + } + haveTokens = append(haveTokens, CopyToken(tok)) + } + if len(haveTokens) != len(wantTokens) { + t.Errorf("tokens count mismatch: have %d, want %d", len(haveTokens), len(wantTokens)) + } + for i, want := range wantTokens { + if i >= len(haveTokens) { + t.Errorf("token[%d] expected %#v, have no token", i, want) + } else { + have := haveTokens[i] + if !reflect.DeepEqual(have, want) { + t.Errorf("token[%d] mismatch:\nhave: %#v\nwant: %#v", i, have, want) + } + } + } +}