Source file src/encoding/xml/xml_test.go

     1  // Copyright 2009 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package xml
     6  
     7  import (
     8  	"bytes"
     9  	"fmt"
    10  	"io"
    11  	"reflect"
    12  	"strings"
    13  	"testing"
    14  	"unicode/utf8"
    15  )
    16  
    17  type toks struct {
    18  	earlyEOF bool
    19  	t        []Token
    20  }
    21  
    22  func (t *toks) Token() (Token, error) {
    23  	if len(t.t) == 0 {
    24  		return nil, io.EOF
    25  	}
    26  	var tok Token
    27  	tok, t.t = t.t[0], t.t[1:]
    28  	if t.earlyEOF && len(t.t) == 0 {
    29  		return tok, io.EOF
    30  	}
    31  	return tok, nil
    32  }
    33  
    34  func TestDecodeEOF(t *testing.T) {
    35  	start := StartElement{Name: Name{Local: "test"}}
    36  	tests := []struct {
    37  		name   string
    38  		tokens []Token
    39  		ok     bool
    40  	}{
    41  		{
    42  			name: "OK",
    43  			tokens: []Token{
    44  				start,
    45  				start.End(),
    46  			},
    47  			ok: true,
    48  		},
    49  		{
    50  			name: "Malformed",
    51  			tokens: []Token{
    52  				start,
    53  				StartElement{Name: Name{Local: "bad"}},
    54  				start.End(),
    55  			},
    56  			ok: false,
    57  		},
    58  	}
    59  	for _, tc := range tests {
    60  		for _, eof := range []bool{true, false} {
    61  			name := fmt.Sprintf("%s/earlyEOF=%v", tc.name, eof)
    62  			t.Run(name, func(t *testing.T) {
    63  				d := NewTokenDecoder(&toks{
    64  					earlyEOF: eof,
    65  					t:        tc.tokens,
    66  				})
    67  				err := d.Decode(&struct {
    68  					XMLName Name `xml:"test"`
    69  				}{})
    70  				if tc.ok && err != nil {
    71  					t.Fatalf("d.Decode: expected nil error, got %v", err)
    72  				}
    73  				if _, ok := err.(*SyntaxError); !tc.ok && !ok {
    74  					t.Errorf("d.Decode: expected syntax error, got %v", err)
    75  				}
    76  			})
    77  		}
    78  	}
    79  }
    80  
    81  type toksNil struct {
    82  	returnEOF bool
    83  	t         []Token
    84  }
    85  
    86  func (t *toksNil) Token() (Token, error) {
    87  	if len(t.t) == 0 {
    88  		if !t.returnEOF {
    89  			// Return nil, nil before returning an EOF. It's legal, but
    90  			// discouraged.
    91  			t.returnEOF = true
    92  			return nil, nil
    93  		}
    94  		return nil, io.EOF
    95  	}
    96  	var tok Token
    97  	tok, t.t = t.t[0], t.t[1:]
    98  	return tok, nil
    99  }
   100  
   101  func TestDecodeNilToken(t *testing.T) {
   102  	for _, strict := range []bool{true, false} {
   103  		name := fmt.Sprintf("Strict=%v", strict)
   104  		t.Run(name, func(t *testing.T) {
   105  			start := StartElement{Name: Name{Local: "test"}}
   106  			bad := StartElement{Name: Name{Local: "bad"}}
   107  			d := NewTokenDecoder(&toksNil{
   108  				// Malformed
   109  				t: []Token{start, bad, start.End()},
   110  			})
   111  			d.Strict = strict
   112  			err := d.Decode(&struct {
   113  				XMLName Name `xml:"test"`
   114  			}{})
   115  			if _, ok := err.(*SyntaxError); !ok {
   116  				t.Errorf("d.Decode: expected syntax error, got %v", err)
   117  			}
   118  		})
   119  	}
   120  }
   121  
   122  const testInput = `
   123  <?xml version="1.0" encoding="UTF-8"?>
   124  <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
   125    "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
   126  <body xmlns:foo="ns1" xmlns="ns2" xmlns:tag="ns3" ` +
   127  	"\r\n\t" + `  >
   128    <hello lang="en">World &lt;&gt;&apos;&quot; &#x767d;&#40300;翔</hello>
   129    <query>&何; &is-it;</query>
   130    <goodbye />
   131    <outer foo:attr="value" xmlns:tag="ns4">
   132      <inner/>
   133    </outer>
   134    <tag:name>
   135      <![CDATA[Some text here.]]>
   136    </tag:name>
   137  </body><!-- missing final newline -->`
   138  
   139  var testEntity = map[string]string{"何": "What", "is-it": "is it?"}
   140  
   141  var rawTokens = []Token{
   142  	CharData("\n"),
   143  	ProcInst{"xml", []byte(`version="1.0" encoding="UTF-8"`)},
   144  	CharData("\n"),
   145  	Directive(`DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
   146    "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"`),
   147  	CharData("\n"),
   148  	StartElement{Name{"", "body"}, []Attr{{Name{"xmlns", "foo"}, "ns1"}, {Name{"", "xmlns"}, "ns2"}, {Name{"xmlns", "tag"}, "ns3"}}},
   149  	CharData("\n  "),
   150  	StartElement{Name{"", "hello"}, []Attr{{Name{"", "lang"}, "en"}}},
   151  	CharData("World <>'\" 白鵬翔"),
   152  	EndElement{Name{"", "hello"}},
   153  	CharData("\n  "),
   154  	StartElement{Name{"", "query"}, []Attr{}},
   155  	CharData("What is it?"),
   156  	EndElement{Name{"", "query"}},
   157  	CharData("\n  "),
   158  	StartElement{Name{"", "goodbye"}, []Attr{}},
   159  	EndElement{Name{"", "goodbye"}},
   160  	CharData("\n  "),
   161  	StartElement{Name{"", "outer"}, []Attr{{Name{"foo", "attr"}, "value"}, {Name{"xmlns", "tag"}, "ns4"}}},
   162  	CharData("\n    "),
   163  	StartElement{Name{"", "inner"}, []Attr{}},
   164  	EndElement{Name{"", "inner"}},
   165  	CharData("\n  "),
   166  	EndElement{Name{"", "outer"}},
   167  	CharData("\n  "),
   168  	StartElement{Name{"tag", "name"}, []Attr{}},
   169  	CharData("\n    "),
   170  	CharData("Some text here."),
   171  	CharData("\n  "),
   172  	EndElement{Name{"tag", "name"}},
   173  	CharData("\n"),
   174  	EndElement{Name{"", "body"}},
   175  	Comment(" missing final newline "),
   176  }
   177  
   178  var cookedTokens = []Token{
   179  	CharData("\n"),
   180  	ProcInst{"xml", []byte(`version="1.0" encoding="UTF-8"`)},
   181  	CharData("\n"),
   182  	Directive(`DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
   183    "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"`),
   184  	CharData("\n"),
   185  	StartElement{Name{"ns2", "body"}, []Attr{{Name{"xmlns", "foo"}, "ns1"}, {Name{"", "xmlns"}, "ns2"}, {Name{"xmlns", "tag"}, "ns3"}}},
   186  	CharData("\n  "),
   187  	StartElement{Name{"ns2", "hello"}, []Attr{{Name{"", "lang"}, "en"}}},
   188  	CharData("World <>'\" 白鵬翔"),
   189  	EndElement{Name{"ns2", "hello"}},
   190  	CharData("\n  "),
   191  	StartElement{Name{"ns2", "query"}, []Attr{}},
   192  	CharData("What is it?"),
   193  	EndElement{Name{"ns2", "query"}},
   194  	CharData("\n  "),
   195  	StartElement{Name{"ns2", "goodbye"}, []Attr{}},
   196  	EndElement{Name{"ns2", "goodbye"}},
   197  	CharData("\n  "),
   198  	StartElement{Name{"ns2", "outer"}, []Attr{{Name{"ns1", "attr"}, "value"}, {Name{"xmlns", "tag"}, "ns4"}}},
   199  	CharData("\n    "),
   200  	StartElement{Name{"ns2", "inner"}, []Attr{}},
   201  	EndElement{Name{"ns2", "inner"}},
   202  	CharData("\n  "),
   203  	EndElement{Name{"ns2", "outer"}},
   204  	CharData("\n  "),
   205  	StartElement{Name{"ns3", "name"}, []Attr{}},
   206  	CharData("\n    "),
   207  	CharData("Some text here."),
   208  	CharData("\n  "),
   209  	EndElement{Name{"ns3", "name"}},
   210  	CharData("\n"),
   211  	EndElement{Name{"ns2", "body"}},
   212  	Comment(" missing final newline "),
   213  }
   214  
   215  const testInputAltEncoding = `
   216  <?xml version="1.0" encoding="x-testing-uppercase"?>
   217  <TAG>VALUE</TAG>`
   218  
   219  var rawTokensAltEncoding = []Token{
   220  	CharData("\n"),
   221  	ProcInst{"xml", []byte(`version="1.0" encoding="x-testing-uppercase"`)},
   222  	CharData("\n"),
   223  	StartElement{Name{"", "tag"}, []Attr{}},
   224  	CharData("value"),
   225  	EndElement{Name{"", "tag"}},
   226  }
   227  
   228  var xmlInput = []string{
   229  	// unexpected EOF cases
   230  	"<",
   231  	"<t",
   232  	"<t ",
   233  	"<t/",
   234  	"<!",
   235  	"<!-",
   236  	"<!--",
   237  	"<!--c-",
   238  	"<!--c--",
   239  	"<!d",
   240  	"<t></",
   241  	"<t></t",
   242  	"<?",
   243  	"<?p",
   244  	"<t a",
   245  	"<t a=",
   246  	"<t a='",
   247  	"<t a=''",
   248  	"<t/><![",
   249  	"<t/><![C",
   250  	"<t/><![CDATA[d",
   251  	"<t/><![CDATA[d]",
   252  	"<t/><![CDATA[d]]",
   253  
   254  	// other Syntax errors
   255  	"<>",
   256  	"<t/a",
   257  	"<0 />",
   258  	"<?0 >",
   259  	//	"<!0 >",	// let the Token() caller handle
   260  	"</0>",
   261  	"<t 0=''>",
   262  	"<t a='&'>",
   263  	"<t a='<'>",
   264  	"<t>&nbspc;</t>",
   265  	"<t a>",
   266  	"<t a=>",
   267  	"<t a=v>",
   268  	//	"<![CDATA[d]]>",	// let the Token() caller handle
   269  	"<t></e>",
   270  	"<t></>",
   271  	"<t></t!",
   272  	"<t>cdata]]></t>",
   273  }
   274  
   275  func TestRawToken(t *testing.T) {
   276  	d := NewDecoder(strings.NewReader(testInput))
   277  	d.Entity = testEntity
   278  	testRawToken(t, d, testInput, rawTokens)
   279  }
   280  
   281  const nonStrictInput = `
   282  <tag>non&entity</tag>
   283  <tag>&unknown;entity</tag>
   284  <tag>&#123</tag>
   285  <tag>&#zzz;</tag>
   286  <tag>&なまえ3;</tag>
   287  <tag>&lt-gt;</tag>
   288  <tag>&;</tag>
   289  <tag>&0a;</tag>
   290  `
   291  
   292  var nonStrictTokens = []Token{
   293  	CharData("\n"),
   294  	StartElement{Name{"", "tag"}, []Attr{}},
   295  	CharData("non&entity"),
   296  	EndElement{Name{"", "tag"}},
   297  	CharData("\n"),
   298  	StartElement{Name{"", "tag"}, []Attr{}},
   299  	CharData("&unknown;entity"),
   300  	EndElement{Name{"", "tag"}},
   301  	CharData("\n"),
   302  	StartElement{Name{"", "tag"}, []Attr{}},
   303  	CharData("&#123"),
   304  	EndElement{Name{"", "tag"}},
   305  	CharData("\n"),
   306  	StartElement{Name{"", "tag"}, []Attr{}},
   307  	CharData("&#zzz;"),
   308  	EndElement{Name{"", "tag"}},
   309  	CharData("\n"),
   310  	StartElement{Name{"", "tag"}, []Attr{}},
   311  	CharData("&なまえ3;"),
   312  	EndElement{Name{"", "tag"}},
   313  	CharData("\n"),
   314  	StartElement{Name{"", "tag"}, []Attr{}},
   315  	CharData("&lt-gt;"),
   316  	EndElement{Name{"", "tag"}},
   317  	CharData("\n"),
   318  	StartElement{Name{"", "tag"}, []Attr{}},
   319  	CharData("&;"),
   320  	EndElement{Name{"", "tag"}},
   321  	CharData("\n"),
   322  	StartElement{Name{"", "tag"}, []Attr{}},
   323  	CharData("&0a;"),
   324  	EndElement{Name{"", "tag"}},
   325  	CharData("\n"),
   326  }
   327  
   328  func TestNonStrictRawToken(t *testing.T) {
   329  	d := NewDecoder(strings.NewReader(nonStrictInput))
   330  	d.Strict = false
   331  	testRawToken(t, d, nonStrictInput, nonStrictTokens)
   332  }
   333  
   334  type downCaser struct {
   335  	t *testing.T
   336  	r io.ByteReader
   337  }
   338  
   339  func (d *downCaser) ReadByte() (c byte, err error) {
   340  	c, err = d.r.ReadByte()
   341  	if c >= 'A' && c <= 'Z' {
   342  		c += 'a' - 'A'
   343  	}
   344  	return
   345  }
   346  
   347  func (d *downCaser) Read(p []byte) (int, error) {
   348  	d.t.Fatalf("unexpected Read call on downCaser reader")
   349  	panic("unreachable")
   350  }
   351  
   352  func TestRawTokenAltEncoding(t *testing.T) {
   353  	d := NewDecoder(strings.NewReader(testInputAltEncoding))
   354  	d.CharsetReader = func(charset string, input io.Reader) (io.Reader, error) {
   355  		if charset != "x-testing-uppercase" {
   356  			t.Fatalf("unexpected charset %q", charset)
   357  		}
   358  		return &downCaser{t, input.(io.ByteReader)}, nil
   359  	}
   360  	testRawToken(t, d, testInputAltEncoding, rawTokensAltEncoding)
   361  }
   362  
   363  func TestRawTokenAltEncodingNoConverter(t *testing.T) {
   364  	d := NewDecoder(strings.NewReader(testInputAltEncoding))
   365  	token, err := d.RawToken()
   366  	if token == nil {
   367  		t.Fatalf("expected a token on first RawToken call")
   368  	}
   369  	if err != nil {
   370  		t.Fatal(err)
   371  	}
   372  	token, err = d.RawToken()
   373  	if token != nil {
   374  		t.Errorf("expected a nil token; got %#v", token)
   375  	}
   376  	if err == nil {
   377  		t.Fatalf("expected an error on second RawToken call")
   378  	}
   379  	const encoding = "x-testing-uppercase"
   380  	if !strings.Contains(err.Error(), encoding) {
   381  		t.Errorf("expected error to contain %q; got error: %v",
   382  			encoding, err)
   383  	}
   384  }
   385  
   386  func testRawToken(t *testing.T, d *Decoder, raw string, rawTokens []Token) {
   387  	lastEnd := int64(0)
   388  	for i, want := range rawTokens {
   389  		start := d.InputOffset()
   390  		have, err := d.RawToken()
   391  		end := d.InputOffset()
   392  		if err != nil {
   393  			t.Fatalf("token %d: unexpected error: %s", i, err)
   394  		}
   395  		if !reflect.DeepEqual(have, want) {
   396  			var shave, swant string
   397  			if _, ok := have.(CharData); ok {
   398  				shave = fmt.Sprintf("CharData(%q)", have)
   399  			} else {
   400  				shave = fmt.Sprintf("%#v", have)
   401  			}
   402  			if _, ok := want.(CharData); ok {
   403  				swant = fmt.Sprintf("CharData(%q)", want)
   404  			} else {
   405  				swant = fmt.Sprintf("%#v", want)
   406  			}
   407  			t.Errorf("token %d = %s, want %s", i, shave, swant)
   408  		}
   409  
   410  		// Check that InputOffset returned actual token.
   411  		switch {
   412  		case start < lastEnd:
   413  			t.Errorf("token %d: position [%d,%d) for %T is before previous token", i, start, end, have)
   414  		case start >= end:
   415  			// Special case: EndElement can be synthesized.
   416  			if start == end && end == lastEnd {
   417  				break
   418  			}
   419  			t.Errorf("token %d: position [%d,%d) for %T is empty", i, start, end, have)
   420  		case end > int64(len(raw)):
   421  			t.Errorf("token %d: position [%d,%d) for %T extends beyond input", i, start, end, have)
   422  		default:
   423  			text := raw[start:end]
   424  			if strings.ContainsAny(text, "<>") && (!strings.HasPrefix(text, "<") || !strings.HasSuffix(text, ">")) {
   425  				t.Errorf("token %d: misaligned raw token %#q for %T", i, text, have)
   426  			}
   427  		}
   428  		lastEnd = end
   429  	}
   430  }
   431  
   432  // Ensure that directives (specifically !DOCTYPE) include the complete
   433  // text of any nested directives, noting that < and > do not change
   434  // nesting depth if they are in single or double quotes.
   435  
   436  var nestedDirectivesInput = `
   437  <!DOCTYPE [<!ENTITY rdf "http://www.w3.org/1999/02/22-rdf-syntax-ns#">]>
   438  <!DOCTYPE [<!ENTITY xlt ">">]>
   439  <!DOCTYPE [<!ENTITY xlt "<">]>
   440  <!DOCTYPE [<!ENTITY xlt '>'>]>
   441  <!DOCTYPE [<!ENTITY xlt '<'>]>
   442  <!DOCTYPE [<!ENTITY xlt '">'>]>
   443  <!DOCTYPE [<!ENTITY xlt "'<">]>
   444  `
   445  
   446  var nestedDirectivesTokens = []Token{
   447  	CharData("\n"),
   448  	Directive(`DOCTYPE [<!ENTITY rdf "http://www.w3.org/1999/02/22-rdf-syntax-ns#">]`),
   449  	CharData("\n"),
   450  	Directive(`DOCTYPE [<!ENTITY xlt ">">]`),
   451  	CharData("\n"),
   452  	Directive(`DOCTYPE [<!ENTITY xlt "<">]`),
   453  	CharData("\n"),
   454  	Directive(`DOCTYPE [<!ENTITY xlt '>'>]`),
   455  	CharData("\n"),
   456  	Directive(`DOCTYPE [<!ENTITY xlt '<'>]`),
   457  	CharData("\n"),
   458  	Directive(`DOCTYPE [<!ENTITY xlt '">'>]`),
   459  	CharData("\n"),
   460  	Directive(`DOCTYPE [<!ENTITY xlt "'<">]`),
   461  	CharData("\n"),
   462  }
   463  
   464  func TestNestedDirectives(t *testing.T) {
   465  	d := NewDecoder(strings.NewReader(nestedDirectivesInput))
   466  
   467  	for i, want := range nestedDirectivesTokens {
   468  		have, err := d.Token()
   469  		if err != nil {
   470  			t.Fatalf("token %d: unexpected error: %s", i, err)
   471  		}
   472  		if !reflect.DeepEqual(have, want) {
   473  			t.Errorf("token %d = %#v want %#v", i, have, want)
   474  		}
   475  	}
   476  }
   477  
   478  func TestToken(t *testing.T) {
   479  	d := NewDecoder(strings.NewReader(testInput))
   480  	d.Entity = testEntity
   481  
   482  	for i, want := range cookedTokens {
   483  		have, err := d.Token()
   484  		if err != nil {
   485  			t.Fatalf("token %d: unexpected error: %s", i, err)
   486  		}
   487  		if !reflect.DeepEqual(have, want) {
   488  			t.Errorf("token %d = %#v want %#v", i, have, want)
   489  		}
   490  	}
   491  }
   492  
   493  func TestSyntax(t *testing.T) {
   494  	for i := range xmlInput {
   495  		d := NewDecoder(strings.NewReader(xmlInput[i]))
   496  		var err error
   497  		for _, err = d.Token(); err == nil; _, err = d.Token() {
   498  		}
   499  		if _, ok := err.(*SyntaxError); !ok {
   500  			t.Fatalf(`xmlInput "%s": expected SyntaxError not received`, xmlInput[i])
   501  		}
   502  	}
   503  }
   504  
   505  type allScalars struct {
   506  	True1     bool
   507  	True2     bool
   508  	False1    bool
   509  	False2    bool
   510  	Int       int
   511  	Int8      int8
   512  	Int16     int16
   513  	Int32     int32
   514  	Int64     int64
   515  	Uint      int
   516  	Uint8     uint8
   517  	Uint16    uint16
   518  	Uint32    uint32
   519  	Uint64    uint64
   520  	Uintptr   uintptr
   521  	Float32   float32
   522  	Float64   float64
   523  	String    string
   524  	PtrString *string
   525  }
   526  
   527  var all = allScalars{
   528  	True1:     true,
   529  	True2:     true,
   530  	False1:    false,
   531  	False2:    false,
   532  	Int:       1,
   533  	Int8:      -2,
   534  	Int16:     3,
   535  	Int32:     -4,
   536  	Int64:     5,
   537  	Uint:      6,
   538  	Uint8:     7,
   539  	Uint16:    8,
   540  	Uint32:    9,
   541  	Uint64:    10,
   542  	Uintptr:   11,
   543  	Float32:   13.0,
   544  	Float64:   14.0,
   545  	String:    "15",
   546  	PtrString: &sixteen,
   547  }
   548  
   549  var sixteen = "16"
   550  
   551  const testScalarsInput = `<allscalars>
   552  	<True1>true</True1>
   553  	<True2>1</True2>
   554  	<False1>false</False1>
   555  	<False2>0</False2>
   556  	<Int>1</Int>
   557  	<Int8>-2</Int8>
   558  	<Int16>3</Int16>
   559  	<Int32>-4</Int32>
   560  	<Int64>5</Int64>
   561  	<Uint>6</Uint>
   562  	<Uint8>7</Uint8>
   563  	<Uint16>8</Uint16>
   564  	<Uint32>9</Uint32>
   565  	<Uint64>10</Uint64>
   566  	<Uintptr>11</Uintptr>
   567  	<Float>12.0</Float>
   568  	<Float32>13.0</Float32>
   569  	<Float64>14.0</Float64>
   570  	<String>15</String>
   571  	<PtrString>16</PtrString>
   572  </allscalars>`
   573  
   574  func TestAllScalars(t *testing.T) {
   575  	var a allScalars
   576  	err := Unmarshal([]byte(testScalarsInput), &a)
   577  
   578  	if err != nil {
   579  		t.Fatal(err)
   580  	}
   581  	if !reflect.DeepEqual(a, all) {
   582  		t.Errorf("have %+v want %+v", a, all)
   583  	}
   584  }
   585  
   586  type item struct {
   587  	FieldA string
   588  }
   589  
   590  func TestIssue569(t *testing.T) {
   591  	data := `<item><FieldA>abcd</FieldA></item>`
   592  	var i item
   593  	err := Unmarshal([]byte(data), &i)
   594  
   595  	if err != nil || i.FieldA != "abcd" {
   596  		t.Fatal("Expecting abcd")
   597  	}
   598  }
   599  
   600  func TestUnquotedAttrs(t *testing.T) {
   601  	data := "<tag attr=azAZ09:-_\t>"
   602  	d := NewDecoder(strings.NewReader(data))
   603  	d.Strict = false
   604  	token, err := d.Token()
   605  	if _, ok := err.(*SyntaxError); ok {
   606  		t.Errorf("Unexpected error: %v", err)
   607  	}
   608  	if token.(StartElement).Name.Local != "tag" {
   609  		t.Errorf("Unexpected tag name: %v", token.(StartElement).Name.Local)
   610  	}
   611  	attr := token.(StartElement).Attr[0]
   612  	if attr.Value != "azAZ09:-_" {
   613  		t.Errorf("Unexpected attribute value: %v", attr.Value)
   614  	}
   615  	if attr.Name.Local != "attr" {
   616  		t.Errorf("Unexpected attribute name: %v", attr.Name.Local)
   617  	}
   618  }
   619  
   620  func TestValuelessAttrs(t *testing.T) {
   621  	tests := [][3]string{
   622  		{"<p nowrap>", "p", "nowrap"},
   623  		{"<p nowrap >", "p", "nowrap"},
   624  		{"<input checked/>", "input", "checked"},
   625  		{"<input checked />", "input", "checked"},
   626  	}
   627  	for _, test := range tests {
   628  		d := NewDecoder(strings.NewReader(test[0]))
   629  		d.Strict = false
   630  		token, err := d.Token()
   631  		if _, ok := err.(*SyntaxError); ok {
   632  			t.Errorf("Unexpected error: %v", err)
   633  		}
   634  		if token.(StartElement).Name.Local != test[1] {
   635  			t.Errorf("Unexpected tag name: %v", token.(StartElement).Name.Local)
   636  		}
   637  		attr := token.(StartElement).Attr[0]
   638  		if attr.Value != test[2] {
   639  			t.Errorf("Unexpected attribute value: %v", attr.Value)
   640  		}
   641  		if attr.Name.Local != test[2] {
   642  			t.Errorf("Unexpected attribute name: %v", attr.Name.Local)
   643  		}
   644  	}
   645  }
   646  
   647  func TestCopyTokenCharData(t *testing.T) {
   648  	data := []byte("same data")
   649  	var tok1 Token = CharData(data)
   650  	tok2 := CopyToken(tok1)
   651  	if !reflect.DeepEqual(tok1, tok2) {
   652  		t.Error("CopyToken(CharData) != CharData")
   653  	}
   654  	data[1] = 'o'
   655  	if reflect.DeepEqual(tok1, tok2) {
   656  		t.Error("CopyToken(CharData) uses same buffer.")
   657  	}
   658  }
   659  
   660  func TestCopyTokenStartElement(t *testing.T) {
   661  	elt := StartElement{Name{"", "hello"}, []Attr{{Name{"", "lang"}, "en"}}}
   662  	var tok1 Token = elt
   663  	tok2 := CopyToken(tok1)
   664  	if tok1.(StartElement).Attr[0].Value != "en" {
   665  		t.Error("CopyToken overwrote Attr[0]")
   666  	}
   667  	if !reflect.DeepEqual(tok1, tok2) {
   668  		t.Error("CopyToken(StartElement) != StartElement")
   669  	}
   670  	tok1.(StartElement).Attr[0] = Attr{Name{"", "lang"}, "de"}
   671  	if reflect.DeepEqual(tok1, tok2) {
   672  		t.Error("CopyToken(CharData) uses same buffer.")
   673  	}
   674  }
   675  
   676  func TestSyntaxErrorLineNum(t *testing.T) {
   677  	testInput := "<P>Foo<P>\n\n<P>Bar</>\n"
   678  	d := NewDecoder(strings.NewReader(testInput))
   679  	var err error
   680  	for _, err = d.Token(); err == nil; _, err = d.Token() {
   681  	}
   682  	synerr, ok := err.(*SyntaxError)
   683  	if !ok {
   684  		t.Error("Expected SyntaxError.")
   685  	}
   686  	if synerr.Line != 3 {
   687  		t.Error("SyntaxError didn't have correct line number.")
   688  	}
   689  }
   690  
   691  func TestTrailingRawToken(t *testing.T) {
   692  	input := `<FOO></FOO>  `
   693  	d := NewDecoder(strings.NewReader(input))
   694  	var err error
   695  	for _, err = d.RawToken(); err == nil; _, err = d.RawToken() {
   696  	}
   697  	if err != io.EOF {
   698  		t.Fatalf("d.RawToken() = _, %v, want _, io.EOF", err)
   699  	}
   700  }
   701  
   702  func TestTrailingToken(t *testing.T) {
   703  	input := `<FOO></FOO>  `
   704  	d := NewDecoder(strings.NewReader(input))
   705  	var err error
   706  	for _, err = d.Token(); err == nil; _, err = d.Token() {
   707  	}
   708  	if err != io.EOF {
   709  		t.Fatalf("d.Token() = _, %v, want _, io.EOF", err)
   710  	}
   711  }
   712  
   713  func TestEntityInsideCDATA(t *testing.T) {
   714  	input := `<test><![CDATA[ &val=foo ]]></test>`
   715  	d := NewDecoder(strings.NewReader(input))
   716  	var err error
   717  	for _, err = d.Token(); err == nil; _, err = d.Token() {
   718  	}
   719  	if err != io.EOF {
   720  		t.Fatalf("d.Token() = _, %v, want _, io.EOF", err)
   721  	}
   722  }
   723  
   724  var characterTests = []struct {
   725  	in  string
   726  	err string
   727  }{
   728  	{"\x12<doc/>", "illegal character code U+0012"},
   729  	{"<?xml version=\"1.0\"?>\x0b<doc/>", "illegal character code U+000B"},
   730  	{"\xef\xbf\xbe<doc/>", "illegal character code U+FFFE"},
   731  	{"<?xml version=\"1.0\"?><doc>\r\n<hiya/>\x07<toots/></doc>", "illegal character code U+0007"},
   732  	{"<?xml version=\"1.0\"?><doc \x12='value'>what's up</doc>", "expected attribute name in element"},
   733  	{"<doc>&abc\x01;</doc>", "invalid character entity &abc (no semicolon)"},
   734  	{"<doc>&\x01;</doc>", "invalid character entity & (no semicolon)"},
   735  	{"<doc>&\xef\xbf\xbe;</doc>", "invalid character entity &\uFFFE;"},
   736  	{"<doc>&hello;</doc>", "invalid character entity &hello;"},
   737  }
   738  
   739  func TestDisallowedCharacters(t *testing.T) {
   740  
   741  	for i, tt := range characterTests {
   742  		d := NewDecoder(strings.NewReader(tt.in))
   743  		var err error
   744  
   745  		for err == nil {
   746  			_, err = d.Token()
   747  		}
   748  		synerr, ok := err.(*SyntaxError)
   749  		if !ok {
   750  			t.Fatalf("input %d d.Token() = _, %v, want _, *SyntaxError", i, err)
   751  		}
   752  		if synerr.Msg != tt.err {
   753  			t.Fatalf("input %d synerr.Msg wrong: want %q, got %q", i, tt.err, synerr.Msg)
   754  		}
   755  	}
   756  }
   757  
   758  func TestIsInCharacterRange(t *testing.T) {
   759  	invalid := []rune{
   760  		utf8.MaxRune + 1,
   761  		0xD800, // surrogate min
   762  		0xDFFF, // surrogate max
   763  		-1,
   764  	}
   765  	for _, r := range invalid {
   766  		if isInCharacterRange(r) {
   767  			t.Errorf("rune %U considered valid", r)
   768  		}
   769  	}
   770  }
   771  
   772  var procInstTests = []struct {
   773  	input  string
   774  	expect [2]string
   775  }{
   776  	{`version="1.0" encoding="utf-8"`, [2]string{"1.0", "utf-8"}},
   777  	{`version="1.0" encoding='utf-8'`, [2]string{"1.0", "utf-8"}},
   778  	{`version="1.0" encoding='utf-8' `, [2]string{"1.0", "utf-8"}},
   779  	{`version="1.0" encoding=utf-8`, [2]string{"1.0", ""}},
   780  	{`encoding="FOO" `, [2]string{"", "FOO"}},
   781  }
   782  
   783  func TestProcInstEncoding(t *testing.T) {
   784  	for _, test := range procInstTests {
   785  		if got := procInst("version", test.input); got != test.expect[0] {
   786  			t.Errorf("procInst(version, %q) = %q; want %q", test.input, got, test.expect[0])
   787  		}
   788  		if got := procInst("encoding", test.input); got != test.expect[1] {
   789  			t.Errorf("procInst(encoding, %q) = %q; want %q", test.input, got, test.expect[1])
   790  		}
   791  	}
   792  }
   793  
   794  // Ensure that directives with comments include the complete
   795  // text of any nested directives.
   796  
   797  var directivesWithCommentsInput = `
   798  <!DOCTYPE [<!-- a comment --><!ENTITY rdf "http://www.w3.org/1999/02/22-rdf-syntax-ns#">]>
   799  <!DOCTYPE [<!ENTITY go "Golang"><!-- a comment-->]>
   800  <!DOCTYPE <!-> <!> <!----> <!-->--> <!--->--> [<!ENTITY go "Golang"><!-- a comment-->]>
   801  `
   802  
   803  var directivesWithCommentsTokens = []Token{
   804  	CharData("\n"),
   805  	Directive(`DOCTYPE [ <!ENTITY rdf "http://www.w3.org/1999/02/22-rdf-syntax-ns#">]`),
   806  	CharData("\n"),
   807  	Directive(`DOCTYPE [<!ENTITY go "Golang"> ]`),
   808  	CharData("\n"),
   809  	Directive(`DOCTYPE <!-> <!>       [<!ENTITY go "Golang"> ]`),
   810  	CharData("\n"),
   811  }
   812  
   813  func TestDirectivesWithComments(t *testing.T) {
   814  	d := NewDecoder(strings.NewReader(directivesWithCommentsInput))
   815  
   816  	for i, want := range directivesWithCommentsTokens {
   817  		have, err := d.Token()
   818  		if err != nil {
   819  			t.Fatalf("token %d: unexpected error: %s", i, err)
   820  		}
   821  		if !reflect.DeepEqual(have, want) {
   822  			t.Errorf("token %d = %#v want %#v", i, have, want)
   823  		}
   824  	}
   825  }
   826  
   827  // Writer whose Write method always returns an error.
   828  type errWriter struct{}
   829  
   830  func (errWriter) Write(p []byte) (n int, err error) { return 0, fmt.Errorf("unwritable") }
   831  
   832  func TestEscapeTextIOErrors(t *testing.T) {
   833  	expectErr := "unwritable"
   834  	err := EscapeText(errWriter{}, []byte{'A'})
   835  
   836  	if err == nil || err.Error() != expectErr {
   837  		t.Errorf("have %v, want %v", err, expectErr)
   838  	}
   839  }
   840  
   841  func TestEscapeTextInvalidChar(t *testing.T) {
   842  	input := []byte("A \x00 terminated string.")
   843  	expected := "A \uFFFD terminated string."
   844  
   845  	buff := new(bytes.Buffer)
   846  	if err := EscapeText(buff, input); err != nil {
   847  		t.Fatalf("have %v, want nil", err)
   848  	}
   849  	text := buff.String()
   850  
   851  	if text != expected {
   852  		t.Errorf("have %v, want %v", text, expected)
   853  	}
   854  }
   855  
   856  func TestIssue5880(t *testing.T) {
   857  	type T []byte
   858  	data, err := Marshal(T{192, 168, 0, 1})
   859  	if err != nil {
   860  		t.Errorf("Marshal error: %v", err)
   861  	}
   862  	if !utf8.Valid(data) {
   863  		t.Errorf("Marshal generated invalid UTF-8: %x", data)
   864  	}
   865  }
   866  
   867  func TestIssue11405(t *testing.T) {
   868  	testCases := []string{
   869  		"<root>",
   870  		"<root><foo>",
   871  		"<root><foo></foo>",
   872  	}
   873  	for _, tc := range testCases {
   874  		d := NewDecoder(strings.NewReader(tc))
   875  		var err error
   876  		for {
   877  			_, err = d.Token()
   878  			if err != nil {
   879  				break
   880  			}
   881  		}
   882  		if _, ok := err.(*SyntaxError); !ok {
   883  			t.Errorf("%s: Token: Got error %v, want SyntaxError", tc, err)
   884  		}
   885  	}
   886  }
   887  
   888  func TestIssue12417(t *testing.T) {
   889  	testCases := []struct {
   890  		s  string
   891  		ok bool
   892  	}{
   893  		{`<?xml encoding="UtF-8" version="1.0"?><root/>`, true},
   894  		{`<?xml encoding="UTF-8" version="1.0"?><root/>`, true},
   895  		{`<?xml encoding="utf-8" version="1.0"?><root/>`, true},
   896  		{`<?xml encoding="uuu-9" version="1.0"?><root/>`, false},
   897  	}
   898  	for _, tc := range testCases {
   899  		d := NewDecoder(strings.NewReader(tc.s))
   900  		var err error
   901  		for {
   902  			_, err = d.Token()
   903  			if err != nil {
   904  				if err == io.EOF {
   905  					err = nil
   906  				}
   907  				break
   908  			}
   909  		}
   910  		if err != nil && tc.ok {
   911  			t.Errorf("%q: Encoding charset: expected no error, got %s", tc.s, err)
   912  			continue
   913  		}
   914  		if err == nil && !tc.ok {
   915  			t.Errorf("%q: Encoding charset: expected error, got nil", tc.s)
   916  		}
   917  	}
   918  }
   919  
   920  func tokenMap(mapping func(t Token) Token) func(TokenReader) TokenReader {
   921  	return func(src TokenReader) TokenReader {
   922  		return mapper{
   923  			t: src,
   924  			f: mapping,
   925  		}
   926  	}
   927  }
   928  
   929  type mapper struct {
   930  	t TokenReader
   931  	f func(Token) Token
   932  }
   933  
   934  func (m mapper) Token() (Token, error) {
   935  	tok, err := m.t.Token()
   936  	if err != nil {
   937  		return nil, err
   938  	}
   939  	return m.f(tok), nil
   940  }
   941  
   942  func TestNewTokenDecoderIdempotent(t *testing.T) {
   943  	d := NewDecoder(strings.NewReader(`<br>`))
   944  	d2 := NewTokenDecoder(d)
   945  	if d != d2 {
   946  		t.Error("NewTokenDecoder did not detect underlying Decoder")
   947  	}
   948  }
   949  
   950  func TestWrapDecoder(t *testing.T) {
   951  	d := NewDecoder(strings.NewReader(`<quote>[Re-enter Clown with a letter, and FABIAN]</quote>`))
   952  	m := tokenMap(func(t Token) Token {
   953  		switch tok := t.(type) {
   954  		case StartElement:
   955  			if tok.Name.Local == "quote" {
   956  				tok.Name.Local = "blocking"
   957  				return tok
   958  			}
   959  		case EndElement:
   960  			if tok.Name.Local == "quote" {
   961  				tok.Name.Local = "blocking"
   962  				return tok
   963  			}
   964  		}
   965  		return t
   966  	})
   967  
   968  	d = NewTokenDecoder(m(d))
   969  
   970  	o := struct {
   971  		XMLName  Name   `xml:"blocking"`
   972  		Chardata string `xml:",chardata"`
   973  	}{}
   974  
   975  	if err := d.Decode(&o); err != nil {
   976  		t.Fatal("Got unexpected error while decoding:", err)
   977  	}
   978  
   979  	if o.Chardata != "[Re-enter Clown with a letter, and FABIAN]" {
   980  		t.Fatalf("Got unexpected chardata: `%s`\n", o.Chardata)
   981  	}
   982  }
   983  
   984  type tokReader struct{}
   985  
   986  func (tokReader) Token() (Token, error) {
   987  	return StartElement{}, nil
   988  }
   989  
   990  type Failure struct{}
   991  
   992  func (Failure) UnmarshalXML(*Decoder, StartElement) error {
   993  	return nil
   994  }
   995  
   996  func TestTokenUnmarshaler(t *testing.T) {
   997  	defer func() {
   998  		if r := recover(); r != nil {
   999  			t.Error("Unexpected panic using custom token unmarshaler")
  1000  		}
  1001  	}()
  1002  
  1003  	d := NewTokenDecoder(tokReader{})
  1004  	d.Decode(&Failure{})
  1005  }
  1006  
  1007  func testRoundTrip(t *testing.T, input string) {
  1008  	d := NewDecoder(strings.NewReader(input))
  1009  	var tokens []Token
  1010  	var buf bytes.Buffer
  1011  	e := NewEncoder(&buf)
  1012  	for {
  1013  		tok, err := d.Token()
  1014  		if err == io.EOF {
  1015  			break
  1016  		}
  1017  		if err != nil {
  1018  			t.Fatalf("invalid input: %v", err)
  1019  		}
  1020  		if err := e.EncodeToken(tok); err != nil {
  1021  			t.Fatalf("failed to re-encode input: %v", err)
  1022  		}
  1023  		tokens = append(tokens, CopyToken(tok))
  1024  	}
  1025  	if err := e.Flush(); err != nil {
  1026  		t.Fatal(err)
  1027  	}
  1028  
  1029  	d = NewDecoder(&buf)
  1030  	for {
  1031  		tok, err := d.Token()
  1032  		if err == io.EOF {
  1033  			break
  1034  		}
  1035  		if err != nil {
  1036  			t.Fatalf("failed to decode output: %v", err)
  1037  		}
  1038  		if len(tokens) == 0 {
  1039  			t.Fatalf("unexpected token: %#v", tok)
  1040  		}
  1041  		a, b := tokens[0], tok
  1042  		if !reflect.DeepEqual(a, b) {
  1043  			t.Fatalf("token mismatch: %#v vs %#v", a, b)
  1044  		}
  1045  		tokens = tokens[1:]
  1046  	}
  1047  	if len(tokens) > 0 {
  1048  		t.Fatalf("lost tokens: %#v", tokens)
  1049  	}
  1050  }
  1051  
  1052  func TestRoundTrip(t *testing.T) {
  1053  	tests := map[string]string{
  1054  		"leading colon":          `<::Test ::foo="bar"><:::Hello></:::Hello><Hello></Hello></::Test>`,
  1055  		"trailing colon":         `<foo abc:="x"></foo>`,
  1056  		"double colon":           `<x:y:foo></x:y:foo>`,
  1057  		"comments in directives": `<!ENTITY x<!<!-- c1 [ " -->--x --> > <e></e> <!DOCTYPE xxx [ x<!-- c2 " -->--x ]>`,
  1058  	}
  1059  	for name, input := range tests {
  1060  		t.Run(name, func(t *testing.T) { testRoundTrip(t, input) })
  1061  	}
  1062  }
  1063  

View as plain text