Source file src/vendor/golang.org/x/net/idna/idna9.0.0.go

     1  // Code generated by running "go generate" in golang.org/x/text. DO NOT EDIT.
     2  
     3  // Copyright 2016 The Go Authors. All rights reserved.
     4  // Use of this source code is governed by a BSD-style
     5  // license that can be found in the LICENSE file.
     6  
     7  //go:build !go1.10
     8  // +build !go1.10
     9  
    10  // Package idna implements IDNA2008 using the compatibility processing
    11  // defined by UTS (Unicode Technical Standard) #46, which defines a standard to
    12  // deal with the transition from IDNA2003.
    13  //
    14  // IDNA2008 (Internationalized Domain Names for Applications), is defined in RFC
    15  // 5890, RFC 5891, RFC 5892, RFC 5893 and RFC 5894.
    16  // UTS #46 is defined in https://www.unicode.org/reports/tr46.
    17  // See https://unicode.org/cldr/utility/idna.jsp for a visualization of the
    18  // differences between these two standards.
    19  package idna // import "golang.org/x/net/idna"
    20  
    21  import (
    22  	"fmt"
    23  	"strings"
    24  	"unicode/utf8"
    25  
    26  	"golang.org/x/text/secure/bidirule"
    27  	"golang.org/x/text/unicode/norm"
    28  )
    29  
    30  // NOTE: Unlike common practice in Go APIs, the functions will return a
    31  // sanitized domain name in case of errors. Browsers sometimes use a partially
    32  // evaluated string as lookup.
    33  // TODO: the current error handling is, in my opinion, the least opinionated.
    34  // Other strategies are also viable, though:
    35  // Option 1) Return an empty string in case of error, but allow the user to
    36  //    specify explicitly which errors to ignore.
    37  // Option 2) Return the partially evaluated string if it is itself a valid
    38  //    string, otherwise return the empty string in case of error.
    39  // Option 3) Option 1 and 2.
    40  // Option 4) Always return an empty string for now and implement Option 1 as
    41  //    needed, and document that the return string may not be empty in case of
    42  //    error in the future.
    43  // I think Option 1 is best, but it is quite opinionated.
    44  
    45  // ToASCII is a wrapper for Punycode.ToASCII.
    46  func ToASCII(s string) (string, error) {
    47  	return Punycode.process(s, true)
    48  }
    49  
    50  // ToUnicode is a wrapper for Punycode.ToUnicode.
    51  func ToUnicode(s string) (string, error) {
    52  	return Punycode.process(s, false)
    53  }
    54  
    55  // An Option configures a Profile at creation time.
    56  type Option func(*options)
    57  
    58  // Transitional sets a Profile to use the Transitional mapping as defined in UTS
    59  // #46. This will cause, for example, "ß" to be mapped to "ss". Using the
    60  // transitional mapping provides a compromise between IDNA2003 and IDNA2008
    61  // compatibility. It is used by some browsers when resolving domain names. This
    62  // option is only meaningful if combined with MapForLookup.
    63  func Transitional(transitional bool) Option {
    64  	return func(o *options) { o.transitional = transitional }
    65  }
    66  
    67  // VerifyDNSLength sets whether a Profile should fail if any of the IDN parts
    68  // are longer than allowed by the RFC.
    69  //
    70  // This option corresponds to the VerifyDnsLength flag in UTS #46.
    71  func VerifyDNSLength(verify bool) Option {
    72  	return func(o *options) { o.verifyDNSLength = verify }
    73  }
    74  
    75  // RemoveLeadingDots removes leading label separators. Leading runes that map to
    76  // dots, such as U+3002 IDEOGRAPHIC FULL STOP, are removed as well.
    77  func RemoveLeadingDots(remove bool) Option {
    78  	return func(o *options) { o.removeLeadingDots = remove }
    79  }
    80  
    81  // ValidateLabels sets whether to check the mandatory label validation criteria
    82  // as defined in Section 5.4 of RFC 5891. This includes testing for correct use
    83  // of hyphens ('-'), normalization, validity of runes, and the context rules.
    84  // In particular, ValidateLabels also sets the CheckHyphens and CheckJoiners flags
    85  // in UTS #46.
    86  func ValidateLabels(enable bool) Option {
    87  	return func(o *options) {
    88  		// Don't override existing mappings, but set one that at least checks
    89  		// normalization if it is not set.
    90  		if o.mapping == nil && enable {
    91  			o.mapping = normalize
    92  		}
    93  		o.trie = trie
    94  		o.checkJoiners = enable
    95  		o.checkHyphens = enable
    96  		if enable {
    97  			o.fromPuny = validateFromPunycode
    98  		} else {
    99  			o.fromPuny = nil
   100  		}
   101  	}
   102  }
   103  
   104  // CheckHyphens sets whether to check for correct use of hyphens ('-') in
   105  // labels. Most web browsers do not have this option set, since labels such as
   106  // "r3---sn-apo3qvuoxuxbt-j5pe" are in common use.
   107  //
   108  // This option corresponds to the CheckHyphens flag in UTS #46.
   109  func CheckHyphens(enable bool) Option {
   110  	return func(o *options) { o.checkHyphens = enable }
   111  }
   112  
   113  // CheckJoiners sets whether to check the ContextJ rules as defined in Appendix
   114  // A of RFC 5892, concerning the use of joiner runes.
   115  //
   116  // This option corresponds to the CheckJoiners flag in UTS #46.
   117  func CheckJoiners(enable bool) Option {
   118  	return func(o *options) {
   119  		o.trie = trie
   120  		o.checkJoiners = enable
   121  	}
   122  }
   123  
   124  // StrictDomainName limits the set of permissable ASCII characters to those
   125  // allowed in domain names as defined in RFC 1034 (A-Z, a-z, 0-9 and the
   126  // hyphen). This is set by default for MapForLookup and ValidateForRegistration,
   127  // but is only useful if ValidateLabels is set.
   128  //
   129  // This option is useful, for instance, for browsers that allow characters
   130  // outside this range, for example a '_' (U+005F LOW LINE). See
   131  // http://www.rfc-editor.org/std/std3.txt for more details.
   132  //
   133  // This option corresponds to the UseSTD3ASCIIRules flag in UTS #46.
   134  func StrictDomainName(use bool) Option {
   135  	return func(o *options) { o.useSTD3Rules = use }
   136  }
   137  
   138  // NOTE: the following options pull in tables. The tables should not be linked
   139  // in as long as the options are not used.
   140  
   141  // BidiRule enables the Bidi rule as defined in RFC 5893. Any application
   142  // that relies on proper validation of labels should include this rule.
   143  //
   144  // This option corresponds to the CheckBidi flag in UTS #46.
   145  func BidiRule() Option {
   146  	return func(o *options) { o.bidirule = bidirule.ValidString }
   147  }
   148  
   149  // ValidateForRegistration sets validation options to verify that a given IDN is
   150  // properly formatted for registration as defined by Section 4 of RFC 5891.
   151  func ValidateForRegistration() Option {
   152  	return func(o *options) {
   153  		o.mapping = validateRegistration
   154  		StrictDomainName(true)(o)
   155  		ValidateLabels(true)(o)
   156  		VerifyDNSLength(true)(o)
   157  		BidiRule()(o)
   158  	}
   159  }
   160  
   161  // MapForLookup sets validation and mapping options such that a given IDN is
   162  // transformed for domain name lookup according to the requirements set out in
   163  // Section 5 of RFC 5891. The mappings follow the recommendations of RFC 5894,
   164  // RFC 5895 and UTS 46. It does not add the Bidi Rule. Use the BidiRule option
   165  // to add this check.
   166  //
   167  // The mappings include normalization and mapping case, width and other
   168  // compatibility mappings.
   169  func MapForLookup() Option {
   170  	return func(o *options) {
   171  		o.mapping = validateAndMap
   172  		StrictDomainName(true)(o)
   173  		ValidateLabels(true)(o)
   174  		RemoveLeadingDots(true)(o)
   175  	}
   176  }
   177  
   178  type options struct {
   179  	transitional      bool
   180  	useSTD3Rules      bool
   181  	checkHyphens      bool
   182  	checkJoiners      bool
   183  	verifyDNSLength   bool
   184  	removeLeadingDots bool
   185  
   186  	trie *idnaTrie
   187  
   188  	// fromPuny calls validation rules when converting A-labels to U-labels.
   189  	fromPuny func(p *Profile, s string) error
   190  
   191  	// mapping implements a validation and mapping step as defined in RFC 5895
   192  	// or UTS 46, tailored to, for example, domain registration or lookup.
   193  	mapping func(p *Profile, s string) (string, error)
   194  
   195  	// bidirule, if specified, checks whether s conforms to the Bidi Rule
   196  	// defined in RFC 5893.
   197  	bidirule func(s string) bool
   198  }
   199  
   200  // A Profile defines the configuration of a IDNA mapper.
   201  type Profile struct {
   202  	options
   203  }
   204  
   205  func apply(o *options, opts []Option) {
   206  	for _, f := range opts {
   207  		f(o)
   208  	}
   209  }
   210  
   211  // New creates a new Profile.
   212  //
   213  // With no options, the returned Profile is the most permissive and equals the
   214  // Punycode Profile. Options can be passed to further restrict the Profile. The
   215  // MapForLookup and ValidateForRegistration options set a collection of options,
   216  // for lookup and registration purposes respectively, which can be tailored by
   217  // adding more fine-grained options, where later options override earlier
   218  // options.
   219  func New(o ...Option) *Profile {
   220  	p := &Profile{}
   221  	apply(&p.options, o)
   222  	return p
   223  }
   224  
   225  // ToASCII converts a domain or domain label to its ASCII form. For example,
   226  // ToASCII("bücher.example.com") is "xn--bcher-kva.example.com", and
   227  // ToASCII("golang") is "golang". If an error is encountered it will return
   228  // an error and a (partially) processed result.
   229  func (p *Profile) ToASCII(s string) (string, error) {
   230  	return p.process(s, true)
   231  }
   232  
   233  // ToUnicode converts a domain or domain label to its Unicode form. For example,
   234  // ToUnicode("xn--bcher-kva.example.com") is "bücher.example.com", and
   235  // ToUnicode("golang") is "golang". If an error is encountered it will return
   236  // an error and a (partially) processed result.
   237  func (p *Profile) ToUnicode(s string) (string, error) {
   238  	pp := *p
   239  	pp.transitional = false
   240  	return pp.process(s, false)
   241  }
   242  
   243  // String reports a string with a description of the profile for debugging
   244  // purposes. The string format may change with different versions.
   245  func (p *Profile) String() string {
   246  	s := ""
   247  	if p.transitional {
   248  		s = "Transitional"
   249  	} else {
   250  		s = "NonTransitional"
   251  	}
   252  	if p.useSTD3Rules {
   253  		s += ":UseSTD3Rules"
   254  	}
   255  	if p.checkHyphens {
   256  		s += ":CheckHyphens"
   257  	}
   258  	if p.checkJoiners {
   259  		s += ":CheckJoiners"
   260  	}
   261  	if p.verifyDNSLength {
   262  		s += ":VerifyDNSLength"
   263  	}
   264  	return s
   265  }
   266  
   267  var (
   268  	// Punycode is a Profile that does raw punycode processing with a minimum
   269  	// of validation.
   270  	Punycode *Profile = punycode
   271  
   272  	// Lookup is the recommended profile for looking up domain names, according
   273  	// to Section 5 of RFC 5891. The exact configuration of this profile may
   274  	// change over time.
   275  	Lookup *Profile = lookup
   276  
   277  	// Display is the recommended profile for displaying domain names.
   278  	// The configuration of this profile may change over time.
   279  	Display *Profile = display
   280  
   281  	// Registration is the recommended profile for checking whether a given
   282  	// IDN is valid for registration, according to Section 4 of RFC 5891.
   283  	Registration *Profile = registration
   284  
   285  	punycode = &Profile{}
   286  	lookup   = &Profile{options{
   287  		transitional:      true,
   288  		removeLeadingDots: true,
   289  		useSTD3Rules:      true,
   290  		checkHyphens:      true,
   291  		checkJoiners:      true,
   292  		trie:              trie,
   293  		fromPuny:          validateFromPunycode,
   294  		mapping:           validateAndMap,
   295  		bidirule:          bidirule.ValidString,
   296  	}}
   297  	display = &Profile{options{
   298  		useSTD3Rules:      true,
   299  		removeLeadingDots: true,
   300  		checkHyphens:      true,
   301  		checkJoiners:      true,
   302  		trie:              trie,
   303  		fromPuny:          validateFromPunycode,
   304  		mapping:           validateAndMap,
   305  		bidirule:          bidirule.ValidString,
   306  	}}
   307  	registration = &Profile{options{
   308  		useSTD3Rules:    true,
   309  		verifyDNSLength: true,
   310  		checkHyphens:    true,
   311  		checkJoiners:    true,
   312  		trie:            trie,
   313  		fromPuny:        validateFromPunycode,
   314  		mapping:         validateRegistration,
   315  		bidirule:        bidirule.ValidString,
   316  	}}
   317  
   318  	// TODO: profiles
   319  	// Register: recommended for approving domain names: don't do any mappings
   320  	// but rather reject on invalid input. Bundle or block deviation characters.
   321  )
   322  
   323  type labelError struct{ label, code_ string }
   324  
   325  func (e labelError) code() string { return e.code_ }
   326  func (e labelError) Error() string {
   327  	return fmt.Sprintf("idna: invalid label %q", e.label)
   328  }
   329  
   330  type runeError rune
   331  
   332  func (e runeError) code() string { return "P1" }
   333  func (e runeError) Error() string {
   334  	return fmt.Sprintf("idna: disallowed rune %U", e)
   335  }
   336  
   337  // process implements the algorithm described in section 4 of UTS #46,
   338  // see https://www.unicode.org/reports/tr46.
   339  func (p *Profile) process(s string, toASCII bool) (string, error) {
   340  	var err error
   341  	if p.mapping != nil {
   342  		s, err = p.mapping(p, s)
   343  	}
   344  	// Remove leading empty labels.
   345  	if p.removeLeadingDots {
   346  		for ; len(s) > 0 && s[0] == '.'; s = s[1:] {
   347  		}
   348  	}
   349  	// It seems like we should only create this error on ToASCII, but the
   350  	// UTS 46 conformance tests suggests we should always check this.
   351  	if err == nil && p.verifyDNSLength && s == "" {
   352  		err = &labelError{s, "A4"}
   353  	}
   354  	labels := labelIter{orig: s}
   355  	for ; !labels.done(); labels.next() {
   356  		label := labels.label()
   357  		if label == "" {
   358  			// Empty labels are not okay. The label iterator skips the last
   359  			// label if it is empty.
   360  			if err == nil && p.verifyDNSLength {
   361  				err = &labelError{s, "A4"}
   362  			}
   363  			continue
   364  		}
   365  		if strings.HasPrefix(label, acePrefix) {
   366  			u, err2 := decode(label[len(acePrefix):])
   367  			if err2 != nil {
   368  				if err == nil {
   369  					err = err2
   370  				}
   371  				// Spec says keep the old label.
   372  				continue
   373  			}
   374  			labels.set(u)
   375  			if err == nil && p.fromPuny != nil {
   376  				err = p.fromPuny(p, u)
   377  			}
   378  			if err == nil {
   379  				// This should be called on NonTransitional, according to the
   380  				// spec, but that currently does not have any effect. Use the
   381  				// original profile to preserve options.
   382  				err = p.validateLabel(u)
   383  			}
   384  		} else if err == nil {
   385  			err = p.validateLabel(label)
   386  		}
   387  	}
   388  	if toASCII {
   389  		for labels.reset(); !labels.done(); labels.next() {
   390  			label := labels.label()
   391  			if !ascii(label) {
   392  				a, err2 := encode(acePrefix, label)
   393  				if err == nil {
   394  					err = err2
   395  				}
   396  				label = a
   397  				labels.set(a)
   398  			}
   399  			n := len(label)
   400  			if p.verifyDNSLength && err == nil && (n == 0 || n > 63) {
   401  				err = &labelError{label, "A4"}
   402  			}
   403  		}
   404  	}
   405  	s = labels.result()
   406  	if toASCII && p.verifyDNSLength && err == nil {
   407  		// Compute the length of the domain name minus the root label and its dot.
   408  		n := len(s)
   409  		if n > 0 && s[n-1] == '.' {
   410  			n--
   411  		}
   412  		if len(s) < 1 || n > 253 {
   413  			err = &labelError{s, "A4"}
   414  		}
   415  	}
   416  	return s, err
   417  }
   418  
   419  func normalize(p *Profile, s string) (string, error) {
   420  	return norm.NFC.String(s), nil
   421  }
   422  
   423  func validateRegistration(p *Profile, s string) (string, error) {
   424  	if !norm.NFC.IsNormalString(s) {
   425  		return s, &labelError{s, "V1"}
   426  	}
   427  	for i := 0; i < len(s); {
   428  		v, sz := trie.lookupString(s[i:])
   429  		// Copy bytes not copied so far.
   430  		switch p.simplify(info(v).category()) {
   431  		// TODO: handle the NV8 defined in the Unicode idna data set to allow
   432  		// for strict conformance to IDNA2008.
   433  		case valid, deviation:
   434  		case disallowed, mapped, unknown, ignored:
   435  			r, _ := utf8.DecodeRuneInString(s[i:])
   436  			return s, runeError(r)
   437  		}
   438  		i += sz
   439  	}
   440  	return s, nil
   441  }
   442  
   443  func validateAndMap(p *Profile, s string) (string, error) {
   444  	var (
   445  		err error
   446  		b   []byte
   447  		k   int
   448  	)
   449  	for i := 0; i < len(s); {
   450  		v, sz := trie.lookupString(s[i:])
   451  		start := i
   452  		i += sz
   453  		// Copy bytes not copied so far.
   454  		switch p.simplify(info(v).category()) {
   455  		case valid:
   456  			continue
   457  		case disallowed:
   458  			if err == nil {
   459  				r, _ := utf8.DecodeRuneInString(s[start:])
   460  				err = runeError(r)
   461  			}
   462  			continue
   463  		case mapped, deviation:
   464  			b = append(b, s[k:start]...)
   465  			b = info(v).appendMapping(b, s[start:i])
   466  		case ignored:
   467  			b = append(b, s[k:start]...)
   468  			// drop the rune
   469  		case unknown:
   470  			b = append(b, s[k:start]...)
   471  			b = append(b, "\ufffd"...)
   472  		}
   473  		k = i
   474  	}
   475  	if k == 0 {
   476  		// No changes so far.
   477  		s = norm.NFC.String(s)
   478  	} else {
   479  		b = append(b, s[k:]...)
   480  		if norm.NFC.QuickSpan(b) != len(b) {
   481  			b = norm.NFC.Bytes(b)
   482  		}
   483  		// TODO: the punycode converters require strings as input.
   484  		s = string(b)
   485  	}
   486  	return s, err
   487  }
   488  
   489  // A labelIter allows iterating over domain name labels.
   490  type labelIter struct {
   491  	orig     string
   492  	slice    []string
   493  	curStart int
   494  	curEnd   int
   495  	i        int
   496  }
   497  
   498  func (l *labelIter) reset() {
   499  	l.curStart = 0
   500  	l.curEnd = 0
   501  	l.i = 0
   502  }
   503  
   504  func (l *labelIter) done() bool {
   505  	return l.curStart >= len(l.orig)
   506  }
   507  
   508  func (l *labelIter) result() string {
   509  	if l.slice != nil {
   510  		return strings.Join(l.slice, ".")
   511  	}
   512  	return l.orig
   513  }
   514  
   515  func (l *labelIter) label() string {
   516  	if l.slice != nil {
   517  		return l.slice[l.i]
   518  	}
   519  	p := strings.IndexByte(l.orig[l.curStart:], '.')
   520  	l.curEnd = l.curStart + p
   521  	if p == -1 {
   522  		l.curEnd = len(l.orig)
   523  	}
   524  	return l.orig[l.curStart:l.curEnd]
   525  }
   526  
   527  // next sets the value to the next label. It skips the last label if it is empty.
   528  func (l *labelIter) next() {
   529  	l.i++
   530  	if l.slice != nil {
   531  		if l.i >= len(l.slice) || l.i == len(l.slice)-1 && l.slice[l.i] == "" {
   532  			l.curStart = len(l.orig)
   533  		}
   534  	} else {
   535  		l.curStart = l.curEnd + 1
   536  		if l.curStart == len(l.orig)-1 && l.orig[l.curStart] == '.' {
   537  			l.curStart = len(l.orig)
   538  		}
   539  	}
   540  }
   541  
   542  func (l *labelIter) set(s string) {
   543  	if l.slice == nil {
   544  		l.slice = strings.Split(l.orig, ".")
   545  	}
   546  	l.slice[l.i] = s
   547  }
   548  
   549  // acePrefix is the ASCII Compatible Encoding prefix.
   550  const acePrefix = "xn--"
   551  
   552  func (p *Profile) simplify(cat category) category {
   553  	switch cat {
   554  	case disallowedSTD3Mapped:
   555  		if p.useSTD3Rules {
   556  			cat = disallowed
   557  		} else {
   558  			cat = mapped
   559  		}
   560  	case disallowedSTD3Valid:
   561  		if p.useSTD3Rules {
   562  			cat = disallowed
   563  		} else {
   564  			cat = valid
   565  		}
   566  	case deviation:
   567  		if !p.transitional {
   568  			cat = valid
   569  		}
   570  	case validNV8, validXV8:
   571  		// TODO: handle V2008
   572  		cat = valid
   573  	}
   574  	return cat
   575  }
   576  
   577  func validateFromPunycode(p *Profile, s string) error {
   578  	if !norm.NFC.IsNormalString(s) {
   579  		return &labelError{s, "V1"}
   580  	}
   581  	for i := 0; i < len(s); {
   582  		v, sz := trie.lookupString(s[i:])
   583  		if c := p.simplify(info(v).category()); c != valid && c != deviation {
   584  			return &labelError{s, "V6"}
   585  		}
   586  		i += sz
   587  	}
   588  	return nil
   589  }
   590  
   591  const (
   592  	zwnj = "\u200c"
   593  	zwj  = "\u200d"
   594  )
   595  
   596  type joinState int8
   597  
   598  const (
   599  	stateStart joinState = iota
   600  	stateVirama
   601  	stateBefore
   602  	stateBeforeVirama
   603  	stateAfter
   604  	stateFAIL
   605  )
   606  
   607  var joinStates = [][numJoinTypes]joinState{
   608  	stateStart: {
   609  		joiningL:   stateBefore,
   610  		joiningD:   stateBefore,
   611  		joinZWNJ:   stateFAIL,
   612  		joinZWJ:    stateFAIL,
   613  		joinVirama: stateVirama,
   614  	},
   615  	stateVirama: {
   616  		joiningL: stateBefore,
   617  		joiningD: stateBefore,
   618  	},
   619  	stateBefore: {
   620  		joiningL:   stateBefore,
   621  		joiningD:   stateBefore,
   622  		joiningT:   stateBefore,
   623  		joinZWNJ:   stateAfter,
   624  		joinZWJ:    stateFAIL,
   625  		joinVirama: stateBeforeVirama,
   626  	},
   627  	stateBeforeVirama: {
   628  		joiningL: stateBefore,
   629  		joiningD: stateBefore,
   630  		joiningT: stateBefore,
   631  	},
   632  	stateAfter: {
   633  		joiningL:   stateFAIL,
   634  		joiningD:   stateBefore,
   635  		joiningT:   stateAfter,
   636  		joiningR:   stateStart,
   637  		joinZWNJ:   stateFAIL,
   638  		joinZWJ:    stateFAIL,
   639  		joinVirama: stateAfter, // no-op as we can't accept joiners here
   640  	},
   641  	stateFAIL: {
   642  		0:          stateFAIL,
   643  		joiningL:   stateFAIL,
   644  		joiningD:   stateFAIL,
   645  		joiningT:   stateFAIL,
   646  		joiningR:   stateFAIL,
   647  		joinZWNJ:   stateFAIL,
   648  		joinZWJ:    stateFAIL,
   649  		joinVirama: stateFAIL,
   650  	},
   651  }
   652  
   653  // validateLabel validates the criteria from Section 4.1. Item 1, 4, and 6 are
   654  // already implicitly satisfied by the overall implementation.
   655  func (p *Profile) validateLabel(s string) error {
   656  	if s == "" {
   657  		if p.verifyDNSLength {
   658  			return &labelError{s, "A4"}
   659  		}
   660  		return nil
   661  	}
   662  	if p.bidirule != nil && !p.bidirule(s) {
   663  		return &labelError{s, "B"}
   664  	}
   665  	if p.checkHyphens {
   666  		if len(s) > 4 && s[2] == '-' && s[3] == '-' {
   667  			return &labelError{s, "V2"}
   668  		}
   669  		if s[0] == '-' || s[len(s)-1] == '-' {
   670  			return &labelError{s, "V3"}
   671  		}
   672  	}
   673  	if !p.checkJoiners {
   674  		return nil
   675  	}
   676  	trie := p.trie // p.checkJoiners is only set if trie is set.
   677  	// TODO: merge the use of this in the trie.
   678  	v, sz := trie.lookupString(s)
   679  	x := info(v)
   680  	if x.isModifier() {
   681  		return &labelError{s, "V5"}
   682  	}
   683  	// Quickly return in the absence of zero-width (non) joiners.
   684  	if strings.Index(s, zwj) == -1 && strings.Index(s, zwnj) == -1 {
   685  		return nil
   686  	}
   687  	st := stateStart
   688  	for i := 0; ; {
   689  		jt := x.joinType()
   690  		if s[i:i+sz] == zwj {
   691  			jt = joinZWJ
   692  		} else if s[i:i+sz] == zwnj {
   693  			jt = joinZWNJ
   694  		}
   695  		st = joinStates[st][jt]
   696  		if x.isViramaModifier() {
   697  			st = joinStates[st][joinVirama]
   698  		}
   699  		if i += sz; i == len(s) {
   700  			break
   701  		}
   702  		v, sz = trie.lookupString(s[i:])
   703  		x = info(v)
   704  	}
   705  	if st == stateFAIL || st == stateAfter {
   706  		return &labelError{s, "C"}
   707  	}
   708  	return nil
   709  }
   710  
   711  func ascii(s string) bool {
   712  	for i := 0; i < len(s); i++ {
   713  		if s[i] >= utf8.RuneSelf {
   714  			return false
   715  		}
   716  	}
   717  	return true
   718  }
   719  

View as plain text