Source file src/mime/mediatype.go

     1  // Copyright 2010 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package mime
     6  
     7  import (
     8  	"errors"
     9  	"fmt"
    10  	"sort"
    11  	"strings"
    12  	"unicode"
    13  )
    14  
    15  // FormatMediaType serializes mediatype t and the parameters
    16  // param as a media type conforming to RFC 2045 and RFC 2616.
    17  // The type and parameter names are written in lower-case.
    18  // When any of the arguments result in a standard violation then
    19  // FormatMediaType returns the empty string.
    20  func FormatMediaType(t string, param map[string]string) string {
    21  	var b strings.Builder
    22  	if major, sub, ok := strings.Cut(t, "/"); !ok {
    23  		if !isToken(t) {
    24  			return ""
    25  		}
    26  		b.WriteString(strings.ToLower(t))
    27  	} else {
    28  		if !isToken(major) || !isToken(sub) {
    29  			return ""
    30  		}
    31  		b.WriteString(strings.ToLower(major))
    32  		b.WriteByte('/')
    33  		b.WriteString(strings.ToLower(sub))
    34  	}
    35  
    36  	attrs := make([]string, 0, len(param))
    37  	for a := range param {
    38  		attrs = append(attrs, a)
    39  	}
    40  	sort.Strings(attrs)
    41  
    42  	for _, attribute := range attrs {
    43  		value := param[attribute]
    44  		b.WriteByte(';')
    45  		b.WriteByte(' ')
    46  		if !isToken(attribute) {
    47  			return ""
    48  		}
    49  		b.WriteString(strings.ToLower(attribute))
    50  
    51  		needEnc := needsEncoding(value)
    52  		if needEnc {
    53  			// RFC 2231 section 4
    54  			b.WriteByte('*')
    55  		}
    56  		b.WriteByte('=')
    57  
    58  		if needEnc {
    59  			b.WriteString("utf-8''")
    60  
    61  			offset := 0
    62  			for index := 0; index < len(value); index++ {
    63  				ch := value[index]
    64  				// {RFC 2231 section 7}
    65  				// attribute-char := <any (US-ASCII) CHAR except SPACE, CTLs, "*", "'", "%", or tspecials>
    66  				if ch <= ' ' || ch >= 0x7F ||
    67  					ch == '*' || ch == '\'' || ch == '%' ||
    68  					isTSpecial(rune(ch)) {
    69  
    70  					b.WriteString(value[offset:index])
    71  					offset = index + 1
    72  
    73  					b.WriteByte('%')
    74  					b.WriteByte(upperhex[ch>>4])
    75  					b.WriteByte(upperhex[ch&0x0F])
    76  				}
    77  			}
    78  			b.WriteString(value[offset:])
    79  			continue
    80  		}
    81  
    82  		if isToken(value) {
    83  			b.WriteString(value)
    84  			continue
    85  		}
    86  
    87  		b.WriteByte('"')
    88  		offset := 0
    89  		for index := 0; index < len(value); index++ {
    90  			character := value[index]
    91  			if character == '"' || character == '\\' {
    92  				b.WriteString(value[offset:index])
    93  				offset = index
    94  				b.WriteByte('\\')
    95  			}
    96  		}
    97  		b.WriteString(value[offset:])
    98  		b.WriteByte('"')
    99  	}
   100  	return b.String()
   101  }
   102  
   103  func checkMediaTypeDisposition(s string) error {
   104  	typ, rest := consumeToken(s)
   105  	if typ == "" {
   106  		return errors.New("mime: no media type")
   107  	}
   108  	if rest == "" {
   109  		return nil
   110  	}
   111  	if !strings.HasPrefix(rest, "/") {
   112  		return errors.New("mime: expected slash after first token")
   113  	}
   114  	subtype, rest := consumeToken(rest[1:])
   115  	if subtype == "" {
   116  		return errors.New("mime: expected token after slash")
   117  	}
   118  	if rest != "" {
   119  		return errors.New("mime: unexpected content after media subtype")
   120  	}
   121  	return nil
   122  }
   123  
   124  // ErrInvalidMediaParameter is returned by ParseMediaType if
   125  // the media type value was found but there was an error parsing
   126  // the optional parameters
   127  var ErrInvalidMediaParameter = errors.New("mime: invalid media parameter")
   128  
   129  // ParseMediaType parses a media type value and any optional
   130  // parameters, per RFC 1521.  Media types are the values in
   131  // Content-Type and Content-Disposition headers (RFC 2183).
   132  // On success, ParseMediaType returns the media type converted
   133  // to lowercase and trimmed of white space and a non-nil map.
   134  // If there is an error parsing the optional parameter,
   135  // the media type will be returned along with the error
   136  // ErrInvalidMediaParameter.
   137  // The returned map, params, maps from the lowercase
   138  // attribute to the attribute value with its case preserved.
   139  func ParseMediaType(v string) (mediatype string, params map[string]string, err error) {
   140  	base, _, _ := strings.Cut(v, ";")
   141  	mediatype = strings.TrimSpace(strings.ToLower(base))
   142  
   143  	err = checkMediaTypeDisposition(mediatype)
   144  	if err != nil {
   145  		return "", nil, err
   146  	}
   147  
   148  	params = make(map[string]string)
   149  
   150  	// Map of base parameter name -> parameter name -> value
   151  	// for parameters containing a '*' character.
   152  	// Lazily initialized.
   153  	var continuation map[string]map[string]string
   154  
   155  	v = v[len(base):]
   156  	for len(v) > 0 {
   157  		v = strings.TrimLeftFunc(v, unicode.IsSpace)
   158  		if len(v) == 0 {
   159  			break
   160  		}
   161  		key, value, rest := consumeMediaParam(v)
   162  		if key == "" {
   163  			if strings.TrimSpace(rest) == ";" {
   164  				// Ignore trailing semicolons.
   165  				// Not an error.
   166  				break
   167  			}
   168  			// Parse error.
   169  			return mediatype, nil, ErrInvalidMediaParameter
   170  		}
   171  
   172  		pmap := params
   173  		if baseName, _, ok := strings.Cut(key, "*"); ok {
   174  			if continuation == nil {
   175  				continuation = make(map[string]map[string]string)
   176  			}
   177  			var ok bool
   178  			if pmap, ok = continuation[baseName]; !ok {
   179  				continuation[baseName] = make(map[string]string)
   180  				pmap = continuation[baseName]
   181  			}
   182  		}
   183  		if _, exists := pmap[key]; exists {
   184  			// Duplicate parameter name is bogus.
   185  			return "", nil, errors.New("mime: duplicate parameter name")
   186  		}
   187  		pmap[key] = value
   188  		v = rest
   189  	}
   190  
   191  	// Stitch together any continuations or things with stars
   192  	// (i.e. RFC 2231 things with stars: "foo*0" or "foo*")
   193  	var buf strings.Builder
   194  	for key, pieceMap := range continuation {
   195  		singlePartKey := key + "*"
   196  		if v, ok := pieceMap[singlePartKey]; ok {
   197  			if decv, ok := decode2231Enc(v); ok {
   198  				params[key] = decv
   199  			}
   200  			continue
   201  		}
   202  
   203  		buf.Reset()
   204  		valid := false
   205  		for n := 0; ; n++ {
   206  			simplePart := fmt.Sprintf("%s*%d", key, n)
   207  			if v, ok := pieceMap[simplePart]; ok {
   208  				valid = true
   209  				buf.WriteString(v)
   210  				continue
   211  			}
   212  			encodedPart := simplePart + "*"
   213  			v, ok := pieceMap[encodedPart]
   214  			if !ok {
   215  				break
   216  			}
   217  			valid = true
   218  			if n == 0 {
   219  				if decv, ok := decode2231Enc(v); ok {
   220  					buf.WriteString(decv)
   221  				}
   222  			} else {
   223  				decv, _ := percentHexUnescape(v)
   224  				buf.WriteString(decv)
   225  			}
   226  		}
   227  		if valid {
   228  			params[key] = buf.String()
   229  		}
   230  	}
   231  
   232  	return
   233  }
   234  
   235  func decode2231Enc(v string) (string, bool) {
   236  	sv := strings.SplitN(v, "'", 3)
   237  	if len(sv) != 3 {
   238  		return "", false
   239  	}
   240  	// TODO: ignoring lang in sv[1] for now. If anybody needs it we'll
   241  	// need to decide how to expose it in the API. But I'm not sure
   242  	// anybody uses it in practice.
   243  	charset := strings.ToLower(sv[0])
   244  	if len(charset) == 0 {
   245  		return "", false
   246  	}
   247  	if charset != "us-ascii" && charset != "utf-8" {
   248  		// TODO: unsupported encoding
   249  		return "", false
   250  	}
   251  	encv, err := percentHexUnescape(sv[2])
   252  	if err != nil {
   253  		return "", false
   254  	}
   255  	return encv, true
   256  }
   257  
   258  func isNotTokenChar(r rune) bool {
   259  	return !isTokenChar(r)
   260  }
   261  
   262  // consumeToken consumes a token from the beginning of provided
   263  // string, per RFC 2045 section 5.1 (referenced from 2183), and return
   264  // the token consumed and the rest of the string. Returns ("", v) on
   265  // failure to consume at least one character.
   266  func consumeToken(v string) (token, rest string) {
   267  	notPos := strings.IndexFunc(v, isNotTokenChar)
   268  	if notPos == -1 {
   269  		return v, ""
   270  	}
   271  	if notPos == 0 {
   272  		return "", v
   273  	}
   274  	return v[0:notPos], v[notPos:]
   275  }
   276  
   277  // consumeValue consumes a "value" per RFC 2045, where a value is
   278  // either a 'token' or a 'quoted-string'.  On success, consumeValue
   279  // returns the value consumed (and de-quoted/escaped, if a
   280  // quoted-string) and the rest of the string. On failure, returns
   281  // ("", v).
   282  func consumeValue(v string) (value, rest string) {
   283  	if v == "" {
   284  		return
   285  	}
   286  	if v[0] != '"' {
   287  		return consumeToken(v)
   288  	}
   289  
   290  	// parse a quoted-string
   291  	buffer := new(strings.Builder)
   292  	for i := 1; i < len(v); i++ {
   293  		r := v[i]
   294  		if r == '"' {
   295  			return buffer.String(), v[i+1:]
   296  		}
   297  		// When MSIE sends a full file path (in "intranet mode"), it does not
   298  		// escape backslashes: "C:\dev\go\foo.txt", not "C:\\dev\\go\\foo.txt".
   299  		//
   300  		// No known MIME generators emit unnecessary backslash escapes
   301  		// for simple token characters like numbers and letters.
   302  		//
   303  		// If we see an unnecessary backslash escape, assume it is from MSIE
   304  		// and intended as a literal backslash. This makes Go servers deal better
   305  		// with MSIE without affecting the way they handle conforming MIME
   306  		// generators.
   307  		if r == '\\' && i+1 < len(v) && isTSpecial(rune(v[i+1])) {
   308  			buffer.WriteByte(v[i+1])
   309  			i++
   310  			continue
   311  		}
   312  		if r == '\r' || r == '\n' {
   313  			return "", v
   314  		}
   315  		buffer.WriteByte(v[i])
   316  	}
   317  	// Did not find end quote.
   318  	return "", v
   319  }
   320  
   321  func consumeMediaParam(v string) (param, value, rest string) {
   322  	rest = strings.TrimLeftFunc(v, unicode.IsSpace)
   323  	if !strings.HasPrefix(rest, ";") {
   324  		return "", "", v
   325  	}
   326  
   327  	rest = rest[1:] // consume semicolon
   328  	rest = strings.TrimLeftFunc(rest, unicode.IsSpace)
   329  	param, rest = consumeToken(rest)
   330  	param = strings.ToLower(param)
   331  	if param == "" {
   332  		return "", "", v
   333  	}
   334  
   335  	rest = strings.TrimLeftFunc(rest, unicode.IsSpace)
   336  	if !strings.HasPrefix(rest, "=") {
   337  		return "", "", v
   338  	}
   339  	rest = rest[1:] // consume equals sign
   340  	rest = strings.TrimLeftFunc(rest, unicode.IsSpace)
   341  	value, rest2 := consumeValue(rest)
   342  	if value == "" && rest2 == rest {
   343  		return "", "", v
   344  	}
   345  	rest = rest2
   346  	return param, value, rest
   347  }
   348  
   349  func percentHexUnescape(s string) (string, error) {
   350  	// Count %, check that they're well-formed.
   351  	percents := 0
   352  	for i := 0; i < len(s); {
   353  		if s[i] != '%' {
   354  			i++
   355  			continue
   356  		}
   357  		percents++
   358  		if i+2 >= len(s) || !ishex(s[i+1]) || !ishex(s[i+2]) {
   359  			s = s[i:]
   360  			if len(s) > 3 {
   361  				s = s[0:3]
   362  			}
   363  			return "", fmt.Errorf("mime: bogus characters after %%: %q", s)
   364  		}
   365  		i += 3
   366  	}
   367  	if percents == 0 {
   368  		return s, nil
   369  	}
   370  
   371  	t := make([]byte, len(s)-2*percents)
   372  	j := 0
   373  	for i := 0; i < len(s); {
   374  		switch s[i] {
   375  		case '%':
   376  			t[j] = unhex(s[i+1])<<4 | unhex(s[i+2])
   377  			j++
   378  			i += 3
   379  		default:
   380  			t[j] = s[i]
   381  			j++
   382  			i++
   383  		}
   384  	}
   385  	return string(t), nil
   386  }
   387  
   388  func ishex(c byte) bool {
   389  	switch {
   390  	case '0' <= c && c <= '9':
   391  		return true
   392  	case 'a' <= c && c <= 'f':
   393  		return true
   394  	case 'A' <= c && c <= 'F':
   395  		return true
   396  	}
   397  	return false
   398  }
   399  
   400  func unhex(c byte) byte {
   401  	switch {
   402  	case '0' <= c && c <= '9':
   403  		return c - '0'
   404  	case 'a' <= c && c <= 'f':
   405  		return c - 'a' + 10
   406  	case 'A' <= c && c <= 'F':
   407  		return c - 'A' + 10
   408  	}
   409  	return 0
   410  }
   411  

View as plain text