Source file src/net/http/httputil/reverseproxy.go

     1  // Copyright 2011 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  // HTTP reverse proxy handler
     6  
     7  package httputil
     8  
     9  import (
    10  	"context"
    11  	"fmt"
    12  	"io"
    13  	"log"
    14  	"mime"
    15  	"net"
    16  	"net/http"
    17  	"net/http/internal/ascii"
    18  	"net/textproto"
    19  	"net/url"
    20  	"strings"
    21  	"sync"
    22  	"time"
    23  
    24  	"golang.org/x/net/http/httpguts"
    25  )
    26  
    27  // ReverseProxy is an HTTP Handler that takes an incoming request and
    28  // sends it to another server, proxying the response back to the
    29  // client.
    30  //
    31  // ReverseProxy by default sets the client IP as the value of the
    32  // X-Forwarded-For header.
    33  //
    34  // If an X-Forwarded-For header already exists, the client IP is
    35  // appended to the existing values. As a special case, if the header
    36  // exists in the Request.Header map but has a nil value (such as when
    37  // set by the Director func), the X-Forwarded-For header is
    38  // not modified.
    39  //
    40  // To prevent IP spoofing, be sure to delete any pre-existing
    41  // X-Forwarded-For header coming from the client or
    42  // an untrusted proxy.
    43  type ReverseProxy struct {
    44  	// Director must be a function which modifies
    45  	// the request into a new request to be sent
    46  	// using Transport. Its response is then copied
    47  	// back to the original client unmodified.
    48  	// Director must not access the provided Request
    49  	// after returning.
    50  	Director func(*http.Request)
    51  
    52  	// The transport used to perform proxy requests.
    53  	// If nil, http.DefaultTransport is used.
    54  	Transport http.RoundTripper
    55  
    56  	// FlushInterval specifies the flush interval
    57  	// to flush to the client while copying the
    58  	// response body.
    59  	// If zero, no periodic flushing is done.
    60  	// A negative value means to flush immediately
    61  	// after each write to the client.
    62  	// The FlushInterval is ignored when ReverseProxy
    63  	// recognizes a response as a streaming response, or
    64  	// if its ContentLength is -1; for such responses, writes
    65  	// are flushed to the client immediately.
    66  	FlushInterval time.Duration
    67  
    68  	// ErrorLog specifies an optional logger for errors
    69  	// that occur when attempting to proxy the request.
    70  	// If nil, logging is done via the log package's standard logger.
    71  	ErrorLog *log.Logger
    72  
    73  	// BufferPool optionally specifies a buffer pool to
    74  	// get byte slices for use by io.CopyBuffer when
    75  	// copying HTTP response bodies.
    76  	BufferPool BufferPool
    77  
    78  	// ModifyResponse is an optional function that modifies the
    79  	// Response from the backend. It is called if the backend
    80  	// returns a response at all, with any HTTP status code.
    81  	// If the backend is unreachable, the optional ErrorHandler is
    82  	// called without any call to ModifyResponse.
    83  	//
    84  	// If ModifyResponse returns an error, ErrorHandler is called
    85  	// with its error value. If ErrorHandler is nil, its default
    86  	// implementation is used.
    87  	ModifyResponse func(*http.Response) error
    88  
    89  	// ErrorHandler is an optional function that handles errors
    90  	// reaching the backend or errors from ModifyResponse.
    91  	//
    92  	// If nil, the default is to log the provided error and return
    93  	// a 502 Status Bad Gateway response.
    94  	ErrorHandler func(http.ResponseWriter, *http.Request, error)
    95  }
    96  
    97  // A BufferPool is an interface for getting and returning temporary
    98  // byte slices for use by io.CopyBuffer.
    99  type BufferPool interface {
   100  	Get() []byte
   101  	Put([]byte)
   102  }
   103  
   104  func singleJoiningSlash(a, b string) string {
   105  	aslash := strings.HasSuffix(a, "/")
   106  	bslash := strings.HasPrefix(b, "/")
   107  	switch {
   108  	case aslash && bslash:
   109  		return a + b[1:]
   110  	case !aslash && !bslash:
   111  		return a + "/" + b
   112  	}
   113  	return a + b
   114  }
   115  
   116  func joinURLPath(a, b *url.URL) (path, rawpath string) {
   117  	if a.RawPath == "" && b.RawPath == "" {
   118  		return singleJoiningSlash(a.Path, b.Path), ""
   119  	}
   120  	// Same as singleJoiningSlash, but uses EscapedPath to determine
   121  	// whether a slash should be added
   122  	apath := a.EscapedPath()
   123  	bpath := b.EscapedPath()
   124  
   125  	aslash := strings.HasSuffix(apath, "/")
   126  	bslash := strings.HasPrefix(bpath, "/")
   127  
   128  	switch {
   129  	case aslash && bslash:
   130  		return a.Path + b.Path[1:], apath + bpath[1:]
   131  	case !aslash && !bslash:
   132  		return a.Path + "/" + b.Path, apath + "/" + bpath
   133  	}
   134  	return a.Path + b.Path, apath + bpath
   135  }
   136  
   137  // NewSingleHostReverseProxy returns a new ReverseProxy that routes
   138  // URLs to the scheme, host, and base path provided in target. If the
   139  // target's path is "/base" and the incoming request was for "/dir",
   140  // the target request will be for /base/dir.
   141  // NewSingleHostReverseProxy does not rewrite the Host header.
   142  // To rewrite Host headers, use ReverseProxy directly with a custom
   143  // Director policy.
   144  func NewSingleHostReverseProxy(target *url.URL) *ReverseProxy {
   145  	targetQuery := target.RawQuery
   146  	director := func(req *http.Request) {
   147  		req.URL.Scheme = target.Scheme
   148  		req.URL.Host = target.Host
   149  		req.URL.Path, req.URL.RawPath = joinURLPath(target, req.URL)
   150  		if targetQuery == "" || req.URL.RawQuery == "" {
   151  			req.URL.RawQuery = targetQuery + req.URL.RawQuery
   152  		} else {
   153  			req.URL.RawQuery = targetQuery + "&" + req.URL.RawQuery
   154  		}
   155  		if _, ok := req.Header["User-Agent"]; !ok {
   156  			// explicitly disable User-Agent so it's not set to default value
   157  			req.Header.Set("User-Agent", "")
   158  		}
   159  	}
   160  	return &ReverseProxy{Director: director}
   161  }
   162  
   163  func copyHeader(dst, src http.Header) {
   164  	for k, vv := range src {
   165  		for _, v := range vv {
   166  			dst.Add(k, v)
   167  		}
   168  	}
   169  }
   170  
   171  // Hop-by-hop headers. These are removed when sent to the backend.
   172  // As of RFC 7230, hop-by-hop headers are required to appear in the
   173  // Connection header field. These are the headers defined by the
   174  // obsoleted RFC 2616 (section 13.5.1) and are used for backward
   175  // compatibility.
   176  var hopHeaders = []string{
   177  	"Connection",
   178  	"Proxy-Connection", // non-standard but still sent by libcurl and rejected by e.g. google
   179  	"Keep-Alive",
   180  	"Proxy-Authenticate",
   181  	"Proxy-Authorization",
   182  	"Te",      // canonicalized version of "TE"
   183  	"Trailer", // not Trailers per URL above; https://www.rfc-editor.org/errata_search.php?eid=4522
   184  	"Transfer-Encoding",
   185  	"Upgrade",
   186  }
   187  
   188  func (p *ReverseProxy) defaultErrorHandler(rw http.ResponseWriter, req *http.Request, err error) {
   189  	p.logf("http: proxy error: %v", err)
   190  	rw.WriteHeader(http.StatusBadGateway)
   191  }
   192  
   193  func (p *ReverseProxy) getErrorHandler() func(http.ResponseWriter, *http.Request, error) {
   194  	if p.ErrorHandler != nil {
   195  		return p.ErrorHandler
   196  	}
   197  	return p.defaultErrorHandler
   198  }
   199  
   200  // modifyResponse conditionally runs the optional ModifyResponse hook
   201  // and reports whether the request should proceed.
   202  func (p *ReverseProxy) modifyResponse(rw http.ResponseWriter, res *http.Response, req *http.Request) bool {
   203  	if p.ModifyResponse == nil {
   204  		return true
   205  	}
   206  	if err := p.ModifyResponse(res); err != nil {
   207  		res.Body.Close()
   208  		p.getErrorHandler()(rw, req, err)
   209  		return false
   210  	}
   211  	return true
   212  }
   213  
   214  func (p *ReverseProxy) ServeHTTP(rw http.ResponseWriter, req *http.Request) {
   215  	transport := p.Transport
   216  	if transport == nil {
   217  		transport = http.DefaultTransport
   218  	}
   219  
   220  	ctx := req.Context()
   221  	if cn, ok := rw.(http.CloseNotifier); ok {
   222  		var cancel context.CancelFunc
   223  		ctx, cancel = context.WithCancel(ctx)
   224  		defer cancel()
   225  		notifyChan := cn.CloseNotify()
   226  		go func() {
   227  			select {
   228  			case <-notifyChan:
   229  				cancel()
   230  			case <-ctx.Done():
   231  			}
   232  		}()
   233  	}
   234  
   235  	outreq := req.Clone(ctx)
   236  	if req.ContentLength == 0 {
   237  		outreq.Body = nil // Issue 16036: nil Body for http.Transport retries
   238  	}
   239  	if outreq.Body != nil {
   240  		// Reading from the request body after returning from a handler is not
   241  		// allowed, and the RoundTrip goroutine that reads the Body can outlive
   242  		// this handler. This can lead to a crash if the handler panics (see
   243  		// Issue 46866). Although calling Close doesn't guarantee there isn't
   244  		// any Read in flight after the handle returns, in practice it's safe to
   245  		// read after closing it.
   246  		defer outreq.Body.Close()
   247  	}
   248  	if outreq.Header == nil {
   249  		outreq.Header = make(http.Header) // Issue 33142: historical behavior was to always allocate
   250  	}
   251  
   252  	p.Director(outreq)
   253  	outreq.Close = false
   254  
   255  	reqUpType := upgradeType(outreq.Header)
   256  	if !ascii.IsPrint(reqUpType) {
   257  		p.getErrorHandler()(rw, req, fmt.Errorf("client tried to switch to invalid protocol %q", reqUpType))
   258  		return
   259  	}
   260  	removeConnectionHeaders(outreq.Header)
   261  
   262  	// Remove hop-by-hop headers to the backend. Especially
   263  	// important is "Connection" because we want a persistent
   264  	// connection, regardless of what the client sent to us.
   265  	for _, h := range hopHeaders {
   266  		outreq.Header.Del(h)
   267  	}
   268  
   269  	// Issue 21096: tell backend applications that care about trailer support
   270  	// that we support trailers. (We do, but we don't go out of our way to
   271  	// advertise that unless the incoming client request thought it was worth
   272  	// mentioning.) Note that we look at req.Header, not outreq.Header, since
   273  	// the latter has passed through removeConnectionHeaders.
   274  	if httpguts.HeaderValuesContainsToken(req.Header["Te"], "trailers") {
   275  		outreq.Header.Set("Te", "trailers")
   276  	}
   277  
   278  	// After stripping all the hop-by-hop connection headers above, add back any
   279  	// necessary for protocol upgrades, such as for websockets.
   280  	if reqUpType != "" {
   281  		outreq.Header.Set("Connection", "Upgrade")
   282  		outreq.Header.Set("Upgrade", reqUpType)
   283  	}
   284  
   285  	if clientIP, _, err := net.SplitHostPort(req.RemoteAddr); err == nil {
   286  		// If we aren't the first proxy retain prior
   287  		// X-Forwarded-For information as a comma+space
   288  		// separated list and fold multiple headers into one.
   289  		prior, ok := outreq.Header["X-Forwarded-For"]
   290  		omit := ok && prior == nil // Issue 38079: nil now means don't populate the header
   291  		if len(prior) > 0 {
   292  			clientIP = strings.Join(prior, ", ") + ", " + clientIP
   293  		}
   294  		if !omit {
   295  			outreq.Header.Set("X-Forwarded-For", clientIP)
   296  		}
   297  	}
   298  
   299  	res, err := transport.RoundTrip(outreq)
   300  	if err != nil {
   301  		p.getErrorHandler()(rw, outreq, err)
   302  		return
   303  	}
   304  
   305  	// Deal with 101 Switching Protocols responses: (WebSocket, h2c, etc)
   306  	if res.StatusCode == http.StatusSwitchingProtocols {
   307  		if !p.modifyResponse(rw, res, outreq) {
   308  			return
   309  		}
   310  		p.handleUpgradeResponse(rw, outreq, res)
   311  		return
   312  	}
   313  
   314  	removeConnectionHeaders(res.Header)
   315  
   316  	for _, h := range hopHeaders {
   317  		res.Header.Del(h)
   318  	}
   319  
   320  	if !p.modifyResponse(rw, res, outreq) {
   321  		return
   322  	}
   323  
   324  	copyHeader(rw.Header(), res.Header)
   325  
   326  	// The "Trailer" header isn't included in the Transport's response,
   327  	// at least for *http.Transport. Build it up from Trailer.
   328  	announcedTrailers := len(res.Trailer)
   329  	if announcedTrailers > 0 {
   330  		trailerKeys := make([]string, 0, len(res.Trailer))
   331  		for k := range res.Trailer {
   332  			trailerKeys = append(trailerKeys, k)
   333  		}
   334  		rw.Header().Add("Trailer", strings.Join(trailerKeys, ", "))
   335  	}
   336  
   337  	rw.WriteHeader(res.StatusCode)
   338  
   339  	err = p.copyResponse(rw, res.Body, p.flushInterval(res))
   340  	if err != nil {
   341  		defer res.Body.Close()
   342  		// Since we're streaming the response, if we run into an error all we can do
   343  		// is abort the request. Issue 23643: ReverseProxy should use ErrAbortHandler
   344  		// on read error while copying body.
   345  		if !shouldPanicOnCopyError(req) {
   346  			p.logf("suppressing panic for copyResponse error in test; copy error: %v", err)
   347  			return
   348  		}
   349  		panic(http.ErrAbortHandler)
   350  	}
   351  	res.Body.Close() // close now, instead of defer, to populate res.Trailer
   352  
   353  	if len(res.Trailer) > 0 {
   354  		// Force chunking if we saw a response trailer.
   355  		// This prevents net/http from calculating the length for short
   356  		// bodies and adding a Content-Length.
   357  		if fl, ok := rw.(http.Flusher); ok {
   358  			fl.Flush()
   359  		}
   360  	}
   361  
   362  	if len(res.Trailer) == announcedTrailers {
   363  		copyHeader(rw.Header(), res.Trailer)
   364  		return
   365  	}
   366  
   367  	for k, vv := range res.Trailer {
   368  		k = http.TrailerPrefix + k
   369  		for _, v := range vv {
   370  			rw.Header().Add(k, v)
   371  		}
   372  	}
   373  }
   374  
   375  var inOurTests bool // whether we're in our own tests
   376  
   377  // shouldPanicOnCopyError reports whether the reverse proxy should
   378  // panic with http.ErrAbortHandler. This is the right thing to do by
   379  // default, but Go 1.10 and earlier did not, so existing unit tests
   380  // weren't expecting panics. Only panic in our own tests, or when
   381  // running under the HTTP server.
   382  func shouldPanicOnCopyError(req *http.Request) bool {
   383  	if inOurTests {
   384  		// Our tests know to handle this panic.
   385  		return true
   386  	}
   387  	if req.Context().Value(http.ServerContextKey) != nil {
   388  		// We seem to be running under an HTTP server, so
   389  		// it'll recover the panic.
   390  		return true
   391  	}
   392  	// Otherwise act like Go 1.10 and earlier to not break
   393  	// existing tests.
   394  	return false
   395  }
   396  
   397  // removeConnectionHeaders removes hop-by-hop headers listed in the "Connection" header of h.
   398  // See RFC 7230, section 6.1
   399  func removeConnectionHeaders(h http.Header) {
   400  	for _, f := range h["Connection"] {
   401  		for _, sf := range strings.Split(f, ",") {
   402  			if sf = textproto.TrimString(sf); sf != "" {
   403  				h.Del(sf)
   404  			}
   405  		}
   406  	}
   407  }
   408  
   409  // flushInterval returns the p.FlushInterval value, conditionally
   410  // overriding its value for a specific request/response.
   411  func (p *ReverseProxy) flushInterval(res *http.Response) time.Duration {
   412  	resCT := res.Header.Get("Content-Type")
   413  
   414  	// For Server-Sent Events responses, flush immediately.
   415  	// The MIME type is defined in https://www.w3.org/TR/eventsource/#text-event-stream
   416  	if baseCT, _, _ := mime.ParseMediaType(resCT); baseCT == "text/event-stream" {
   417  		return -1 // negative means immediately
   418  	}
   419  
   420  	// We might have the case of streaming for which Content-Length might be unset.
   421  	if res.ContentLength == -1 {
   422  		return -1
   423  	}
   424  
   425  	return p.FlushInterval
   426  }
   427  
   428  func (p *ReverseProxy) copyResponse(dst io.Writer, src io.Reader, flushInterval time.Duration) error {
   429  	if flushInterval != 0 {
   430  		if wf, ok := dst.(writeFlusher); ok {
   431  			mlw := &maxLatencyWriter{
   432  				dst:     wf,
   433  				latency: flushInterval,
   434  			}
   435  			defer mlw.stop()
   436  
   437  			// set up initial timer so headers get flushed even if body writes are delayed
   438  			mlw.flushPending = true
   439  			mlw.t = time.AfterFunc(flushInterval, mlw.delayedFlush)
   440  
   441  			dst = mlw
   442  		}
   443  	}
   444  
   445  	var buf []byte
   446  	if p.BufferPool != nil {
   447  		buf = p.BufferPool.Get()
   448  		defer p.BufferPool.Put(buf)
   449  	}
   450  	_, err := p.copyBuffer(dst, src, buf)
   451  	return err
   452  }
   453  
   454  // copyBuffer returns any write errors or non-EOF read errors, and the amount
   455  // of bytes written.
   456  func (p *ReverseProxy) copyBuffer(dst io.Writer, src io.Reader, buf []byte) (int64, error) {
   457  	if len(buf) == 0 {
   458  		buf = make([]byte, 32*1024)
   459  	}
   460  	var written int64
   461  	for {
   462  		nr, rerr := src.Read(buf)
   463  		if rerr != nil && rerr != io.EOF && rerr != context.Canceled {
   464  			p.logf("httputil: ReverseProxy read error during body copy: %v", rerr)
   465  		}
   466  		if nr > 0 {
   467  			nw, werr := dst.Write(buf[:nr])
   468  			if nw > 0 {
   469  				written += int64(nw)
   470  			}
   471  			if werr != nil {
   472  				return written, werr
   473  			}
   474  			if nr != nw {
   475  				return written, io.ErrShortWrite
   476  			}
   477  		}
   478  		if rerr != nil {
   479  			if rerr == io.EOF {
   480  				rerr = nil
   481  			}
   482  			return written, rerr
   483  		}
   484  	}
   485  }
   486  
   487  func (p *ReverseProxy) logf(format string, args ...any) {
   488  	if p.ErrorLog != nil {
   489  		p.ErrorLog.Printf(format, args...)
   490  	} else {
   491  		log.Printf(format, args...)
   492  	}
   493  }
   494  
   495  type writeFlusher interface {
   496  	io.Writer
   497  	http.Flusher
   498  }
   499  
   500  type maxLatencyWriter struct {
   501  	dst     writeFlusher
   502  	latency time.Duration // non-zero; negative means to flush immediately
   503  
   504  	mu           sync.Mutex // protects t, flushPending, and dst.Flush
   505  	t            *time.Timer
   506  	flushPending bool
   507  }
   508  
   509  func (m *maxLatencyWriter) Write(p []byte) (n int, err error) {
   510  	m.mu.Lock()
   511  	defer m.mu.Unlock()
   512  	n, err = m.dst.Write(p)
   513  	if m.latency < 0 {
   514  		m.dst.Flush()
   515  		return
   516  	}
   517  	if m.flushPending {
   518  		return
   519  	}
   520  	if m.t == nil {
   521  		m.t = time.AfterFunc(m.latency, m.delayedFlush)
   522  	} else {
   523  		m.t.Reset(m.latency)
   524  	}
   525  	m.flushPending = true
   526  	return
   527  }
   528  
   529  func (m *maxLatencyWriter) delayedFlush() {
   530  	m.mu.Lock()
   531  	defer m.mu.Unlock()
   532  	if !m.flushPending { // if stop was called but AfterFunc already started this goroutine
   533  		return
   534  	}
   535  	m.dst.Flush()
   536  	m.flushPending = false
   537  }
   538  
   539  func (m *maxLatencyWriter) stop() {
   540  	m.mu.Lock()
   541  	defer m.mu.Unlock()
   542  	m.flushPending = false
   543  	if m.t != nil {
   544  		m.t.Stop()
   545  	}
   546  }
   547  
   548  func upgradeType(h http.Header) string {
   549  	if !httpguts.HeaderValuesContainsToken(h["Connection"], "Upgrade") {
   550  		return ""
   551  	}
   552  	return h.Get("Upgrade")
   553  }
   554  
   555  func (p *ReverseProxy) handleUpgradeResponse(rw http.ResponseWriter, req *http.Request, res *http.Response) {
   556  	reqUpType := upgradeType(req.Header)
   557  	resUpType := upgradeType(res.Header)
   558  	if !ascii.IsPrint(resUpType) { // We know reqUpType is ASCII, it's checked by the caller.
   559  		p.getErrorHandler()(rw, req, fmt.Errorf("backend tried to switch to invalid protocol %q", resUpType))
   560  	}
   561  	if !ascii.EqualFold(reqUpType, resUpType) {
   562  		p.getErrorHandler()(rw, req, fmt.Errorf("backend tried to switch protocol %q when %q was requested", resUpType, reqUpType))
   563  		return
   564  	}
   565  
   566  	hj, ok := rw.(http.Hijacker)
   567  	if !ok {
   568  		p.getErrorHandler()(rw, req, fmt.Errorf("can't switch protocols using non-Hijacker ResponseWriter type %T", rw))
   569  		return
   570  	}
   571  	backConn, ok := res.Body.(io.ReadWriteCloser)
   572  	if !ok {
   573  		p.getErrorHandler()(rw, req, fmt.Errorf("internal error: 101 switching protocols response with non-writable body"))
   574  		return
   575  	}
   576  
   577  	backConnCloseCh := make(chan bool)
   578  	go func() {
   579  		// Ensure that the cancellation of a request closes the backend.
   580  		// See issue https://golang.org/issue/35559.
   581  		select {
   582  		case <-req.Context().Done():
   583  		case <-backConnCloseCh:
   584  		}
   585  		backConn.Close()
   586  	}()
   587  
   588  	defer close(backConnCloseCh)
   589  
   590  	conn, brw, err := hj.Hijack()
   591  	if err != nil {
   592  		p.getErrorHandler()(rw, req, fmt.Errorf("Hijack failed on protocol switch: %v", err))
   593  		return
   594  	}
   595  	defer conn.Close()
   596  
   597  	copyHeader(rw.Header(), res.Header)
   598  
   599  	res.Header = rw.Header()
   600  	res.Body = nil // so res.Write only writes the headers; we have res.Body in backConn above
   601  	if err := res.Write(brw); err != nil {
   602  		p.getErrorHandler()(rw, req, fmt.Errorf("response write: %v", err))
   603  		return
   604  	}
   605  	if err := brw.Flush(); err != nil {
   606  		p.getErrorHandler()(rw, req, fmt.Errorf("response flush: %v", err))
   607  		return
   608  	}
   609  	errc := make(chan error, 1)
   610  	spc := switchProtocolCopier{user: conn, backend: backConn}
   611  	go spc.copyToBackend(errc)
   612  	go spc.copyFromBackend(errc)
   613  	<-errc
   614  	return
   615  }
   616  
   617  // switchProtocolCopier exists so goroutines proxying data back and
   618  // forth have nice names in stacks.
   619  type switchProtocolCopier struct {
   620  	user, backend io.ReadWriter
   621  }
   622  
   623  func (c switchProtocolCopier) copyFromBackend(errc chan<- error) {
   624  	_, err := io.Copy(c.user, c.backend)
   625  	errc <- err
   626  }
   627  
   628  func (c switchProtocolCopier) copyToBackend(errc chan<- error) {
   629  	_, err := io.Copy(c.backend, c.user)
   630  	errc <- err
   631  }
   632  

View as plain text