Fixing authorities in scheme-relative URLs.

Patching in http://codereview.appspot.com/3910042/
This commit is contained in:
Brad Fitzpatrick 2011-01-10 17:05:37 -08:00
parent ba2ee6a93c
commit eaefd4f708
4 changed files with 172 additions and 8 deletions

View File

@ -508,6 +508,8 @@ func ReadRequest(b *bufio.Reader) (req *Request, err os.Error) {
return nil, err
}
cleanURLForHTTPRequest(req.URL)
// Subsequent lines: Key: value.
nheader := 0
req.Header = make(map[string]string)

View File

@ -7,7 +7,9 @@
package http
import (
"bufio"
"bytes"
"io"
"os"
"net"
"testing"
@ -133,3 +135,86 @@ func TestConsumingBodyOnNextConn(t *testing.T) {
t.Errorf("Serve returned %q; expected EOF", serveerr)
}
}
type responseWriterMethodCall struct {
method string
headerKey, headerValue string // if method == "SetHeader"
bytesWritten []byte // if method == "Write"
responseCode int // if method == "WriteHeader"
}
type recordingResponseWriter struct {
log []*responseWriterMethodCall
}
func (rw *recordingResponseWriter) RemoteAddr() string {
return "1.2.3.4"
}
func (rw *recordingResponseWriter) UsingTLS() bool {
return false
}
func (rw *recordingResponseWriter) SetHeader(k, v string) {
rw.log = append(rw.log, &responseWriterMethodCall{method: "SetHeader", headerKey: k, headerValue: v})
}
func (rw *recordingResponseWriter) Write(buf []byte) (int, os.Error) {
rw.log = append(rw.log, &responseWriterMethodCall{method: "Write", bytesWritten: buf})
return len(buf), nil
}
func (rw *recordingResponseWriter) WriteHeader(code int) {
rw.log = append(rw.log, &responseWriterMethodCall{method: "WriteHeader", responseCode: code})
}
func (rw *recordingResponseWriter) Flush() {
rw.log = append(rw.log, &responseWriterMethodCall{method: "Flush"})
}
func (rw *recordingResponseWriter) Hijack() (io.ReadWriteCloser, *bufio.ReadWriter, os.Error) {
panic("Not supported")
}
// Tests for http://code.google.com/p/go/issues/detail?id=900
func TestMuxRedirectLeadingSlashes(t *testing.T) {
paths := []string{"//foo.txt", "///foo.txt", "/../../foo.txt"}
for _, path := range paths {
req, err := ReadRequest(bufio.NewReader(bytes.NewBufferString("GET " + path + " HTTP/1.1\r\nHost: test\r\n\r\n")))
if err != nil {
t.Errorf("%s", err)
}
mux := NewServeMux()
resp := new(recordingResponseWriter)
resp.log = make([]*responseWriterMethodCall, 0)
mux.ServeHTTP(resp, req)
dumpLog := func() {
t.Logf("For path %q:", path)
for _, call := range resp.log {
t.Logf("Got call: %s, header=%s, value=%s, buf=%q, code=%d", call.method,
call.headerKey, call.headerValue, call.bytesWritten, call.responseCode)
}
}
if len(resp.log) != 2 {
dumpLog()
t.Errorf("expected 2 calls to response writer; got %d", len(resp.log))
return
}
if resp.log[0].method != "SetHeader" ||
resp.log[0].headerKey != "Location" || resp.log[0].headerValue != "/foo.txt" {
dumpLog()
t.Errorf("Expected SetHeader of Location to /foo.txt")
return
}
if resp.log[1].method != "WriteHeader" || resp.log[1].responseCode != StatusMovedPermanently {
dumpLog()
t.Errorf("Expected WriteHeader of StatusMovedPermanently")
return
}
}
}

View File

@ -400,7 +400,7 @@ func ParseURL(rawurl string) (url *URL, err os.Error) {
goto Error
}
if url.Scheme != "" && (len(path) == 0 || path[0] != '/') {
if url.Scheme != "" && !strings.HasPrefix(path, "/") {
// RFC 2396:
// Absolute URI (has scheme) with non-rooted path
// is uninterpreted. It doesn't even have a ?query.
@ -420,7 +420,7 @@ func ParseURL(rawurl string) (url *URL, err os.Error) {
}
// Maybe path is //authority/path
if url.Scheme != "" && len(path) > 2 && path[0:2] == "//" {
if strings.HasPrefix(path, "//") && !strings.HasPrefix(path, "///") {
url.RawAuthority, path = split(path[2:], '/', false)
url.RawPath = url.RawPath[2+len(url.RawAuthority):]
}
@ -515,3 +515,19 @@ func (url *URL) String() string {
}
return result
}
// cleanURLForRequest cleans URLs as parsed from ReadRequest.
// ReadRequest uses ParseURL which accepts a superset of URL formats
// which are valid for web requests (scheme-relative URLs, for example)
// Ideally ReadRequest should use a different parse function for HTTP
// serving context, but for now we'll just fix it up here.
func cleanURLForHTTPRequest(url *URL) {
if url.Scheme == "" && url.RawAuthority != "" {
prefix := "//" + url.RawAuthority
url.Host = ""
url.RawAuthority = ""
url.RawUserinfo = ""
url.Path = prefix + url.Path
url.RawPath = prefix + url.RawPath
}
}

View File

@ -188,14 +188,48 @@ var urltests = []URLTest{
},
"",
},
// leading // without scheme shouldn't create an authority
// leading // without scheme should create an authority
{
"//foo",
&URL{
Raw: "//foo",
Scheme: "",
RawPath: "//foo",
Path: "//foo",
RawAuthority: "foo",
Raw: "//foo",
Host: "foo",
Scheme: "",
RawPath: "",
Path: "",
},
"",
},
// leading // without scheme, with userinfo, path, and query
{
"//user@foo/path?a=b",
&URL{
Raw: "//user@foo/path?a=b",
RawAuthority: "user@foo",
RawUserinfo: "user",
Scheme: "",
RawPath: "/path?a=b",
Path: "/path",
RawQuery: "a=b",
Host: "foo",
},
"",
},
// Three leading slashes isn't an authority, but doesn't return an error.
// (We can't return an error, as this code is also used via
// ServeHTTP -> ReadRequest -> ParseURL, which is arguably a
// different URL parsing context, but currently shares the
// same codepath)
{
"///threeslashes",
&URL{
RawAuthority: "",
Raw: "///threeslashes",
Host: "",
Scheme: "",
RawPath: "///threeslashes",
Path: "///threeslashes",
},
"",
},
@ -272,7 +306,7 @@ var urlfragtests = []URLTest{
// more useful string for debugging than fmt's struct printer
func ufmt(u *URL) string {
return fmt.Sprintf("%q, %q, %q, %q, %q, %q, %q, %q, %q",
return fmt.Sprintf("raw=%q, scheme=%q, rawpath=%q, auth=%q, userinfo=%q, host=%q, path=%q, rawq=%q, frag=%q",
u.Raw, u.Scheme, u.RawPath, u.RawAuthority, u.RawUserinfo,
u.Host, u.Path, u.RawQuery, u.Fragment)
}
@ -507,3 +541,30 @@ func TestUnescapeUserinfo(t *testing.T) {
}
}
}
func TestCleanURLForHTTPRequest(t *testing.T) {
path := "//user@foo/bar/"
url, _ := ParseURL(path)
if url.RawAuthority != "user@foo" {
t.Errorf("Expected authority of 'foo'; got %q", url.RawAuthority)
}
if url.RawUserinfo != "user" {
t.Errorf("Expected userinfo of 'user'; got %q", url.RawUserinfo)
}
cleanURLForHTTPRequest(url)
if url.RawAuthority != "" {
t.Errorf("Expected blank authority.")
}
if url.RawUserinfo != "" {
t.Errorf("Expected blank userinfo.")
}
if url.Host != "" {
t.Errorf("Expected blank host.")
}
if url.RawPath != path {
t.Errorf("Expected path %q; got %q", path, url.RawPath)
}
if url.Path != path {
t.Errorf("Expected path %q; got %q", path, url.Path)
}
}