diff --git a/lib/go/http/url.go b/lib/go/http/url.go index c666d01bc..129d4ff06 100644 --- a/lib/go/http/url.go +++ b/lib/go/http/url.go @@ -458,6 +458,9 @@ Error: } // ParseURLReference is like ParseURL but allows a trailing #fragment. +// +// Per RFC 2396, +// URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ] func ParseURLReference(rawurlref string) (url *URL, err os.Error) { // Cut off #frag. rawurl, frag := split(rawurlref, '#', false) @@ -516,6 +519,88 @@ func (url *URL) String() string { return result } +func (url *URL) IsAbsolute() bool { + return url.Scheme != "" +} + +// Add resolves a URI reference to an absolute URI from an absolute base URI, per +// RFC 2396 Section 5.2. The URI reference may be relative or absolute. Note +// that Add always returns a new URL instance, even if the returned URL is identical +// to either the base or reference. +func (base *URL) Add(ref *URL) *URL { + url := new(URL) + switch { + case ref.IsAbsolute(): + *url = *ref + default: + // relativeURI = ( net_path | abs_path | rel_path ) [ "?" query ] + *url = *base + if ref.RawAuthority != "" { + // The "net_path" case. + url.RawAuthority = ref.RawAuthority + url.Host = ref.Host + url.RawUserinfo = ref.RawUserinfo + } + switch { + case url.OpaquePath: + url.Path = ref.Path + url.RawPath = ref.RawPath + url.RawQuery = ref.RawQuery + case strings.HasPrefix(ref.Path, "/"): + // The "abs_path" case. + url.Path = ref.Path + url.RawPath = ref.RawPath + url.RawQuery = ref.RawQuery + default: + // The "rel_path" case. + segs := strings.Split(base.Path, "/", -1) + refSegs := strings.Split(ref.Path, "/", -1) + segs = applyReferenceSegments(segs, refSegs) + path := strings.Join(segs, "/") + if !strings.HasPrefix(path, "/") { + path = "/" + path + } + url.Path = path + url.RawPath = url.Path + url.RawQuery = ref.RawQuery + if ref.RawQuery != "" { + url.RawPath += "?" + url.RawQuery + } + } + + url.Fragment = ref.Fragment + } + url.Raw = url.String() + return url +} + +// Note: mutates 'base' +func applyReferenceSegments(base []string, refs []string) []string { + if len(base) == 0 { + base = []string{""} + } + for idx, ref := range refs { + switch { + case ref == ".": + base[len(base)-1] = "" + case ref == "..": + newLen := len(base) - 1 + if newLen < 1 { + newLen = 1 + } + base = base[0:newLen] + base[len(base)-1] = "" + default: + if idx == 0 || base[len(base)-1] == "" { + base[len(base)-1] = ref + } else { + base = append(base, ref) + } + } + } + return base +} + // cleanURLForRequest cleans URLs as parsed from ReadRequest. // ReadRequest uses ParseURL which accepts a superset of URL formats // which are valid for web requests (scheme-relative URLs, for example) diff --git a/lib/go/http/url_test.go b/lib/go/http/url_test.go index f1f8c605f..0f549ee55 100644 --- a/lib/go/http/url_test.go +++ b/lib/go/http/url_test.go @@ -8,6 +8,7 @@ import ( "fmt" "os" "reflect" + "strings" "testing" ) @@ -568,3 +569,87 @@ func TestCleanURLForHTTPRequest(t *testing.T) { t.Errorf("Expected path %q; got %q", path, url.Path) } } + +func mustParseURL(t *testing.T, url string) *URL { + u, err := ParseURLReference(url) + if err != nil { + t.Fatalf("Expected URL to parse: %q, got error: %v", url, err) + } + return u +} + +func TestApplyReferenceSegments(t *testing.T) { + tests := []struct { + base, ref, expected string + }{ + {"a/b", ".", "a/"}, + {"a/b", "c", "a/c"}, + {"a/b", "..", ""}, + {"a/", "..", ""}, + {"a/", "../..", ""}, + {"a/b/c", "..", "a/"}, + {"a/b/c", "../d", "a/d"}, + {"a/b/c", ".././d", "a/d"}, + {"a/b", "./..", ""}, + } + for _, test := range tests { + segs := strings.Split(test.base, "/", -1) + refSegs := strings.Split(test.ref, "/", -1) + got := strings.Join(applyReferenceSegments(segs, refSegs), "/") + if got != test.expected { + t.Errorf("For %q + %q got %q; expected %q", test.base, test.ref, got, test.expected) + } + } +} + +func TestURLAdd(t *testing.T) { + tests := []struct { + base, rel, expected string + }{ + // Absolute URL references + {"http://foo.com?a=b", "https://bar.com/", "https://bar.com/"}, + {"http://foo.com/", "https://bar.com/?a=b", "https://bar.com/?a=b"}, + {"http://foo.com/bar", "mailto:foo@example.com", "mailto:foo@example.com"}, + + // Path-absolute references + {"http://foo.com/bar", "/baz", "http://foo.com/baz"}, + {"http://foo.com/bar?a=b#f", "/baz", "http://foo.com/baz"}, + {"http://foo.com/bar?a=b", "/baz?c=d", "http://foo.com/baz?c=d"}, + + // Scheme-relative + {"https://foo.com/bar?a=b", "//bar.com/quux", "https://bar.com/quux"}, + + // Path-relative references: + + // ... current directory + {"http://foo.com", ".", "http://foo.com/"}, + {"http://foo.com/bar", ".", "http://foo.com/"}, + {"http://foo.com/bar/", ".", "http://foo.com/bar/"}, + + // ... going down + {"http://foo.com", "bar", "http://foo.com/bar"}, + {"http://foo.com/", "bar", "http://foo.com/bar"}, + {"http://foo.com/bar/baz", "quux", "http://foo.com/bar/quux"}, + + // ... going up + {"http://foo.com/bar/baz", "../quux", "http://foo.com/quux"}, + {"http://foo.com/bar/baz", "../../../../../quux", "http://foo.com/quux"}, + {"http://foo.com/bar", "..", "http://foo.com/"}, + {"http://foo.com/bar/baz", "./..", "http://foo.com/"}, + + // Triple dot isn't special + {"http://foo.com/bar", "...", "http://foo.com/..."}, + + // Fragment + {"http://foo.com/bar", ".#frag", "http://foo.com/#frag"}, + } + for _, test := range tests { + base := mustParseURL(t, test.base) + rel := mustParseURL(t, test.rel) + url := base.Add(rel) + urlStr := url.String() + if urlStr != test.expected { + t.Errorf("Adding %q + %q != %q; got %q", test.base, test.rel, test.expected, urlStr) + } + } +}