Base + reference URI => Absolute URI.

This commit is contained in:
Brad Fitzpatrick 2011-01-11 10:33:16 -08:00
parent eaefd4f708
commit 7f85b55928
2 changed files with 170 additions and 0 deletions

View File

@ -458,6 +458,9 @@ Error:
}
// ParseURLReference is like ParseURL but allows a trailing #fragment.
//
// Per RFC 2396,
// URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
func ParseURLReference(rawurlref string) (url *URL, err os.Error) {
// Cut off #frag.
rawurl, frag := split(rawurlref, '#', false)
@ -516,6 +519,88 @@ func (url *URL) String() string {
return result
}
func (url *URL) IsAbsolute() bool {
return url.Scheme != ""
}
// Add resolves a URI reference to an absolute URI from an absolute base URI, per
// RFC 2396 Section 5.2. The URI reference may be relative or absolute. Note
// that Add always returns a new URL instance, even if the returned URL is identical
// to either the base or reference.
func (base *URL) Add(ref *URL) *URL {
url := new(URL)
switch {
case ref.IsAbsolute():
*url = *ref
default:
// relativeURI = ( net_path | abs_path | rel_path ) [ "?" query ]
*url = *base
if ref.RawAuthority != "" {
// The "net_path" case.
url.RawAuthority = ref.RawAuthority
url.Host = ref.Host
url.RawUserinfo = ref.RawUserinfo
}
switch {
case url.OpaquePath:
url.Path = ref.Path
url.RawPath = ref.RawPath
url.RawQuery = ref.RawQuery
case strings.HasPrefix(ref.Path, "/"):
// The "abs_path" case.
url.Path = ref.Path
url.RawPath = ref.RawPath
url.RawQuery = ref.RawQuery
default:
// The "rel_path" case.
segs := strings.Split(base.Path, "/", -1)
refSegs := strings.Split(ref.Path, "/", -1)
segs = applyReferenceSegments(segs, refSegs)
path := strings.Join(segs, "/")
if !strings.HasPrefix(path, "/") {
path = "/" + path
}
url.Path = path
url.RawPath = url.Path
url.RawQuery = ref.RawQuery
if ref.RawQuery != "" {
url.RawPath += "?" + url.RawQuery
}
}
url.Fragment = ref.Fragment
}
url.Raw = url.String()
return url
}
// Note: mutates 'base'
func applyReferenceSegments(base []string, refs []string) []string {
if len(base) == 0 {
base = []string{""}
}
for idx, ref := range refs {
switch {
case ref == ".":
base[len(base)-1] = ""
case ref == "..":
newLen := len(base) - 1
if newLen < 1 {
newLen = 1
}
base = base[0:newLen]
base[len(base)-1] = ""
default:
if idx == 0 || base[len(base)-1] == "" {
base[len(base)-1] = ref
} else {
base = append(base, ref)
}
}
}
return base
}
// cleanURLForRequest cleans URLs as parsed from ReadRequest.
// ReadRequest uses ParseURL which accepts a superset of URL formats
// which are valid for web requests (scheme-relative URLs, for example)

View File

@ -8,6 +8,7 @@ import (
"fmt"
"os"
"reflect"
"strings"
"testing"
)
@ -568,3 +569,87 @@ func TestCleanURLForHTTPRequest(t *testing.T) {
t.Errorf("Expected path %q; got %q", path, url.Path)
}
}
func mustParseURL(t *testing.T, url string) *URL {
u, err := ParseURLReference(url)
if err != nil {
t.Fatalf("Expected URL to parse: %q, got error: %v", url, err)
}
return u
}
func TestApplyReferenceSegments(t *testing.T) {
tests := []struct {
base, ref, expected string
}{
{"a/b", ".", "a/"},
{"a/b", "c", "a/c"},
{"a/b", "..", ""},
{"a/", "..", ""},
{"a/", "../..", ""},
{"a/b/c", "..", "a/"},
{"a/b/c", "../d", "a/d"},
{"a/b/c", ".././d", "a/d"},
{"a/b", "./..", ""},
}
for _, test := range tests {
segs := strings.Split(test.base, "/", -1)
refSegs := strings.Split(test.ref, "/", -1)
got := strings.Join(applyReferenceSegments(segs, refSegs), "/")
if got != test.expected {
t.Errorf("For %q + %q got %q; expected %q", test.base, test.ref, got, test.expected)
}
}
}
func TestURLAdd(t *testing.T) {
tests := []struct {
base, rel, expected string
}{
// Absolute URL references
{"http://foo.com?a=b", "https://bar.com/", "https://bar.com/"},
{"http://foo.com/", "https://bar.com/?a=b", "https://bar.com/?a=b"},
{"http://foo.com/bar", "mailto:foo@example.com", "mailto:foo@example.com"},
// Path-absolute references
{"http://foo.com/bar", "/baz", "http://foo.com/baz"},
{"http://foo.com/bar?a=b#f", "/baz", "http://foo.com/baz"},
{"http://foo.com/bar?a=b", "/baz?c=d", "http://foo.com/baz?c=d"},
// Scheme-relative
{"https://foo.com/bar?a=b", "//bar.com/quux", "https://bar.com/quux"},
// Path-relative references:
// ... current directory
{"http://foo.com", ".", "http://foo.com/"},
{"http://foo.com/bar", ".", "http://foo.com/"},
{"http://foo.com/bar/", ".", "http://foo.com/bar/"},
// ... going down
{"http://foo.com", "bar", "http://foo.com/bar"},
{"http://foo.com/", "bar", "http://foo.com/bar"},
{"http://foo.com/bar/baz", "quux", "http://foo.com/bar/quux"},
// ... going up
{"http://foo.com/bar/baz", "../quux", "http://foo.com/quux"},
{"http://foo.com/bar/baz", "../../../../../quux", "http://foo.com/quux"},
{"http://foo.com/bar", "..", "http://foo.com/"},
{"http://foo.com/bar/baz", "./..", "http://foo.com/"},
// Triple dot isn't special
{"http://foo.com/bar", "...", "http://foo.com/..."},
// Fragment
{"http://foo.com/bar", ".#frag", "http://foo.com/#frag"},
}
for _, test := range tests {
base := mustParseURL(t, test.base)
rel := mustParseURL(t, test.rel)
url := base.Add(rel)
urlStr := url.String()
if urlStr != test.expected {
t.Errorf("Adding %q + %q != %q; got %q", test.base, test.rel, test.expected, urlStr)
}
}
}