Change the implement of remove_fragment method
This commit is contained in:
parent
e438075d22
commit
f803e43ee8
|
@ -3,10 +3,10 @@ from datetime import timedelta
|
||||||
|
|
||||||
try:
|
try:
|
||||||
from HTMLParser import HTMLParser
|
from HTMLParser import HTMLParser
|
||||||
from urlparse import urljoin, urlparse, urlunparse
|
from urlparse import urljoin, urldefrag
|
||||||
except ImportError:
|
except ImportError:
|
||||||
from html.parser import HTMLParser
|
from html.parser import HTMLParser
|
||||||
from urllib.parse import urljoin, urlparse, urlunparse
|
from urllib.parse import urljoin, urldefrag
|
||||||
|
|
||||||
from tornado import httpclient, gen, ioloop, queues
|
from tornado import httpclient, gen, ioloop, queues
|
||||||
|
|
||||||
|
@ -38,8 +38,8 @@ def get_links_from_url(url):
|
||||||
|
|
||||||
|
|
||||||
def remove_fragment(url):
|
def remove_fragment(url):
|
||||||
scheme, netloc, url, params, query, fragment = urlparse(url)
|
pure_url, frag = urldefrag(url)
|
||||||
return urlunparse((scheme, netloc, url, params, query, ''))
|
return pure_url
|
||||||
|
|
||||||
|
|
||||||
def get_links(html):
|
def get_links(html):
|
||||||
|
|
Loading…
Reference in New Issue