Change the implement of remove_fragment method

This commit is contained in:
Ping 2015-09-25 17:04:55 +08:00
parent e438075d22
commit f803e43ee8
1 changed files with 4 additions and 4 deletions

View File

@ -3,10 +3,10 @@ from datetime import timedelta
try: try:
from HTMLParser import HTMLParser from HTMLParser import HTMLParser
from urlparse import urljoin, urlparse, urlunparse from urlparse import urljoin, urldefrag
except ImportError: except ImportError:
from html.parser import HTMLParser from html.parser import HTMLParser
from urllib.parse import urljoin, urlparse, urlunparse from urllib.parse import urljoin, urldefrag
from tornado import httpclient, gen, ioloop, queues from tornado import httpclient, gen, ioloop, queues
@ -38,8 +38,8 @@ def get_links_from_url(url):
def remove_fragment(url): def remove_fragment(url):
scheme, netloc, url, params, query, fragment = urlparse(url) pure_url, frag = urldefrag(url)
return urlunparse((scheme, netloc, url, params, query, '')) return pure_url
def get_links(html): def get_links(html):