From f803e43ee88ab17dcfeed9ee9399c3821f6bace0 Mon Sep 17 00:00:00 2001 From: Ping Date: Fri, 25 Sep 2015 17:04:55 +0800 Subject: [PATCH] Change the implement of remove_fragment method --- demos/webspider/webspider.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/demos/webspider/webspider.py b/demos/webspider/webspider.py index a36798de..b6468b85 100644 --- a/demos/webspider/webspider.py +++ b/demos/webspider/webspider.py @@ -3,10 +3,10 @@ from datetime import timedelta try: from HTMLParser import HTMLParser - from urlparse import urljoin, urlparse, urlunparse + from urlparse import urljoin, urldefrag except ImportError: from html.parser import HTMLParser - from urllib.parse import urljoin, urlparse, urlunparse + from urllib.parse import urljoin, urldefrag from tornado import httpclient, gen, ioloop, queues @@ -38,8 +38,8 @@ def get_links_from_url(url): def remove_fragment(url): - scheme, netloc, url, params, query, fragment = urlparse(url) - return urlunparse((scheme, netloc, url, params, query, '')) + pure_url, frag = urldefrag(url) + return pure_url def get_links(html):