From 9f75d2d42d47a351b6d2974399228c47e91f15b3 Mon Sep 17 00:00:00 2001 From: EtiennePelletier Date: Tue, 24 Oct 2017 23:36:58 -0400 Subject: [PATCH] Bypass UnicodeDecodeErrors in Webspider demo --- demos/webspider/webspider.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/demos/webspider/webspider.py b/demos/webspider/webspider.py index 59e9dac2..dd8e6b38 100644 --- a/demos/webspider/webspider.py +++ b/demos/webspider/webspider.py @@ -29,7 +29,7 @@ def get_links_from_url(url): print('fetched %s' % url) html = response.body if isinstance(response.body, str) \ - else response.body.decode() + else response.body.decode(errors='ignore') urls = [urljoin(url, remove_fragment(new_url)) for new_url in get_links(html)] except Exception as e: