72 lines
2.2 KiB
Python
72 lines
2.2 KiB
Python
# -*- coding: utf-8 -*-
|
|
"""
|
|
proxy.py
|
|
~~~~~~~~
|
|
⚡⚡⚡ Fast, Lightweight, Pluggable, TLS interception capable proxy server focused on
|
|
Network monitoring, controls & Application development, testing, debugging.
|
|
|
|
:copyright: (c) 2013-present by Abhinav Singh and contributors.
|
|
:license: BSD, see LICENSE for more details.
|
|
"""
|
|
import time
|
|
from abc import abstractmethod
|
|
from typing import Any
|
|
|
|
from proxy import Proxy
|
|
from proxy.core.work import Work
|
|
from proxy.common.types import Readables, Writables, SelectableEvents
|
|
from proxy.core.connection import TcpClientConnection
|
|
|
|
|
|
class WebScraper(Work[TcpClientConnection]):
|
|
"""Demonstrates how to orchestrate a generic work acceptors and executors
|
|
workflow using proxy.py core.
|
|
|
|
By default, `WebScraper` expects to receive work from a file on disk.
|
|
Each line in the file must be a URL to scrape. Received URL is scrapped
|
|
by the implementation in this class.
|
|
|
|
After scrapping, results are published to the eventing core. One or several
|
|
result subscriber can then handle the result as necessary. Currently, result
|
|
subscribers consume the scrapped response and write discovered URL in the
|
|
file on the disk. This creates a feedback loop. Allowing WebScraper to
|
|
continue endlessly.
|
|
|
|
NOTE: No loop detection is performed currently.
|
|
|
|
NOTE: File descriptor need not point to a file on disk.
|
|
Example, file descriptor can be a database connection.
|
|
For simplicity, imagine a Redis server connection handling
|
|
only PUBSUB protocol.
|
|
"""
|
|
|
|
async def get_events(self) -> SelectableEvents:
|
|
"""Return sockets and events (read or write) that we are interested in."""
|
|
return {}
|
|
|
|
async def handle_events(
|
|
self,
|
|
readables: Readables,
|
|
writables: Writables,
|
|
) -> bool:
|
|
"""Handle readable and writable sockets.
|
|
|
|
Return True to shutdown work."""
|
|
return False
|
|
|
|
@staticmethod
|
|
@abstractmethod
|
|
def create(*args: Any) -> TcpClientConnection:
|
|
raise NotImplementedError()
|
|
|
|
|
|
if __name__ == '__main__':
|
|
with Proxy(
|
|
work_klass=WebScraper,
|
|
threadless=True,
|
|
num_workers=1,
|
|
port=12345,
|
|
) as pool:
|
|
while True:
|
|
time.sleep(1)
|