Skip to content

proxy

onion_peeler.middlewares.proxy

Hybrid proxy middleware

Configures the downloader to use configured proxy based on the domain of a request: - tor proxy for .onion domains - gluetun VPN proxy for clearnet domains Manages the tor circuit through control port enabling as needed ip rotation

ProxyMiddleware(settings)

Bases: HttpProxyMiddleware

Source code in src/onion_peeler/middlewares/proxy.py
def __init__(self, settings):
    super().__init__(settings)

    proxy_setting_key = "ONION_PEELER_PROXY"
    proxy_cfg = settings.attributes.get(proxy_setting_key)
    if hasattr(proxy_cfg, "value"):
        proxy_cfg = proxy_cfg.value
    if not isinstance(proxy_cfg, dict):
        proxy_cfg = {}

    self.tor_host = proxy_cfg.get("tor_host", "tor")
    self.tor_port = int(proxy_cfg.get("tor_port", 9080))
    self.tor_control_host = proxy_cfg.get("tor_control_host", self.tor_host)
    self.tor_control_port = int(proxy_cfg.get("tor_control_port", 9051))
    self.tor_password = (
            proxy_cfg.get("tor_control_password", "")
            or os.getenv("TOR_CONTROL_PASSWORD", "")
    )
    self.tor_rotation_interval = int(proxy_cfg.get("tor_rotation_interval", 10))

    vpn_host = proxy_cfg.get("vpn_host", "gluetun")
    vpn_port = int(proxy_cfg.get("vpn_port", 8888))

    self.tor_proxy = proxy_cfg.get("tor_proxy_url", f"http://{self.tor_host}:{self.tor_port}")
    self.vpn_proxy = proxy_cfg.get("vpn_proxy_url", f"http://{vpn_host}:{vpn_port}")

    self._tor_controller = None
    self._connect_tor_controller()

process_request(request, spider)

Route request through appropriate proxy

Source code in src/onion_peeler/middlewares/proxy.py
def process_request(self, request: Request, spider):
    """Route request through appropriate proxy"""

    if self._should_rotate_tor(request, spider):
        self._rotate_circuit()

    host = (urlparse(request.url).hostname or "").lower()
    if host.endswith(".onion") or host.endswith(".exit") or host == ("check.torproject.org"):
        request.meta["proxy"] = self.tor_proxy
        # Increment the system-wide lock counter
        with self._lock:
            ProxyMiddleware._tor_req_count += 1
    else:
        request.meta["proxy"] = self.vpn_proxy