Skip to content

models

onion_peeler.settings.models

GlobalConfig

Bases: BaseModel

Global configuration for the scraper, including proxy settings and other global options.

Can be loaded from
  • A toml file (e.g. config.toml)
  • Site-specific config files (e.g. site_config.yaml) that override global defaults for specific sites
  • Environment variables for sensitive data (e.g. TOR_CONTROL_PASSWORD)

to_scrapy_dict()

Convert to Scrapy settings dictionary.

Only includes UPPERCASE fields (Scrapy convention).

Source code in src/onion_peeler/settings/models.py
def to_scrapy_dict(self) -> Dict[str, Any]:
    """
    Convert to Scrapy settings dictionary.

    Only includes UPPERCASE fields (Scrapy convention).
    """
    data = self.model_dump()

    return {
        key: value
        for key, value in data.items()
        if key.isupper()
    }

SelectorsConfig

Bases: BaseModel

as_selector_dict()

Return all selector fields as a plain dictionary.

Source code in src/onion_peeler/settings/models.py
def as_selector_dict(self) -> Dict[str, str]:
    """Return all selector fields as a plain dictionary."""
    selectors: Dict[str, str] = {}

    raw_data = self.model_dump(exclude_none=True)

    for key, value in raw_data.items():
        if isinstance(value, str):
            selectors[key] = value

    return selectors

get_field_selector(field_name)

Get raw selector string for a field, including extra fields.

Source code in src/onion_peeler/settings/models.py
def get_field_selector(self, field_name: str) -> Optional[str]:
    """Get raw selector string for a field, including extra fields."""
    if hasattr(self, field_name):
        value = getattr(self, field_name)
        return value if isinstance(value, str) else None

    extra = getattr(self, "model_extra", None) or getattr(self, "__dict__", {})
    value = extra.get(field_name)
    return value if isinstance(value, str) else None

SiteConfig

Bases: BaseModel

get_item_selectors(item_type)

Return selectors for an item type.

Parameters:

Name Type Description Default
item_type str

Item type key from config, e.g. "thread"

required
Source code in src/onion_peeler/settings/models.py
def get_item_selectors(
        self,
        item_type: str,
) -> Dict[str, str]:
    """
    Return selectors for an item type.

    Args:
        item_type: Item type key from config, e.g. "thread"
    """
    selectors = self.selectors.get(item_type)
    if not selectors:
        return {}

    return selectors.as_selector_dict()

get_selector(item_type, field_name)

Get the selector for a specific field.

Source code in src/onion_peeler/settings/models.py
def get_selector(self, item_type: str, field_name: str) -> Optional[str]:
    """
    Get the selector for a specific field.
    """
    selectors = self.selectors.get(item_type)
    if not selectors:
        return None

    raw_selector = selectors.get_field_selector(field_name)
    if raw_selector is None:
        return None

    return raw_selector