Skip to content

loader

onion_peeler.settings.loader

ConfigLoader(config_dir=None, strict=True)

Loads configuration from TOML files and validates with Pydantic.

Features: - Merges base.toml with site-specific configs - Environment variable overrides - Automatic validation - Helpful error messages

Initialize loader.

Parameters:

Name Type Description Default
config_dir Optional[str]

Path to config directory (default: project_root/config)

None
strict bool

If True, fail on validation errors

True
Source code in src/onion_peeler/settings/loader.py
def __init__(
        self,
        config_dir: Optional[str] = None,
        strict: bool = True
):
    """
    Initialize loader.

    Args:
        config_dir: Path to config directory (default: project_root/config)
        strict: If True, fail on validation errors
    """
    self.config_dir = self._resolve_config_dir(config_dir)
    self.strict = strict

    self.base_file = os.path.join(self.config_dir, "base.toml")
    self.sites_dir = os.path.join(self.config_dir, "sites")

find_site_for_url(url)

Find site ID by matching URL against domains.

Source code in src/onion_peeler/settings/loader.py
def find_site_for_url(self, url: str) -> Optional[str]:
    """Find site ID by matching URL against domains."""
    hostname = urlparse(url).hostname
    if not hostname:
        return None

    for site_id in self.list_sites():
        try:
            config = self.load_site_config(site_id)
            if hostname in config.allowed_domains:
                return site_id
        except Exception:
            continue

    return None

list_sites()

List all available site IDs.

Source code in src/onion_peeler/settings/loader.py
def list_sites(self) -> list[str]:
    """List all available site IDs."""
    if not os.path.exists(self.sites_dir):
        return []

    sites = []
    for filename in os.listdir(self.sites_dir):
        if filename.endswith(".toml"):
            file_path = os.path.join(self.sites_dir, filename)
            if os.path.isfile(file_path):
                # Remove .toml extension
                sites.append(filename[:-5])

    return sites

load_all_sites()

Load all site configurations.

Source code in src/onion_peeler/settings/loader.py
def load_all_sites(self) -> list[SiteConfig]:
    """Load all site configurations."""
    configs = []
    for site_id in self.list_sites():
        try:
            configs.append(self.load_site_config(site_id))
        except Exception as e:
            logger.warning(f"Failed to load '{site_id}': {e}")
    return configs

load_global_config()

Load global configuration.

Loads from: 1. base.toml [scrapy] section 2. Environment variables (ONION_PEELER__*)

Returns:

Type Description
GlobalConfig

Validated GlobalConfig instance

Source code in src/onion_peeler/settings/loader.py
def load_global_config(self) -> GlobalConfig:
    """
    Load global configuration.

    Loads from:
    1. base.toml [scrapy] section
    2. Environment variables (ONION_PEELER__*)

    Returns:
        Validated GlobalConfig instance
    """
    try:
        # Load TOML
        toml_data = self._load_toml(self.base_file)
        scrapy_section = toml_data.get("scrapy", {})
        proxy_section = toml_data.get("proxy", {})

        # Merge sections
        config_data = {**scrapy_section}
        if proxy_section:
            config_data["proxy"] = proxy_section

        # Pydantic automatically reads environment variables!
        # Just pass TOML data, env vars override automatically
        config = GlobalConfig(**config_data)

        logger.info("Global config loaded successfully")
        return config

    except Exception as e:
        logger.error(f"Failed to load global config: {e}")
        if self.strict:
            raise
        # Return defaults on error
        return GlobalConfig()

load_site_config(site_id=None)

Load site-specific configuration.

Parameters:

Name Type Description Default
site_id Optional[str]

Site identifier (filename without .toml) If None, uses ONION_PEELER_SITE env var

None

Returns:

Type Description
SiteConfig

Validated SiteConfig instance

Source code in src/onion_peeler/settings/loader.py
def load_site_config(self, site_id: Optional[str] = None) -> SiteConfig:
    """
    Load site-specific configuration.

    Args:
        site_id: Site identifier (filename without .toml)
                If None, uses ONION_PEELER_SITE env var

    Returns:
        Validated SiteConfig instance
    """
    # Resolve site_id
    if not site_id:
        site_id = os.getenv("ONION_PEELER_SITE")
        if not site_id:
            base_toml = self._load_toml(self.base_file)
            site_id = base_toml.get("system", {}).get("site")

    if not site_id:
        logger.warning("No site_id specified and ONION_PEELER_SITE not set")
        raise ValueError("site_id required")

    try:
        # Load base + site-specific TOML
        base_data = self._load_toml(self.base_file)
        site_file = os.path.join(self.sites_dir, f"{site_id}.toml")

        if not os.path.exists(site_file):
            raise FileNotFoundError(f"Site config not found: {site_file}")

        site_data = self._load_toml(site_file)

        # Merge configs (site overrides base)
        merged = self._deep_merge(base_data, site_data)

        # Site schema is kept under [site] in TOML
        site_section = merged.get("site", {})
        if "proxy" in merged and "proxy" not in site_section:
            site_section["proxy"] = merged["proxy"]

        # Validate with Pydantic
        config = SiteConfig(**site_section)

        logger.info(f"site config loaded: {site_id}")
        return config

    except Exception as e:
        logger.error(f"Failed to load site config '{site_id}': {e}")
        if self.strict:
            raise
        # Return minimal config on error
        return SiteConfig(name=site_id or "unknown")

get_loader()

Get global config loader instance.

Source code in src/onion_peeler/settings/loader.py
def get_loader() -> ConfigLoader:
    """Get global config loader instance."""
    global _loader
    if _loader is None:
        _loader = ConfigLoader()
    return _loader

load_global_config()

Load global configuration.

Source code in src/onion_peeler/settings/loader.py
def load_global_config() -> GlobalConfig:
    """Load global configuration."""
    return get_loader().load_global_config()

load_item_selectors(item_type, site_id=None)

Load selectors for an item type from site config.

Source code in src/onion_peeler/settings/loader.py
def load_item_selectors(
        item_type: str,
        site_id: Optional[str] = None,
) -> Dict[str, str]:
    """Load selectors for an item type from site config."""
    site_config = get_loader().load_site_config(site_id)
    return site_config.get_item_selectors(item_type=item_type)

load_selector(item_type, field_name, site_id=None)

Load a single selector field for a site and item type.

Source code in src/onion_peeler/settings/loader.py
def load_selector(
        item_type: str,
        field_name: str,
        site_id: Optional[str] = None,
) -> Optional[str]:
    """Load a single selector field for a site and item type."""
    site_config = get_loader().load_site_config(site_id)
    return site_config.get_selector(item_type, field_name)

load_site_config(site_id=None)

Load site configuration.

Source code in src/onion_peeler/settings/loader.py
def load_site_config(site_id: Optional[str] = None) -> SiteConfig:
    """Load site configuration."""
    return get_loader().load_site_config(site_id)