Skip to content

base

onion_peeler.pages.base

GenericPage

Bases: WebPage

Site-agnostic page object powered by selectors from SiteConfig.

all_items()

Extract all configured item types from the current response.

Source code in src/onion_peeler/pages/base.py
@field
def all_items(self) -> Dict[str, List[Dict[str, Any]]]:
    """Extract all configured item types from the current response."""
    result: Dict[str, List[Dict[str, Any]]] = {}

    for item_type in self.site_config.selectors.keys():
        extracted = self.extract_items(item_type)
        if extracted:
            result[item_type] = extracted

    return result

extract_items(item_type)

Extract items for a specific configured item type.

Source code in src/onion_peeler/pages/base.py
def extract_items(self, item_type: str) -> List[Dict[str, Any]]:
    """Extract items for a specific configured item type."""
    selectors = self.site_config.get_item_selectors(item_type=item_type)
    if not selectors:
        return []

    container_selector = selectors.pop("container", None)
    containers = self._select(container_selector) if container_selector else [self]

    results: List[Dict[str, Any]] = []
    for container in containers:
        item: Dict[str, Any] = {"source": self.url, "item_type": item_type}
        for field_name, selector in selectors.items():
            value = self._extract_field_from_container(container, selector)
            if isinstance(value, str):
                value = value.strip()
            if value is not None:
                item[field_name] = value

        if len(item) > 2:
            results.append(item)

    return results

items()

Extract items for the default item type from request metadata or config.

Source code in src/onion_peeler/pages/base.py
@field
def items(self) -> List[Dict[str, Any]]:
    """Extract items for the default item type from request metadata or config."""
    item_type = self._default_item_type()
    if not item_type:
        return []
    return self.extract_items(item_type)

next_page_url()

Extract next page URL from site pagination selector.

Source code in src/onion_peeler/pages/base.py
@field
def next_page_url(self) -> Optional[str]:
    """Extract next page URL from site pagination selector."""
    pagination = getattr(self.site_config, "pagination", None)
    if pagination and pagination.selector:
        return self._extract_link(pagination.selector)

    return None