Skip to content

rasteret.core.raster_accessor

Data-loading handle for a single Parquet row (record). Each record in a Collection gets a RasterAccessor via Collection.iterate_rasters().

raster_accessor

Classes

RasterAccessor

RasterAccessor(info: RasterInfo, data_source: str)

Data-loading handle for a single Parquet record (row) in a Collection.

Each record in a Rasteret Collection represents one raster item: typically a satellite scene, but could be a drone image, derived product, or any tiled GeoTIFF. RasterAccessor wraps that record's metadata and provides methods to load band data as arrays.

Handles: - Async band data loading via cached COG metadata - Tile management and geometry masking - Multi-band concurrent fetching

Initialize from a record's metadata.

Parameters:

Name Type Description Default
info RasterInfo

Record metadata including URLs and COG metadata.

required
data_source str

Data source identifier for band mapping.

required
Source code in src/rasteret/core/raster_accessor.py
def __init__(self, info: RasterInfo, data_source: str) -> None:
    """Initialize from a record's metadata.

    Parameters
    ----------
    info : RasterInfo
        Record metadata including URLs and COG metadata.
    data_source : str
        Data source identifier for band mapping.
    """
    self.id = info.id
    self.datetime = info.datetime
    self.bbox = info.bbox
    self.footprint = info.footprint
    self.crs = info.crs
    self.cloud_cover = info.cloud_cover
    self.assets = info.assets
    self.band_metadata = info.band_metadata
    self.collection = info.collection
    self.data_source = data_source
Attributes
geometry property
geometry

Alias for footprint.

available_bands property
available_bands: list[str]

List available band keys for this record.

Functions
try_get_band_cog_metadata
try_get_band_cog_metadata(
    band_code: str,
) -> tuple[CogMetadata | None, str | None, int | None]

Return tiled GeoTIFF/COG metadata and URL for band_code.

Returns (None, None) when the asset or required per-band metadata is missing.

Source code in src/rasteret/core/raster_accessor.py
def try_get_band_cog_metadata(
    self,
    band_code: str,
) -> tuple[CogMetadata | None, str | None, int | None]:
    """Return tiled GeoTIFF/COG metadata and URL for *band_code*.

    Returns ``(None, None)`` when the asset or required per-band metadata
    is missing.
    """

    # Support both legacy asset-key conventions:
    # - Old STAC-backed Collections often use STAC asset keys (e.g. "blue")
    # - Newer/normalized Collections use logical band codes (e.g. "B02")
    #
    # Resolve by trying: direct band code, registry forward map (B02->blue),
    # then registry reverse map ("blue"->B02), taking the first key that exists.
    candidates: list[str] = [band_code]
    band_map = BandRegistry.get(self.data_source)
    forward = band_map.get(band_code)
    if forward:
        candidates.append(forward)
    if band_map and band_code in band_map.values():
        reverse = {v: k for k, v in band_map.items()}
        back = reverse.get(band_code)
        if back:
            candidates.append(back)

    asset_key = next((c for c in candidates if c in self.assets), None)
    if asset_key is None:
        return None, None, None

    asset = self.assets[asset_key]

    url = self._extract_asset_href(asset)
    band_index = asset.get("band_index") if isinstance(asset, dict) else None

    # Band metadata key could be either band_code or resolved asset_key
    metadata_keys = [f"{band_code}_metadata", f"{asset_key}_metadata"]
    raw_metadata = None
    for key in metadata_keys:
        if key in self.band_metadata:
            raw_metadata = self.band_metadata[key]
            break

    if raw_metadata is None or url is None:
        return None, None, None

    try:
        cog_metadata = CogMetadata.from_dict(raw_metadata, crs=self.crs)
        idx = None
        if band_index is not None:
            try:
                idx = int(band_index)
            except (TypeError, ValueError):
                idx = None
        return cog_metadata, url, idx
    except KeyError:
        return None, None, None
intersects
intersects(geometry) -> bool

Return True if this record's bbox overlaps geometry's bbox.

Source code in src/rasteret/core/raster_accessor.py
def intersects(self, geometry) -> bool:
    """Return ``True`` if this record's bbox overlaps *geometry*'s bbox."""
    from rasteret.core.geometry import (
        bbox_array,
        bbox_intersects,
        coerce_to_geoarrow,
    )

    geo_arr = coerce_to_geoarrow(geometry)
    xmin, ymin, xmax, ymax = bbox_array(geo_arr)
    geom_bbox = (xmin[0].as_py(), ymin[0].as_py(), xmax[0].as_py(), ymax[0].as_py())
    record_bbox = tuple(self.bbox) if self.bbox else None
    if record_bbox is None:
        return False
    return bbox_intersects(record_bbox, geom_bbox)
load_bands async
load_bands(
    geometries: Array,
    band_codes: list[str],
    max_concurrent: int = 50,
    for_xarray: bool = True,
    backend: object | None = None,
    target_crs: int | None = None,
)

Load bands for all geometries with parallel processing.

Parameters:

Name Type Description Default
geometries Array

GeoArrow native array of areas of interest.

required
band_codes list of str

Band codes to load.

required
max_concurrent int

Maximum concurrent HTTP requests.

50
for_xarray bool

If True, return xr.Dataset; otherwise gpd.GeoDataFrame.

True
backend object

Pluggable I/O backend.

None
target_crs int

Reproject results to this CRS.

None

Returns:

Type Description
Dataset or GeoDataFrame

Data is returned in the native COG dtype (e.g. uint16, int8, float32). Integer arrays promote to float32 only when geometry masking requires NaN and no nodata value is declared in the COG metadata.

Source code in src/rasteret/core/raster_accessor.py
async def load_bands(
    self,
    geometries: pa.Array,
    band_codes: list[str],
    max_concurrent: int = 50,
    for_xarray: bool = True,
    backend: object | None = None,
    target_crs: int | None = None,
):
    """Load bands for all geometries with parallel processing.

    Parameters
    ----------
    geometries : pa.Array
        GeoArrow native array of areas of interest.
    band_codes : list of str
        Band codes to load.
    max_concurrent : int
        Maximum concurrent HTTP requests.
    for_xarray : bool
        If ``True``, return ``xr.Dataset``; otherwise ``gpd.GeoDataFrame``.
    backend : object, optional
        Pluggable I/O backend.
    target_crs : int, optional
        Reproject results to this CRS.

    Returns
    -------
    xarray.Dataset or geopandas.GeoDataFrame
        Data is returned in the native COG dtype (e.g. ``uint16``,
        ``int8``, ``float32``). Integer arrays promote to ``float32``
        only when geometry masking requires NaN and no nodata value is
        declared in the COG metadata.
    """
    from rasteret.fetch.cog import COGReader

    n_geoms = len(geometries)
    logger.debug(f"Loading {len(band_codes)} bands for {n_geoms} geometries")

    geom_progress = tqdm(total=n_geoms, desc=f"Record {self.id}")

    async with COGReader(max_concurrent=max_concurrent, backend=backend) as reader:

        async def process_geometry(geom_idx: int, geom_id: int):
            band_progress = tqdm(
                total=len(band_codes), desc=f"Geom {geom_id}", leave=False
            )

            band_tasks = []
            for band_code in band_codes:
                task = self._load_single_band(
                    geometries,
                    geom_idx,
                    band_code,
                    max_concurrent,
                    reader=reader,
                )
                band_tasks.append(task)

            raw_results = await asyncio.gather(*band_tasks, return_exceptions=True)
            results = []
            for r in raw_results:
                if isinstance(r, Exception):
                    logger.error("Band load failed: %s", r)
                else:
                    results.append(r)
            band_progress.update(len(band_codes))
            band_progress.close()
            geom_progress.update(1)

            valid = [r for r in results if r is not None]
            if target_crs is not None and target_crs != self.crs and valid:
                valid = self._reproject_band_results(valid, target_crs)
            return valid, geom_id

        # Process geometries concurrently with semaphore
        sem = asyncio.Semaphore(max_concurrent)

        async def bounded_process(geom_idx: int, geom_id: int):
            async with sem:
                return await process_geometry(geom_idx, geom_id)

        tasks = [bounded_process(idx, idx + 1) for idx in range(n_geoms)]
        raw_geom_results = await asyncio.gather(*tasks, return_exceptions=True)

    results = []
    for r in raw_geom_results:
        if isinstance(r, Exception):
            logger.error("Geometry processing failed: %s", r)
        else:
            results.append(r)

    geom_progress.close()

    # Process results
    if for_xarray:
        return self._merge_xarray_results(results, target_crs=target_crs)
    else:
        return self._merge_geodataframe_results(results, geometries)

Functions