Source code for hats.catalog.catalog_collection
from __future__ import annotations
from upath import UPath
from hats.catalog import Catalog
from hats.catalog.dataset.collection_properties import CollectionProperties
from hats.pixel_math import HealpixPixel
[docs]
class CatalogCollection:
"""A collection of HATS Catalog with data stored in a HEALPix Hive partitioned structure
Catalogs of this type are described by a `collection.properties` file which specifies
the underlying main catalog, margin catalog and index catalog paths. These catalogs are
stored at the root of the collection, each in its separate directory::
catalog_collection/
├── main_catalog/
├── margin_catalog/
├── index_catalog/
├── collection.properties
Margin and index catalogs are optional but there could also be multiple of them. The
catalogs used by default are specified in the `collection.properties` file in the
`default_margin` and `default_index` keywords.
"""
def __init__(
self,
collection_path: UPath,
collection_properties: CollectionProperties,
main_catalog: Catalog,
):
[docs]
self.collection_path = collection_path
[docs]
self.collection_properties = collection_properties
if not isinstance(main_catalog, Catalog):
raise TypeError(f"HATS at {main_catalog.catalog_path} is not of type `Catalog`")
[docs]
self.main_catalog = main_catalog
@property
[docs]
def main_catalog_dir(self) -> UPath:
"""Path to the main catalog directory"""
return self.collection_path / self.collection_properties.hats_primary_table_url
@property
[docs]
def all_margins(self) -> list[str] | None:
"""The list of margin catalog names in the collection"""
return self.collection_properties.all_margins
@property
[docs]
def default_margin(self) -> str | None:
"""The name of the default margin"""
return self.collection_properties.default_margin
@property
[docs]
def default_margin_catalog_dir(self) -> UPath | None:
"""Path to the default margin catalog directory"""
if self.default_margin is None:
return None
return self.collection_path / self.default_margin
@property
[docs]
def all_indexes(self) -> dict[str, str] | None:
"""The mapping of indexes in the collection"""
return self.collection_properties.all_indexes
@property
[docs]
def default_index_field(self) -> str | None:
"""The name of the default index field"""
return self.collection_properties.default_index
@property
[docs]
def default_index_catalog_dir(self) -> UPath | None:
"""Path to the default index catalog directory"""
if self.default_index_field is None:
return None
default_index_dir = self.all_indexes[self.default_index_field]
return self.collection_path / default_index_dir
[docs]
def get_index_dir_for_field(self, field_name: str | None = None) -> UPath | None:
"""Path to the field's index catalog directory"""
if field_name is None:
return self.default_index_catalog_dir
if self.all_indexes is None or field_name not in self.all_indexes:
raise ValueError(f"Index for field `{field_name}` is not specified in all_indexes")
index_dir = self.all_indexes[field_name]
return self.collection_path / index_dir
[docs]
def get_healpix_pixels(self) -> list[HealpixPixel]:
"""The list of HEALPix pixels of the main catalog"""
return self.main_catalog.get_healpix_pixels()