CAMELS-CL¶

Overview¶

CAMELS-CL is the Chile hydrological dataset implementation. Chilean CAMELS dataset spanning from arid to humid climates across Chile.

Dataset Information¶

Region: Chile
Module: hydrodataset.camels_cl
Class: CamelsCl

Features¶

Static Attributes¶

Static catchment attributes include: - Basin area - Mean precipitation - Topographic characteristics - Land cover information - Soil properties - Climate indices

Dynamic Variables¶

Timeseries variables available (varies by dataset): - Streamflow - Precipitation - Temperature (min, max, mean) - Potential evapotranspiration - Solar radiation - And more...

Usage¶

Basic Usage¶

from hydrodataset.camels_cl import CamelsCl
from hydrodataset import SETTING

# Initialize dataset
data_path = SETTING["local_data_path"]["datasets-origin"]
ds = CamelsCl(data_path)

# Get basin IDs
basin_ids = ds.read_object_ids()
print(f"Number of basins: {len(basin_ids)}")

# Check available features
print("Static features:", ds.available_static_features)
print("Dynamic features:", ds.available_dynamic_features)

# Read timeseries data
timeseries = ds.read_ts_xrdataset(
    gage_id_lst=basin_ids[:5],
    t_range=ds.default_t_range,
    var_lst=["streamflow", "precipitation"]
)
print(timeseries)

# Read attribute data
attributes = ds.read_attr_xrdataset(
    gage_id_lst=basin_ids[:5],
    var_lst=["area", "p_mean"]
)
print(attributes)

Reading Specific Variables¶

# Read with specific time range
ts_data = ds.read_ts_xrdataset(
    gage_id_lst=basin_ids[:10],
    t_range=["1990-01-01", "1995-12-31"],
    var_lst=["streamflow", "precipitation", "temperature_mean"]
)

# Read basin area
areas = ds.read_area(gage_id_lst=basin_ids[:10])

# Read mean precipitation
mean_precip = ds.read_mean_prcp(gage_id_lst=basin_ids[:10])

Data Sources¶

The dataset supports multiple data sources for certain variables. Check the class documentation for available sources and use tuple notation to specify:

# Request specific data source
ts_data = ds.read_ts_xrdataset(
    gage_id_lst=basin_ids[:5],
    t_range=["1990-01-01", "1995-12-31"],
    var_lst=[
        ("precipitation", "era5land"),  # Specify ERA5-Land source
        "streamflow"  # Use default source
    ]
)

API Reference¶

`hydrodataset.camels_cl.CamelsCl` ¶

Bases: HydroDataset

CAMELS_CL dataset class extending RainfallRunoff.

This class provides access to the CAMELS_CL dataset, which contains hourly hydrological and meteorological data for various watersheds.

Attributes:

Name	Type	Description
`region`		Geographic region identifier
`download`		Whether to download data automatically
`ds_description`		Dictionary containing dataset file paths

Source code in hydrodataset/camels_cl.py

class CamelsCl(HydroDataset):
    """CAMELS_CL dataset class extending RainfallRunoff.

    This class provides access to the CAMELS_CL dataset, which contains hourly
    hydrological and meteorological data for various watersheds.

    Attributes:
        region: Geographic region identifier
        download: Whether to download data automatically
        ds_description: Dictionary containing dataset file paths
    """

    def __init__(
        self, data_path: str, region: Optional[str] = None, download: bool = False
    ) -> None:
        """Initialize CAMELS_CL dataset.

        Args:
            data_path: Path to the CAMELS_CL data directory
            region: Geographic region identifier (optional)
            download: Whether to download data automatically (default: False)
        """
        super().__init__(data_path)
        self.region = region
        self.download = download
        self.aqua_fetch = CAMELS_CL(data_path)

    @property
    def _attributes_cache_filename(self):
        return "camels_cl_attributes.nc"

    @property
    def _timeseries_cache_filename(self):
        return "camels_cl_timeseries.nc"

    @property
    def default_t_range(self):
        return ["1913-02-15", "2018-03-09"]

    def cache_attributes_xrdataset(self):
        """Override base method to add calculated p_mean from precipitation timeseries.

        This method:
        1. Calls parent method to create base attribute cache
        2. Reads precipitation timeseries data
        3. Calculates mean precipitation (p_mean) for each basin
        4. Adds p_mean to the attribute dataset
        5. Saves the updated cache
        """
        # Step 1: Create base attribute cache using parent method
        print("Creating base attribute cache...")
        super().cache_attributes_xrdataset()

        # Step 2: Load the base cache file
        cache_file = self.cache_dir.joinpath(self._attributes_cache_filename)
        with xr.open_dataset(cache_file) as ds_attr:
            ds_attr = ds_attr.load()  # Load into memory

        print("Calculating p_mean from precipitation timeseries...")

        # Step 3: Read precipitation timeseries for all basins
        # Use the default precipitation source (cr2met)
        basin_ids = self.read_object_ids().tolist()

        try:
            # Read full precipitation timeseries
            prcp_ts = self.read_ts_xrdataset(
                gage_id_lst=basin_ids,
                t_range=self.default_t_range,
                var_lst=["precipitation"],
            )

            # Step 4: Calculate temporal mean for each basin
            # The result is a DataArray with dimension (basin,)
            p_mean_values = prcp_ts["precipitation"].mean(dim="time")

            # Add units attribute
            p_mean_values.attrs["units"] = "mm/day"
            p_mean_values.attrs["description"] = (
                "Mean daily precipitation (calculated from timeseries)"
            )

            # Step 5: Add p_mean to the attribute dataset
            ds_attr["p_mean"] = p_mean_values

            print(f"Successfully calculated p_mean for {len(basin_ids)} basins")

        except Exception as e:
            print(f"Warning: Could not calculate p_mean from precipitation data: {e}")
            print("Creating p_mean with NaN values as placeholder")
            # Create p_mean with NaN values if calculation fails
            p_mean_nan = xr.DataArray(
                np.full(len(basin_ids), np.nan),
                coords={"basin": basin_ids},
                dims=["basin"],
                attrs={
                    "units": "mm/day",
                    "description": "Mean daily precipitation (not available)",
                },
            )
            ds_attr["p_mean"] = p_mean_nan

        # Step 6: Save the updated cache file
        print(f"Saving updated attribute cache with p_mean to: {cache_file}")
        ds_attr.to_netcdf(cache_file, mode="w")
        print("Successfully saved attribute cache with p_mean")

    # get the information of features from table3 in "https://hess.copernicus.org/articles/22/5817/2018/"
    _subclass_static_definitions = {
        "p_mean": {"specific_name": "p_mean", "unit": "mm/day"},
        "area": {"specific_name": "area_km2", "unit": "km^2"},
    }
    _dynamic_variable_mapping = {
        StandardVariable.STREAMFLOW: {
            "default_source": "observations",
            "sources": {
                "observations": {"specific_name": "q_cms_obs", "unit": "m^3/s"},
                "depth_based": {"specific_name": "q_mm_obs", "unit": "mm/day"},
            },
        },
        StandardVariable.PRECIPITATION: {
            "default_source": "cr2met",
            "sources": {
                "cr2met": {"specific_name": "pcp_mm_cr2met", "unit": "mm/day"},
                "chirps": {"specific_name": "pcp_mm_chirps", "unit": "mm/day"},
                "mswep": {"specific_name": "pcp_mm_mswep", "unit": "mm/day"},
                "tmpa": {"specific_name": "pcp_mm_tmpa", "unit": "mm/day"},
            },
        },
        StandardVariable.TEMPERATURE_MIN: {
            "default_source": "observations",
            "sources": {
                "observations": {"specific_name": "airtemp_C_min", "unit": "°C"}
            },
        },
        StandardVariable.TEMPERATURE_MAX: {
            "default_source": "observations",
            "sources": {
                "observations": {"specific_name": "airtemp_C_max", "unit": "°C"}
            },
        },
        StandardVariable.TEMPERATURE_MEAN: {
            "default_source": "observations",
            "sources": {
                "observations": {"specific_name": "airtemp_C_mean", "unit": "°C"}
            },
        },
        StandardVariable.POTENTIAL_EVAPOTRANSPIRATION: {
            "default_source": "modis",
            "sources": {
                "modis": {"specific_name": "pet_mm_modis", "unit": "mm/day"},
                "hargreaves": {"specific_name": "pet_mm_hargreaves", "unit": "mm/day"},
            },
        },
        StandardVariable.SNOW_WATER_EQUIVALENT: {
            "default_source": "observations",
            "sources": {"observations": {"specific_name": "swe", "unit": "mm"}},
        },
    }

`default_t_range` `property` ¶

`init(data_path, region=None, download=False)` ¶

Initialize CAMELS_CL dataset.

Parameters:

Name	Type	Description	Default
`data_path`	`str`	Path to the CAMELS_CL data directory	required
`region`	`Optional[str]`	Geographic region identifier (optional)	`None`
`download`	`bool`	Whether to download data automatically (default: False)	`False`