Source code for psi_io.data

  1"""
  2Module for fetching HDF5 assets used through examples.
  3
  4This module uses the ``pooch`` library to manage the downloading and caching of
  5HDF4 and HDF5 files that adhere to PSI data conventions. It defines functions to
  6fetch specific example datasets, including 1D radial scale data, 2D coronal hole
  7maps, 3D radial magnetic field data, magnetic fieldline data, and synchronic maps
  8used in coronal and heliospheric magnetic field modeling.
  9
 10Currently, these files are hosted on the PredSci documentation website:
 11at https://www.predsci.com/doc/assets/ and are primarily intended for use in
 12building examples in the PSI I/O and mapflpy packages.
 13"""
 14
 15
 16from __future__ import annotations
 17
 18import inspect
 19from functools import wraps
 20from typing import Callable, ParamSpec, TypeVar
 21
 22from psi_io.psi_io import HdfExtType, HDFEXT
 23
 24try:
 25    import pooch
 26except ImportError as e:
 27    raise ImportError(
 28        "Missing the optional 'pooch' dependency required for data fetching. "
 29        "Please install it via pip or conda to access the necessary datasets."
 30    ) from e
 31
 32
 33REGISTRY = {
 34	"h4h5-files/rscale.h5": "sha256:60a0cbcd4dc69f7d250cbbdddd6fc3680f09d87c1e4cee6a79d8ec3731533718",
 35    "h4h5-files/chmap.h5": "sha256:668b5fe7e86903e6af4effdf65e3d2dd499a1217e93ca60d8b54b68941b6f1f7",
 36    "h4h5-files/fieldline.h5": "sha256:a5b2a1cc0c458d0d9510d8eacc93d3b4a2cc7e99e0a3f86cd3d6b164e74f370d",
 37    "h4h5-files/br.h5": "sha256:2038dc8e67303cf0b31414d532352b40e8c75ebd8917bc8b68614cf4e7b24055",
 38    "h4h5-files/rscale.hdf": "sha256:1c15bd669fc5a92dfdda7dc23703294c23f0a09440599fd5c30cf7a0e1a6f3c4",
 39    "h4h5-files/chmap.hdf": "sha256:fa2f1134aa4f1c9c0dd729b4e8f23f480bea5cb178e44e8da01bdffad09a2225",
 40    "h4h5-files/fieldline.hdf": "sha256:a4149783780e1ce44a8fe76a83c674e0a3082cd78c6a635b6c8e860e0fdd3891",
 41    "h4h5-files/br.hdf": "sha256:3a4b3174e5d6f45244bd25826890486b5659196b8fe093541c542375a88cdf52",
 42    "h4h5-files/synchronic_map.h5": "sha256:170794a5a19684246339ca9782a2b89066b89661400ec48bb6fc0a082e0a2450"
 43}
 44"""Registry of available magnetic field files with their SHA256 hashes. 
 45
 46This registry is used by the pooch fetcher to verify the integrity of
 47downloaded files, and is primarily intended for building sphinx-gallery
 48examples that require MHD data files.
 49"""
 50
 51
 52BASE_URL = "https://www.predsci.com/doc/assets/"
 53"""Base URL hosting magnetic field file assets.
 54"""
 55
 56
 57FETCHER = pooch.create(
 58    path=pooch.os_cache("psi"),
 59    base_url=BASE_URL,
 60    registry=REGISTRY,
 61    env="PSI_IO_CACHE",
 62)
 63"""Pooch fetcher for downloading and caching magnetic field files.
 64
 65.. note::
 66    The cache directory can be overridden by setting the ``PSI_IO_CACHE``
 67    environment variable to a desired path. Otherwise, the default cache
 68    directory is platform-dependent, as determined by :func:`pooch.os_cache`.
 69    
 70.. note::
 71    The default (os-dependent) cache directory stores assets under a
 72    subdirectory named ``psi``. The reason for this naming choice – as opposed
 73    to ``psi_io`` – is to maintain consistency with other PredSci packages
 74    that utilize the same asset hosting and caching mechanism.
 75"""
 76
 77_P = ParamSpec("_P")
 78_R = TypeVar("_R")
 79
[docs] 80def check_hdf_type(func: Callable[_P, _R]) -> Callable[_P, _R]: 81 """Validate the ``hdf`` keyword argument of a data-fetch function. 82 83 This decorator inspects the bound ``hdf`` parameter of the wrapped function 84 and raises :exc:`ValueError` if the value is not one of the supported HDF 85 file extensions (``'.h5'`` or ``'.hdf'``). 86 87 Parameters 88 ---------- 89 func : Callable 90 The data-fetch function to wrap. Must accept an ``hdf`` keyword 91 argument that names the desired file extension. 92 93 Returns 94 ------- 95 out : Callable 96 The wrapped function with ``hdf`` validation applied. 97 98 Raises 99 ------ 100 ValueError 101 If the ``hdf`` argument is not a member of :data:`~psi_io.psi_io.HDFEXT`. 102 103 Examples 104 -------- 105 >>> from psi_io import data 106 >>> data.get_3d_data.__wrapped__ # doctest: +ELLIPSIS 107 <function get_3d_data at 0x...> 108 """ 109 sig = inspect.signature(func) 110 111 @wraps(func) 112 def wrapper(*args: _P.args, **kwargs: _P.kwargs) -> _R: 113 bound = sig.bind(*args, **kwargs) 114 bound.apply_defaults() 115 116 hdf = bound.arguments["hdf"] # assumes the param is literally named "hdf" 117 if hdf not in HDFEXT: 118 raise ValueError(f"Invalid HDF type {hdf!r}. Must be in {sorted(HDFEXT)}.") 119 120 return func(*bound.args, **bound.kwargs) 121 122 return wrapper
123 124
[docs] 125def file_ids() -> list[str]: 126 """List all available magnetic field files in the registry. 127 128 Returns 129 ------- 130 out : list[str] 131 File names (registry keys) for all available magnetic field assets. 132 133 Examples 134 -------- 135 >>> from psi_io import data 136 >>> ids = data.file_ids() 137 >>> isinstance(ids, list) and len(ids) > 0 138 True 139 """ 140 return list(FETCHER.registry.keys())
141 142
[docs] 143@check_hdf_type 144def get_1d_data(hdf: HdfExtType = ".h5") -> str: 145 """Fetch the radial scale (1D) data file. 146 147 Parameters 148 ---------- 149 hdf : HdfExtType, optional 150 The HDF file format to fetch. Accepted values are ``'.h5'`` for HDF5 151 and ``'.hdf'`` for HDF4. Default is ``'.h5'``. 152 153 Returns 154 ------- 155 out : str 156 Local path to the downloaded (and cached) radial scale data file. 157 158 Examples 159 -------- 160 >>> from psi_io import data 161 >>> path = data.get_1d_data() 162 >>> path.endswith(".h5") 163 True 164 """ 165 filename = f"h4h5-files/rscale{hdf}" 166 return FETCHER.fetch(filename)
167 168
[docs] 169@check_hdf_type 170def get_2d_data(hdf: HdfExtType = ".h5") -> str: 171 """Fetch the coronal hole map (2D) data file. 172 173 Parameters 174 ---------- 175 hdf : HdfExtType, optional 176 The HDF file format to fetch. Accepted values are ``'.h5'`` for HDF5 177 and ``'.hdf'`` for HDF4. Default is ``'.h5'``. 178 179 Returns 180 ------- 181 out : str 182 Local path to the downloaded (and cached) coronal hole map data file. 183 184 Examples 185 -------- 186 >>> from psi_io import data 187 >>> path = data.get_2d_data() 188 >>> path.endswith(".h5") 189 True 190 """ 191 filename = f"h4h5-files/chmap{hdf}" 192 return FETCHER.fetch(filename)
193 194
[docs] 195@check_hdf_type 196def get_3d_data(hdf: HdfExtType = ".h5") -> str: 197 """Fetch the radial magnetic field (3D) data file. 198 199 Parameters 200 ---------- 201 hdf : HdfExtType, optional 202 The HDF file format to fetch. Accepted values are ``'.h5'`` for HDF5 203 and ``'.hdf'`` for HDF4. Default is ``'.h5'``. 204 205 Returns 206 ------- 207 out : str 208 Local path to the downloaded (and cached) radial magnetic field data 209 file. 210 211 Examples 212 -------- 213 >>> from psi_io import data 214 >>> path = data.get_3d_data() 215 >>> path.endswith(".h5") 216 True 217 """ 218 filename = f"h4h5-files/br{hdf}" 219 return FETCHER.fetch(filename)
220 221
[docs] 222@check_hdf_type 223def get_fieldline_data(hdf: HdfExtType = ".h5") -> str: 224 """Fetch the magnetic fieldline (2D) data file. 225 226 .. warning:: 227 Unlike the other example data files, fieldline data files do not 228 contain scale datasets. 229 230 Parameters 231 ---------- 232 hdf : HdfExtType, optional 233 The HDF file format to fetch. Accepted values are ``'.h5'`` for HDF5 234 and ``'.hdf'`` for HDF4. Default is ``'.h5'``. 235 236 Returns 237 ------- 238 out : str 239 Local path to the downloaded (and cached) magnetic fieldline data file. 240 241 Examples 242 -------- 243 >>> from psi_io import data 244 >>> path = data.get_fieldline_data() 245 >>> path.endswith(".h5") 246 True 247 """ 248 filename = f"h4h5-files/fieldline{hdf}" 249 return FETCHER.fetch(filename)
250 251
[docs] 252@check_hdf_type 253def get_synchronic_map_data(hdf: HdfExtType = ".h5") -> str: 254 """Fetch the synchronic map data file. 255 256 .. warning:: 257 Synchronic map data is only available in HDF5 format. Furthermore, 258 unlike the other example data files, synchronic map data files contain 259 additional datasets beyond the primary data and scales. 260 261 Parameters 262 ---------- 263 hdf : HdfExtType, optional 264 The HDF file format to fetch. Accepted values are ``'.h5'`` for HDF5 265 and ``'.hdf'`` for HDF4. Default is ``'.h5'``. 266 267 Returns 268 ------- 269 out : str 270 Local path to the downloaded (and cached) synchronic map data file. 271 272 Examples 273 -------- 274 >>> from psi_io import data 275 >>> path = data.get_synchronic_map_data() 276 >>> path.endswith(".h5") 277 True 278 """ 279 if hdf == ".hdf": 280 raise NotImplemented("Synchronic map data is only available in HDF5 format.") 281 filename = f"h4h5-files/synchronic_map{hdf}" 282 return FETCHER.fetch(filename)