1"""
2Routines for reading/writing PSI style HDF5 and HDF4 data files.
3
4Written by Ronald M. Caplan, Ryder Davidson, & Cooper Downs.
5
62023/09: Start with SVN version r454, 2023/09/12 by RC, Predictive Science Inc.
7
82024/06: CD: add the get_scales subroutines.
9
102024/11: RD: Major Update: Add several generic data loading capabilites for faster IO.
11 - Read only the portions of data required (`read_hdf_by_index`, `read_hdf_by_value`).
12 - Interpolate to slices along a given axes (`np_interpolate_slice_from_hdf`) or
13 generic positions (`interpolate_positions_from_hdf`).
14
152025/06: CD: Prep for integration into psi-io package, HDF4 is now optional.
16
172026/01: RD: Refactor legacy routines to use new generic routines where possible.
18"""
19
20from __future__ import annotations
21
22__all__ = [
23 "read_hdf_meta",
24 "read_rtp_meta",
25
26 "get_scales_1d",
27 "get_scales_2d",
28 "get_scales_3d",
29
30 "read_hdf_by_index",
31 "read_hdf_by_value",
32 "read_hdf_by_ivalue",
33
34 "np_interpolate_slice_from_hdf",
35 "sp_interpolate_slice_from_hdf",
36 "interpolate_positions_from_hdf",
37
38 "instantiate_linear_interpolator",
39 "interpolate_point_from_1d_slice",
40 "interpolate_point_from_2d_slice",
41
42 "read_hdf_data",
43 "rdhdf_1d",
44 "rdhdf_2d",
45 "rdhdf_3d",
46
47 "write_hdf_data",
48 "wrhdf_1d",
49 "wrhdf_2d",
50 "wrhdf_3d",
51]
52
53import math
54from collections import namedtuple
55from pathlib import Path
56from types import MappingProxyType
57from typing import Optional, Literal, Tuple, Iterable, List, Dict, Union, Callable, Any
58
59import numpy as np
60import h5py as h5
61
62# -----------------------------------------------------------------------------
63# Optional Imports and Import Checking
64# -----------------------------------------------------------------------------
65# These packages are needed by several functions and must be imported in the
66# module namespace.
67try:
68 import pyhdf.SD as h4
69 H4_AVAILABLE = True
70 NPTYPES_TO_SDCTYPES = MappingProxyType({
71 "int8": h4.SDC.INT8,
72 "uint8": h4.SDC.UINT8,
73 "int16": h4.SDC.INT16,
74 "uint16": h4.SDC.UINT16,
75 "int32": h4.SDC.INT32,
76 # "int64": h4.SDC.INT32, # Not supported by pyhdf
77 "uint32": h4.SDC.UINT32,
78 # "float16": h4.SDC.FLOAT32, # Not supported by pyhdf
79 "float32": h4.SDC.FLOAT32,
80 "float64": h4.SDC.FLOAT64,
81 })
82except ImportError:
83 H4_AVAILABLE = False
84 NPTYPES_TO_SDCTYPES = {}
85
86try:
87 from scipy.interpolate import RegularGridInterpolator
88 SCIPY_AVAILABLE = True
89except ImportError:
90 SCIPY_AVAILABLE = False
91
92
93# Functions to stop execution if a package doesn't exist.
94def _except_no_pyhdf():
95 if not H4_AVAILABLE:
96 raise ImportError('The pyhdf package is required to read/write HDF4 .hdf files!')
97 return
98
99
100def _except_no_scipy():
101 if not SCIPY_AVAILABLE:
102 raise ImportError('The scipy package is required for the interpolation routines!')
103 return
104
105
106SDC_TYPE_CONVERSIONS = MappingProxyType({
107 3: np.dtype("ubyte"),
108 4: np.dtype("byte"),
109 5: np.dtype("float32"),
110 6: np.dtype("float64"),
111 20: np.dtype("int8"),
112 21: np.dtype("uint8"),
113 22: np.dtype("int16"),
114 23: np.dtype("uint16"),
115 24: np.dtype("int32"),
116 25: np.dtype("uint32")
117})
118"""Helper dictionary for mapping HDF4 types to numpy dtypes"""
119
120
121PSI_DATA_ID = MappingProxyType({
122 'h4': 'Data-Set-2',
123 'h5': 'Data'
124})
125"""Mapping of PSI standard dataset names for HDF4 and HDF5 files"""
126
127
128PSI_SCALE_ID = MappingProxyType({
129 'h4': ('fakeDim0', 'fakeDim1', 'fakeDim2'),
130 'h5': ('dim1', 'dim2', 'dim3')
131})
132"""Mapping of PSI standard scale names for HDF4 and HDF5 files"""
133
134
135HDFEXT = {'.hdf', '.h5'}
136"""Set of possible HDF file extensions"""
137
138
139HdfExtType = Literal['.hdf', '.h5']
140"""Type alias for possible HDF file extensions"""
141
142
143HdfScaleMeta = namedtuple('HdfScaleMeta', ['name', 'type', 'shape', 'imin', 'imax'])
144"""
145 Named tuples for HDF metadata
146
147 Parameters
148 ----------
149 name : str
150 The name of the scale.
151 type : str
152 The data type of the scale.
153 shape : Tuple[int, ...]
154 The shape of the scale.
155 imin : float
156 The minimum value of the scale.
157 This assumes the scale is monotonically increasing.
158 imax : float
159 The maximum value of the scale.
160 This assumes the scale is monotonically increasing.
161"""
162
163
164HdfDataMeta = namedtuple('HdfDataMeta', ['name', 'type', 'shape', 'scales'])
165"""
166 Named tuple for HDF dataset metadata
167
168 Parameters
169 ----------
170 name : str
171 The name of the dataset.
172 type : str
173 The data type of the dataset.
174 shape : tuple of int
175 The shape of the dataset.
176 scales : list of HdfScaleMeta
177 A list of scale metadata objects corresponding to each dimension of the dataset.
178 If the dataset has no scales, this list will be empty.
179"""
180
181
182def _dispatch_by_ext(ifile: Union[Path, str],
183 hdf4_func: Callable,
184 hdf5_func: Callable,
185 *args: Any, **kwargs: Any
186 ):
187 """
188 Dispatch function to call HDF4 or HDF5 specific functions based on file extension.
189
190 Parameters
191 ----------
192 ifile : Path | str
193 The path to the HDF file.
194 hdf4_func : Callable
195 The function to call for HDF4 files.
196 hdf5_func : Callable
197 The function to call for HDF5 files.
198 *args : Any
199 Positional arguments to pass to the selected function.
200
201 Raises
202 ------
203 ValueError
204 If the file does not have a `.hdf` or `.h5` extension.
205 ImportError
206 If the file is HDF4 and the `pyhdf` package is not available
207 """
208 ipath = Path(ifile)
209 if ipath.suffix == '.h5':
210 return hdf5_func(ifile, *args, **kwargs)
211 if ipath.suffix == '.hdf':
212 _except_no_pyhdf()
213 return hdf4_func(ifile, *args, **kwargs)
214 raise ValueError("File must be HDF4 (.hdf) or HDF5 (.h5)")
215
216
217# -----------------------------------------------------------------------------
218# "Classic" HDF reading and writing routines adapted from psihdf.py or psi_io.py.
219# -----------------------------------------------------------------------------
220
221
[docs]
222def rdhdf_1d(hdf_filename: str
223 ) -> Tuple[np.ndarray, np.ndarray]:
224 """Read a 1D PSI-style HDF5 or HDF4 file.
225
226 Parameters
227 ----------
228 hdf_filename : str
229 The path to the 1D HDF5 (.h5) or HDF4 (.hdf) file to read.
230
231 Returns
232 -------
233 x : np.ndarray
234 1D array of scales.
235 f : np.ndarray
236 1D array of data.
237
238 See Also
239 --------
240 read_hdf_data : Generic HDF data reading routine.
241 """
242 return _rdhdf_nd(hdf_filename, dimensionality=1)
243
244
[docs]
245def rdhdf_2d(hdf_filename: str
246 ) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
247 """Read a 2D PSI-style HDF5 or HDF4 file.
248
249 The data in the HDF file is assumed to be ordered X,Y in Fortran order.
250
251 Each dimension is assumed to have a 1D "scale" associated with it that
252 describes the rectilinear grid coordinates in each dimension.
253
254 Parameters
255 ----------
256 hdf_filename : str
257 The path to the 1D HDF5 (.h5) or HDF4 (.hdf) file to read.
258
259 Returns
260 -------
261 x : np.ndarray
262 1D array of scales in the X dimension.
263 y : np.ndarray
264 1D array of scales in the Y dimension.
265 f : np.ndarray
266 2D array of data, C-ordered as shape(ny,nx) for Python (see note 1).
267
268 See Also
269 --------
270 read_hdf_data : Generic HDF data reading routine.
271 """
272 return _rdhdf_nd(hdf_filename, dimensionality=2)
273
274
[docs]
275def rdhdf_3d(hdf_filename: str
276 ) -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]:
277 """Read a 3D PSI-style HDF5 or HDF4 file.
278
279 The data in the HDF file is assumed to be ordered X,Y,Z in Fortran order.
280
281 Each dimension is assumed to have a 1D "scale" associated with it that
282 describes the rectilinear grid coordinates in each dimension.
283
284 Parameters
285 ----------
286 hdf_filename : str
287 The path to the 1D HDF5 (.h5) or HDF4 (.hdf) file to read.
288
289 Returns
290 -------
291 x : np.ndarray
292 1D array of scales in the X dimension.
293 y : np.ndarray
294 1D array of scales in the Y dimension.
295 z : np.ndarray
296 1D array of scales in the Z dimension.
297 f : np.ndarray
298 3D array of data, C-ordered as shape(nz,ny,nx) for Python (see note 1).
299
300 See Also
301 --------
302 read_hdf_data : Generic HDF data reading routine.
303 """
304 return _rdhdf_nd(hdf_filename, dimensionality=3)
305
306
[docs]
307def wrhdf_1d(hdf_filename: str,
308 x: np.ndarray,
309 f: np.ndarray) -> None:
310 """Write a 1D PSI-style HDF5 or HDF4 file.
311
312 Parameters
313 ----------
314 hdf_filename : str
315 The path to the 1D HDF5 (.h5) or HDF4 (.hdf) file to write.
316 x : np.ndarray
317 1D array of scales.
318 f : np.ndarray
319 1D array of data.
320
321 Raises
322 ------
323 ValueError
324 If the file does not have a `.hdf` or `.h5` extension.
325 KeyError
326 If, for HDF4 files, the provided data is of a type not supported by :py:mod:`pyhdf`.
327 *viz.* float16 or int64.
328
329 See Also
330 --------
331 write_hdf_data : Generic HDF data writing routine.
332 """
333 return _wrhdf_nd(hdf_filename, f, x, dimensionality=1)
334
335
[docs]
336def wrhdf_2d(hdf_filename: str,
337 x: np.ndarray,
338 y: np.ndarray,
339 f: np.ndarray) -> None:
340 """Write a 2D PSI-style HDF5 or HDF4 file.
341
342 The data in the HDF file will appear as X,Y in Fortran order.
343
344 Each dimension requires a 1D "scale" associated with it that
345 describes the rectilinear grid coordinates in each dimension.
346
347 Parameters
348 ----------
349 hdf_filename : str
350 The path to the 2D HDF5 (.h5) or HDF4 (.hdf) file to write.
351 x : np.ndarray
352 1D array of scales in the X dimension.
353 y : np.ndarray
354 1D array of scales in the Y dimension.
355 f : np.ndarray
356 2D array of data, C-ordered as shape(ny,nx) for Python (see note 1).
357
358 Raises
359 ------
360 ValueError
361 If the file does not have a `.hdf` or `.h5` extension.
362 KeyError
363 If, for HDF4 files, the provided data is of a type not supported by :py:mod:`pyhdf`.
364 *viz.* float16 or int64.
365
366 See Also
367 --------
368 write_hdf_data : Generic HDF data writing routine.
369 """
370 return _wrhdf_nd(hdf_filename, f, x, y, dimensionality=2)
371
372
[docs]
373def wrhdf_3d(hdf_filename: str,
374 x: np.ndarray,
375 y: np.ndarray,
376 z: np.ndarray,
377 f: np.ndarray) -> None:
378 """Write a 3D PSI-style HDF5 or HDF4 file.
379
380 The data in the HDF file will appear as X,Y,Z in Fortran order.
381
382 Each dimension requires a 1D "scale" associated with it that
383 describes the rectilinear grid coordinates in each dimension.
384
385 Parameters
386 ----------
387 hdf_filename : str
388 The path to the 3D HDF5 (.h5) or HDF4 (.hdf) file to write.
389 x : np.ndarray
390 1D array of scales in the X dimension.
391 y : np.ndarray
392 1D array of scales in the Y dimension.
393 z : np.ndarray
394 1D array of scales in the Z dimension.
395 f : np.ndarray
396 3D array of data, C-ordered as shape(nz,ny,nx) for Python (see note 1).
397
398 Raises
399 ------
400 ValueError
401 If the file does not have a `.hdf` or `.h5` extension.
402 KeyError
403 If, for HDF4 files, the provided data is of a type not supported by :py:mod:`pyhdf`.
404 *viz.* float16 or int64.
405
406 See Also
407 --------
408 write_hdf_data : Generic HDF data writing routine.
409 """
410 return _wrhdf_nd(hdf_filename, f, x, y, z, dimensionality=3)
411
412
[docs]
413def get_scales_1d(filename: str
414 ) -> np.ndarray:
415 """Wrapper to return the scales of a 1D PSI style HDF5 or HDF4 dataset.
416
417 This routine does not load the data array so it can be quite fast for large files.
418
419 Parameters
420 ----------
421 filename : str
422 The path to the 1D HDF5 (.h5) or HDF4 (.hdf) file to read.
423
424 Returns
425 -------
426 x : np.ndarray
427 1D array of scales in the X dimension.
428 """
429 return _dispatch_by_ext(filename, _get_scales_nd_h4, _get_scales_nd_h5,
430 dimensionality=1)
431
432
[docs]
433def get_scales_2d(filename: str
434 ) -> Tuple[np.ndarray, np.ndarray]:
435 """Wrapper to return the scales of a 2D PSI style HDF5 or HDF4 dataset.
436
437 This routine does not load the data array so it can be quite fast for large files.
438
439 The data in the HDF file is assumed to be ordered X,Y in Fortran order.
440
441 Parameters
442 ----------
443 filename : str
444 The path to the 1D HDF5 (.h5) or HDF4 (.hdf) file to read.
445
446 Returns
447 -------
448 x : np.ndarray
449 1D array of scales in the X dimension.
450 y : np.ndarray
451 1D array of scales in the Y dimension.
452 """
453 return _dispatch_by_ext(filename, _get_scales_nd_h4, _get_scales_nd_h5,
454 dimensionality=2)
455
456
[docs]
457def get_scales_3d(filename: str
458 ) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
459 """Wrapper to return the scales of a 3D PSI style HDF5 or HDF4 dataset.
460
461 This routine does not load the data array so it can be quite fast for large files.
462
463 The data in the HDF file is assumed to be ordered X,Y,Z in Fortran order.
464
465 Parameters
466 ----------
467 filename : str
468 The path to the 1D HDF5 (.h5) or HDF4 (.hdf) file to read.
469
470 Returns
471 -------
472 x : np.ndarray
473 1D array of scales in the X dimension.
474 y : np.ndarray
475 1D array of scales in the Y dimension.
476 z : np.ndarray
477 1D array of scales in the Z dimension.
478 """
479 return _dispatch_by_ext(filename, _get_scales_nd_h4, _get_scales_nd_h5,
480 dimensionality=3)
481
482
483# -----------------------------------------------------------------------------
484# "Updated" HDF reading and slicing routines for Hdf4 and Hdf5 datasets.
485# -----------------------------------------------------------------------------
486
487
535
536
568
569
[docs]
570def read_hdf_data(ifile: Union[Path, str], /,
571 dataset_id: Optional[str] = None,
572 return_scales: bool = True,
573 ) -> Tuple[np.ndarray]:
574 """
575 Read data from an HDF4 (.hdf) or HDF5 (.h5) file.
576
577 Parameters
578 ----------
579 ifile : Path | str
580 The path to the HDF file to read.
581 dataset_id : str | None
582 The identifier of the dataset to read.
583 If None, a default dataset is used ('Data-Set-2' for HDF4 and 'Data' for HDF5).
584 return_scales : bool
585 If True, the scales (coordinate arrays) for each dimension are also returned.
586
587 Returns
588 -------
589 out : np.ndarray | tuple[np.ndarray]
590 The data array.
591 If ``return_scales`` is True, returns a tuple containing the data array
592 and the scales for each dimension.
593
594 Raises
595 ------
596 ValueError
597 If the file does not have a `.hdf` or `.h5` extension.
598
599 See Also
600 --------
601 read_hdf_by_index
602 Read HDF datasets by index.
603 read_hdf_by_value
604 Read HDF datasets by value ranges.
605 read_hdf_by_ivalue
606 Read HDF datasets by subindex values.
607
608 Notes
609 -----
610 This function delegates to :func:`_read_h5_data` for HDF5 files
611 and :func:`_read_h4_data` for HDF4 files based on the file extension.
612 """
613 return _dispatch_by_ext(ifile, _read_h4_data, _read_h5_data,
614 dataset_id=dataset_id, return_scales=return_scales)
615
616
[docs]
617def read_hdf_by_index(ifile: Union[Path, str], /,
618 *xi: Union[int, Tuple[Union[int, None], Union[int, None]], None],
619 dataset_id: Optional[str] = None,
620 return_scales: bool = True,
621 ) -> Union[np.ndarray, Tuple[np.ndarray]]:
622 """
623 Read data from an HDF4 (.hdf) or HDF5 (.h5) file by index.
624
625 .. attention::
626 For each dimension, the *minimum* number of elements returned is 1 *e.g.*
627 if 3 ints are passed (as positional `*xi` arguments) for a 3D dataset,
628 the resulting subset will have a shape of (1, 1, 1,) with scales of length 1.
629
630 Parameters
631 ----------
632 ifile : Path | str
633 The path to the HDF file to read.
634 *xi : int | tuple[int | None, int | None] | None
635 Indices or ranges for each dimension of the `n`-dimensional dataset.
636 Use None for a dimension to select all indices. If no arguments are passed,
637 the entire dataset (and its scales) will be returned – see
638 :func:`~psi_io.psi_io.read_hdf_data`.
639 dataset_id : str | None
640 The identifier of the dataset to read.
641 If None, a default dataset is used ('Data-Set-2' for HDF4 and 'Data' for HDF5).
642 return_scales : bool
643 If True, the scales (coordinate arrays) for each dimension are also returned.
644
645 Returns
646 -------
647 out : np.ndarray | tuple[np.ndarray]
648 The selected data array.
649 If ``return_scales`` is True, returns a tuple containing the data array
650 and the scales for each dimension.
651
652 Raises
653 ------
654 ValueError
655 If the file does not have a `.hdf` or `.h5` extension.
656
657 See Also
658 --------
659 read_hdf_by_value
660 Read HDF datasets by value ranges.
661 read_hdf_by_ivalue
662 Read HDF datasets by subindex values.
663 read_hdf_data
664 Read entire HDF datasets.
665
666 Notes
667 -----
668 This function delegates to :func:`_read_h5_by_index` for HDF5 files and
669 :func:`_read_h4_by_index` for HDF4 files based on the file extension.
670
671 This function assumes that the dataset is Fortran (or column-major) ordered *viz.* for
672 compatibility with PSI's data ecosystem; as such, a given :math:`n`-dimensional array,
673 of shape :math:`(D_0, D_1, ..., D_{n-1})`, has scales :math:`(x_0, x_1, ..., x_{n-1})`,
674 such that :math:`| x_i | = | D_{(n-1)-i} |`. For example, a 3D dataset with shape
675 :math:`(D_p, D_t, D_r)` has scales :math:`r, t, p` corresponding to the radial, theta,
676 and phi dimensions respectively.
677
678 This function extracts a subset of the given dataset/scales without reading the
679 entire data into memory. For a given scale :math:`x_j`, an index, index range, or ``None``
680 can be provided; these inputs are forwarded through to Python's builtin :class:`slice` to
681 extract the desired subset of the scale(s) / dataset.
682
683 Examples
684 --------
685 Import a 3D HDF5 cube.
686
687 >>> from psi_io.data import get_3d_data
688 >>> from psi_io import read_hdf_by_index
689 >>> filepath = get_3d_data()
690
691 Extract a radial slice (at the first radial-scale index) from a 3D cube:
692
693 >>> f, r, t, p = read_hdf_by_value(filepath, 0, None, None)
694 >>> f.shape, r.shape, t.shape, p.shape
695 ((181, 100, 1), (1,), (100,), (181,))
696
697 Extract a phi slice at the 90th index from a 3D cube:
698
699 >>> f, r, t, p = read_hdf_by_value(filepath, None, None, 90)
700 >>> f.shape, r.shape, t.shape, p.shape
701 ((1, 100, 151), (151,), (100,), (1,))
702
703 Extract the values up to the 20th index (in the radial dimension) and with
704 phi indices from 10 to 25:
705
706 >>> f, r, t, p = read_hdf_by_value(filepath, (None, 20), None, (10, 25))
707 >>> f.shape, r.shape, t.shape, p.shape
708 ((15, 100, 20), (20,), (100,), (15,))
709 """
710 if not xi:
711 return read_hdf_data(ifile, dataset_id=dataset_id, return_scales=return_scales)
712 return _dispatch_by_ext(ifile, _read_h4_by_index, _read_h5_by_index,
713 *xi, dataset_id=dataset_id, return_scales=return_scales)
714
715
[docs]
716def read_hdf_by_value(ifile: Union[Path, str], /,
717 *xi: Union[float, Tuple[float, float], None],
718 dataset_id: Optional[str] = None,
719 return_scales: bool = True,
720 ) -> Union[np.ndarray, Tuple[np.ndarray]]:
721 """
722 Read data from an HDF4 (.hdf) or HDF5 (.h5) file by value.
723
724 .. note::
725 For each dimension, the minimum number of elements returned is 2 *e.g.*
726 if 3 floats are passed (as positional `*xi` arguments) for a 3D dataset,
727 the resulting subset will have a shape of (2, 2, 2,) with scales of length 2.
728
729 Parameters
730 ----------
731 ifile : Path | str
732 The path to the HDF file to read.
733 *xi : float | tuple[float, float] | None
734 Values or value ranges corresponding to each dimension of the `n`-dimensional
735 dataset specified by the ``dataset_id``. If no arguments are passed, the
736 entire dataset (and its scales) will be returned.
737 dataset_id : str | None
738 The identifier of the dataset to read.
739 If None, a default dataset is used ('Data-Set-2' for HDF4 and 'Data' for HDF5).
740 return_scales : bool
741 If True, the scales for the specified dataset are also returned.
742
743 Returns
744 -------
745 out : np.ndarray | tuple[np.ndarray]
746 The selected data array.
747 If ``return_scales`` is True, returns a tuple containing the data array
748 and the scales for each dimension.
749
750 Raises
751 ------
752 ValueError
753 If the file does not have a `.hdf` or `.h5` extension.
754
755 See Also
756 --------
757 read_hdf_by_index
758 Read HDF datasets by index.
759 read_hdf_by_ivalue
760 Read HDF datasets by subindex values.
761 read_hdf_data
762 Read entire HDF datasets.
763 sp_interpolate_slice_from_hdf
764 Interpolate slices from HDF datasets using SciPy's
765 :class:`~scipy.interpolate.RegularGridInterpolator`
766 np_interpolate_slice_from_hdf
767 Perform linear, bilinear, or trilinear interpolation using vectorized numpy-based
768 routines.
769
770 Notes
771 -----
772 This function delegates to :func:`_read_h5_by_value` for HDF5 files and
773 :func:`_read_h4_by_value` for HDF4 files based on the file extension.
774
775 This function assumes that the dataset is Fortran (or column-major) ordered *viz.* for
776 compatibility with PSI's data ecosystem; as such, a given :math:`n`-dimensional array,
777 of shape :math:`(D_0, D_1, ..., D_{n-1})`, has scales :math:`(x_0, x_1, ..., x_{n-1})`,
778 such that :math:`| x_i | = | D_{(n-1)-i} |`. For example, a 3D dataset with shape
779 :math:`(D_p, D_t, D_r)` has scales :math:`r, t, p` corresponding to the radial, theta,
780 and phi dimensions respectively.
781
782 This function extracts a subset of the given dataset/scales without reading the
783 entire data into memory. For a given scale :math:`x_j`, if:
784
785 - *i)* a single float is provided (:math:`a`), the function will return a 2-element
786 subset of the scale (:math:`xʹ_j`) such that :math:`xʹ_j[0] <= a < xʹ_j[1]`.
787 - *ii)* a (float, float) tuple is provided (:math:`a_0, a_1`), the function will return an
788 *m*-element subset of the scale (:math:`xʹ_j`) where
789 :math:`xʹ_j[0] <= a_0` and :math:`xʹ_j[m-1] > a_1`.
790 - *iii)* a **None** value is provided, the function will return the entire scale :math:`x_j`
791
792 The returned subset can then be passed to a linear interpolation routine to extract the
793 "slice" at the desired fixed dimensions.
794
795 Examples
796 --------
797 Import a 3D HDF5 cube.
798
799 >>> from psi_io.data import get_3d_data
800 >>> from psi_io import read_hdf_by_value
801 >>> filepath = get_3d_data()
802
803 Extract a radial slice at r=15 from a 3D cube:
804
805 >>> f, r, t, p = read_hdf_by_value(filepath, 15, None, None)
806 >>> f.shape, r.shape, t.shape, p.shape
807 ((181, 100, 2), (2,), (100,), (181,))
808
809 Extract a phi slice at p=1.57 from a 3D cube:
810
811 >>> f, r, t, p = read_hdf_by_value(filepath, None, None, 1.57)
812 >>> f.shape, r.shape, t.shape, p.shape
813 ((2, 100, 151), (151,), (100,), (2,))
814
815 Extract the values between 3.2 and 6.4 (in the radial dimension) and with
816 phi equal to 4.5
817
818 >>> f, r, t, p = read_hdf_by_value(filepath, (3.2, 6.4), None, 4.5)
819 >>> f.shape, r.shape, t.shape, p.shape
820 ((2, 100, 15), (15,), (100,), (2,))
821 """
822 if not xi:
823 return read_hdf_data(ifile, dataset_id=dataset_id, return_scales=return_scales)
824 return _dispatch_by_ext(ifile, _read_h4_by_value, _read_h5_by_value,
825 *xi, dataset_id=dataset_id, return_scales=return_scales)
826
827
[docs]
828def read_hdf_by_ivalue(ifile: Union[Path, str], /,
829 *xi: Union[float, Tuple[float, float], None],
830 dataset_id: Optional[str] = None,
831 return_scales: bool = True,
832 ) -> Union[np.ndarray, Tuple[np.ndarray]]:
833 """
834 Read data from an HDF4 (.hdf) or HDF5 (.h5) file by value.
835
836 .. note::
837 For each dimension, the minimum number of elements returned is 2 *e.g.*
838 if 3 floats are passed (as positional `*xi` arguments) for a 3D dataset,
839 the resulting subset will have a shape of (2, 2, 2,) with scales of length 2.
840
841 Parameters
842 ----------
843 ifile : Path | str
844 The path to the HDF file to read.
845 *xi : float | tuple[float, float] | None
846 Values or value ranges corresponding to each dimension of the `n`-dimensional
847 dataset specified by the ``dataset_id``. If no arguments are passed, the
848 entire dataset (and its scales) will be returned.
849 dataset_id : str | None
850 The identifier of the dataset to read.
851 If None, a default dataset is used ('Data-Set-2' for HDF4 and 'Data' for HDF5).
852 return_scales : bool
853 If True, arrays of indices for the specified dataset are also returned.
854 Note, regardless of whether the provided dataset has coordinate variables
855 (scales), the returned index arrays are always 0-based indices generated
856 through :func:`~numpy.arange`.
857
858 Returns
859 -------
860 out : np.ndarray | tuple[np.ndarray]
861 The selected data array.
862 If ``return_scales`` is True, returns a tuple containing the data array
863 and the scales for each dimension.
864
865 Raises
866 ------
867 ValueError
868 If the file does not have a `.hdf` or `.h5` extension.
869
870 See Also
871 --------
872 read_hdf_by_index
873 Read HDF datasets by index.
874 read_hdf_data
875 Read entire HDF datasets.
876 sp_interpolate_slice_from_hdf
877 Interpolate slices from HDF datasets using SciPy's
878 :class:`~scipy.interpolate.RegularGridInterpolator`
879 np_interpolate_slice_from_hdf
880 Perform linear, bilinear, or trilinear interpolation using vectorized numpy-based
881 routines.
882
883 Notes
884 -----
885 This function delegates to :func:`_read_h5_by_ivalue` for HDF5 files and
886 :func:`_read_h4_by_ivalue` for HDF4 files based on the file extension.
887
888 This function assumes that the dataset is Fortran (or column-major) ordered *viz.* for
889 compatibility with PSI's data ecosystem; as such, a given :math:`n`-dimensional array,
890 of shape :math:`(D_0, D_1, ..., D_{n-1})`, has scales :math:`(x_0, x_1, ..., x_{n-1})`,
891 such that :math:`| x_i | = | D_{(n-1)-i} |`. For example, a 3D dataset with shape
892 :math:`(D_p, D_t, D_r)` has scales :math:`r, t, p` corresponding to the radial, theta,
893 and phi dimensions respectively.
894
895 This function extracts a subset of the given dataset/scales without reading the
896 entire data into memory. For a given scale :math:`x_j`, if:
897
898 - *i)* a single float is provided (:math:`a`), the function will return a 2-element
899 subset of the scale (:math:`xʹ_j`): :math:`xʹ_j[floor(a)], xʹ_j[ceil(a)]`.
900 - *ii)* a (float, float) tuple is provided (:math:`a_0, a_1`), the function will return an
901 *m*-element subset of the scale (:math:`xʹ_j`): :math:`xʹ_j[floor(a_0)], xʹ_j[ceil(a_1)]`
902 - *iii)* a **None** value is provided, the function will return the entire scale :math:`x_j`
903
904 The returned subset can then be passed to a linear interpolation routine to extract the
905 "slice" at the desired fixed dimensions.
906 """
907 if not xi:
908 return read_hdf_data(ifile, dataset_id=dataset_id, return_scales=return_scales)
909 return _dispatch_by_ext(ifile, _read_h4_by_ivalue, _read_h5_by_ivalue,
910 *xi, dataset_id=dataset_id, return_scales=return_scales)
911
912
[docs]
913def write_hdf_data(ifile: Union[Path, str], /,
914 data: np.ndarray,
915 *scales: Iterable[Union[np.ndarray, None]],
916 dataset_id: Optional[str] = None
917 ) -> Path:
918 """
919 Write data to an HDF4 (.hdf) or HDF5 (.h5) file.
920
921 Following PSI conventions, the data array is assumed to be Fortran-ordered,
922 with the scales provided in the order corresponding to each dimension *e.g.* a
923 3D dataset with shape (Dp, Dt, Dr) has scales r, t, p corresponding to the
924 radial, theta, and phi dimensions respectively.
925
926 Parameters
927 ----------
928 ifile : Path | str
929 The path to the HDF file to write.
930 data : np.ndarray
931 The data array to write.
932 *scales : Iterable[np.ndarray | None]
933 The scales (coordinate arrays) for each dimension.
934 dataset_id : str | None
935 The identifier of the dataset to write.
936 If None, a default dataset is used ('Data-Set-2' for HDF
937 and 'Data' for HDF5).
938
939 Returns
940 -------
941 out : Path
942 The path to the written HDF file.
943
944 Raises
945 ------
946 ValueError
947 If the file does not have a `.hdf` or `.h5` extension.
948 KeyError
949 If, for HDF4 files, the provided data is of a type not supported by :py:mod:`pyhdf`.
950 *viz.* float16 or int64.
951
952 Notes
953 -----
954 This function delegates to :func:`_write_h5_data` for HDF5 files
955 and :func:`_write_h4_data` for HDF4 files based on the file extension.
956
957 If no scales are provided, the dataset will be written without coordinate variables.
958 If scales are provided, the number of scales must be less than or equal to the number
959 of dimensions in the data array. To attach scales to specific dimensions only, provide
960 ``None`` for the dimensions without scales.
961
962 See Also
963 --------
964 wrhdf_1d
965 Write 1D HDF files.
966 wrhdf_2d
967 Write 2D HDF files.
968 wrhdf_3d
969 Write 3D HDF files.
970 """
971 return _dispatch_by_ext(ifile, _write_h4_data, _write_h5_data, data,
972 *scales, dataset_id=dataset_id)
973
974
[docs]
975def instantiate_linear_interpolator(*args, **kwargs):
976 """
977 Instantiate a linear interpolator using the provided data and scales.
978
979 Parameters
980 ----------
981 *args : sequence[array_like]
982 The first argument is the data array.
983 Subsequent arguments are the scales (coordinate arrays) for each dimension.
984 **kwargs : dict
985 Additional keyword arguments to pass to
986 :class:`~scipy.interpolate.RegularGridInterpolator`.
987
988 Returns
989 -------
990 out : RegularGridInterpolator
991 An instance of RegularGridInterpolator initialized
992 with the provided data and scales.
993
994 Notes
995 -----
996 This function transposes the data array and passes it along with the scales
997 to RegularGridInterpolator. Given a PSI-style Fortran-ordered 3D dataset,
998 the resulting interpolator can be queried using (r, theta, phi) coordinates.
999
1000 Examples
1001 --------
1002 Import a 3D HDF5 cube.
1003
1004 >>> from psi_io.data import get_3d_data
1005 >>> from psi_io import read_hdf_by_value
1006 >>> from numpy import pi
1007 >>> filepath = get_3d_data()
1008
1009 Read the dataset by value (at 15 R_sun in the radial dimension).
1010
1011 >>> data_and_scales = read_hdf_by_value(filepath, 15, None, None)
1012 >>> interpolator = instantiate_linear_interpolator(*data_and_scales)
1013
1014 Interpolate at a specific position.
1015
1016 >>> interpolator((15, pi/2, pi))
1017 0.0012864485109423877
1018 """
1019 _except_no_scipy()
1020 return RegularGridInterpolator(
1021 values=args[0].T,
1022 points=args[1:],
1023 **kwargs)
1024
1025
[docs]
1026def sp_interpolate_slice_from_hdf(*xi, **kwargs):
1027 """
1028 Interpolate a slice from HDF data using SciPy's `RegularGridInterpolator`.
1029
1030 .. note::
1031 Slicing routines result in a dimensional reduction. The dimensions
1032 that are fixed (i.e. provided as float values in `*xi`) are removed
1033 from the output slice, while the dimensions that are not fixed
1034 (*i.e.* provided as None in `*xi`) are retained.
1035
1036 Parameters
1037 ----------
1038 *xi : sequence
1039 Positional arguments passed-through to :func:`read_hdf_by_value`.
1040 **kwargs : dict
1041 Keyword arguments passed-through to :func:`read_hdf_by_value`.
1042 **NOTE: Instantiating a linear interpolator requires the** ``return_scales``
1043 **keyword argument to be set to True; this function overrides
1044 any provided value for** ``return_scales`` **to ensure this behavior.**
1045
1046 Returns
1047 -------
1048 slice : np.ndarray
1049 The interpolated data slice.
1050 scales : list
1051 A list of scales for the dimensions that were not fixed.
1052
1053 Notes
1054 -----
1055 This function reads data from an HDF file, creates a linear interpolator,
1056 and interpolates a slice based on the provided values.
1057
1058 .. note::
1059 The returned slice is Fortran-ordered *e.g.* radial slices will have shape
1060 (phi, theta), phi slices will have shape (r, theta), etc.
1061
1062 .. note::
1063 SciPy's `RegularGridInterpolator` casts all input data to `float64` internally.
1064 Therefore, PSI HDF datasets with single-precision (`float32`) data will be upcast
1065 during interpolation.
1066
1067 Examples
1068 --------
1069 >>> from psi_io.data import get_3d_data
1070 >>> from psi_io import sp_interpolate_slice_from_hdf
1071 >>> from numpy import pi
1072 >>> filepath = get_3d_data()
1073
1074 Fetch a 2D slice at r=15 from 3D map
1075
1076 >>> slice_, theta_scale, phi_scale = sp_interpolate_slice_from_hdf(filepath, 15, None, None)
1077 >>> slice_.shape, theta_scale.shape, phi_scale.shape
1078 ((181, 100), (100,), (181,))
1079
1080 Fetch a single point from 3D map
1081
1082 >>> point_value, *_ = sp_interpolate_slice_from_hdf(filepath, 1, pi/2, pi)
1083 >>> point_value
1084 6.084495480971823
1085 """
1086 filepath, *args = xi
1087 kwargs.pop('return_scales', None)
1088 result = read_hdf_by_value(filepath, *args, **kwargs)
1089 interpolator = instantiate_linear_interpolator(*result)
1090 grid = [yi[0] if yi[0] is not None else yi[1] for yi in zip(args, result[1:])]
1091 slice_ = interpolator(tuple(np.meshgrid(*grid, indexing='ij')))
1092 indices = [0 if yi is not None else slice(None, None) for yi in args]
1093 return slice_[tuple(indices)].T, *[yi[1] for yi in zip(args, result[1:]) if yi[0] is None]
1094
1095
[docs]
1096def np_interpolate_slice_from_hdf(ifile: Union[Path, str], /,
1097 *xi: Union[float, Tuple[float, float], None],
1098 dataset_id: Optional[str] = None,
1099 by_index: bool = False,
1100 ):
1101 """
1102 Interpolate a slice from HDF data using linear interpolation.
1103
1104 .. note::
1105 Slicing routines result in a dimensional reduction. The dimensions
1106 that are fixed (i.e. provided as float values in `*xi`) are removed
1107 from the output slice, while the dimensions that are not fixed
1108 (*i.e.* provided as `None` in `*xi`) are retained.
1109
1110 Parameters
1111 ----------
1112 ifile : Path | str
1113 The path to the HDF file to read.
1114 *xi : sequence
1115 Positional arguments passed-through to reader function.
1116 dataset_id : str | None
1117 The identifier of the dataset to read.
1118 If None, a default dataset is used ('Data-Set-2' for HDF
1119 and 'Data' for HDF5).
1120 by_index : bool
1121 If True, use :func:`read_hdf_by_ivalue` to read data by subindex values.
1122 If False, use :func:`read_hdf_by_value` to read data by value ranges.
1123
1124 Returns
1125 -------
1126 slice : np.ndarray
1127 The interpolated data slice.
1128 scales : list
1129 A list of scales for the dimensions that were not fixed.
1130
1131 Raises
1132 ------
1133 ValueError
1134 If the number of dimensions to interpolate over is not supported.
1135
1136 Notes
1137 -----
1138 This function supports linear, bilinear, and trilinear interpolation
1139 depending on the number of dimensions fixed in `xi`.
1140
1141 Examples
1142 --------
1143 >>> from psi_io.data import get_3d_data
1144 >>> from psi_io import np_interpolate_slice_from_hdf
1145 >>> from numpy import pi
1146 >>> filepath = get_3d_data()
1147
1148 Fetch a 2D slice at r=15 from 3D map
1149
1150 >>> slice_, theta_scale, phi_scale = np_interpolate_slice_from_hdf(filepath, 15, None, None)
1151 >>> slice_.shape, theta_scale.shape, phi_scale.shape
1152 ((181, 100), (100,), (181,))
1153
1154 Fetch a single point from 3D map
1155
1156 >>> point_value, *_ = np_interpolate_slice_from_hdf(filepath, 1, pi/2, pi)
1157 >>> point_value
1158 6.084496
1159
1160 """
1161 reader = read_hdf_by_value if not by_index else read_hdf_by_ivalue
1162 data, *scales = reader(ifile, *xi, dataset_id=dataset_id, return_scales=True)
1163 f_ = np.transpose(data)
1164 slice_type = sum([yi is not None for yi in xi])
1165 if slice_type == 1:
1166 return _np_linear_interpolation(xi, scales, f_).T, *[yi[1] for yi in zip(xi, scales) if yi[0] is None]
1167 elif slice_type == 2:
1168 return _np_bilinear_interpolation(xi, scales, f_).T, *[yi[1] for yi in zip(xi, scales) if yi[0] is None]
1169 elif slice_type == 3:
1170 return _np_trilinear_interpolation(xi, scales, f_).T, *[yi[1] for yi in zip(xi, scales) if yi[0] is None]
1171 else:
1172 raise ValueError("Not a valid number of dimensions for supported linear interpolation methods")
1173
1174
[docs]
1175def interpolate_positions_from_hdf(ifile, *xi, **kwargs):
1176 """
1177 Interpolate at a list of scale positions using SciPy's `RegularGridInterpolator`.
1178
1179 Parameters
1180 ----------
1181 ifile : Path | str
1182 The path to the HDF file to read.
1183 *xi : sequence[np.ndarray]
1184 Iterable scale values for each dimension of the `n`-dimensional dataset.
1185 Each array should have the same shape *i.e.* :math:`(m, )` – the function composes
1186 these array into a :math:`m x n` column stack for interpolation.
1187 **kwargs : dict
1188 Keyword arguments to pass to :func:`read_hdf_by_value`.
1189
1190 Returns
1191 -------
1192 out : np.ndarray
1193 The interpolated values at the provided positions.
1194
1195 Notes
1196 -----
1197 This function reads data from an HDF file, creates a linear interpolator,
1198 and interpolates at the provided scale values. For each dimension, the
1199 minimum and maximum values from the provided arrays are used to read
1200 the necessary subset of data from the HDF file *viz.* to avoid loading
1201 the entire dataset into memory.
1202
1203 Examples
1204 --------
1205 Import a 3D HDF5 cube.
1206
1207 >>> from psi_io.data import get_3d_data
1208 >>> from psi_io import interpolate_positions_from_hdf
1209 >>> import numpy as np
1210 >>> filepath = get_3d_data()
1211
1212 Set up positions to interpolate.
1213
1214 >>> r_vals = np.array([15, 20, 25])
1215 >>> theta_vals = np.array([np.pi/4, np.pi/2, 3*np.pi/4])
1216 >>> phi_vals = np.array([0, np.pi, 2*np.pi])
1217
1218 Interpolate at the specified positions.
1219
1220 >>> interpolate_positions_from_hdf(filepath, r_vals, theta_vals, phi_vals)
1221 [0.0008402743657585175, 0.000723875405654482, -0.00041033233811179216]
1222 """
1223 xi_ = [(np.nanmin(i), np.nanmax(i)) for i in xi]
1224 f, *scales = read_hdf_by_value(ifile, *xi_, **kwargs)
1225 interpolator = instantiate_linear_interpolator(f, *scales, bounds_error=False)
1226 return interpolator(np.stack(xi, axis=len(xi[0].shape)))
1227
1228
[docs]
1229def interpolate_point_from_1d_slice(xi, scalex, values):
1230 """
1231 Interpolate a point from a 1D slice using linear interpolation.
1232
1233 Parameters
1234 ----------
1235 xi : float
1236 The scale value at which to interpolate.
1237 scalex : np.ndarray
1238 The scale (coordinate array) for the dimension.
1239 values : np.ndarray
1240 The data array to interpolate.
1241
1242 Returns
1243 -------
1244 out : np.ndarray
1245 The interpolated data point.
1246 """
1247 if scalex[0] > scalex[-1]:
1248 scalex, values = scalex[::-1], values[::-1]
1249 xi_ = int(np.searchsorted(scalex, xi))
1250 sx_ = slice(*_check_index_ranges(len(scalex), xi_, xi_))
1251 return _np_linear_interpolation([xi], [scalex[sx_]], values[sx_])
1252
1253
[docs]
1254def interpolate_point_from_2d_slice(xi, yi, scalex, scaley, values):
1255 """
1256 Interpolate a point from a 2D slice using bilinear interpolation.
1257
1258 Parameters
1259 ----------
1260 xi : float
1261 The scale value for the first dimension.
1262 yi : float
1263 The scale value for the second dimension.
1264 scalex : np.ndarray
1265 The scale (coordinate array) for the first dimension.
1266 scaley : np.ndarray
1267 The scale (coordinate array) for the second dimension.
1268 values : np.ndarray
1269 The data array to interpolate.
1270
1271 Returns
1272 -------
1273 out : np.ndarray
1274 The interpolated data point.
1275 """
1276 values = np.transpose(values)
1277 if scalex[0] > scalex[-1]:
1278 scalex, values = scalex[::-1], values[::-1, :]
1279 if scaley[0] > scaley[-1]:
1280 scaley, values = scaley[::-1], values[:, ::-1]
1281 xi_, yi_ = int(np.searchsorted(scalex, xi)), int(np.searchsorted(scaley, yi))
1282 sx_, sy_ = slice(*_check_index_ranges(len(scalex), xi_, xi_)), slice(*_check_index_ranges(len(scaley), yi_, yi_))
1283 return _np_bilinear_interpolation([xi, yi], [scalex[sx_], scaley[sy_]], values[(sx_, sy_)])
1284
1285
1286def _rdhdf_nd(hdf_filename: str,
1287 dimensionality: int
1288 ) -> Tuple[np.ndarray, ...]:
1289 f, *scales = read_hdf_data(hdf_filename)
1290 if f.ndim != dimensionality:
1291 err = f'Expected {dimensionality}D data, got {f.ndim}D data instead.'
1292 raise ValueError(err)
1293 scales = scales or (np.empty(0) for _ in f.shape)
1294 return *scales, f
1295
1296
1297def _wrhdf_nd(hdf_filename: str,
1298 data: np.ndarray,
1299 *scales: Iterable[Union[np.ndarray, None]],
1300 dimensionality: int,
1301 ) -> None:
1302 if data.ndim != dimensionality:
1303 err = f'Expected {dimensionality}D data, got {data.ndim}D data instead.'
1304 raise ValueError(err)
1305 write_hdf_data(hdf_filename, data, *scales)
1306
1307
1308def _get_scales_nd_h5(ifile: Union[ Path, str], /,
1309 dimensionality: int,
1310 dataset_id: Optional[str] = None,
1311 ):
1312 with h5.File(ifile, 'r') as hdf:
1313 data = hdf[dataset_id or PSI_DATA_ID['h5']]
1314 ndim = data.ndim
1315 if ndim != dimensionality:
1316 err = f'Expected {dimensionality}D data, got {ndim}D data instead.'
1317 raise ValueError(err)
1318 scales = []
1319 for dim in data.dims:
1320 if dim:
1321 scales.append(dim[0][:])
1322 else:
1323 raise ValueError(f'Dimension has no scale associated with it.')
1324 return tuple(scales)
1325
1326
1327def _get_scales_nd_h4(ifile: Union[ Path, str], /,
1328 dimensionality: int,
1329 dataset_id: Optional[str] = None,
1330 ):
1331 hdf = h4.SD(str(ifile))
1332 data = hdf.select(dataset_id or PSI_DATA_ID['h4'])
1333 ndim = data.info()[1]
1334 if ndim != dimensionality:
1335 err = f'Expected {dimensionality}D data, got {ndim}D data instead.'
1336 raise ValueError(err)
1337 scales = []
1338 for k_, v_ in reversed(data.dimensions(full=1).items()):
1339 if v_[3]:
1340 scales.append(hdf.select(k_)[:])
1341 else:
1342 raise ValueError('Dimension has no scale associated with it.')
1343 return tuple(scales)
1344
1345
1346def _read_h5_meta(ifile: Union[Path, str], /,
1347 dataset_id: Optional[str] = None
1348 ):
1349 """HDF5 (.h5) version of :func:`read_hdf_meta`."""
1350 with h5.File(ifile, 'r') as hdf:
1351 # Raises KeyError if ``dataset_id`` not found
1352 # If ``dataset_id`` is None, get all non-scale :class:`h5.Dataset`s
1353 if dataset_id:
1354 datasets = (dataset_id, hdf[dataset_id]),
1355 else:
1356 datasets = ((k, v) for k, v in hdf.items() if not v.is_scale)
1357
1358 # One should avoid multiple calls to ``dimproxy[0]`` – *e.g.* ``dimproxy[0].dtype`` and
1359 # ``dimproxy[0].shape`` – because the __getitem__ method creates and returns a new
1360 # :class:`~h5.DimensionProxy` object each time it is called. [Does this matter? Probably not.]
1361 return [HdfDataMeta(name=k,
1362 type=v.dtype,
1363 shape=v.shape,
1364 scales=[HdfScaleMeta(name=dimproxy.label,
1365 type=dim.dtype,
1366 shape=dim.shape,
1367 imin=dim[0],
1368 imax=dim[-1])
1369 for dimproxy in v.dims if dimproxy and (dim := dimproxy[0])])
1370 for k, v in datasets]
1371
1372
1373def _read_h4_meta(ifile: Union[Path, str], /,
1374 dataset_id: Optional[str] = None
1375 ):
1376 """HDF4 (.hdf) version of :func:`read_hdf_meta`."""
1377 hdf = h4.SD(str(ifile))
1378 # Raises HDF4Error if ``dataset_id`` not found
1379 # If ``dataset_id`` is None, get all non-scale :class:`pyhdf.SD.SDS`s
1380 if dataset_id:
1381 datasets = (dataset_id, hdf.select(dataset_id)),
1382 else:
1383 datasets = ((k, hdf.select(k)) for k in hdf.datasets().keys() if not hdf.select(k).iscoordvar())
1384
1385 # The inner list comprehension differs in approach from the HDF5 version because calling
1386 # ``dimensions(full=1)`` on an :class:`~pyhdf.SD.SDS` returns a dictionary of dimension
1387 # dataset identifiers (keys) and tuples containing dimension metadata (values). Even if no
1388 # coordinate-variable datasets are defined, this dictionary is still returned; the only
1389 # indication that the datasets returned do not exist is that the "type" field (within the
1390 # tuple of dimension metadata) is set to 0.
1391
1392 # Also, one cannot avoid multiple calls to ``hdf.select(k_)`` within the inner list comprehension
1393 # because :class:`~pyhdf.SD.SDS` objects do not define a ``__bool__`` method, and the fallback
1394 # behavior of Python is to assess if the __len__ method returns a non-zero value (which, in
1395 # this case, always returns 0).
1396 return [HdfDataMeta(name=k,
1397 type=SDC_TYPE_CONVERSIONS[v.info()[3]],
1398 shape=_cast_shape_tuple(v.info()[2]),
1399 scales=[HdfScaleMeta(name=k_,
1400 type=SDC_TYPE_CONVERSIONS[v_[3]],
1401 shape=_cast_shape_tuple(v_[0]),
1402 imin=hdf.select(k_)[0],
1403 imax=hdf.select(k_)[-1])
1404 for k_, v_ in v.dimensions(full=1).items() if v_[3]])
1405 for k, v in datasets]
1406
1407
1408def _read_h5_rtp(ifile: Union[ Path, str], /):
1409 """HDF5 (.h5) version of :func:`read_rtp_meta`."""
1410 with h5.File(ifile, 'r') as hdf:
1411 return {k: (hdf[v].size, hdf[v][0], hdf[v][-1])
1412 for k, v in zip('rtp', PSI_SCALE_ID['h5'])}
1413
1414
1415def _read_h4_rtp(ifile: Union[ Path, str], /):
1416 """HDF4 (.hdf) version of :func:`read_rtp_meta`."""
1417 hdf = h4.SD(str(ifile))
1418 return {k: (hdf.select(v).info()[2], hdf.select(v)[0], hdf.select(v)[-1])
1419 for k, v in zip('ptr', PSI_SCALE_ID['h4'])}
1420
1421
1422def _read_h5_data(ifile: Union[Path, str], /,
1423 dataset_id: Optional[str] = None,
1424 return_scales: bool = True,
1425 ) -> Union[np.ndarray, Tuple[np.ndarray]]:
1426 with h5.File(ifile, 'r') as hdf:
1427 data = hdf[dataset_id or PSI_DATA_ID['h5']]
1428 dataset = data[:]
1429 if return_scales:
1430 scales = [dim[0][:] for dim in data.dims if dim]
1431 return dataset, *scales
1432 return dataset
1433
1434
1435def _read_h4_data(ifile: Union[Path, str], /,
1436 dataset_id: Optional[str] = None,
1437 return_scales: bool = True,
1438 ) -> Union[np.ndarray, Tuple[np.ndarray]]:
1439 hdf = h4.SD(str(ifile))
1440 data = hdf.select(dataset_id or PSI_DATA_ID['h4'])
1441 if return_scales:
1442 out = (data[:],
1443 *[hdf.select(k_)[:] for k_, v_ in reversed(data.dimensions(full=1).items()) if v_[3]])
1444 else:
1445 out = data[:]
1446 return out
1447
1448
1449def _read_h5_by_index(ifile: Union[Path, str], /,
1450 *xi: Union[int, Tuple[Union[int, None], Union[int, None]], None],
1451 dataset_id: Optional[str] = None,
1452 return_scales: bool = True,
1453 ) -> Union[np.ndarray, Tuple[np.ndarray]]:
1454 """ HDF5(.h5) version of :func:`read_hdf_by_index`."""
1455 with h5.File(ifile, 'r') as hdf:
1456 data = hdf[dataset_id or PSI_DATA_ID['h5']]
1457 if len(xi) != data.ndim:
1458 raise ValueError(f"len(xi) must equal the number of scales for {dataset_id}")
1459 slices = [_parse_index_inputs(slice_input) for slice_input in xi]
1460 dataset = data[tuple(reversed(slices))]
1461 if return_scales:
1462 scales = [dim[0][si] for si, dim in zip(slices, data.dims) if dim]
1463 return dataset, *scales
1464 return dataset
1465
1466def _read_h4_by_index(ifile: Union[Path, str], /,
1467 *xi: Union[int, Tuple[Union[int, None], Union[int, None]], None],
1468 dataset_id: Optional[str] = None,
1469 return_scales: bool = True,
1470 ) -> Union[np.ndarray, Tuple[np.ndarray]]:
1471 """HDF4(.hdf) version of :func:`read_hdf_by_index`."""
1472 hdf = h4.SD(str(ifile))
1473 data = hdf.select(dataset_id or PSI_DATA_ID['h4'])
1474 ndim = data.info()[1]
1475 if len(xi) != ndim:
1476 raise ValueError(f"len(xi) must equal the number of scales for {dataset_id}")
1477 slices = [_parse_index_inputs(slice_input) for slice_input in xi]
1478 dataset = data[tuple(reversed(slices))]
1479 if return_scales:
1480 scales = [hdf.select(k_)[si] for si, (k_, v_) in zip(slices, reversed(data.dimensions(full=1).items())) if v_[3]]
1481 return dataset, *scales
1482 return dataset
1483
1484
1485def _read_h5_by_value(ifile: Union[Path, str], /,
1486 *xi: Union[float, Tuple[float, float], None],
1487 dataset_id: Optional[str] = None,
1488 return_scales: bool = True,
1489 ) -> Union[np.ndarray, Tuple[np.ndarray]]:
1490 """HDF5 (.h5) version of :func:`read_hdf_by_value`."""
1491 with h5.File(ifile, 'r') as hdf:
1492 data = hdf[dataset_id or PSI_DATA_ID['h5']]
1493 if len(xi) != data.ndim:
1494 raise ValueError(f"len(xi) must equal the number of scales for {dataset_id}")
1495 slices = []
1496 for dimproxy, value in zip(data.dims, xi):
1497 if dimproxy:
1498 slices.append(_parse_value_inputs(dimproxy[0], value))
1499 elif value is None:
1500 slices.append(slice(None))
1501 else:
1502 raise ValueError("Cannot slice by value on dimension without scales")
1503 dataset = data[tuple(reversed(slices))]
1504 if return_scales:
1505 scales = [dim[0][si] for si, dim in zip(slices, data.dims) if dim]
1506 return dataset, *scales
1507 return dataset
1508
1509
1510def _read_h4_by_value(ifile: Union[Path, str], /,
1511 *xi: Union[float, Tuple[float, float], None],
1512 dataset_id: Optional[str] = None,
1513 return_scales: bool = True,
1514 ) -> Union[np.ndarray, Tuple[np.ndarray]]:
1515 """HDF4 (.hdf) version of :func:`read_hdf_by_value`."""
1516 hdf = h4.SD(str(ifile))
1517 data = hdf.select(dataset_id or PSI_DATA_ID['h4'])
1518 ndim = data.info()[1]
1519 if len(xi) != ndim:
1520 raise ValueError(f"len(xi) must equal the number of scales for {dataset_id}")
1521 slices = []
1522 for (k_, v_), value in zip(reversed(data.dimensions(full=1).items()), xi):
1523 if v_[3] != 0:
1524 slices.append(_parse_value_inputs(hdf.select(k_), value))
1525 elif value is None:
1526 slices.append(slice(None))
1527 else:
1528 raise ValueError("Cannot slice by value on dimension without scales")
1529 dataset = data[tuple(reversed(slices))]
1530 if return_scales:
1531 scales = [hdf.select(k_)[si] for si, (k_, v_) in zip(slices, reversed(data.dimensions(full=1).items())) if v_[3]]
1532 return dataset, *scales
1533 return dataset
1534
1535
1536def _read_h5_by_ivalue(ifile: Union[Path, str], /,
1537 *xi: Union[float, Tuple[float, float], None],
1538 dataset_id: Optional[str] = None,
1539 return_scales: bool = True,
1540 ) -> Union[np.ndarray, Tuple[np.ndarray]]:
1541 with h5.File(ifile, 'r') as hdf:
1542 data = hdf[dataset_id or PSI_DATA_ID['h5']]
1543 if len(xi) != data.ndim:
1544 raise ValueError(f"len(xi) must equal the number of scales for {dataset_id}")
1545 slices = [_parse_ivalue_inputs(*args) for args in zip(reversed(data.shape), xi)]
1546 dataset = data[tuple(reversed(slices))]
1547 if return_scales:
1548 scales = [np.arange(si.start or 0, si.stop or size) for si, size in zip(slices, reversed(data.shape))]
1549 return dataset, *scales
1550 return dataset
1551
1552
1553def _read_h4_by_ivalue(ifile: Union[Path, str], /,
1554 *xi: Union[float, Tuple[float, float], None],
1555 dataset_id: Optional[str] = None,
1556 return_scales: bool = True,
1557 ) -> Union[np.ndarray, Tuple[np.ndarray]]:
1558 hdf = h4.SD(str(ifile))
1559 data = hdf.select(dataset_id or PSI_DATA_ID['h4'])
1560 ndim, shape = data.info()[1], _cast_shape_tuple(data.info()[2])
1561 if len(xi) != ndim:
1562 raise ValueError(f"len(xi) must equal the number of scales for {dataset_id}")
1563 slices = [_parse_ivalue_inputs(*args) for args in zip(reversed(shape), xi)]
1564 dataset = data[tuple(reversed(slices))]
1565 if return_scales:
1566 scales = [np.arange(si.start or 0, si.stop or size) for si, size in zip(slices, reversed(shape))]
1567 return dataset, *scales
1568 return dataset
1569
1570
1571def _write_h4_data(ifile: Union[Path, str], /,
1572 data: np.ndarray,
1573 *scales: Iterable[np.ndarray],
1574 dataset_id: Optional[str] = None
1575 ) -> Path:
1576 dataid = dataset_id or PSI_DATA_ID['h4']
1577 h4file = h4.SD(str(ifile), h4.SDC.WRITE | h4.SDC.CREATE | h4.SDC.TRUNC)
1578 sds_id = h4file.create(dataid, NPTYPES_TO_SDCTYPES[data.dtype.name], data.shape)
1579
1580 if scales:
1581 for i, scale in enumerate(reversed(scales)):
1582 if scale is not None:
1583 sds_id.dim(i).setscale(NPTYPES_TO_SDCTYPES[scale.dtype.name], scale.tolist())
1584
1585 sds_id.set(data)
1586 sds_id.endaccess()
1587 h4file.end()
1588
1589 return ifile
1590
1591
1592def _write_h5_data(ifile: Union[Path, str], /,
1593 data: np.ndarray,
1594 *scales: Iterable[np.ndarray],
1595 dataset_id: Optional[str] = None
1596 ) -> Path:
1597 dataid = dataset_id or PSI_DATA_ID['h5']
1598 with h5.File(ifile, "w") as h5file:
1599 h5file.create_dataset(dataid, data=data, dtype=data.dtype, shape=data.shape)
1600
1601 if scales:
1602 for i, scale in enumerate(scales):
1603 if scale is not None:
1604 h5file.create_dataset(f"dim{i+1}", data=scale, dtype=scale.dtype, shape=scale.shape)
1605 h5file[dataid].dims[i].attach_scale(h5file[f"dim{i+1}"])
1606 h5file[dataid].dims[i].label = f"dim{i+1}"
1607
1608 return ifile
1609
1610
1611def _np_linear_interpolation(xi: Iterable, scales: Iterable, values: np.ndarray):
1612 """
1613 Perform linear interpolation over one dimension.
1614
1615 Parameters
1616 ----------
1617 xi : list
1618 List of values or None for each dimension.
1619 scales : list
1620 List of scales (coordinate arrays) for each dimension.
1621 values : np.ndarray
1622 The data array to interpolate.
1623
1624 Returns
1625 -------
1626 np.ndarray
1627 The interpolated data.
1628
1629 """
1630 index0 = next((i for i, v in enumerate(xi) if v is not None), None)
1631 t = (xi[index0] - scales[index0][0])/(scales[index0][1] - scales[index0][0])
1632 f0 = [slice(None, None)]*values.ndim
1633 f1 = [slice(None, None)]*values.ndim
1634 f0[index0] = 0
1635 f1[index0] = 1
1636
1637 return (1 - t)*values[tuple(f0)] + t*values[tuple(f1)]
1638
1639
1640def _np_bilinear_interpolation(xi, scales, values):
1641 """
1642 Perform bilinear interpolation over two dimensions.
1643
1644 Parameters
1645 ----------
1646 xi : Iterable
1647 List of values or None for each dimension.
1648 scales : Iterable
1649 List of scales (coordinate arrays) for each dimension.
1650 values : np.ndarray
1651 The data array to interpolate.
1652
1653 Returns
1654 -------
1655 np.ndarray
1656 The interpolated data.
1657
1658 """
1659 index0, index1 = [i for i, v in enumerate(xi) if v is not None]
1660 t, u = [(xi[i] - scales[i][0])/(scales[i][1] - scales[i][0]) for i in (index0, index1)]
1661
1662 f00 = [slice(None, None)]*values.ndim
1663 f10 = [slice(None, None)]*values.ndim
1664 f01 = [slice(None, None)]*values.ndim
1665 f11 = [slice(None, None)]*values.ndim
1666 f00[index0], f00[index1] = 0, 0
1667 f10[index0], f10[index1] = 1, 0
1668 f01[index0], f01[index1] = 0, 1
1669 f11[index0], f11[index1] = 1, 1
1670
1671 return (
1672 (1 - t)*(1 - u)*values[tuple(f00)] +
1673 t*(1 - u)*values[tuple(f10)] +
1674 (1 - t)*u*values[tuple(f01)] +
1675 t*u*values[tuple(f11)]
1676 )
1677
1678
1679def _np_trilinear_interpolation(xi, scales, values):
1680 """
1681 Perform trilinear interpolation over three dimensions.
1682
1683 Parameters
1684 ----------
1685 xi : list
1686 List of values or None for each dimension.
1687 scales : list
1688 List of scales (coordinate arrays) for each dimension.
1689 values : np.ndarray
1690 The data array to interpolate.
1691
1692 Returns
1693 -------
1694 np.ndarray
1695 The interpolated data.
1696
1697 """
1698 index0, index1, index2 = [i for i, v in enumerate(xi) if v is not None]
1699 t, u, v = [(xi[i] - scales[i][0])/(scales[i][1] - scales[i][0]) for i in (index0, index1, index2)]
1700
1701 f000 = [slice(None, None)]*values.ndim
1702 f100 = [slice(None, None)]*values.ndim
1703 f010 = [slice(None, None)]*values.ndim
1704 f110 = [slice(None, None)]*values.ndim
1705 f001 = [slice(None, None)]*values.ndim
1706 f101 = [slice(None, None)]*values.ndim
1707 f011 = [slice(None, None)]*values.ndim
1708 f111 = [slice(None, None)]*values.ndim
1709
1710 f000[index0], f000[index1], f000[index2] = 0, 0, 0
1711 f100[index0], f100[index1], f100[index2] = 1, 0, 0
1712 f010[index0], f010[index1], f010[index2] = 0, 1, 0
1713 f110[index0], f110[index1], f110[index2] = 1, 1, 0
1714 f001[index0], f001[index1], f001[index2] = 0, 0, 1
1715 f101[index0], f101[index1], f101[index2] = 1, 0, 1
1716 f011[index0], f011[index1], f011[index2] = 0, 1, 1
1717 f111[index0], f111[index1], f111[index2] = 1, 1, 1
1718
1719 c00 = values[tuple(f000)]*(1 - t) + values[tuple(f100)]*t
1720 c10 = values[tuple(f010)]*(1 - t) + values[tuple(f110)]*t
1721 c01 = values[tuple(f001)]*(1 - t) + values[tuple(f101)]*t
1722 c11 = values[tuple(f011)]*(1 - t) + values[tuple(f111)]*t
1723
1724 c0 = c00*(1 - u) + c10*u
1725 c1 = c01*(1 - u) + c11*u
1726
1727 return c0*(1 - v) + c1*v
1728
1729
1730def _check_index_ranges(arr_size: int,
1731 i0: Union[int, np.integer],
1732 i1: Union[int, np.integer]
1733 ) -> Tuple[int, int]:
1734 """
1735 Adjust index ranges to ensure they cover at least two indices.
1736
1737 Parameters
1738 ----------
1739 arr_size : int
1740 The size of the array along the dimension.
1741 i0 : int
1742 The starting index.
1743 i1 : int
1744 The ending index.
1745
1746 Returns
1747 -------
1748 Tuple[int, int]
1749 Adjusted starting and ending indices.
1750
1751 Notes
1752 -----
1753 This function ensures that the range between `i0` and `i1` includes at least
1754 two indices for interpolation purposes.
1755
1756 """
1757 i0, i1 = int(i0), int(i1)
1758 if i0 == 0:
1759 return (i0, i1 + 2) if i1 == 0 else (i0, i1 + 1)
1760 elif i0 == arr_size:
1761 return i0 - 2, i1
1762 else:
1763 return i0 - 1, i1 + 1
1764
1765
1766def _cast_shape_tuple(input: Union[int, Iterable[int]]
1767 ) -> tuple[int, ...]:
1768 """
1769 Cast an input to a tuple of integers.
1770
1771 Parameters
1772 ----------
1773 input : int | Iterable[int]
1774 The input to cast.
1775
1776 Returns
1777 -------
1778 tuple[int]
1779 The input cast as a tuple of integers.
1780
1781 Raises
1782 ------
1783 TypeError
1784 If the input is neither an integer nor an iterable of integers.
1785 """
1786 if isinstance(input, int):
1787 return (input,)
1788 elif isinstance(input, Iterable):
1789 return tuple(int(i) for i in input)
1790 else:
1791 raise TypeError("Input must be an integer or an iterable of integers.")
1792
1793
1794def _parse_index_inputs(input: Union[int, slice, Iterable[Union[int, None]], None]
1795 ) -> slice:
1796 """
1797 Parse various slice input formats into a standard slice object.
1798
1799 Parameters
1800 ----------
1801 input : int | slice | Iterable[Union[int, None]] | None
1802 The input to parse.
1803 arr_size : int
1804 The size of the array along the dimension.
1805
1806 Returns
1807 -------
1808 slice
1809 The parsed slice object.
1810
1811 Raises
1812 ------
1813 TypeError
1814 If the input type is unsupported.
1815 ValueError
1816 If the input iterable does not have exactly two elements.
1817 """
1818 if isinstance(input, int):
1819 return slice(input, input + 1)
1820 elif isinstance(input, Iterable):
1821 return slice(*input)
1822 elif input is None:
1823 return slice(None)
1824 else:
1825 raise TypeError("Unsupported input type for slicing.")
1826
1827
1828def _parse_value_inputs(dimproxy,
1829 value,
1830 scale_exists: bool = True
1831 ) -> slice:
1832 if value is None:
1833 return slice(None)
1834 if not scale_exists:
1835 raise ValueError("Cannot parse value inputs when scale does not exist.")
1836 dim = dimproxy[:]
1837 if not isinstance(value, Iterable):
1838 insert_index = np.searchsorted(dim, value)
1839 return slice(*_check_index_ranges(dim.size, insert_index, insert_index))
1840 else:
1841 temp_range = list(value)
1842 if temp_range[0] is None:
1843 temp_range[0] = -np.inf
1844 if temp_range[-1] is None:
1845 temp_range[-1] = np.inf
1846 insert_indices = np.searchsorted(dim, temp_range)
1847 return slice(*_check_index_ranges(dim.size, *insert_indices))
1848
1849
1850def _parse_ivalue_inputs(dimsize,
1851 input: Union[Union[int, float], slice, Iterable[Union[Union[int, float], None]], None]
1852 ) -> slice:
1853 """
1854 Parse various slice input formats into a standard slice object.
1855
1856 Parameters
1857 ----------
1858 input : int | slice | Iterable[Union[int, None]] | None
1859 The input to parse.
1860 arr_size : int
1861 The size of the array along the dimension.
1862
1863 Returns
1864 -------
1865 slice
1866 The parsed slice object.
1867
1868 Raises
1869 ------
1870 TypeError
1871 If the input type is unsupported.
1872 ValueError
1873 If the input iterable does not have exactly two elements.
1874 """
1875 if input is None:
1876 return slice(None)
1877 elif isinstance(input, (int, float)):
1878 i0, i1 = math.floor(input), math.ceil(input)
1879 elif isinstance(input, Iterable):
1880 i0, i1 = math.floor(input[0]), math.ceil(input[1])
1881 else:
1882 raise TypeError("Unsupported input type for slicing.")
1883
1884 if i0 > i1:
1885 i0, i1 = i1, i0
1886 i0, i1 = max(0, i0), min(dimsize - 1, i1)
1887 if i0 > i1:
1888 i0, i1 = i1, i0
1889 i0, i1 = max(0, i0), min(dimsize - 1, i1)
1890 if (i1 - i0) < 2:
1891 return slice(i0, i1 + 2 - (i1-i0))
1892 else:
1893 return slice(i0, i1)