1"""
2Routines for reading and writing PSI-style HDF5 and HDF4 data files.
3
4This module provides a unified interface for interacting with PSI's HDF data
5ecosystem. It handles both HDF4 (``.hdf``) and HDF5 (``.h5``) file formats,
6automatically dispatching to the appropriate backend based on the file
7extension.
8
9Key interfaces
10--------------
11Reading full datasets:
12 :func:`read_hdf_data`, :func:`rdhdf_1d`, :func:`rdhdf_2d`, :func:`rdhdf_3d`
13
14Writing full datasets:
15 :func:`write_hdf_data`, :func:`wrhdf_1d`, :func:`wrhdf_2d`, :func:`wrhdf_3d`
16
17Reading file metadata:
18 :func:`read_hdf_meta`, :func:`read_rtp_meta`
19
20Reading dataset subsets:
21 :func:`get_scales_1d`, :func:`get_scales_2d`, :func:`get_scales_3d`,
22 :func:`read_hdf_by_index`, :func:`read_hdf_by_value`, :func:`read_hdf_by_ivalue`
23
24Interpolating data:
25 :func:`np_interpolate_slice_from_hdf`, :func:`sp_interpolate_slice_from_hdf`,
26 :func:`interpolate_positions_from_hdf`
27
28Converting between formats:
29 :func:`convert`, :func:`convert_psih4_to_psih5`
30
31See Also
32--------
33:mod:`psi_io.data` :
34 Helpers for fetching example HDF data files.
35
36Examples
37--------
38Read a 3D PSI-style HDF5 file:
39
40>>> from psi_io import read_hdf_data
41>>> from psi_io.data import get_3d_data
42>>> filepath = get_3d_data()
43>>> data, r, t, p = read_hdf_data(filepath)
44>>> data.shape
45(181, 100, 151)
46"""
47
48from __future__ import annotations
49
50__all__ = [
51 "read_hdf_meta",
52 "read_rtp_meta",
53
54 "get_scales_1d",
55 "get_scales_2d",
56 "get_scales_3d",
57
58 "read_hdf_by_index",
59 "read_hdf_by_value",
60 "read_hdf_by_ivalue",
61
62 "np_interpolate_slice_from_hdf",
63 "sp_interpolate_slice_from_hdf",
64 "interpolate_positions_from_hdf",
65
66 "instantiate_linear_interpolator",
67 "interpolate_point_from_1d_slice",
68 "interpolate_point_from_2d_slice",
69
70 "read_hdf_data",
71 "rdhdf_1d",
72 "rdhdf_2d",
73 "rdhdf_3d",
74
75 "write_hdf_data",
76 "wrhdf_1d",
77 "wrhdf_2d",
78 "wrhdf_3d",
79
80 "convert",
81 "convert_psih4_to_psih5"
82]
83
84import math
85from collections import namedtuple
86from pathlib import Path
87from types import MappingProxyType
88from typing import Optional, Literal, Tuple, Sequence, List, Dict, Union, Callable, Any
89
90import numpy as np
91import h5py as h5
92
93# -----------------------------------------------------------------------------
94# Optional Imports and Import Checking
95# -----------------------------------------------------------------------------
96# These packages are needed by several functions and must be imported in the
97# module namespace.
98try:
99 import pyhdf.SD as h4
100 H4_AVAILABLE = True
101 DTYPE_TO_SDC = MappingProxyType({
102 "i": {
103 1: h4.SDC.INT8,
104 2: h4.SDC.INT16,
105 4: h4.SDC.INT32,
106 },
107 "u": {
108 1: h4.SDC.UINT8,
109 2: h4.SDC.UINT16,
110 4: h4.SDC.UINT32,
111 },
112 "f": {
113 4: h4.SDC.FLOAT32,
114 8: h4.SDC.FLOAT64,
115 },
116 "b": {
117 1: h4.SDC.UINT8
118 },
119 "U": h4.SDC.CHAR,
120 "S": h4.SDC.UCHAR
121 })
122 """
123 Helper dictionary mapping :class:`~numpy.dtype` kinds to HDF4 SDC types.
124
125 The keys are :attr:`~numpy.dtype.kind`, and the values are either a direct
126 mapping (for byte-string or unicode-string types) or a nested mapping of
127 :attr:`~numpy.dtype.itemsize` to SDC type (for numeric types).
128 """
129except ImportError:
130 H4_AVAILABLE = False
131 DTYPE_TO_SDC = {}
132
133try:
134 from scipy.interpolate import RegularGridInterpolator
135 SCIPY_AVAILABLE = True
136except ImportError:
137 SCIPY_AVAILABLE = False
138
139
140# Functions to stop execution if a package doesn't exist.
141def _except_no_pyhdf():
142 """Raise :exc:`ImportError` if the ``pyhdf`` package is not available.
143
144 Raises
145 ------
146 ImportError
147 If ``pyhdf`` was not importable at module load time.
148
149 Examples
150 --------
151 >>> from psi_io.psi_io import _except_no_pyhdf, H4_AVAILABLE
152 >>> H4_AVAILABLE # doctest: +SKIP
153 True
154 >>> _except_no_pyhdf() is None # no-op when pyhdf is installed
155 True
156 """
157 if not H4_AVAILABLE:
158 raise ImportError('The pyhdf package is required to read/write HDF4 .hdf files!')
159 return
160
161
162def _except_no_scipy():
163 """Raise :exc:`ImportError` if the ``scipy`` package is not available.
164
165 Raises
166 ------
167 ImportError
168 If ``scipy`` was not importable at module load time.
169
170 Examples
171 --------
172 >>> from psi_io.psi_io import _except_no_scipy, SCIPY_AVAILABLE
173 >>> SCIPY_AVAILABLE # doctest: +SKIP
174 True
175 >>> _except_no_scipy() is None # no-op when scipy is installed
176 True
177 """
178 if not SCIPY_AVAILABLE:
179 raise ImportError('The scipy package is required for the interpolation routines!')
180 return
181
182
183SDC_TYPE_CONVERSIONS = MappingProxyType({
184 3: np.dtype("ubyte"),
185 4: np.dtype("byte"),
186 5: np.dtype("float32"),
187 6: np.dtype("float64"),
188 20: np.dtype("int8"),
189 21: np.dtype("uint8"),
190 22: np.dtype("int16"),
191 23: np.dtype("uint16"),
192 24: np.dtype("int32"),
193 25: np.dtype("uint32")
194})
195"""Helper dictionary for mapping HDF4 types to numpy dtypes"""
196
197
198PSI_DATA_ID = MappingProxyType({
199 'h4': 'Data-Set-2',
200 'h5': 'Data'
201})
202"""Mapping of PSI standard dataset names for HDF4 and HDF5 files"""
203
204
205PSI_SCALE_ID = MappingProxyType({
206 'h4': ('fakeDim0', 'fakeDim1', 'fakeDim2'),
207 'h5': ('dim1', 'dim2', 'dim3')
208})
209"""Mapping of PSI standard scale names for HDF4 and HDF5 files"""
210
211
212HDFEXT = {'.hdf', '.h5'}
213"""Set of possible HDF file extensions"""
214
215
216HdfExtType = Literal['.hdf', '.h5']
217"""Type alias for possible HDF file extensions"""
218
219
220HdfScaleMeta = namedtuple('HdfScaleMeta', ['name', 'type', 'shape', 'imin', 'imax'])
221"""
222 Named tuple storing metadata for a single HDF scale (coordinate) dimension.
223
224 Parameters
225 ----------
226 name : str
227 The name of the scale dataset.
228 type : str
229 The data type of the scale.
230 shape : tuple[int, ...]
231 The shape of the scale array.
232 imin : float
233 The minimum value of the scale.
234 This assumes the scale is monotonically increasing.
235 imax : float
236 The maximum value of the scale.
237 This assumes the scale is monotonically increasing.
238"""
239
240
241HdfDataMeta = namedtuple('HdfDataMeta', ['name', 'type', 'shape', 'attr', 'scales'])
242"""
243 Named tuple for HDF dataset metadata
244
245 Parameters
246 ----------
247 name : str
248 The name of the dataset.
249 type : str
250 The data type of the dataset.
251 shape : tuple[int, ...]
252 The shape of the dataset.
253 attr : dict[str, Any]
254 A dictionary of attributes associated with the dataset.
255 scales : list[HdfScaleMeta]
256 A list of scale metadata objects corresponding to each dimension of the dataset.
257 If the dataset has no scales, this list will be empty.
258"""
259
260PathLike = Union[Path, str]
261"""Type alias for file paths, accepting either :class:`pathlib.Path` or str"""
262
263
264def _dtype_to_sdc(dtype: np.dtype):
265 """Convert a numpy dtype to the corresponding HDF4 SDC type.
266
267 Parameters
268 ----------
269 dtype : np.dtype
270 The numpy dtype to convert.
271
272 Returns
273 -------
274 out : int
275 The corresponding HDF4 SDC type constant for the given numpy dtype.
276
277 Raises
278 ------
279 ImportError
280 If the ``pyhdf`` package is not available.
281 KeyError
282 If the dtype kind or itemsize is not supported by HDF4. See
283 :func:`write_hdf_data` for the full dtype support table.
284
285 Examples
286 --------
287 >>> import numpy as np
288 >>> from psi_io.psi_io import _dtype_to_sdc
289 >>> import pyhdf.SD as h4
290 >>> _dtype_to_sdc(np.dtype("float32")) == h4.SDC.FLOAT32
291 True
292 """
293 _except_no_pyhdf()
294 if dtype.kind in {"U", "S"}:
295 return DTYPE_TO_SDC[dtype.kind]
296
297 try:
298 return DTYPE_TO_SDC[dtype.kind][dtype.itemsize]
299 except KeyError as e:
300 if dtype.kind not in DTYPE_TO_SDC:
301 msg = (f"Unsupported dtype kind '{dtype.kind}' for HDF4. "
302 f"Supported kinds are: {set(DTYPE_TO_SDC.keys())}")
303 raise KeyError(msg) from e
304 elif dtype.itemsize not in DTYPE_TO_SDC[dtype.kind]:
305 msg = (f"Unsupported itemsize '{dtype.itemsize}' for dtype kind '{dtype.kind}' in HDF4. "
306 f"Supported itemsizes are: {set(DTYPE_TO_SDC[dtype.kind].keys())}")
307 raise KeyError(msg) from e
308 raise e
309
310
311def _dispatch_by_ext(ifile: PathLike,
312 hdf4_func: Callable,
313 hdf5_func: Callable,
314 *args: Any, **kwargs: Any
315 ):
316 """
317 Dispatch function to call HDF4 or HDF5 specific functions based on file extension.
318
319 Parameters
320 ----------
321 ifile : PathLike
322 The path to the HDF file.
323 hdf4_func : Callable
324 The function to call for HDF4 files.
325 hdf5_func : Callable
326 The function to call for HDF5 files.
327 *args : Any
328 Positional arguments to pass to the selected function.
329 **kwargs : Any
330 Keyword arguments to pass to the selected function.
331
332 Returns
333 -------
334 out : Any
335 The return value of the dispatched function.
336
337 Raises
338 ------
339 ValueError
340 If the file does not have a `.hdf` or `.h5` extension.
341 ImportError
342 If the file is HDF4 and the `pyhdf` package is not available.
343
344 Examples
345 --------
346 >>> from psi_io.psi_io import _dispatch_by_ext, _read_h5_data, _read_h4_data
347 >>> from psi_io.data import get_3d_data
348 >>> filepath = get_3d_data()
349 >>> data, *_ = _dispatch_by_ext(filepath, _read_h4_data, _read_h5_data)
350 >>> data.shape
351 (181, 100, 151)
352 """
353 ipath = Path(ifile)
354 if ipath.suffix == '.h5':
355 return hdf5_func(ifile, *args, **kwargs)
356 if ipath.suffix == '.hdf':
357 _except_no_pyhdf()
358 return hdf4_func(ifile, *args, **kwargs)
359 raise ValueError("File must be HDF4 (.hdf) or HDF5 (.h5)")
360
361
362# -----------------------------------------------------------------------------
363# "Classic" HDF reading and writing routines adapted from psihdf.py or psi_io.py.
364# -----------------------------------------------------------------------------
365
366
[docs]
367def rdhdf_1d(hdf_filename: PathLike
368 ) -> Tuple[np.ndarray, np.ndarray]:
369 """Read a 1D PSI-style HDF5 or HDF4 file.
370
371 Parameters
372 ----------
373 hdf_filename : PathLike
374 The path to the 1D HDF5 (.h5) or HDF4 (.hdf) file to read.
375
376 Returns
377 -------
378 x : np.ndarray
379 1D coordinate scale array.
380 f : np.ndarray
381 1D data array.
382
383 See Also
384 --------
385 read_hdf_data : Generic HDF data reading routine.
386
387 Examples
388 --------
389 >>> from psi_io import rdhdf_1d
390 >>> from psi_io.data import get_1d_data
391 >>> filepath = get_1d_data()
392 >>> x, f = rdhdf_1d(filepath)
393 >>> x.shape, f.shape
394 ((151,), (151,))
395 """
396 return _rdhdf_nd(hdf_filename, dimensionality=1)
397
398
[docs]
399def rdhdf_2d(hdf_filename: PathLike
400 ) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
401 """Read a 2D PSI-style HDF5 or HDF4 file.
402
403 The data in the HDF file is assumed to be ordered X, Y in Fortran order.
404 Each dimension is assumed to have a 1D scale associated with it that
405 describes the rectilinear grid coordinates.
406
407 Parameters
408 ----------
409 hdf_filename : PathLike
410 The path to the 2D HDF5 (.h5) or HDF4 (.hdf) file to read.
411
412 Returns
413 -------
414 x : np.ndarray
415 1D coordinate scale array for the X dimension.
416 y : np.ndarray
417 1D coordinate scale array for the Y dimension.
418 f : np.ndarray
419 2D data array, C-ordered with shape ``(ny, nx)`` in Python.
420
421 Notes
422 -----
423 Because Fortran uses column-major ordering and Python uses row-major
424 ordering, the data array is returned with reversed axis order relative to
425 how it is stored on disk. The first returned scale always corresponds to
426 the *slowest* varying dimension in the data array (i.e. the last axis).
427
428 See Also
429 --------
430 read_hdf_data : Generic HDF data reading routine.
431
432 Examples
433 --------
434 >>> from psi_io import rdhdf_2d
435 >>> from psi_io.data import get_2d_data
436 >>> filepath = get_2d_data()
437 >>> x, y, f = rdhdf_2d(filepath)
438 >>> f.shape == (y.shape[0], x.shape[0])
439 True
440 """
441 return _rdhdf_nd(hdf_filename, dimensionality=2)
442
443
[docs]
444def rdhdf_3d(hdf_filename: PathLike
445 ) -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]:
446 """Read a 3D PSI-style HDF5 or HDF4 file.
447
448 The data in the HDF file is assumed to be ordered X, Y, Z in Fortran order.
449 Each dimension is assumed to have a 1D scale associated with it that
450 describes the rectilinear grid coordinates.
451
452 Parameters
453 ----------
454 hdf_filename : PathLike
455 The path to the 3D HDF5 (.h5) or HDF4 (.hdf) file to read.
456
457 Returns
458 -------
459 x : np.ndarray
460 1D coordinate scale array for the X dimension.
461 y : np.ndarray
462 1D coordinate scale array for the Y dimension.
463 z : np.ndarray
464 1D coordinate scale array for the Z dimension.
465 f : np.ndarray
466 3D data array, C-ordered with shape ``(nz, ny, nx)`` in Python.
467
468 Notes
469 -----
470 Because Fortran uses column-major ordering and Python uses row-major
471 ordering, the data array is returned with reversed axis order relative to
472 how it is stored on disk. For PSI spherical datasets the returned shapes
473 follow the convention ``f.shape == (n_phi, n_theta, n_r)`` with scales
474 returned in physical order ``(r, theta, phi)``.
475
476 See Also
477 --------
478 read_hdf_data : Generic HDF data reading routine.
479
480 Examples
481 --------
482 >>> from psi_io import rdhdf_3d
483 >>> from psi_io.data import get_3d_data
484 >>> filepath = get_3d_data()
485 >>> r, t, p, f = rdhdf_3d(filepath)
486 >>> f.shape == (p.shape[0], t.shape[0], r.shape[0])
487 True
488 """
489 return _rdhdf_nd(hdf_filename, dimensionality=3)
490
491
[docs]
492def wrhdf_1d(hdf_filename: PathLike,
493 x: np.ndarray,
494 f: np.ndarray,
495 **kwargs) -> None:
496 """Write a 1D PSI-style HDF5 or HDF4 file.
497
498 Parameters
499 ----------
500 hdf_filename : PathLike
501 The path to the 1D HDF5 (.h5) or HDF4 (.hdf) file to write.
502 x : np.ndarray
503 1D array of scales.
504 f : np.ndarray
505 1D array of data.
506 **kwargs
507 Additional keyword arguments passed through to the underlying
508 :func:`~psi_io.psi_io.write_hdf_data` routine, specifically:
509
510 - ``dataset_id`` : str | None
511 The identifier of the dataset to write.
512 If None, a default dataset is used ('Data-Set-2' for HDF4 and 'Data' for HDF5).
513 - ``sync_dtype``: bool
514 If True, the data type of the scales will be matched to that of the data array.
515
516 Omitting these will yield the same behavior as the legacy routines, *i.e.* writing to
517 the default PSI dataset IDs for HDF4/HDF5 files and synchronizing datatypes between
518 the dataset and scales.
519
520 Raises
521 ------
522 ValueError
523 If the file does not have a ``.hdf`` or ``.h5`` extension.
524 KeyError
525 If, for HDF4 files, the data or scale dtype is not supported by
526 :py:mod:`pyhdf`. See :func:`write_hdf_data` for the full dtype
527 support table.
528
529 Notes
530 -----
531 This routine is provided for backward compatibility with existing PSI codes that
532 expect this API signature. The underlying implementation dispatches to
533 :func:`~psi_io.psi_io.write_hdf_data`, but the argument order and default
534 behavior (sync dtype, default dataset IDs) are preserved.
535
536 .. warning::
537 When called with its default arguments this routine **writes to the default PSI
538 dataset IDs for HDF4/HDF5 files and synchronizes dtypes between the dataset and
539 scales.** This behavior is required for interoperability with certain Fortran-based
540 PSI tools.
541
542 See Also
543 --------
544 write_hdf_data : Generic HDF data writing routine.
545
546 Examples
547 --------
548 >>> import tempfile, numpy as np
549 >>> from pathlib import Path
550 >>> from psi_io import wrhdf_1d, rdhdf_1d
551 >>> x = np.linspace(0.0, 1.0, 50, dtype=np.float32)
552 >>> f = np.sin(x)
553 >>> with tempfile.TemporaryDirectory() as d:
554 ... wrhdf_1d(Path(d) / "out.h5", x, f)
555 ... x2, f2 = rdhdf_1d(Path(d) / "out.h5")
556 ... np.array_equal(f, f2)
557 True
558 """
559 return _wrhdf_nd(hdf_filename, f, x, dimensionality=1, **kwargs)
560
561
[docs]
562def wrhdf_2d(hdf_filename: PathLike,
563 x: np.ndarray,
564 y: np.ndarray,
565 f: np.ndarray,
566 **kwargs) -> None:
567 """Write a 2D PSI-style HDF5 or HDF4 file.
568
569 The data in the HDF file will appear as X,Y in Fortran order.
570
571 Each dimension requires a 1D "scale" associated with it that
572 describes the rectilinear grid coordinates in each dimension.
573
574 Parameters
575 ----------
576 hdf_filename : PathLike
577 The path to the 2D HDF5 (.h5) or HDF4 (.hdf) file to write.
578 x : np.ndarray
579 1D array of scales in the X dimension.
580 y : np.ndarray
581 1D array of scales in the Y dimension.
582 f : np.ndarray
583 2D data array, C-ordered with shape ``(ny, nx)`` in Python.
584 **kwargs
585 Additional keyword arguments passed through to the underlying
586 :func:`~psi_io.psi_io.write_hdf_data` routine, specifically:
587
588 - ``dataset_id`` : str | None
589 The identifier of the dataset to write.
590 If None, a default dataset is used (``'Data-Set-2'`` for HDF4 and ``'Data'`` for HDF5).
591 - ``sync_dtype``: bool
592 If True, the data type of the scales will be matched to that of the data array.
593
594 Omitting these will yield the same behavior as the legacy routines, *i.e.* writing to
595 the default PSI dataset IDs for HDF4/HDF5 files and synchronizing datatypes between
596 the dataset and scales.
597
598 Raises
599 ------
600 ValueError
601 If the file does not have a ``.hdf`` or ``.h5`` extension.
602 KeyError
603 If, for HDF4 files, the data or scale dtype is not supported by
604 :py:mod:`pyhdf`. See :func:`write_hdf_data` for the full dtype
605 support table.
606
607 Notes
608 -----
609 This routine is provided for backward compatibility with existing PSI codes that
610 expect this API signature. The underlying implementation dispatches to
611 :func:`~psi_io.psi_io.write_hdf_data`, but the argument order and default
612 behavior (sync dtype, default dataset IDs) are preserved.
613
614 .. warning::
615 When called with its default arguments this routine **writes to the default PSI
616 dataset IDs for HDF4/HDF5 files and synchronizes dtypes between the dataset and
617 scales.** This behavior is required for interoperability with certain Fortran-based
618 PSI tools.
619
620 See Also
621 --------
622 write_hdf_data : Generic HDF data writing routine.
623
624 Examples
625 --------
626 >>> import tempfile, numpy as np
627 >>> from pathlib import Path
628 >>> from psi_io import wrhdf_2d, rdhdf_2d
629 >>> x = np.linspace(0.0, 2*np.pi, 64, dtype=np.float32)
630 >>> y = np.linspace(0.0, np.pi, 32, dtype=np.float32)
631 >>> f = np.outer(np.sin(x), np.cos(y)).astype(np.float32)
632 >>> with tempfile.TemporaryDirectory() as d:
633 ... wrhdf_2d(Path(d) / "out.h5", x, y, f)
634 ... x2, y2, f2 = rdhdf_2d(Path(d) / "out.h5")
635 ... np.array_equal(f, f2)
636 True
637 """
638 return _wrhdf_nd(hdf_filename, f, x, y, dimensionality=2, **kwargs)
639
640
[docs]
641def wrhdf_3d(hdf_filename: PathLike,
642 x: np.ndarray,
643 y: np.ndarray,
644 z: np.ndarray,
645 f: np.ndarray,
646 **kwargs) -> None:
647 """Write a 3D PSI-style HDF5 or HDF4 file.
648
649 The data in the HDF file will appear as X,Y,Z in Fortran order.
650
651 Each dimension requires a 1D "scale" associated with it that
652 describes the rectilinear grid coordinates in each dimension.
653
654 Parameters
655 ----------
656 hdf_filename : PathLike
657 The path to the 3D HDF5 (.h5) or HDF4 (.hdf) file to write.
658 x : np.ndarray
659 1D array of scales in the X dimension.
660 y : np.ndarray
661 1D array of scales in the Y dimension.
662 z : np.ndarray
663 1D array of scales in the Z dimension.
664 f : np.ndarray
665 3D data array, C-ordered with shape ``(nz, ny, nx)`` in Python.
666 **kwargs
667 Additional keyword arguments passed through to the underlying
668 :func:`~psi_io.psi_io.write_hdf_data` routine, specifically:
669
670 - ``dataset_id`` : str | None
671 The identifier of the dataset to write.
672 If None, a default dataset is used (``'Data-Set-2'`` for HDF4 and ``'Data'`` for HDF5).
673 - ``sync_dtype``: bool
674 If True, the data type of the scales will be matched to that of the data array.
675
676 Omitting these will yield the same behavior as the legacy routines, *i.e.* writing to
677 the default PSI dataset IDs for HDF4/HDF5 files and synchronizing datatypes between
678 the dataset and scales.
679
680 Raises
681 ------
682 ValueError
683 If the file does not have a ``.hdf`` or ``.h5`` extension.
684 KeyError
685 If, for HDF4 files, the data or scale dtype is not supported by
686 :py:mod:`pyhdf`. See :func:`write_hdf_data` for the full dtype
687 support table.
688
689 Notes
690 -----
691 This routine is provided for backward compatibility with existing PSI codes that
692 expect this API signature. The underlying implementation dispatches to
693 :func:`~psi_io.psi_io.write_hdf_data`, but the argument order and default
694 behavior (sync dtype, default dataset IDs) are preserved.
695
696 .. warning::
697 When called with its default arguments this routine **writes to the default PSI
698 dataset IDs for HDF4/HDF5 files and synchronizes dtypes between the dataset and
699 scales.** This behavior is required for interoperability with certain Fortran-based
700 PSI tools.
701
702 See Also
703 --------
704 write_hdf_data : Generic HDF data writing routine.
705
706 Examples
707 --------
708 >>> import tempfile, numpy as np
709 >>> from pathlib import Path
710 >>> from psi_io import wrhdf_3d, rdhdf_3d
711 >>> r = np.linspace(1.0, 5.0, 10, dtype=np.float32)
712 >>> t = np.linspace(0.0, np.pi, 20, dtype=np.float32)
713 >>> p = np.linspace(0.0, 2*np.pi, 30, dtype=np.float32)
714 >>> f = np.ones((30, 20, 10), dtype=np.float32)
715 >>> with tempfile.TemporaryDirectory() as d:
716 ... wrhdf_3d(Path(d) / "out.h5", r, t, p, f)
717 ... r2, t2, p2, f2 = rdhdf_3d(Path(d) / "out.h5")
718 ... np.array_equal(f, f2)
719 True
720 """
721 return _wrhdf_nd(hdf_filename, f, x, y, z, dimensionality=3, **kwargs)
722
723
[docs]
724def get_scales_1d(filename: PathLike
725 ) -> np.ndarray:
726 """Return the coordinate scale of a 1D PSI-style HDF5 or HDF4 dataset.
727
728 Does not load the data array, so this is efficient for large files.
729
730 Parameters
731 ----------
732 filename : PathLike
733 The path to the 1D HDF5 (.h5) or HDF4 (.hdf) file to read.
734
735 Returns
736 -------
737 x : np.ndarray
738 1D coordinate scale array.
739
740 Examples
741 --------
742 >>> from psi_io import get_scales_1d
743 >>> from psi_io.data import get_1d_data
744 >>> filepath = get_1d_data()
745 >>> x = get_scales_1d(filepath)
746 >>> x.ndim
747 1
748 """
749 return _dispatch_by_ext(filename, _get_scales_nd_h4, _get_scales_nd_h5,
750 dimensionality=1)
751
752
[docs]
753def get_scales_2d(filename: PathLike
754 ) -> Tuple[np.ndarray, np.ndarray]:
755 """Return the coordinate scales of a 2D PSI-style HDF5 or HDF4 dataset.
756
757 Does not load the data array, so this is efficient for large files.
758 The data in the HDF file is assumed to be ordered X, Y in Fortran order.
759
760 Parameters
761 ----------
762 filename : PathLike
763 The path to the 2D HDF5 (.h5) or HDF4 (.hdf) file to read.
764
765 Returns
766 -------
767 x : np.ndarray
768 1D coordinate scale array for the X dimension.
769 y : np.ndarray
770 1D coordinate scale array for the Y dimension.
771
772 Examples
773 --------
774 >>> from psi_io import get_scales_2d
775 >>> from psi_io.data import get_2d_data
776 >>> filepath = get_2d_data()
777 >>> x, y = get_scales_2d(filepath)
778 >>> x.ndim, y.ndim
779 (1, 1)
780 """
781 return _dispatch_by_ext(filename, _get_scales_nd_h4, _get_scales_nd_h5,
782 dimensionality=2)
783
784
[docs]
785def get_scales_3d(filename: PathLike
786 ) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
787 """Return the coordinate scales of a 3D PSI-style HDF5 or HDF4 dataset.
788
789 Does not load the data array, so this is efficient for large files.
790 The data in the HDF file is assumed to be ordered X, Y, Z in Fortran order.
791
792 Parameters
793 ----------
794 filename : PathLike
795 The path to the 3D HDF5 (.h5) or HDF4 (.hdf) file to read.
796
797 Returns
798 -------
799 x : np.ndarray
800 1D coordinate scale array for the X dimension.
801 y : np.ndarray
802 1D coordinate scale array for the Y dimension.
803 z : np.ndarray
804 1D coordinate scale array for the Z dimension.
805
806 Examples
807 --------
808 >>> from psi_io import get_scales_3d
809 >>> from psi_io.data import get_3d_data
810 >>> filepath = get_3d_data()
811 >>> r, t, p = get_scales_3d(filepath)
812 >>> r.ndim, t.ndim, p.ndim
813 (1, 1, 1)
814 """
815 return _dispatch_by_ext(filename, _get_scales_nd_h4, _get_scales_nd_h5,
816 dimensionality=3)
817
818
819# -----------------------------------------------------------------------------
820# "Updated" HDF reading and slicing routines for Hdf4 and Hdf5 datasets.
821# -----------------------------------------------------------------------------
822
823
883
884
926
927
[docs]
928def read_hdf_data(ifile: PathLike, /,
929 dataset_id: Optional[str] = None,
930 return_scales: bool = True,
931 ) -> Tuple[np.ndarray]:
932 """
933 Read data from an HDF4 (.hdf) or HDF5 (.h5) file.
934
935 Parameters
936 ----------
937 ifile : PathLike
938 The path to the HDF file to read.
939 dataset_id : str | None, optional
940 The identifier of the dataset to read.
941 If ``None``, a default dataset is used (``'Data-Set-2'`` for HDF4 and
942 ``'Data'`` for HDF5). Default is ``None``.
943 return_scales : bool, optional
944 If ``True``, the coordinate scale arrays for each dimension are also
945 returned. Default is ``True``.
946
947 Returns
948 -------
949 out : np.ndarray | tuple[np.ndarray, ...]
950 The data array. If ``return_scales`` is ``True``, returns a tuple
951 ``(data, scale_0, scale_1, ...)`` with one scale per dimension.
952
953 Raises
954 ------
955 ValueError
956 If the file does not have a ``.hdf`` or ``.h5`` extension.
957
958 See Also
959 --------
960 read_hdf_by_index : Read HDF datasets by index.
961 read_hdf_by_value : Read HDF datasets by value ranges.
962 read_hdf_by_ivalue : Read HDF datasets by subindex values.
963
964 Notes
965 -----
966 This function delegates to :func:`_read_h5_data` for HDF5 files and
967 :func:`_read_h4_data` for HDF4 files based on the file extension.
968
969 Examples
970 --------
971 >>> from psi_io import read_hdf_data
972 >>> from psi_io.data import get_3d_data
973 >>> filepath = get_3d_data()
974 >>> data, r, t, p = read_hdf_data(filepath)
975 >>> data.shape
976 (181, 100, 151)
977 >>> r.shape, t.shape, p.shape
978 ((151,), (100,), (181,))
979 """
980 return _dispatch_by_ext(ifile, _read_h4_data, _read_h5_data,
981 dataset_id=dataset_id, return_scales=return_scales)
982
983
[docs]
984def read_hdf_by_index(ifile: PathLike, /,
985 *xi: Union[int, Tuple[Union[int, None], Union[int, None]], None],
986 dataset_id: Optional[str] = None,
987 return_scales: bool = True,
988 ) -> Union[np.ndarray, Tuple[np.ndarray]]:
989 r"""
990 Read data from an HDF4 (.hdf) or HDF5 (.h5) file by index.
991
992 .. attention::
993 For each dimension, the *minimum* number of elements returned is 1 *e.g.*
994 if 3 ints are passed (as positional `*xi` arguments) for a 3D dataset,
995 the resulting subset will have a shape of (1, 1, 1,) with scales of length 1.
996
997 Parameters
998 ----------
999 ifile : PathLike
1000 The path to the HDF file to read.
1001 *xi : int | tuple[int | None, int | None] | None
1002 Indices or ranges for each dimension of the `n`-dimensional dataset.
1003 Use None for a dimension to select all indices. If no arguments are passed,
1004 the entire dataset (and its scales) will be returned – see
1005 :func:`~psi_io.psi_io.read_hdf_data`.
1006 dataset_id : str | None, optional
1007 The identifier of the dataset to read.
1008 If ``None``, a default dataset is used (``'Data-Set-2'`` for HDF4 and
1009 ``'Data'`` for HDF5). Default is ``None``.
1010 return_scales : bool, optional
1011 If ``True``, the coordinate scale arrays for each dimension are also
1012 returned. Default is ``True``.
1013
1014 Returns
1015 -------
1016 out : np.ndarray | tuple[np.ndarray, ...]
1017 The selected data array. If ``return_scales`` is ``True``, returns a
1018 tuple ``(data, scale_0, scale_1, ...)`` with one scale per dimension.
1019
1020 Raises
1021 ------
1022 ValueError
1023 If the file does not have a ``.hdf`` or ``.h5`` extension.
1024
1025 See Also
1026 --------
1027 read_hdf_by_value : Read HDF datasets by value ranges.
1028 read_hdf_by_ivalue : Read HDF datasets by subindex values.
1029 read_hdf_data : Read entire HDF datasets.
1030
1031 Notes
1032 -----
1033 This function delegates to :func:`_read_h5_by_index` for HDF5 files and
1034 :func:`_read_h4_by_index` for HDF4 files based on the file extension.
1035
1036 This function assumes Fortran (column-major) ordering for compatibility with
1037 PSI's data ecosystem. For an :math:`n`-dimensional array of shape
1038 :math:`(D_0, D_1, \ldots, D_{n-1})` the scales satisfy
1039 :math:`|x_i| = |D_{(n-1)-i}|`. For example, a 3D dataset of shape
1040 :math:`(D_\phi, D_\theta, D_r)` has scales :math:`r, \theta, \phi`.
1041
1042 Each ``*xi`` argument is forwarded to Python's built-in :class:`slice` to
1043 extract the desired subset without reading the entire dataset into memory.
1044
1045 Examples
1046 --------
1047 Import a 3D HDF5 cube.
1048
1049 >>> from psi_io.data import get_3d_data
1050 >>> from psi_io import read_hdf_by_index
1051 >>> filepath = get_3d_data()
1052
1053 Extract a radial slice at the first radial index from a 3D cube:
1054
1055 >>> f, r, t, p = read_hdf_by_index(filepath, 0, None, None)
1056 >>> f.shape, r.shape, t.shape, p.shape
1057 ((181, 100, 1), (1,), (100,), (181,))
1058
1059 Extract a phi slice at the 90th index from a 3D cube:
1060
1061 >>> f, r, t, p = read_hdf_by_index(filepath, None, None, 90)
1062 >>> f.shape, r.shape, t.shape, p.shape
1063 ((1, 100, 151), (151,), (100,), (1,))
1064
1065 Extract up to the 20th radial index with phi indices 10 to 25:
1066
1067 >>> f, r, t, p = read_hdf_by_index(filepath, (None, 20), None, (10, 25))
1068 >>> f.shape, r.shape, t.shape, p.shape
1069 ((15, 100, 20), (20,), (100,), (15,))
1070 """
1071 if not xi:
1072 return read_hdf_data(ifile, dataset_id=dataset_id, return_scales=return_scales)
1073 return _dispatch_by_ext(ifile, _read_h4_by_index, _read_h5_by_index,
1074 *xi, dataset_id=dataset_id, return_scales=return_scales)
1075
1076
[docs]
1077def read_hdf_by_value(ifile: PathLike, /,
1078 *xi: Union[float, Tuple[float, float], None],
1079 dataset_id: Optional[str] = None,
1080 return_scales: bool = True,
1081 ) -> Union[np.ndarray, Tuple[np.ndarray]]:
1082 """
1083 Read data from an HDF4 (.hdf) or HDF5 (.h5) file by value.
1084
1085 .. note::
1086 For each dimension, the minimum number of elements returned is 2 *e.g.*
1087 if 3 floats are passed (as positional `*xi` arguments) for a 3D dataset,
1088 the resulting subset will have a shape of (2, 2, 2,) with scales of length 2.
1089
1090 Parameters
1091 ----------
1092 ifile : PathLike
1093 The path to the HDF file to read.
1094 *xi : float | tuple[float, float] | None
1095 Values or value ranges corresponding to each dimension of the `n`-dimensional
1096 dataset specified by ``dataset_id``. Pass ``None`` for a dimension to
1097 select all indices. If no arguments are passed, the entire dataset (and
1098 its scales) will be returned.
1099 dataset_id : str | None, optional
1100 The identifier of the dataset to read.
1101 If ``None``, a default dataset is used (``'Data-Set-2'`` for HDF4 and
1102 ``'Data'`` for HDF5). Default is ``None``.
1103 return_scales : bool, optional
1104 If ``True``, the coordinate scale arrays for each dimension are also
1105 returned. Default is ``True``.
1106
1107 Returns
1108 -------
1109 out : np.ndarray | tuple[np.ndarray, ...]
1110 The selected data array. If ``return_scales`` is ``True``, returns a
1111 tuple ``(data, scale_0, scale_1, ...)`` with one scale per dimension.
1112
1113 Raises
1114 ------
1115 ValueError
1116 If the file does not have a ``.hdf`` or ``.h5`` extension.
1117
1118 See Also
1119 --------
1120 read_hdf_by_index : Read HDF datasets by index.
1121 read_hdf_by_ivalue : Read HDF datasets by subindex values.
1122 read_hdf_data : Read entire HDF datasets.
1123 sp_interpolate_slice_from_hdf : Interpolate slices using SciPy's
1124 :class:`~scipy.interpolate.RegularGridInterpolator`.
1125 np_interpolate_slice_from_hdf : Linear/bilinear/trilinear interpolation
1126 using vectorized NumPy routines.
1127
1128 Notes
1129 -----
1130 This function delegates to :func:`_read_h5_by_value` for HDF5 files and
1131 :func:`_read_h4_by_value` for HDF4 files based on the file extension.
1132
1133 This function assumes that the dataset is Fortran (or column-major) ordered *viz.* for
1134 compatibility with PSI's data ecosystem; as such, a given :math:`n`-dimensional array,
1135 of shape :math:`(D_0, D_1, ..., D_{n-1})`, has scales :math:`(x_0, x_1, ..., x_{n-1})`,
1136 such that :math:`| x_i | = | D_{(n-1)-i} |`. For example, a 3D dataset with shape
1137 :math:`(D_p, D_t, D_r)` has scales :math:`r, t, p` corresponding to the radial, theta,
1138 and phi dimensions respectively.
1139
1140 This function extracts a subset of the given dataset/scales without reading the
1141 entire data into memory. For a given scale :math:`x_j`, if:
1142
1143 - *i)* a single float is provided (:math:`a`), the function will return a 2-element
1144 subset of the scale (:math:`xʹ_j`) such that :math:`xʹ_j[0] <= a < xʹ_j[1]`.
1145 - *ii)* a (float, float) tuple is provided (:math:`a_0, a_1`), the function will return an
1146 *m*-element subset of the scale (:math:`xʹ_j`) where
1147 :math:`xʹ_j[0] <= a_0` and :math:`xʹ_j[m-1] > a_1`.
1148 - *iii)* a **None** value is provided, the function will return the entire scale :math:`x_j`
1149
1150 The returned subset can then be passed to a linear interpolation routine to extract the
1151 "slice" at the desired fixed dimensions.
1152
1153 Examples
1154 --------
1155 Import a 3D HDF5 cube.
1156
1157 >>> from psi_io.data import get_3d_data
1158 >>> from psi_io import read_hdf_by_value
1159 >>> filepath = get_3d_data()
1160
1161 Extract a radial slice at r=15 from a 3D cube:
1162
1163 >>> f, r, t, p = read_hdf_by_value(filepath, 15, None, None)
1164 >>> f.shape, r.shape, t.shape, p.shape
1165 ((181, 100, 2), (2,), (100,), (181,))
1166
1167 Extract a phi slice at p=1.57 from a 3D cube:
1168
1169 >>> f, r, t, p = read_hdf_by_value(filepath, None, None, 1.57)
1170 >>> f.shape, r.shape, t.shape, p.shape
1171 ((2, 100, 151), (151,), (100,), (2,))
1172
1173 Extract the values between 3.2 and 6.4 (in the radial dimension) and with
1174 phi equal to 4.5
1175
1176 >>> f, r, t, p = read_hdf_by_value(filepath, (3.2, 6.4), None, 4.5)
1177 >>> f.shape, r.shape, t.shape, p.shape
1178 ((2, 100, 15), (15,), (100,), (2,))
1179 """
1180 if not xi:
1181 return read_hdf_data(ifile, dataset_id=dataset_id, return_scales=return_scales)
1182 return _dispatch_by_ext(ifile, _read_h4_by_value, _read_h5_by_value,
1183 *xi, dataset_id=dataset_id, return_scales=return_scales)
1184
1185
[docs]
1186def read_hdf_by_ivalue(ifile: PathLike, /,
1187 *xi: Union[float, Tuple[float, float], None],
1188 dataset_id: Optional[str] = None,
1189 return_scales: bool = True,
1190 ) -> Union[np.ndarray, Tuple[np.ndarray]]:
1191 r"""
1192 Read data from an HDF4 (.hdf) or HDF5 (.h5) file by subindex value.
1193
1194 Unlike :func:`read_hdf_by_value` (which works in physical coordinate space),
1195 this function accepts fractional *index* positions and returns the minimal
1196 bracketing subset required for interpolation.
1197
1198 .. note::
1199 For each dimension the minimum number of elements returned is 2. For
1200 example, passing 3 scalar floats for a 3D dataset yields a subset of
1201 shape ``(2, 2, 2)`` with index arrays of length 2.
1202
1203 Parameters
1204 ----------
1205 ifile : PathLike
1206 The path to the HDF file to read.
1207 *xi : float | tuple[float, float] | None
1208 Fractional index values or ranges for each dimension of the
1209 ``n``-dimensional dataset. Pass ``None`` to select an entire dimension.
1210 If no arguments are passed, the full dataset is returned.
1211 dataset_id : str | None, optional
1212 The identifier of the dataset to read. If ``None``, a default dataset
1213 is used (``'Data-Set-2'`` for HDF4 and ``'Data'`` for HDF5).
1214 Default is ``None``.
1215 return_scales : bool, optional
1216 If ``True``, 0-based index arrays (generated with :func:`~numpy.arange`)
1217 are returned for each dimension alongside the data. These are always
1218 index-space arrays regardless of whether the dataset has physical
1219 coordinate scales. Default is ``True``.
1220
1221 Returns
1222 -------
1223 out : np.ndarray | tuple[np.ndarray, ...]
1224 The selected data array. If ``return_scales`` is ``True``, returns a
1225 tuple ``(data, idx_0, idx_1, ...)`` with one index array per dimension.
1226
1227 Raises
1228 ------
1229 ValueError
1230 If the file does not have a ``.hdf`` or ``.h5`` extension.
1231
1232 See Also
1233 --------
1234 read_hdf_by_index : Read HDF datasets by integer index.
1235 read_hdf_by_value : Read HDF datasets by physical coordinate value.
1236 read_hdf_data : Read entire HDF datasets.
1237 sp_interpolate_slice_from_hdf : Interpolate slices using SciPy's
1238 :class:`~scipy.interpolate.RegularGridInterpolator`.
1239 np_interpolate_slice_from_hdf : Linear/bilinear/trilinear interpolation
1240 using vectorized NumPy routines.
1241
1242 Notes
1243 -----
1244 This function delegates to :func:`_read_h5_by_ivalue` for HDF5 files and
1245 :func:`_read_h4_by_ivalue` for HDF4 files based on the file extension.
1246
1247 For a given dimension with scale :math:`x_j`, the bracketing rule is:
1248
1249 - *single float* :math:`a` → returns
1250 :math:`x_j[\lfloor a \rfloor], x_j[\lceil a \rceil]`.
1251 - *(float, float)* :math:`(a_0, a_1)` → returns
1252 :math:`x_j[\lfloor a_0 \rfloor], \ldots, x_j[\lceil a_1 \rceil]`.
1253 - ``None`` → returns the entire scale :math:`x_j`.
1254
1255 Examples
1256 --------
1257 >>> from psi_io.data import get_3d_data
1258 >>> from psi_io import read_hdf_by_ivalue
1259 >>> filepath = get_3d_data()
1260
1261 Extract a 2-element bracket around fractional radial index 2.7:
1262
1263 >>> f, r_idx, t_idx, p_idx = read_hdf_by_ivalue(filepath, 2.7, None, None)
1264 >>> r_idx.shape
1265 (2,)
1266 """
1267 if not xi:
1268 return read_hdf_data(ifile, dataset_id=dataset_id, return_scales=return_scales)
1269 return _dispatch_by_ext(ifile, _read_h4_by_ivalue, _read_h5_by_ivalue,
1270 *xi, dataset_id=dataset_id, return_scales=return_scales)
1271
1272
[docs]
1273def write_hdf_data(ifile: PathLike, /,
1274 data: np.ndarray,
1275 *scales: Sequence[Union[np.ndarray, None]],
1276 dataset_id: Optional[str] = None,
1277 sync_dtype: bool = False,
1278 strict: bool = True,
1279 **kwargs
1280 ) -> Path:
1281 """
1282 Write data to an HDF4 (.hdf) or HDF5 (.h5) file.
1283
1284 Following PSI conventions, the data array is assumed to be Fortran-ordered,
1285 with the scales provided in the order corresponding to each dimension *e.g.* a
1286 3D dataset with shape (Dp, Dt, Dr) has scales r, t, p corresponding to the
1287 radial, theta, and phi dimensions respectively.
1288
1289 Parameters
1290 ----------
1291 ifile : PathLike
1292 The path to the HDF file to write.
1293 data : np.ndarray
1294 The data array to write.
1295 *scales : Sequence[np.ndarray | None]
1296 The scales (coordinate arrays) for each dimension.
1297 dataset_id : str | None, optional
1298 The identifier of the dataset to write. If ``None``, a default dataset
1299 is used (``'Data-Set-2'`` for HDF4 and ``'Data'`` for HDF5).
1300 Default is ``None``.
1301 sync_dtype : bool, optional
1302 If ``True``, the scale dtypes are cast to match the data array dtype.
1303 This mimics the behavior of PSI's legacy HDF writing routines and ensures
1304 compatibility with Fortran tools that require uniform precision between
1305 datasets and their scales. Default is ``False``.
1306 strict : bool, optional
1307 If ``True``, raise an error if any dataset attribute cannot be written to
1308 the target format. If ``False``, a warning is printed and the attribute
1309 is skipped. Default is ``True``.
1310 **kwargs
1311 Key-value pairs of dataset attributes to attach to the dataset.
1312
1313 Returns
1314 -------
1315 out : Path
1316 The path to the written HDF file.
1317
1318 Raises
1319 ------
1320 ValueError
1321 If the file does not have a ``.hdf`` or ``.h5`` extension.
1322 KeyError
1323 If, for HDF4 files, the data or scale dtype is not supported by
1324 :py:mod:`pyhdf`. See the dtype support table in the Notes section.
1325
1326 Notes
1327 -----
1328 This function delegates to :func:`_write_h5_data` for HDF5 files and
1329 :func:`_write_h4_data` for HDF4 files based on the file extension.
1330
1331 If no scales are provided the dataset is written without coordinate variables.
1332 The number of scales may be less than or equal to the number of dimensions;
1333 pass ``None`` for dimensions that should not have an attached scale.
1334
1335 The table below summarises dtype support across formats. HDF4 support is
1336 determined by the :data:`DTYPE_TO_SDC` mapping; HDF5 support is provided
1337 by :mod:`h5py`.
1338
1339 .. list-table:: NumPy dtype support by format
1340 :header-rows: 1
1341 :widths: 20 20 20
1342
1343 * - dtype
1344 - HDF4 (``.hdf``)
1345 - HDF5 (``.h5``)
1346 * - ``float16``
1347 - No
1348 - Yes
1349 * - ``float32``
1350 - Yes
1351 - Yes
1352 * - ``float64``
1353 - Yes
1354 - Yes
1355 * - ``int8``
1356 - Yes
1357 - Yes
1358 * - ``int16``
1359 - Yes
1360 - Yes
1361 * - ``int32``
1362 - Yes
1363 - Yes
1364 * - ``int64``
1365 - No
1366 - Yes
1367 * - ``uint8``
1368 - Yes
1369 - Yes
1370 * - ``uint16``
1371 - Yes
1372 - Yes
1373 * - ``uint32``
1374 - Yes
1375 - Yes
1376 * - ``uint64``
1377 - No
1378 - Yes
1379 * - ``complex64``
1380 - No
1381 - Yes
1382 * - ``complex128``
1383 - No
1384 - Yes
1385 * - ``bool``
1386 - Yes (stored as ``uint8``)
1387 - Yes
1388
1389 See Also
1390 --------
1391 wrhdf_1d : Write 1D HDF files.
1392 wrhdf_2d : Write 2D HDF files.
1393 wrhdf_3d : Write 3D HDF files.
1394
1395 Examples
1396 --------
1397 >>> import tempfile, numpy as np
1398 >>> from pathlib import Path
1399 >>> from psi_io import write_hdf_data, read_hdf_data
1400 >>> r = np.linspace(1.0, 5.0, 10, dtype=np.float32)
1401 >>> t = np.linspace(0.0, np.pi, 20, dtype=np.float32)
1402 >>> p = np.linspace(0.0, 2*np.pi, 30, dtype=np.float32)
1403 >>> f = np.ones((30, 20, 10), dtype=np.float32)
1404 >>> with tempfile.TemporaryDirectory() as d:
1405 ... write_hdf_data(Path(d) / "out.h5", f, r, t, p)
1406 ... data, r2, t2, p2 = read_hdf_data(Path(d) / "out.h5")
1407 ... data.shape
1408 (30, 20, 10)
1409 """
1410 return _dispatch_by_ext(ifile, _write_h4_data, _write_h5_data, data,
1411 *scales, dataset_id=dataset_id, sync_dtype=sync_dtype, strict=strict, **kwargs)
1412
1413
[docs]
1414def convert(ifile: PathLike,
1415 ofile: Optional[PathLike] = None,
1416 strict: bool = True) -> Path:
1417 """
1418 Convert an HDF file between HDF4 (.hdf) and HDF5 (.h5) formats.
1419
1420 All datasets and their attributes are preserved in the output file.
1421 The output format is inferred from the input: `.hdf` → `.h5` and vice versa,
1422 unless an explicit output path is provided.
1423
1424 Parameters
1425 ----------
1426 ifile : PathLike
1427 The path to the source HDF file.
1428 ofile : PathLike, optional
1429 The path to the output HDF file.
1430 If ``None``, the output file is written alongside the input file
1431 with its extension swapped (*e.g.* ``foo.hdf`` → ``foo.h5``).
1432 strict : bool, optional
1433 If ``True``, raise an error if any dataset attribute cannot be written
1434 to the output format. If ``False``, a warning is printed and the
1435 attribute is skipped. Default is ``True``.
1436
1437 Returns
1438 -------
1439 out : Path
1440 The path to the written output file.
1441
1442 Raises
1443 ------
1444 ValueError
1445 If the input file does not have a ``.hdf`` or ``.h5`` extension.
1446 KeyError
1447 If, for HDF4 output files, the data or an attribute value has a dtype
1448 not supported by :py:mod:`pyhdf` and ``strict`` is ``True``. See
1449 :func:`write_hdf_data` for the full dtype support table.
1450 TypeError
1451 If, for HDF5 output files, a dataset attribute has a type that cannot be
1452 stored as an HDF5 attribute and ``strict`` is ``True``.
1453
1454 See Also
1455 --------
1456 convert_psih4_to_psih5 : Specialized PSI-convention HDF4 → HDF5 converter.
1457 write_hdf_data : Generic HDF data writing routine.
1458 read_hdf_data : Generic HDF data reading routine.
1459
1460 Examples
1461 --------
1462 >>> import tempfile
1463 >>> from pathlib import Path
1464 >>> from psi_io import convert, read_hdf_meta
1465 >>> from psi_io.data import get_3d_data
1466 >>> h5_path = get_3d_data(hdf=".h5")
1467 >>> with tempfile.TemporaryDirectory() as d:
1468 ... out = convert(h5_path, Path(d) / "br.hdf")
1469 ... meta = read_hdf_meta(out)
1470 ... meta[0].name
1471 'Data'
1472 """
1473 ifile = Path(ifile)
1474 if not ofile:
1475 ofile = ifile.with_suffix(".hdf") if ifile.suffix == ".h5" else ifile.with_suffix(".h5")
1476 else:
1477 ofile = Path(ofile)
1478 ofile.parent.mkdir(parents=True, exist_ok=True)
1479
1480 meta_data = read_hdf_meta(ifile)
1481 for dataset in meta_data:
1482 data, *scales = read_hdf_data(ifile, dataset_id=dataset.name, return_scales=True)
1483 write_hdf_data(ofile, data, *scales, dataset_id=dataset.name, strict=strict, **dataset.attr)
1484
1485 return ofile
1486
1487
[docs]
1488def convert_psih4_to_psih5(ifile: PathLike,
1489 ofile: Optional[PathLike] = None) -> Path:
1490 """
1491 Convert a PSI-convention HDF4 file to an HDF5 file.
1492
1493 Unlike :func:`convert`, this function is specialized for PSI-style HDF4 files:
1494 it reads the primary dataset (``'Data-Set-2'``) and writes it under the PSI HDF5
1495 dataset name (``'Data'``), preserving the dataset's attributes and scales.
1496
1497 Parameters
1498 ----------
1499 ifile : PathLike
1500 The path to the source HDF4 (.hdf) file.
1501 ofile : PathLike, optional
1502 The path to the output HDF5 (.h5) file.
1503 If ``None``, the output file is written alongside the input file with a
1504 ``.h5`` extension (*e.g.* ``foo.hdf`` → ``foo.h5``).
1505
1506 Returns
1507 -------
1508 out : Path
1509 The path to the written HDF5 file.
1510
1511 Raises
1512 ------
1513 ValueError
1514 If ``ifile`` does not have a ``.hdf`` extension.
1515 ValueError
1516 If ``ofile`` does not have a ``.h5`` extension.
1517 ImportError
1518 If the :py:mod:`pyhdf` package is not available.
1519
1520 See Also
1521 --------
1522 convert : General-purpose HDF4 ↔ HDF5 converter.
1523 write_hdf_data : Generic HDF data writing routine.
1524
1525 Examples
1526 --------
1527 >>> import tempfile
1528 >>> from pathlib import Path
1529 >>> from psi_io import convert_psih4_to_psih5, read_hdf_meta
1530 >>> from psi_io.data import get_3d_data
1531 >>> hdf4_path = get_3d_data(hdf=".hdf")
1532 >>> with tempfile.TemporaryDirectory() as d:
1533 ... out = convert_psih4_to_psih5(hdf4_path, Path(d) / "br.h5")
1534 ... meta = read_hdf_meta(out)
1535 ... meta[0].name
1536 'Data'
1537 """
1538 ifile = Path(ifile)
1539 ofile = ifile.with_suffix(".h5") if not ofile else Path(ofile)
1540 if ifile.suffix != ".hdf":
1541 raise ValueError(f"Input file must have a .hdf extension; got {ifile.suffix}")
1542 if ofile.suffix != ".h5":
1543 raise ValueError(f"Output file must have a .h5 extension; got {ofile.suffix}")
1544 ofile.parent.mkdir(parents=True, exist_ok=True)
1545
1546
1547 data, *scales = read_hdf_data(ifile, dataset_id=PSI_DATA_ID["h4"], return_scales=True)
1548 meta_data, *_ = read_hdf_meta(ifile, dataset_id=PSI_DATA_ID["h4"])
1549
1550 write_hdf_data(ofile, data, *scales,
1551 dataset_id=PSI_DATA_ID["h5"], **meta_data.attr)
1552 return ofile
1553
[docs]
1554def instantiate_linear_interpolator(*args, **kwargs):
1555 r"""
1556 Instantiate a linear interpolator using the provided data and scales.
1557
1558 Parameters
1559 ----------
1560 *args : sequence[array_like]
1561 The first argument is the data array.
1562 Subsequent arguments are the scales (coordinate arrays) for each dimension.
1563 **kwargs : dict
1564 Additional keyword arguments to pass to
1565 :class:`~scipy.interpolate.RegularGridInterpolator`.
1566
1567 Returns
1568 -------
1569 out : RegularGridInterpolator
1570 An instance of RegularGridInterpolator initialized
1571 with the provided data and scales.
1572
1573 Raises
1574 ------
1575 ImportError
1576 If the ``scipy`` package is not available.
1577
1578 Notes
1579 -----
1580 This function transposes the data array and passes it along with the scales
1581 to :class:`~scipy.interpolate.RegularGridInterpolator`. Given a PSI-style
1582 Fortran-ordered 3D dataset, the resulting interpolator can be queried using
1583 :math:`(r, \theta, \phi)` coordinates.
1584
1585 Examples
1586 --------
1587 Import a 3D HDF5 cube.
1588
1589 >>> from psi_io.data import get_3d_data
1590 >>> from psi_io import read_hdf_by_value
1591 >>> from numpy import pi
1592 >>> filepath = get_3d_data()
1593
1594 Read the dataset by value (at 15 R_sun in the radial dimension).
1595
1596 >>> data_and_scales = read_hdf_by_value(filepath, 15, None, None)
1597 >>> interpolator = instantiate_linear_interpolator(*data_and_scales)
1598
1599 Interpolate at a specific position.
1600
1601 >>> interpolator((15, pi/2, pi))
1602 0.0012864485109423877
1603 """
1604 _except_no_scipy()
1605 return RegularGridInterpolator(
1606 values=args[0].T,
1607 points=args[1:],
1608 **kwargs)
1609
1610
[docs]
1611def sp_interpolate_slice_from_hdf(*xi, **kwargs):
1612 r"""
1613 Interpolate a slice from HDF data using SciPy's `RegularGridInterpolator`.
1614
1615 .. note::
1616 Slicing routines result in a dimensional reduction. The dimensions
1617 that are fixed (i.e. provided as float values in `*xi`) are removed
1618 from the output slice, while the dimensions that are not fixed
1619 (*i.e.* provided as None in `*xi`) are retained.
1620
1621 Parameters
1622 ----------
1623 *xi : sequence
1624 Positional arguments passed-through to :func:`read_hdf_by_value`.
1625 **kwargs : dict
1626 Keyword arguments passed-through to :func:`read_hdf_by_value`.
1627 **NOTE: Instantiating a linear interpolator requires the** ``return_scales``
1628 **keyword argument to be set to True; this function overrides
1629 any provided value for** ``return_scales`` **to ensure this behavior.**
1630
1631 Returns
1632 -------
1633 data_slice : np.ndarray
1634 The interpolated data slice with fixed dimensions removed.
1635 scales : list[np.ndarray]
1636 Coordinate scale arrays for the retained (non-fixed) dimensions.
1637
1638 Notes
1639 -----
1640 This function reads data from an HDF file, builds a
1641 :class:`~scipy.interpolate.RegularGridInterpolator`, and evaluates it at
1642 the requested fixed coordinates to produce the slice.
1643
1644 .. note::
1645 The returned slice is Fortran-ordered, *e.g.* radial slices have shape
1646 :math:`(n_\phi, n_\theta)`, and :math:`\phi` slices have shape
1647 :math:`(n_r, n_\theta)`.
1648
1649 .. note::
1650 :class:`~scipy.interpolate.RegularGridInterpolator` casts all input data
1651 to ``float64`` internally. PSI HDF datasets stored as ``float32`` will
1652 therefore be upcast during interpolation.
1653
1654 Examples
1655 --------
1656 >>> from psi_io.data import get_3d_data
1657 >>> from psi_io import sp_interpolate_slice_from_hdf
1658 >>> from numpy import pi
1659 >>> filepath = get_3d_data()
1660
1661 Fetch a 2D slice at r=15 from 3D map
1662
1663 >>> slice_, theta_scale, phi_scale = sp_interpolate_slice_from_hdf(filepath, 15, None, None)
1664 >>> slice_.shape, theta_scale.shape, phi_scale.shape
1665 ((181, 100), (100,), (181,))
1666
1667 Fetch a single point from 3D map
1668
1669 >>> point_value, *_ = sp_interpolate_slice_from_hdf(filepath, 1, pi/2, pi)
1670 >>> point_value
1671 6.084495480971823
1672 """
1673 filepath, *args = xi
1674 kwargs.pop('return_scales', None)
1675 result = read_hdf_by_value(filepath, *args, **kwargs)
1676 interpolator = instantiate_linear_interpolator(*result)
1677 grid = [yi[0] if yi[0] is not None else yi[1] for yi in zip(args, result[1:])]
1678 slice_ = interpolator(tuple(np.meshgrid(*grid, indexing='ij')))
1679 indices = [0 if yi is not None else slice(None, None) for yi in args]
1680 return slice_[tuple(indices)].T, *[yi[1] for yi in zip(args, result[1:]) if yi[0] is None]
1681
1682
[docs]
1683def np_interpolate_slice_from_hdf(ifile: PathLike, /,
1684 *xi: Union[float, Tuple[float, float], None],
1685 dataset_id: Optional[str] = None,
1686 by_index: bool = False,
1687 ):
1688 """
1689 Interpolate a slice from HDF data using linear interpolation.
1690
1691 .. note::
1692 Slicing routines result in a dimensional reduction. The dimensions
1693 that are fixed (i.e. provided as float values in `*xi`) are removed
1694 from the output slice, while the dimensions that are not fixed
1695 (*i.e.* provided as `None` in `*xi`) are retained.
1696
1697 Parameters
1698 ----------
1699 ifile : PathLike
1700 The path to the HDF file to read.
1701 *xi : sequence
1702 Positional arguments passed-through to reader function.
1703 dataset_id : str | None, optional
1704 The identifier of the dataset to read. If ``None``, a default dataset
1705 is used (``'Data-Set-2'`` for HDF4 and ``'Data'`` for HDF5).
1706 Default is ``None``.
1707 by_index : bool, optional
1708 If ``True``, use :func:`read_hdf_by_ivalue` to read data by fractional
1709 index values. If ``False``, use :func:`read_hdf_by_value` to read by
1710 physical coordinate values. Default is ``False``.
1711
1712 Returns
1713 -------
1714 data_slice : np.ndarray
1715 The interpolated data slice with fixed dimensions removed.
1716 scales : list[np.ndarray]
1717 Coordinate scale arrays for the retained (non-fixed) dimensions.
1718
1719 Raises
1720 ------
1721 ValueError
1722 If the number of dimensions to interpolate over is not supported.
1723
1724 Notes
1725 -----
1726 This function supports linear, bilinear, and trilinear interpolation
1727 depending on the number of dimensions fixed in `xi`.
1728
1729 Examples
1730 --------
1731 >>> from psi_io.data import get_3d_data
1732 >>> from psi_io import np_interpolate_slice_from_hdf
1733 >>> from numpy import pi
1734 >>> filepath = get_3d_data()
1735
1736 Fetch a 2D slice at r=15 from 3D map
1737
1738 >>> slice_, theta_scale, phi_scale = np_interpolate_slice_from_hdf(filepath, 15, None, None)
1739 >>> slice_.shape, theta_scale.shape, phi_scale.shape
1740 ((181, 100), (100,), (181,))
1741
1742 Fetch a single point from 3D map
1743
1744 >>> point_value, *_ = np_interpolate_slice_from_hdf(filepath, 1, pi/2, pi)
1745 >>> point_value
1746 6.084496
1747
1748 """
1749 reader = read_hdf_by_value if not by_index else read_hdf_by_ivalue
1750 data, *scales = reader(ifile, *xi, dataset_id=dataset_id, return_scales=True)
1751 f_ = np.transpose(data)
1752 slice_type = sum([yi is not None for yi in xi])
1753 if slice_type == 1:
1754 return _np_linear_interpolation(xi, scales, f_).T, *[yi[1] for yi in zip(xi, scales) if yi[0] is None]
1755 elif slice_type == 2:
1756 return _np_bilinear_interpolation(xi, scales, f_).T, *[yi[1] for yi in zip(xi, scales) if yi[0] is None]
1757 elif slice_type == 3:
1758 return _np_trilinear_interpolation(xi, scales, f_).T, *[yi[1] for yi in zip(xi, scales) if yi[0] is None]
1759 else:
1760 raise ValueError("Not a valid number of dimensions for supported linear interpolation methods")
1761
1762
[docs]
1763def interpolate_positions_from_hdf(ifile, *xi, **kwargs):
1764 r"""
1765 Interpolate at a list of scale positions using SciPy's
1766 :class:`~scipy.interpolate.RegularGridInterpolator`.
1767
1768 Parameters
1769 ----------
1770 ifile : PathLike
1771 The path to the HDF file to read.
1772 *xi : sequence[np.ndarray]
1773 Coordinate values for each dimension of the ``n``-dimensional dataset.
1774 Each array must have the same length :math:`m`; the function assembles
1775 them into an :math:`m \times n` column stack for interpolation.
1776 **kwargs
1777 Keyword arguments forwarded to :func:`read_hdf_by_value`.
1778
1779 Returns
1780 -------
1781 out : np.ndarray
1782 The interpolated values at the provided positions.
1783
1784 Notes
1785 -----
1786 This function reads data from an HDF file, creates a linear interpolator,
1787 and interpolates at the provided scale values. For each dimension, the
1788 minimum and maximum values from the provided arrays are used to read
1789 the necessary subset of data from the HDF file *viz.* to avoid loading
1790 the entire dataset into memory.
1791
1792 Examples
1793 --------
1794 Import a 3D HDF5 cube.
1795
1796 >>> from psi_io.data import get_3d_data
1797 >>> from psi_io import interpolate_positions_from_hdf
1798 >>> import numpy as np
1799 >>> filepath = get_3d_data()
1800
1801 Set up positions to interpolate.
1802
1803 >>> r_vals = np.array([15, 20, 25])
1804 >>> theta_vals = np.array([np.pi/4, np.pi/2, 3*np.pi/4])
1805 >>> phi_vals = np.array([0, np.pi, 2*np.pi])
1806
1807 Interpolate at the specified positions.
1808
1809 >>> interpolate_positions_from_hdf(filepath, r_vals, theta_vals, phi_vals)
1810 [0.0008402743657585175, 0.000723875405654482, -0.00041033233811179216]
1811 """
1812 xi_ = [(np.nanmin(i), np.nanmax(i)) for i in xi]
1813 f, *scales = read_hdf_by_value(ifile, *xi_, **kwargs)
1814 interpolator = instantiate_linear_interpolator(f, *scales, bounds_error=False)
1815 return interpolator(np.stack(xi, axis=len(xi[0].shape)))
1816
1817
[docs]
1818def interpolate_point_from_1d_slice(xi, scalex, values):
1819 """
1820 Interpolate a point from a 1D slice using linear interpolation.
1821
1822 Parameters
1823 ----------
1824 xi : float
1825 The coordinate value at which to interpolate.
1826 scalex : np.ndarray
1827 The coordinate scale array for the dimension.
1828 values : np.ndarray
1829 The 1D data array to interpolate.
1830
1831 Returns
1832 -------
1833 out : np.ndarray
1834 The interpolated scalar value as a zero-dimensional array.
1835
1836 Examples
1837 --------
1838 >>> import numpy as np
1839 >>> from psi_io.psi_io import interpolate_point_from_1d_slice
1840 >>> x = np.linspace(0.0, 1.0, 11)
1841 >>> f = x ** 2
1842 >>> float(interpolate_point_from_1d_slice(0.5, x, f))
1843 0.25
1844 """
1845 if scalex[0] > scalex[-1]:
1846 scalex, values = scalex[::-1], values[::-1]
1847 xi_ = int(np.searchsorted(scalex, xi))
1848 sx_ = slice(*_check_index_ranges(len(scalex), xi_, xi_))
1849 return _np_linear_interpolation([xi], [scalex[sx_]], values[sx_])
1850
1851
[docs]
1852def interpolate_point_from_2d_slice(xi, yi, scalex, scaley, values):
1853 """
1854 Interpolate a point from a 2D slice using bilinear interpolation.
1855
1856 Parameters
1857 ----------
1858 xi : float
1859 The coordinate value for the first dimension.
1860 yi : float
1861 The coordinate value for the second dimension.
1862 scalex : np.ndarray
1863 The coordinate scale array for the first dimension.
1864 scaley : np.ndarray
1865 The coordinate scale array for the second dimension.
1866 values : np.ndarray
1867 The 2D data array to interpolate, with shape ``(nx, ny)``.
1868
1869 Returns
1870 -------
1871 out : np.ndarray
1872 The interpolated scalar value as a zero-dimensional array.
1873
1874 Examples
1875 --------
1876 >>> import numpy as np
1877 >>> from psi_io.psi_io import interpolate_point_from_2d_slice
1878 >>> x = np.linspace(0.0, 1.0, 11)
1879 >>> y = np.linspace(0.0, 1.0, 11)
1880 >>> f = np.outer(x, y)
1881 >>> float(interpolate_point_from_2d_slice(0.5, 0.5, x, y, f))
1882 0.25
1883 """
1884 values = np.transpose(values)
1885 if scalex[0] > scalex[-1]:
1886 scalex, values = scalex[::-1], values[::-1, :]
1887 if scaley[0] > scaley[-1]:
1888 scaley, values = scaley[::-1], values[:, ::-1]
1889 xi_, yi_ = int(np.searchsorted(scalex, xi)), int(np.searchsorted(scaley, yi))
1890 sx_, sy_ = slice(*_check_index_ranges(len(scalex), xi_, xi_)), slice(*_check_index_ranges(len(scaley), yi_, yi_))
1891 return _np_bilinear_interpolation([xi, yi], [scalex[sx_], scaley[sy_]], values[(sx_, sy_)])
1892
1893
1894def _rdhdf_nd(hdf_filename: str,
1895 dimensionality: int
1896 ) -> Tuple[np.ndarray, ...]:
1897 """Read an n-dimensional PSI-style HDF file; shared implementation for `rdhdf_1d/2d/3d`.
1898
1899 Parameters
1900 ----------
1901 hdf_filename : PathLike
1902 The path to the HDF5 (.h5) or HDF4 (.hdf) file to read.
1903 dimensionality : int
1904 The expected number of dimensions of the dataset.
1905
1906 Returns
1907 -------
1908 out : tuple[np.ndarray, ...]
1909 The coordinate scales for each dimension followed by the data array.
1910
1911 Raises
1912 ------
1913 ValueError
1914 If the dataset dimensionality does not match ``dimensionality``.
1915
1916 Examples
1917 --------
1918 >>> from psi_io.psi_io import _rdhdf_nd
1919 >>> from psi_io.data import get_3d_data
1920 >>> r, t, p, f = _rdhdf_nd(get_3d_data(), dimensionality=3)
1921 >>> f.shape
1922 (181, 100, 151)
1923 """
1924 f, *scales = read_hdf_data(hdf_filename)
1925 if f.ndim != dimensionality:
1926 err = f'Expected {dimensionality}D data, got {f.ndim}D data instead.'
1927 raise ValueError(err)
1928 scales = scales or (np.empty(0) for _ in f.shape)
1929 return *scales, f
1930
1931
1932def _wrhdf_nd(hdf_filename: str,
1933 data: np.ndarray,
1934 *scales: Sequence[Union[np.ndarray, None]],
1935 dimensionality: int,
1936 sync_dtype: bool = True,
1937 **kwargs
1938 ) -> None:
1939 """Write an n-dimensional PSI-style HDF file; shared implementation for `wrhdf_1d/2d/3d`.
1940
1941 Parameters
1942 ----------
1943 hdf_filename : PathLike
1944 The path to the HDF5 (.h5) or HDF4 (.hdf) file to write.
1945 data : np.ndarray
1946 The data array to write.
1947 *scales : Sequence[np.ndarray | None]
1948 The scales (coordinate arrays) for each dimension.
1949 dimensionality : int
1950 The expected number of dimensions of the data array.
1951 sync_dtype : bool
1952 If True, scale dtypes are cast to match the data array dtype.
1953 **kwargs
1954 Additional keyword arguments forwarded to :func:`write_hdf_data`.
1955
1956 Raises
1957 ------
1958 ValueError
1959 If the data dimensionality does not match ``dimensionality``.
1960
1961 Examples
1962 --------
1963 >>> import tempfile, numpy as np
1964 >>> from pathlib import Path
1965 >>> from psi_io.psi_io import _wrhdf_nd
1966 >>> from psi_io import read_hdf_data
1967 >>> f = np.ones((10, 20, 30), dtype=np.float32)
1968 >>> r = np.linspace(1.0, 5.0, 10, dtype=np.float32)
1969 >>> t = np.linspace(0.0, 3.14, 20, dtype=np.float32)
1970 >>> p = np.linspace(0.0, 6.28, 30, dtype=np.float32)
1971 >>> with tempfile.TemporaryDirectory() as d:
1972 ... _wrhdf_nd(Path(d) / "out.h5", f, r, t, p, dimensionality=3)
1973 ... data, *_ = read_hdf_data(Path(d) / "out.h5")
1974 ... data.shape
1975 (10, 20, 30)
1976 """
1977 if data.ndim != dimensionality:
1978 err = f'Expected {dimensionality}D data, got {data.ndim}D data instead.'
1979 raise ValueError(err)
1980 write_hdf_data(hdf_filename, data, *scales, sync_dtype=sync_dtype, **kwargs)
1981
1982
1983def _get_scales_nd_h5(ifile: Union[ Path, str], /,
1984 dimensionality: int,
1985 dataset_id: Optional[str] = None,
1986 ):
1987 """HDF5 (.h5) version of :func:`get_scales_1d` / :func:`get_scales_2d` / :func:`get_scales_3d`.
1988
1989 Parameters
1990 ----------
1991 ifile : PathLike
1992 The path to the HDF5 (.h5) file.
1993 dimensionality : int
1994 The expected number of dimensions of the dataset.
1995 dataset_id : str, optional
1996 The identifier of the dataset whose scales are read.
1997 If ``None``, the default PSI dataset (``'Data'``) is used.
1998
1999 Returns
2000 -------
2001 out : tuple[np.ndarray, ...]
2002 The coordinate scale arrays for each dimension.
2003
2004 Raises
2005 ------
2006 ValueError
2007 If the dataset dimensionality does not match ``dimensionality``.
2008 ValueError
2009 If any dimension has no associated scale.
2010
2011 Examples
2012 --------
2013 >>> from psi_io.psi_io import _get_scales_nd_h5
2014 >>> from psi_io.data import get_3d_data
2015 >>> r, t, p = _get_scales_nd_h5(get_3d_data(), dimensionality=3)
2016 >>> r.shape, t.shape, p.shape
2017 ((151,), (100,), (181,))
2018 """
2019 with h5.File(ifile, 'r') as hdf:
2020 data = hdf[dataset_id or PSI_DATA_ID['h5']]
2021 ndim = data.ndim
2022 if ndim != dimensionality:
2023 err = f'Expected {dimensionality}D data, got {ndim}D data instead.'
2024 raise ValueError(err)
2025 scales = []
2026 for dim in data.dims:
2027 if dim:
2028 scales.append(dim[0][:])
2029 else:
2030 raise ValueError(f'Dimension has no scale associated with it.')
2031 return tuple(scales)
2032
2033
2034def _get_scales_nd_h4(ifile: Union[ Path, str], /,
2035 dimensionality: int,
2036 dataset_id: Optional[str] = None,
2037 ):
2038 """HDF4 (.hdf) version of :func:`get_scales_1d` / :func:`get_scales_2d` / :func:`get_scales_3d`.
2039
2040 Parameters
2041 ----------
2042 ifile : PathLike
2043 The path to the HDF4 (.hdf) file.
2044 dimensionality : int
2045 The expected number of dimensions of the dataset.
2046 dataset_id : str, optional
2047 The identifier of the dataset whose scales are read.
2048 If ``None``, the default PSI dataset (``'Data-Set-2'``) is used.
2049
2050 Returns
2051 -------
2052 out : tuple[np.ndarray, ...]
2053 The coordinate scale arrays for each dimension.
2054
2055 Raises
2056 ------
2057 ValueError
2058 If the dataset dimensionality does not match ``dimensionality``.
2059 ValueError
2060 If any dimension has no associated scale.
2061
2062 Examples
2063 --------
2064 >>> from psi_io.psi_io import _get_scales_nd_h4
2065 >>> from psi_io.data import get_3d_data
2066 >>> r, t, p = _get_scales_nd_h4(get_3d_data(hdf=".hdf"), dimensionality=3) # doctest: +SKIP
2067 >>> r.shape # doctest: +SKIP
2068 (151,)
2069 """
2070 hdf = h4.SD(str(ifile))
2071 data = hdf.select(dataset_id or PSI_DATA_ID['h4'])
2072 ndim = data.info()[1]
2073 if ndim != dimensionality:
2074 err = f'Expected {dimensionality}D data, got {ndim}D data instead.'
2075 raise ValueError(err)
2076 scales = []
2077 for k_, v_ in reversed(data.dimensions(full=1).items()):
2078 if v_[3]:
2079 scales.append(hdf.select(k_)[:])
2080 else:
2081 raise ValueError('Dimension has no scale associated with it.')
2082 return tuple(scales)
2083
2084
2085def _read_h5_meta(ifile: PathLike, /,
2086 dataset_id: Optional[str] = None
2087 ):
2088 """HDF5 (.h5) version of :func:`read_hdf_meta`.
2089
2090 Examples
2091 --------
2092 >>> from psi_io.psi_io import _read_h5_meta
2093 >>> from psi_io.data import get_3d_data
2094 >>> meta = _read_h5_meta(get_3d_data())
2095 >>> meta[0].name
2096 'Data'
2097 """
2098 with h5.File(ifile, 'r') as hdf:
2099 # Raises KeyError if ``dataset_id`` not found
2100 # If ``dataset_id`` is None, get all non-scale :class:`h5.Dataset`s
2101 if dataset_id:
2102 datasets = (dataset_id, hdf[dataset_id]),
2103 else:
2104 datasets = ((k, v) for k, v in hdf.items() if not v.is_scale)
2105
2106 # One should avoid multiple calls to ``dimproxy[0]`` – *e.g.* ``dimproxy[0].dtype`` and
2107 # ``dimproxy[0].shape`` – because the __getitem__ method creates and returns a new
2108 # :class:`~h5.DimensionProxy` object each time it is called. [Does this matter? Probably not.]
2109 return [HdfDataMeta(name=k,
2110 type=v.dtype,
2111 shape=v.shape,
2112 attr=dict(v.attrs),
2113 scales=[HdfScaleMeta(name=dimproxy.label,
2114 type=dim.dtype,
2115 shape=dim.shape,
2116 imin=dim[0],
2117 imax=dim[-1])
2118 for dimproxy in v.dims if dimproxy and (dim := dimproxy[0])])
2119 for k, v in datasets]
2120
2121
2122def _read_h4_meta(ifile: PathLike, /,
2123 dataset_id: Optional[str] = None
2124 ):
2125 """HDF4 (.hdf) version of :func:`read_hdf_meta`.
2126
2127 Examples
2128 --------
2129 >>> from psi_io.psi_io import _read_h4_meta
2130 >>> from psi_io.data import get_3d_data
2131 >>> meta = _read_h4_meta(get_3d_data(hdf=".hdf")) # doctest: +SKIP
2132 >>> meta[0].name # doctest: +SKIP
2133 'Data-Set-2'
2134 """
2135 hdf = h4.SD(str(ifile))
2136 # Raises HDF4Error if ``dataset_id`` not found
2137 # If ``dataset_id`` is None, get all non-scale :class:`pyhdf.SD.SDS`s
2138 if dataset_id:
2139 datasets = (dataset_id, hdf.select(dataset_id)),
2140 else:
2141 datasets = ((k, hdf.select(k)) for k in hdf.datasets().keys() if not hdf.select(k).iscoordvar())
2142
2143 # The inner list comprehension differs in approach from the HDF5 version because calling
2144 # ``dimensions(full=1)`` on an :class:`~pyhdf.SD.SDS` returns a dictionary of dimension
2145 # dataset identifiers (keys) and tuples containing dimension metadata (values). Even if no
2146 # coordinate-variable datasets are defined, this dictionary is still returned; the only
2147 # indication that the datasets returned do not exist is that the "type" field (within the
2148 # tuple of dimension metadata) is set to 0.
2149
2150 # Also, one cannot avoid multiple calls to ``hdf.select(k_)`` within the inner list comprehension
2151 # because :class:`~pyhdf.SD.SDS` objects do not define a ``__bool__`` method, and the fallback
2152 # behavior of Python is to assess if the __len__ method returns a non-zero value (which, in
2153 # this case, always returns 0).
2154 return [HdfDataMeta(name=k,
2155 type=SDC_TYPE_CONVERSIONS[v.info()[3]],
2156 shape=_cast_shape_tuple(v.info()[2]),
2157 attr=v.attributes(),
2158 scales=[HdfScaleMeta(name=k_,
2159 type=SDC_TYPE_CONVERSIONS[v_[3]],
2160 shape=_cast_shape_tuple(v_[0]),
2161 imin=hdf.select(k_)[0],
2162 imax=hdf.select(k_)[-1])
2163 for k_, v_ in v.dimensions(full=1).items() if v_[3]])
2164 for k, v in datasets]
2165
2166
2167def _read_h5_rtp(ifile: Union[ Path, str], /):
2168 """HDF5 (.h5) version of :func:`read_rtp_meta`.
2169
2170 Examples
2171 --------
2172 >>> from psi_io.psi_io import _read_h5_rtp
2173 >>> from psi_io.data import get_3d_data
2174 >>> meta = _read_h5_rtp(get_3d_data())
2175 >>> sorted(meta.keys())
2176 ['p', 'r', 't']
2177 """
2178 with h5.File(ifile, 'r') as hdf:
2179 return {k: (hdf[v].size, hdf[v][0], hdf[v][-1])
2180 for k, v in zip('rtp', PSI_SCALE_ID['h5'])}
2181
2182
2183def _read_h4_rtp(ifile: Union[ Path, str], /):
2184 """HDF4 (.hdf) version of :func:`read_rtp_meta`.
2185
2186 Examples
2187 --------
2188 >>> from psi_io.psi_io import _read_h4_rtp
2189 >>> from psi_io.data import get_3d_data
2190 >>> meta = _read_h4_rtp(get_3d_data(hdf=".hdf")) # doctest: +SKIP
2191 >>> sorted(meta.keys()) # doctest: +SKIP
2192 ['p', 'r', 't']
2193 """
2194 hdf = h4.SD(str(ifile))
2195 return {k: (hdf.select(v).info()[2], hdf.select(v)[0], hdf.select(v)[-1])
2196 for k, v in zip('ptr', PSI_SCALE_ID['h4'])}
2197
2198
2199def _read_h5_data(ifile: PathLike, /,
2200 dataset_id: Optional[str] = None,
2201 return_scales: bool = True,
2202 ) -> Union[np.ndarray, Tuple[np.ndarray]]:
2203 """HDF5 (.h5) version of :func:`read_hdf_data`.
2204
2205 Examples
2206 --------
2207 >>> from psi_io.psi_io import _read_h5_data
2208 >>> from psi_io.data import get_3d_data
2209 >>> data, r, t, p = _read_h5_data(get_3d_data())
2210 >>> data.shape
2211 (181, 100, 151)
2212 """
2213 with h5.File(ifile, 'r') as hdf:
2214 data = hdf[dataset_id or PSI_DATA_ID['h5']]
2215 dataset = data[:]
2216 if return_scales:
2217 scales = [dim[0][:] for dim in data.dims if dim]
2218 return dataset, *scales
2219 return dataset
2220
2221
2222def _read_h4_data(ifile: PathLike, /,
2223 dataset_id: Optional[str] = None,
2224 return_scales: bool = True,
2225 ) -> Union[np.ndarray, Tuple[np.ndarray]]:
2226 """HDF4 (.hdf) version of :func:`read_hdf_data`.
2227
2228 Examples
2229 --------
2230 >>> from psi_io.psi_io import _read_h4_data
2231 >>> from psi_io.data import get_3d_data
2232 >>> data, *_ = _read_h4_data(get_3d_data(hdf=".hdf")) # doctest: +SKIP
2233 >>> data.shape # doctest: +SKIP
2234 (181, 100, 151)
2235 """
2236 hdf = h4.SD(str(ifile))
2237 data = hdf.select(dataset_id or PSI_DATA_ID['h4'])
2238 if return_scales:
2239 out = (data[:],
2240 *[hdf.select(k_)[:] for k_, v_ in reversed(data.dimensions(full=1).items()) if v_[3]])
2241 else:
2242 out = data[:]
2243 return out
2244
2245
2246def _read_h5_by_index(ifile: PathLike, /,
2247 *xi: Union[int, Tuple[Union[int, None], Union[int, None]], None],
2248 dataset_id: Optional[str] = None,
2249 return_scales: bool = True,
2250 ) -> Union[np.ndarray, Tuple[np.ndarray]]:
2251 """HDF5 (.h5) version of :func:`read_hdf_by_index`.
2252
2253 Examples
2254 --------
2255 >>> from psi_io.psi_io import _read_h5_by_index
2256 >>> from psi_io.data import get_3d_data
2257 >>> f, r, t, p = _read_h5_by_index(get_3d_data(), 0, None, None)
2258 >>> f.shape
2259 (181, 100, 1)
2260 """
2261 with h5.File(ifile, 'r') as hdf:
2262 data = hdf[dataset_id or PSI_DATA_ID['h5']]
2263 if len(xi) != data.ndim:
2264 raise ValueError(f"len(xi) must equal the number of scales for {dataset_id}")
2265 slices = [_parse_index_inputs(slice_input) for slice_input in xi]
2266 dataset = data[tuple(reversed(slices))]
2267 if return_scales:
2268 scales = [dim[0][si] for si, dim in zip(slices, data.dims) if dim]
2269 return dataset, *scales
2270 return dataset
2271
2272def _read_h4_by_index(ifile: PathLike, /,
2273 *xi: Union[int, Tuple[Union[int, None], Union[int, None]], None],
2274 dataset_id: Optional[str] = None,
2275 return_scales: bool = True,
2276 ) -> Union[np.ndarray, Tuple[np.ndarray]]:
2277 """HDF4 (.hdf) version of :func:`read_hdf_by_index`.
2278
2279 Examples
2280 --------
2281 >>> from psi_io.psi_io import _read_h4_by_index
2282 >>> from psi_io.data import get_3d_data
2283 >>> f, r, t, p = _read_h4_by_index(get_3d_data(hdf=".hdf"), 0, None, None) # doctest: +SKIP
2284 >>> f.shape # doctest: +SKIP
2285 (181, 100, 1)
2286 """
2287 hdf = h4.SD(str(ifile))
2288 data = hdf.select(dataset_id or PSI_DATA_ID['h4'])
2289 ndim = data.info()[1]
2290 if len(xi) != ndim:
2291 raise ValueError(f"len(xi) must equal the number of scales for {dataset_id}")
2292 slices = [_parse_index_inputs(slice_input) for slice_input in xi]
2293 dataset = data[tuple(reversed(slices))]
2294 if return_scales:
2295 scales = [hdf.select(k_)[si] for si, (k_, v_) in zip(slices, reversed(data.dimensions(full=1).items())) if v_[3]]
2296 return dataset, *scales
2297 return dataset
2298
2299
2300def _read_h5_by_value(ifile: PathLike, /,
2301 *xi: Union[float, Tuple[float, float], None],
2302 dataset_id: Optional[str] = None,
2303 return_scales: bool = True,
2304 ) -> Union[np.ndarray, Tuple[np.ndarray]]:
2305 """HDF5 (.h5) version of :func:`read_hdf_by_value`.
2306
2307 Examples
2308 --------
2309 >>> from psi_io.psi_io import _read_h5_by_value
2310 >>> from psi_io.data import get_3d_data
2311 >>> f, r, t, p = _read_h5_by_value(get_3d_data(), 15, None, None)
2312 >>> f.shape
2313 (181, 100, 2)
2314 """
2315 with h5.File(ifile, 'r') as hdf:
2316 data = hdf[dataset_id or PSI_DATA_ID['h5']]
2317 if len(xi) != data.ndim:
2318 raise ValueError(f"len(xi) must equal the number of scales for {dataset_id}")
2319 slices = []
2320 for dimproxy, value in zip(data.dims, xi):
2321 if dimproxy:
2322 slices.append(_parse_value_inputs(dimproxy[0], value))
2323 elif value is None:
2324 slices.append(slice(None))
2325 else:
2326 raise ValueError("Cannot slice by value on dimension without scales")
2327 dataset = data[tuple(reversed(slices))]
2328 if return_scales:
2329 scales = [dim[0][si] for si, dim in zip(slices, data.dims) if dim]
2330 return dataset, *scales
2331 return dataset
2332
2333
2334def _read_h4_by_value(ifile: PathLike, /,
2335 *xi: Union[float, Tuple[float, float], None],
2336 dataset_id: Optional[str] = None,
2337 return_scales: bool = True,
2338 ) -> Union[np.ndarray, Tuple[np.ndarray]]:
2339 """HDF4 (.hdf) version of :func:`read_hdf_by_value`.
2340
2341 Examples
2342 --------
2343 >>> from psi_io.psi_io import _read_h4_by_value
2344 >>> from psi_io.data import get_3d_data
2345 >>> f, r, t, p = _read_h4_by_value(get_3d_data(hdf=".hdf"), 15, None, None) # doctest: +SKIP
2346 >>> f.shape # doctest: +SKIP
2347 (181, 100, 2)
2348 """
2349 hdf = h4.SD(str(ifile))
2350 data = hdf.select(dataset_id or PSI_DATA_ID['h4'])
2351 ndim = data.info()[1]
2352 if len(xi) != ndim:
2353 raise ValueError(f"len(xi) must equal the number of scales for {dataset_id}")
2354 slices = []
2355 for (k_, v_), value in zip(reversed(data.dimensions(full=1).items()), xi):
2356 if v_[3] != 0:
2357 slices.append(_parse_value_inputs(hdf.select(k_), value))
2358 elif value is None:
2359 slices.append(slice(None))
2360 else:
2361 raise ValueError("Cannot slice by value on dimension without scales")
2362 dataset = data[tuple(reversed(slices))]
2363 if return_scales:
2364 scales = [hdf.select(k_)[si] for si, (k_, v_) in zip(slices, reversed(data.dimensions(full=1).items())) if v_[3]]
2365 return dataset, *scales
2366 return dataset
2367
2368
2369def _read_h5_by_ivalue(ifile: PathLike, /,
2370 *xi: Union[float, Tuple[float, float], None],
2371 dataset_id: Optional[str] = None,
2372 return_scales: bool = True,
2373 ) -> Union[np.ndarray, Tuple[np.ndarray]]:
2374 """HDF5 (.h5) version of :func:`read_hdf_by_ivalue`.
2375
2376 Examples
2377 --------
2378 >>> from psi_io.psi_io import _read_h5_by_ivalue
2379 >>> from psi_io.data import get_3d_data
2380 >>> f, r_idx, t_idx, p_idx = _read_h5_by_ivalue(get_3d_data(), 2.7, None, None)
2381 >>> r_idx.shape
2382 (2,)
2383 """
2384 with h5.File(ifile, 'r') as hdf:
2385 data = hdf[dataset_id or PSI_DATA_ID['h5']]
2386 if len(xi) != data.ndim:
2387 raise ValueError(f"len(xi) must equal the number of scales for {dataset_id}")
2388 slices = [_parse_ivalue_inputs(*args) for args in zip(reversed(data.shape), xi)]
2389 dataset = data[tuple(reversed(slices))]
2390 if return_scales:
2391 scales = [np.arange(si.start or 0, si.stop or size) for si, size in zip(slices, reversed(data.shape))]
2392 return dataset, *scales
2393 return dataset
2394
2395
2396def _read_h4_by_ivalue(ifile: PathLike, /,
2397 *xi: Union[float, Tuple[float, float], None],
2398 dataset_id: Optional[str] = None,
2399 return_scales: bool = True,
2400 ) -> Union[np.ndarray, Tuple[np.ndarray]]:
2401 """HDF4 (.hdf) version of :func:`read_hdf_by_ivalue`.
2402
2403 Examples
2404 --------
2405 >>> from psi_io.psi_io import _read_h4_by_ivalue
2406 >>> from psi_io.data import get_3d_data
2407 >>> f, r_idx, t_idx, p_idx = _read_h4_by_ivalue(get_3d_data(hdf=".hdf"), 2.7, None, None) # doctest: +SKIP
2408 >>> r_idx.shape # doctest: +SKIP
2409 (2,)
2410 """
2411 hdf = h4.SD(str(ifile))
2412 data = hdf.select(dataset_id or PSI_DATA_ID['h4'])
2413 ndim, shape = data.info()[1], _cast_shape_tuple(data.info()[2])
2414 if len(xi) != ndim:
2415 raise ValueError(f"len(xi) must equal the number of scales for {dataset_id}")
2416 slices = [_parse_ivalue_inputs(*args) for args in zip(reversed(shape), xi)]
2417 dataset = data[tuple(reversed(slices))]
2418 if return_scales:
2419 scales = [np.arange(si.start or 0, si.stop or size) for si, size in zip(slices, reversed(shape))]
2420 return dataset, *scales
2421 return dataset
2422
2423
2424def _write_h4_data(ifile: PathLike, /,
2425 data: np.ndarray,
2426 *scales: Sequence[np.ndarray],
2427 dataset_id: Optional[str] = None,
2428 sync_dtype: bool = False,
2429 strict: bool = True,
2430 **kwargs) -> Path:
2431 """HDF4 (.hdf) version of :func:`write_hdf_data`.
2432
2433 Examples
2434 --------
2435 >>> import tempfile, numpy as np
2436 >>> from pathlib import Path
2437 >>> from psi_io.psi_io import _write_h4_data, _read_h4_data
2438 >>> f = np.ones((10,), dtype=np.float32)
2439 >>> x = np.linspace(0.0, 1.0, 10, dtype=np.float32)
2440 >>> with tempfile.TemporaryDirectory() as d: # doctest: +SKIP
2441 ... _write_h4_data(Path(d) / "out.hdf", f, x) # doctest: +SKIP
2442 ... data, *_ = _read_h4_data(Path(d) / "out.hdf") # doctest: +SKIP
2443 ... data.shape # doctest: +SKIP
2444 (10,)
2445 """
2446 dataid = dataset_id or PSI_DATA_ID['h4']
2447 h4file = h4.SD(str(ifile), h4.SDC.WRITE | h4.SDC.CREATE | h4.SDC.TRUNC)
2448 sds_id = h4file.create(dataid, _dtype_to_sdc(data.dtype), data.shape)
2449
2450 if scales:
2451 for i, scale in enumerate(reversed(scales)):
2452 if scale is not None:
2453 if sync_dtype:
2454 scale = scale.astype(data.dtype)
2455 sds_id.dim(i).setscale(_dtype_to_sdc(scale.dtype), scale.tolist())
2456
2457 if kwargs:
2458 for k, v in kwargs.items():
2459 npv = np.asarray(v)
2460 attr_ = sds_id.attr(k)
2461 try:
2462 attr_.set(_dtype_to_sdc(npv.dtype), npv.tolist())
2463 except KeyError as e:
2464 if strict:
2465 raise KeyError(f"Failed to set attribute '{k}' on dataset '{dataid}'") from e
2466 else:
2467 print(f"Warning: Failed to set attribute '{k}' on dataset '{dataid}'; skipping.")
2468
2469 sds_id.set(data)
2470 sds_id.endaccess()
2471 h4file.end()
2472
2473 return ifile
2474
2475
2476def _write_h5_data(ifile: PathLike, /,
2477 data: np.ndarray,
2478 *scales: Sequence[np.ndarray],
2479 dataset_id: Optional[str] = None,
2480 sync_dtype: bool = False,
2481 strict: bool = True,
2482 **kwargs) -> Path:
2483 """HDF5 (.h5) version of :func:`write_hdf_data`.
2484
2485 Examples
2486 --------
2487 >>> import tempfile, numpy as np
2488 >>> from pathlib import Path
2489 >>> from psi_io.psi_io import _write_h5_data, _read_h5_data
2490 >>> f = np.ones((10,), dtype=np.float32)
2491 >>> x = np.linspace(0.0, 1.0, 10, dtype=np.float32)
2492 >>> with tempfile.TemporaryDirectory() as d:
2493 ... _write_h5_data(Path(d) / "out.h5", f, x)
2494 ... data, *_ = _read_h5_data(Path(d) / "out.h5")
2495 ... data.shape
2496 (10,)
2497 """
2498 dataid = dataset_id or PSI_DATA_ID['h5']
2499 with h5.File(ifile, "w") as h5file:
2500 dataset = h5file.create_dataset(dataid, data=data, dtype=data.dtype, shape=data.shape)
2501
2502 if scales:
2503 for i, scale in enumerate(scales):
2504 if scale is not None:
2505 if sync_dtype:
2506 scale = scale.astype(data.dtype)
2507 h5file.create_dataset(f"dim{i+1}", data=scale, dtype=scale.dtype, shape=scale.shape)
2508 h5file[dataid].dims[i].attach_scale(h5file[f"dim{i+1}"])
2509 h5file[dataid].dims[i].label = f"dim{i+1}"
2510
2511 if kwargs:
2512 for key, value in kwargs.items():
2513 try:
2514 dataset.attrs[key] = value
2515 except TypeError as e:
2516 if strict:
2517 raise TypeError(f"Failed to set attribute '{key}' on dataset '{dataid}'") from e
2518 else:
2519 print(f"Warning: Failed to set attribute '{key}' on dataset '{dataid}'; skipping.")
2520
2521 return ifile
2522
2523
2524def _np_linear_interpolation(xi: Sequence, scales: Sequence, values: np.ndarray):
2525 """
2526 Perform linear interpolation over one dimension.
2527
2528 Parameters
2529 ----------
2530 xi : list[float | None]
2531 Target coordinate values for each dimension; ``None`` marks a
2532 "free" (not interpolated) dimension.
2533 scales : list[np.ndarray]
2534 Coordinate scale arrays, one per dimension.
2535 values : np.ndarray
2536 The data array to interpolate.
2537
2538 Returns
2539 -------
2540 out : np.ndarray
2541 The interpolated data array.
2542
2543 Examples
2544 --------
2545 >>> import numpy as np
2546 >>> from psi_io.psi_io import _np_linear_interpolation
2547 >>> scales = [np.array([0.0, 1.0])]
2548 >>> values = np.array([0.0, 2.0])
2549 >>> float(_np_linear_interpolation([0.5], scales, values))
2550 1.0
2551 """
2552 index0 = next((i for i, v in enumerate(xi) if v is not None), None)
2553 t = (xi[index0] - scales[index0][0])/(scales[index0][1] - scales[index0][0])
2554 f0 = [slice(None, None)]*values.ndim
2555 f1 = [slice(None, None)]*values.ndim
2556 f0[index0] = 0
2557 f1[index0] = 1
2558
2559 return (1 - t)*values[tuple(f0)] + t*values[tuple(f1)]
2560
2561
2562def _np_bilinear_interpolation(xi, scales, values):
2563 """
2564 Perform bilinear interpolation over two dimensions.
2565
2566 Parameters
2567 ----------
2568 xi : Sequence[float | None]
2569 Target coordinate values for each dimension; ``None`` marks a
2570 "free" (not interpolated) dimension.
2571 scales : Sequence[np.ndarray]
2572 Coordinate scale arrays, one per dimension.
2573 values : np.ndarray
2574 The data array to interpolate.
2575
2576 Returns
2577 -------
2578 out : np.ndarray
2579 The interpolated data array.
2580
2581 Examples
2582 --------
2583 >>> import numpy as np
2584 >>> from psi_io.psi_io import _np_bilinear_interpolation
2585 >>> scales = [np.array([0.0, 1.0]), np.array([0.0, 1.0])]
2586 >>> values = np.array([[0.0, 0.0], [1.0, 2.0]])
2587 >>> float(_np_bilinear_interpolation([0.5, 0.5], scales, values))
2588 0.75
2589 """
2590 index0, index1 = [i for i, v in enumerate(xi) if v is not None]
2591 t, u = [(xi[i] - scales[i][0])/(scales[i][1] - scales[i][0]) for i in (index0, index1)]
2592
2593 f00 = [slice(None, None)]*values.ndim
2594 f10 = [slice(None, None)]*values.ndim
2595 f01 = [slice(None, None)]*values.ndim
2596 f11 = [slice(None, None)]*values.ndim
2597 f00[index0], f00[index1] = 0, 0
2598 f10[index0], f10[index1] = 1, 0
2599 f01[index0], f01[index1] = 0, 1
2600 f11[index0], f11[index1] = 1, 1
2601
2602 return (
2603 (1 - t)*(1 - u)*values[tuple(f00)] +
2604 t*(1 - u)*values[tuple(f10)] +
2605 (1 - t)*u*values[tuple(f01)] +
2606 t*u*values[tuple(f11)]
2607 )
2608
2609
2610def _np_trilinear_interpolation(xi, scales, values):
2611 """
2612 Perform trilinear interpolation over three dimensions.
2613
2614 Parameters
2615 ----------
2616 xi : list[float | None]
2617 Target coordinate values for each dimension; ``None`` marks a
2618 "free" (not interpolated) dimension.
2619 scales : list[np.ndarray]
2620 Coordinate scale arrays, one per dimension.
2621 values : np.ndarray
2622 The data array to interpolate.
2623
2624 Returns
2625 -------
2626 out : np.ndarray
2627 The interpolated data array.
2628
2629 Examples
2630 --------
2631 >>> import numpy as np
2632 >>> from psi_io.psi_io import _np_trilinear_interpolation
2633 >>> scales = [np.array([0.0, 1.0]), np.array([0.0, 1.0]), np.array([0.0, 1.0])]
2634 >>> values = np.zeros((2, 2, 2))
2635 >>> values[1, 1, 1] = 8.0
2636 >>> float(_np_trilinear_interpolation([0.5, 0.5, 0.5], scales, values))
2637 1.0
2638 """
2639 index0, index1, index2 = [i for i, v in enumerate(xi) if v is not None]
2640 t, u, v = [(xi[i] - scales[i][0])/(scales[i][1] - scales[i][0]) for i in (index0, index1, index2)]
2641
2642 f000 = [slice(None, None)]*values.ndim
2643 f100 = [slice(None, None)]*values.ndim
2644 f010 = [slice(None, None)]*values.ndim
2645 f110 = [slice(None, None)]*values.ndim
2646 f001 = [slice(None, None)]*values.ndim
2647 f101 = [slice(None, None)]*values.ndim
2648 f011 = [slice(None, None)]*values.ndim
2649 f111 = [slice(None, None)]*values.ndim
2650
2651 f000[index0], f000[index1], f000[index2] = 0, 0, 0
2652 f100[index0], f100[index1], f100[index2] = 1, 0, 0
2653 f010[index0], f010[index1], f010[index2] = 0, 1, 0
2654 f110[index0], f110[index1], f110[index2] = 1, 1, 0
2655 f001[index0], f001[index1], f001[index2] = 0, 0, 1
2656 f101[index0], f101[index1], f101[index2] = 1, 0, 1
2657 f011[index0], f011[index1], f011[index2] = 0, 1, 1
2658 f111[index0], f111[index1], f111[index2] = 1, 1, 1
2659
2660 c00 = values[tuple(f000)]*(1 - t) + values[tuple(f100)]*t
2661 c10 = values[tuple(f010)]*(1 - t) + values[tuple(f110)]*t
2662 c01 = values[tuple(f001)]*(1 - t) + values[tuple(f101)]*t
2663 c11 = values[tuple(f011)]*(1 - t) + values[tuple(f111)]*t
2664
2665 c0 = c00*(1 - u) + c10*u
2666 c1 = c01*(1 - u) + c11*u
2667
2668 return c0*(1 - v) + c1*v
2669
2670
2671def _check_index_ranges(arr_size: int,
2672 i0: Union[int, np.integer],
2673 i1: Union[int, np.integer]
2674 ) -> Tuple[int, int]:
2675 """
2676 Adjust index ranges to ensure they cover at least two indices.
2677
2678 Parameters
2679 ----------
2680 arr_size : int
2681 The size of the array along the dimension.
2682 i0 : int
2683 The starting index.
2684 i1 : int
2685 The ending index.
2686
2687 Returns
2688 -------
2689 out : tuple[int, int]
2690 Adjusted starting and ending indices.
2691
2692 Notes
2693 -----
2694 This function ensures that the range between `i0` and `i1` includes at least
2695 two indices for interpolation purposes.
2696
2697 Examples
2698 --------
2699 >>> from psi_io.psi_io import _check_index_ranges
2700 >>> _check_index_ranges(10, 0, 0)
2701 (0, 2)
2702 >>> _check_index_ranges(10, 5, 5)
2703 (4, 6)
2704 >>> _check_index_ranges(10, 10, 10)
2705 (8, 10)
2706 """
2707 i0, i1 = int(i0), int(i1)
2708 if i0 == 0:
2709 return (i0, i1 + 2) if i1 == 0 else (i0, i1 + 1)
2710 elif i0 == arr_size:
2711 return i0 - 2, i1
2712 else:
2713 return i0 - 1, i1 + 1
2714
2715
2716def _cast_shape_tuple(input: Union[int, Sequence[int]]
2717 ) -> tuple[int, ...]:
2718 """
2719 Cast an input to a tuple of integers.
2720
2721 Parameters
2722 ----------
2723 input : int | Sequence[int]
2724 The input to cast.
2725
2726 Returns
2727 -------
2728 out : tuple[int, ...]
2729 The input cast as a tuple of integers.
2730
2731 Raises
2732 ------
2733 TypeError
2734 If the input is neither an integer nor an iterable of integers.
2735
2736 Examples
2737 --------
2738 >>> from psi_io.psi_io import _cast_shape_tuple
2739 >>> _cast_shape_tuple(5)
2740 (5,)
2741 >>> _cast_shape_tuple([3, 4, 5])
2742 (3, 4, 5)
2743 """
2744 if isinstance(input, int):
2745 return (input,)
2746 elif isinstance(input, Sequence):
2747 return tuple(int(i) for i in input)
2748 else:
2749 raise TypeError("Input must be an integer or an iterable of integers.")
2750
2751
2752def _parse_index_inputs(input: Union[int, slice, Sequence[Union[int, None]], None]
2753 ) -> slice:
2754 """
2755 Parse various slice input formats into a standard slice object.
2756
2757 Parameters
2758 ----------
2759 input : int | slice | Sequence[int | None] | None
2760 The index specification to parse:
2761
2762 - *int* — selects a single-element slice ``[n, n+1)``.
2763 - *Sequence[int | None]* — unpacked directly into :class:`slice`.
2764 - ``None`` — selects the entire dimension.
2765
2766 Returns
2767 -------
2768 slice
2769 The parsed slice object.
2770
2771 Raises
2772 ------
2773 TypeError
2774 If the input type is unsupported.
2775
2776 Examples
2777 --------
2778 >>> from psi_io.psi_io import _parse_index_inputs
2779 >>> _parse_index_inputs(3)
2780 slice(3, 4, None)
2781 >>> _parse_index_inputs((2, 7))
2782 slice(2, 7, None)
2783 >>> _parse_index_inputs(None)
2784 slice(None, None, None)
2785 """
2786 if isinstance(input, int):
2787 return slice(input, input + 1)
2788 elif isinstance(input, Sequence):
2789 return slice(*input)
2790 elif input is None:
2791 return slice(None)
2792 else:
2793 raise TypeError("Unsupported input type for slicing.")
2794
2795
2796def _parse_value_inputs(dimproxy,
2797 value,
2798 scale_exists: bool = True
2799 ) -> slice:
2800 """Parse a scale value or value range into a slice over a dimension's coordinate array.
2801
2802 Parameters
2803 ----------
2804 dimproxy : array-like
2805 The coordinate array (scale) for the dimension, supporting ``[:]`` indexing.
2806 value : float | tuple[float | None, float | None] | None
2807 The target value or range:
2808
2809 - ``None`` — select the entire dimension.
2810 - *float* — find the 2-element bracket ``[a, b]`` such that ``a <= value < b``.
2811 - *(float, float)* — find the bracket spanning the given range.
2812 scale_exists : bool
2813 Guard flag; raises :exc:`ValueError` if ``False`` and ``value`` is not ``None``.
2814
2815 Returns
2816 -------
2817 slice
2818 A slice object that selects the appropriate indices from the coordinate array.
2819
2820 Raises
2821 ------
2822 ValueError
2823 If ``value`` is not ``None`` and ``scale_exists`` is ``False``.
2824
2825 Examples
2826 --------
2827 >>> import numpy as np
2828 >>> from psi_io.psi_io import _parse_value_inputs
2829 >>> scale = np.linspace(0.0, 10.0, 11)
2830 >>> _parse_value_inputs(scale, None)
2831 slice(None, None, None)
2832 >>> _parse_value_inputs(scale, 4.5)
2833 slice(4, 6, None)
2834 >>> _parse_value_inputs(scale, (2.0, 7.0))
2835 slice(1, 8, None)
2836 """
2837 if value is None:
2838 return slice(None)
2839 if not scale_exists:
2840 raise ValueError("Cannot parse value inputs when scale does not exist.")
2841 dim = dimproxy[:]
2842 if not isinstance(value, Sequence):
2843 insert_index = np.searchsorted(dim, value)
2844 return slice(*_check_index_ranges(dim.size, insert_index, insert_index))
2845 else:
2846 temp_range = list(value)
2847 if temp_range[0] is None:
2848 temp_range[0] = -np.inf
2849 if temp_range[-1] is None:
2850 temp_range[-1] = np.inf
2851 insert_indices = np.searchsorted(dim, temp_range)
2852 return slice(*_check_index_ranges(dim.size, *insert_indices))
2853
2854
2855def _parse_ivalue_inputs(dimsize,
2856 input: Union[Union[int, float], slice, Sequence[Union[Union[int, float], None]], None]
2857 ) -> slice:
2858 """
2859 Parse a sub-index value or range into a slice over a dimension of a given size.
2860
2861 Parameters
2862 ----------
2863 dimsize : int
2864 The size of the array along the dimension; used to clamp the resulting slice.
2865 input : int | float | Sequence[int | float | None] | None
2866 The sub-index specification to parse:
2867
2868 - ``None`` — select the entire dimension.
2869 - *int* or *float* (:math:`a`) — returns ``slice(floor(a), ceil(a))``
2870 (clamped to ``[0, dimsize - 1]``).
2871 - *Sequence* ``(a, b)`` — returns ``slice(floor(a), ceil(b))``
2872 (clamped to ``[0, dimsize - 1]``).
2873
2874 Returns
2875 -------
2876 slice
2877 The parsed slice object, guaranteed to span at least 2 elements.
2878
2879 Raises
2880 ------
2881 TypeError
2882 If the input type is unsupported.
2883
2884 Examples
2885 --------
2886 >>> from psi_io.psi_io import _parse_ivalue_inputs
2887 >>> _parse_ivalue_inputs(10, None)
2888 slice(None, None, None)
2889 >>> _parse_ivalue_inputs(10, 2.7)
2890 slice(2, 4, None)
2891 >>> _parse_ivalue_inputs(10, (1.3, 4.8))
2892 slice(1, 5, None)
2893 """
2894 if input is None:
2895 return slice(None)
2896 elif isinstance(input, (int, float)):
2897 i0, i1 = math.floor(input), math.ceil(input)
2898 elif isinstance(input, Sequence):
2899 i0, i1 = math.floor(input[0]), math.ceil(input[1])
2900 else:
2901 raise TypeError("Unsupported input type for slicing.")
2902
2903 if i0 > i1:
2904 i0, i1 = i1, i0
2905 i0, i1 = max(0, i0), min(dimsize - 1, i1)
2906 if i0 > i1:
2907 i0, i1 = i1, i0
2908 i0, i1 = max(0, i0), min(dimsize - 1, i1)
2909 if (i1 - i0) < 2:
2910 return slice(i0, i1 + 2 - (i1-i0))
2911 else:
2912 return slice(i0, i1)