1"""
2Routines for reading and writing PSI-style HDF5 and HDF4 data files.
3
4This module provides a unified interface for interacting with PSI's HDF data
5ecosystem. It handles both HDF4 (``.hdf``) and HDF5 (``.h5``) file formats,
6automatically dispatching to the appropriate backend based on the file
7extension.
8
9Key interfaces
10--------------
11Reading full datasets:
12 :func:`read_hdf_data`, :func:`rdhdf_1d`, :func:`rdhdf_2d`, :func:`rdhdf_3d`
13
14Writing full datasets:
15 :func:`write_hdf_data`, :func:`wrhdf_1d`, :func:`wrhdf_2d`, :func:`wrhdf_3d`
16
17Reading file metadata:
18 :func:`read_hdf_meta`, :func:`read_rtp_meta`
19
20Reading dataset subsets:
21 :func:`get_scales_1d`, :func:`get_scales_2d`, :func:`get_scales_3d`,
22 :func:`read_hdf_by_index`, :func:`read_hdf_by_value`, :func:`read_hdf_by_ivalue`
23
24Interpolating data:
25 :func:`np_interpolate_slice_from_hdf`, :func:`sp_interpolate_slice_from_hdf`,
26 :func:`interpolate_positions_from_hdf`
27
28Converting between formats:
29 :func:`convert`, :func:`convert_psih4_to_psih5`
30
31See Also
32--------
33:mod:`psi_data` :
34 The ``psi-data-utils`` package, which provides helpers for fetching
35 example HDF data files. See its `documentation
36 <https://predsci.com/doc/psi-data-utils/index.html>`_ for details.
37
38Examples
39--------
40Read a 3D PSI-style HDF5 file:
41
42>>> from psi_io import read_hdf_data
43>>> from psi_data import fetch_mas_data
44>>> filepath = fetch_mas_data().cor_br
45>>> data, r, t, p = read_hdf_data(filepath)
46>>> data.shape
47(299, 142, 255)
48"""
49
50from __future__ import annotations
51
52__all__ = [
53 "read_hdf_meta",
54 "read_rtp_meta",
55
56 "get_scales_1d",
57 "get_scales_2d",
58 "get_scales_3d",
59
60 "read_hdf_by_index",
61 "read_hdf_by_value",
62 "read_hdf_by_ivalue",
63
64 "np_interpolate_slice_from_hdf",
65 "sp_interpolate_slice_from_hdf",
66 "interpolate_positions_from_hdf",
67
68 "instantiate_linear_interpolator",
69 "interpolate_point_from_1d_slice",
70 "interpolate_point_from_2d_slice",
71
72 "read_hdf_data",
73 "rdhdf_1d",
74 "rdhdf_2d",
75 "rdhdf_3d",
76
77 "write_hdf_data",
78 "write_hdf_meta",
79 "wrhdf_1d",
80 "wrhdf_2d",
81 "wrhdf_3d",
82
83 "convert",
84 "convert_psih4_to_psih5"
85]
86
87import math
88from collections import namedtuple
89from pathlib import Path
90from types import MappingProxyType
91from typing import Optional, Literal, Tuple, Sequence, List, Dict, Union, Callable, Any, Mapping
92
93import numpy as np
94import h5py as h5
95
96# -----------------------------------------------------------------------------
97# Optional Imports and Import Checking
98# -----------------------------------------------------------------------------
99# These packages are needed by several functions and must be imported in the
100# module namespace.
101try:
102 import pyhdf.SD as h4
103 H4_AVAILABLE = True
104 DTYPE_TO_SDC = MappingProxyType({
105 "i": {
106 1: h4.SDC.INT8,
107 2: h4.SDC.INT16,
108 4: h4.SDC.INT32,
109 },
110 "u": {
111 1: h4.SDC.UINT8,
112 2: h4.SDC.UINT16,
113 4: h4.SDC.UINT32,
114 },
115 "f": {
116 4: h4.SDC.FLOAT32,
117 8: h4.SDC.FLOAT64,
118 },
119 "b": {
120 1: h4.SDC.UINT8
121 },
122 "U": h4.SDC.CHAR,
123 "S": h4.SDC.CHAR
124 })
125 """
126 Helper dictionary mapping :class:`~numpy.dtype` kinds to HDF4 SDC types.
127
128 The keys are :attr:`~numpy.dtype.kind`, and the values are either a direct
129 mapping (for byte-string or unicode-string types) or a nested mapping of
130 :attr:`~numpy.dtype.itemsize` to SDC type (for numeric types).
131 """
132except ImportError:
133 H4_AVAILABLE = False
134 DTYPE_TO_SDC = {}
135
136try:
137 from scipy.interpolate import RegularGridInterpolator
138 SCIPY_AVAILABLE = True
139except ImportError:
140 SCIPY_AVAILABLE = False
141
142
143# Functions to stop execution if a package doesn't exist.
144def _except_no_pyhdf():
145 """Raise :exc:`ImportError` if the ``pyhdf`` package is not available.
146
147 Raises
148 ------
149 ImportError
150 If ``pyhdf`` was not importable at module load time.
151
152 Examples
153 --------
154 >>> from psi_io.psi_io import _except_no_pyhdf, H4_AVAILABLE
155 >>> H4_AVAILABLE # doctest: +SKIP
156 True
157 >>> _except_no_pyhdf() is None # no-op when pyhdf is installed
158 True
159 """
160 if not H4_AVAILABLE:
161 raise ImportError('The pyhdf package is required to read/write HDF4 .hdf files!')
162 return
163
164
165def _except_no_scipy():
166 """Raise :exc:`ImportError` if the ``scipy`` package is not available.
167
168 Raises
169 ------
170 ImportError
171 If ``scipy`` was not importable at module load time.
172
173 Examples
174 --------
175 >>> from psi_io.psi_io import _except_no_scipy, SCIPY_AVAILABLE
176 >>> SCIPY_AVAILABLE # doctest: +SKIP
177 True
178 >>> _except_no_scipy() is None # no-op when scipy is installed
179 True
180 """
181 if not SCIPY_AVAILABLE:
182 raise ImportError('The scipy package is required for the interpolation routines!')
183 return
184
185
186SDC_TYPE_CONVERSIONS = MappingProxyType({
187 3: np.dtype("ubyte"),
188 4: np.dtype("byte"),
189 5: np.dtype("float32"),
190 6: np.dtype("float64"),
191 20: np.dtype("int8"),
192 21: np.dtype("uint8"),
193 22: np.dtype("int16"),
194 23: np.dtype("uint16"),
195 24: np.dtype("int32"),
196 25: np.dtype("uint32")
197})
198"""Helper dictionary for mapping HDF4 types to numpy dtypes"""
199
200
201PSI_DATA_ID = MappingProxyType({
202 'h4': 'Data-Set-2',
203 'h5': 'Data'
204})
205"""Mapping of PSI standard dataset names for HDF4 and HDF5 files"""
206
207
208PSI_SCALE_ID = MappingProxyType({
209 'h4': ('fakeDim0', 'fakeDim1', 'fakeDim2'),
210 'h5': ('dim1', 'dim2', 'dim3')
211})
212"""Mapping of PSI standard scale names for HDF4 and HDF5 files"""
213
214
215HDFEXT = {'.hdf', '.h5'}
216"""Set of possible HDF file extensions"""
217
218
219HdfExtType = Literal['.hdf', '.h5']
220"""Type alias for possible HDF file extensions"""
221
222
223HdfScaleMeta = namedtuple('HdfScaleMeta', ['name', 'type', 'shape', 'attr', 'imin', 'imax'])
224"""
225 Named tuple storing metadata for a single HDF scale (coordinate) dimension.
226
227 Parameters
228 ----------
229 name : str
230 The name of the scale dataset.
231 type : str
232 The data type of the scale.
233 shape : tuple[int, ...]
234 The shape of the scale array.
235 attr : dict[str, Any]
236 A dictionary of attributes associated with the dataset.
237 imin : float
238 The minimum value of the scale.
239 This assumes the scale is monotonically increasing.
240 imax : float
241 The maximum value of the scale.
242 This assumes the scale is monotonically increasing.
243"""
244
245
246HdfDataMeta = namedtuple('HdfDataMeta', ['name', 'type', 'shape', 'attr', 'scales'])
247"""
248 Named tuple for HDF dataset metadata
249
250 Parameters
251 ----------
252 name : str
253 The name of the dataset.
254 type : str
255 The data type of the dataset.
256 shape : tuple[int, ...]
257 The shape of the dataset.
258 attr : dict[str, Any]
259 A dictionary of attributes associated with the dataset.
260 scales : list[HdfScaleMeta]
261 A list of scale metadata objects corresponding to each dimension of the dataset.
262 If the dataset has no scales, this list will be empty.
263"""
264
265PathLike = Union[Path, str]
266"""Type alias for file paths, accepting either :class:`pathlib.Path` or str"""
267
268
269def _dtype_to_sdc(dtype: np.dtype):
270 """Convert a numpy dtype to the corresponding HDF4 SDC type.
271
272 Parameters
273 ----------
274 dtype : np.dtype
275 The numpy dtype to convert.
276
277 Returns
278 -------
279 out : int
280 The corresponding HDF4 SDC type constant for the given numpy dtype.
281
282 Raises
283 ------
284 ImportError
285 If the ``pyhdf`` package is not available.
286 KeyError
287 If the dtype kind or itemsize is not supported by HDF4. See
288 :func:`write_hdf_data` for the full dtype support table.
289
290 Examples
291 --------
292 >>> import numpy as np
293 >>> from psi_io.psi_io import _dtype_to_sdc
294 >>> import pyhdf.SD as h4
295 >>> _dtype_to_sdc(np.dtype("float32")) == h4.SDC.FLOAT32
296 True
297 """
298 _except_no_pyhdf()
299 if dtype.kind in {"U", "S"}:
300 return DTYPE_TO_SDC[dtype.kind]
301
302 try:
303 return DTYPE_TO_SDC[dtype.kind][dtype.itemsize]
304 except KeyError as e:
305 if dtype.kind not in DTYPE_TO_SDC:
306 msg = (f"Unsupported dtype kind '{dtype.kind}' for HDF4. "
307 f"Supported kinds are: {set(DTYPE_TO_SDC.keys())}")
308 raise KeyError(msg) from e
309 elif dtype.itemsize not in DTYPE_TO_SDC[dtype.kind]:
310 msg = (f"Unsupported itemsize '{dtype.itemsize}' for dtype kind '{dtype.kind}' in HDF4. "
311 f"Supported itemsizes are: {set(DTYPE_TO_SDC[dtype.kind].keys())}")
312 raise KeyError(msg) from e
313 raise e
314
315
316def _dispatch_by_ext(ifile: PathLike,
317 hdf4_func: Callable,
318 hdf5_func: Callable,
319 *args: Any, **kwargs: Any
320 ):
321 """
322 Dispatch function to call HDF4 or HDF5 specific functions based on file extension.
323
324 Parameters
325 ----------
326 ifile : PathLike
327 The path to the HDF file.
328 hdf4_func : Callable
329 The function to call for HDF4 files.
330 hdf5_func : Callable
331 The function to call for HDF5 files.
332 *args : Any
333 Positional arguments to pass to the selected function.
334 **kwargs : Any
335 Keyword arguments to pass to the selected function.
336
337 Returns
338 -------
339 out : Any
340 The return value of the dispatched function.
341
342 Raises
343 ------
344 ValueError
345 If the file does not have a `.hdf` or `.h5` extension.
346 ImportError
347 If the file is HDF4 and the `pyhdf` package is not available.
348
349 Examples
350 --------
351 >>> from psi_io.psi_io import _dispatch_by_ext, _read_h5_data, _read_h4_data
352 >>> from psi_data import fetch_mas_data
353 >>> filepath = fetch_mas_data().cor_br
354 >>> data, *_ = _dispatch_by_ext(filepath, _read_h4_data, _read_h5_data)
355 >>> data.shape
356 (299, 142, 255)
357 """
358 ipath = Path(ifile)
359 if ipath.suffix == '.h5':
360 return hdf5_func(ifile, *args, **kwargs)
361 if ipath.suffix == '.hdf':
362 _except_no_pyhdf()
363 return hdf4_func(ifile, *args, **kwargs)
364 raise ValueError("File must be HDF4 (.hdf) or HDF5 (.h5)")
365
366
367# -----------------------------------------------------------------------------
368# "Classic" HDF reading and writing routines adapted from psihdf.py or psi_io.py.
369# -----------------------------------------------------------------------------
370
371
[docs]
372def rdhdf_1d(hdf_filename: PathLike
373 ) -> Tuple[np.ndarray, np.ndarray]:
374 """Read a 1D PSI-style HDF5 or HDF4 file.
375
376 Parameters
377 ----------
378 hdf_filename : PathLike
379 The path to the 1D HDF5 (.h5) or HDF4 (.hdf) file to read.
380
381 Returns
382 -------
383 x : np.ndarray
384 1D coordinate scale array.
385 f : np.ndarray
386 1D data array.
387
388 See Also
389 --------
390 read_hdf_data : Generic HDF data reading routine.
391
392 Examples
393 --------
394 >>> from psi_io import rdhdf_1d
395 >>> from psi_data import fetch_example_radial_scale
396 >>> filepath = fetch_example_radial_scale()
397 >>> x, f = rdhdf_1d(filepath)
398 >>> x.shape, f.shape
399 ((121,), (121,))
400 """
401 return _rdhdf_nd(hdf_filename, dimensionality=1)
402
403
[docs]
404def rdhdf_2d(hdf_filename: PathLike
405 ) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
406 """Read a 2D PSI-style HDF5 or HDF4 file.
407
408 The data in the HDF file is assumed to be ordered X, Y in Fortran order.
409 Each dimension is assumed to have a 1D scale associated with it that
410 describes the rectilinear grid coordinates.
411
412 Parameters
413 ----------
414 hdf_filename : PathLike
415 The path to the 2D HDF5 (.h5) or HDF4 (.hdf) file to read.
416
417 Returns
418 -------
419 x : np.ndarray
420 1D coordinate scale array for the X dimension.
421 y : np.ndarray
422 1D coordinate scale array for the Y dimension.
423 f : np.ndarray
424 2D data array, C-ordered with shape ``(ny, nx)`` in Python.
425
426 Notes
427 -----
428 Because Fortran uses column-major ordering and Python uses row-major
429 ordering, the data array is returned with reversed axis order relative to
430 how it is stored on disk. The first returned scale always corresponds to
431 the *slowest* varying dimension in the data array (i.e. the last axis).
432
433 See Also
434 --------
435 read_hdf_data : Generic HDF data reading routine.
436
437 Examples
438 --------
439 >>> from psi_io import rdhdf_2d
440 >>> from psi_data import fetch_mas_quantities
441 >>> filepath = fetch_mas_quantities().ch_pm
442 >>> x, y, f = rdhdf_2d(filepath)
443 >>> f.shape == (y.shape[0], x.shape[0])
444 True
445 """
446 return _rdhdf_nd(hdf_filename, dimensionality=2)
447
448
[docs]
449def rdhdf_3d(hdf_filename: PathLike
450 ) -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]:
451 """Read a 3D PSI-style HDF5 or HDF4 file.
452
453 The data in the HDF file is assumed to be ordered X, Y, Z in Fortran order.
454 Each dimension is assumed to have a 1D scale associated with it that
455 describes the rectilinear grid coordinates.
456
457 Parameters
458 ----------
459 hdf_filename : PathLike
460 The path to the 3D HDF5 (.h5) or HDF4 (.hdf) file to read.
461
462 Returns
463 -------
464 x : np.ndarray
465 1D coordinate scale array for the X dimension.
466 y : np.ndarray
467 1D coordinate scale array for the Y dimension.
468 z : np.ndarray
469 1D coordinate scale array for the Z dimension.
470 f : np.ndarray
471 3D data array, C-ordered with shape ``(nz, ny, nx)`` in Python.
472
473 Notes
474 -----
475 Because Fortran uses column-major ordering and Python uses row-major
476 ordering, the data array is returned with reversed axis order relative to
477 how it is stored on disk. For PSI spherical datasets the returned shapes
478 follow the convention ``f.shape == (n_phi, n_theta, n_r)`` with scales
479 returned in physical order ``(r, theta, phi)``.
480
481 See Also
482 --------
483 read_hdf_data : Generic HDF data reading routine.
484
485 Examples
486 --------
487 >>> from psi_io import rdhdf_3d
488 >>> from psi_data import fetch_mas_data
489 >>> filepath = fetch_mas_data().cor_br
490 >>> r, t, p, f = rdhdf_3d(filepath)
491 >>> f.shape == (p.shape[0], t.shape[0], r.shape[0])
492 True
493 """
494 return _rdhdf_nd(hdf_filename, dimensionality=3)
495
496
[docs]
497def wrhdf_1d(hdf_filename: PathLike,
498 x: np.ndarray,
499 f: np.ndarray,
500 **kwargs) -> None:
501 """Write a 1D PSI-style HDF5 or HDF4 file.
502
503 Parameters
504 ----------
505 hdf_filename : PathLike
506 The path to the 1D HDF5 (.h5) or HDF4 (.hdf) file to write.
507 x : np.ndarray
508 1D array of scales.
509 f : np.ndarray
510 1D array of data.
511 **kwargs
512 Additional keyword arguments passed through to the underlying
513 :func:`~psi_io.psi_io.write_hdf_data` routine, specifically:
514
515 - ``dataset_id`` : str | None
516 The identifier of the dataset to write.
517 If None, a default dataset is used ('Data-Set-2' for HDF4 and 'Data' for HDF5).
518 - ``sync_dtype``: bool
519 If True, the data type of the scales will be matched to that of the data array.
520
521 Omitting these will yield the same behavior as the legacy routines, *i.e.* writing to
522 the default PSI dataset IDs for HDF4/HDF5 files and synchronizing datatypes between
523 the dataset and scales.
524
525 Raises
526 ------
527 ValueError
528 If the file does not have a ``.hdf`` or ``.h5`` extension.
529 KeyError
530 If, for HDF4 files, the data or scale dtype is not supported by
531 :py:mod:`pyhdf`. See :func:`write_hdf_data` for the full dtype
532 support table.
533
534 Notes
535 -----
536 This routine is provided for backward compatibility with existing PSI codes that
537 expect this API signature. The underlying implementation dispatches to
538 :func:`~psi_io.psi_io.write_hdf_data`, but the argument order and default
539 behavior (sync dtype, default dataset IDs) are preserved.
540
541 .. warning::
542 When called with its default arguments this routine **writes to the default PSI
543 dataset IDs for HDF4/HDF5 files and synchronizes dtypes between the dataset and
544 scales.** This behavior is required for interoperability with certain Fortran-based
545 PSI tools.
546
547 See Also
548 --------
549 write_hdf_data : Generic HDF data writing routine.
550
551 Examples
552 --------
553 >>> import tempfile, numpy as np
554 >>> from pathlib import Path
555 >>> from psi_io import wrhdf_1d, rdhdf_1d
556 >>> x = np.linspace(0.0, 1.0, 50, dtype=np.float32)
557 >>> f = np.sin(x)
558 >>> with tempfile.TemporaryDirectory() as d:
559 ... wrhdf_1d(Path(d) / "out.h5", x, f)
560 ... x2, f2 = rdhdf_1d(Path(d) / "out.h5")
561 ... np.array_equal(f, f2)
562 True
563 """
564 return _wrhdf_nd(hdf_filename, f, x, dimensionality=1, **kwargs)
565
566
[docs]
567def wrhdf_2d(hdf_filename: PathLike,
568 x: np.ndarray,
569 y: np.ndarray,
570 f: np.ndarray,
571 **kwargs) -> None:
572 """Write a 2D PSI-style HDF5 or HDF4 file.
573
574 The data in the HDF file will appear as X,Y in Fortran order.
575
576 Each dimension requires a 1D "scale" associated with it that
577 describes the rectilinear grid coordinates in each dimension.
578
579 Parameters
580 ----------
581 hdf_filename : PathLike
582 The path to the 2D HDF5 (.h5) or HDF4 (.hdf) file to write.
583 x : np.ndarray
584 1D array of scales in the X dimension.
585 y : np.ndarray
586 1D array of scales in the Y dimension.
587 f : np.ndarray
588 2D data array, C-ordered with shape ``(ny, nx)`` in Python.
589 **kwargs
590 Additional keyword arguments passed through to the underlying
591 :func:`~psi_io.psi_io.write_hdf_data` routine, specifically:
592
593 - ``dataset_id`` : str | None
594 The identifier of the dataset to write.
595 If None, a default dataset is used (``'Data-Set-2'`` for HDF4 and ``'Data'`` for HDF5).
596 - ``sync_dtype``: bool
597 If True, the data type of the scales will be matched to that of the data array.
598
599 Omitting these will yield the same behavior as the legacy routines, *i.e.* writing to
600 the default PSI dataset IDs for HDF4/HDF5 files and synchronizing datatypes between
601 the dataset and scales.
602
603 Raises
604 ------
605 ValueError
606 If the file does not have a ``.hdf`` or ``.h5`` extension.
607 KeyError
608 If, for HDF4 files, the data or scale dtype is not supported by
609 :py:mod:`pyhdf`. See :func:`write_hdf_data` for the full dtype
610 support table.
611
612 Notes
613 -----
614 This routine is provided for backward compatibility with existing PSI codes that
615 expect this API signature. The underlying implementation dispatches to
616 :func:`~psi_io.psi_io.write_hdf_data`, but the argument order and default
617 behavior (sync dtype, default dataset IDs) are preserved.
618
619 .. warning::
620 When called with its default arguments this routine **writes to the default PSI
621 dataset IDs for HDF4/HDF5 files and synchronizes dtypes between the dataset and
622 scales.** This behavior is required for interoperability with certain Fortran-based
623 PSI tools.
624
625 See Also
626 --------
627 write_hdf_data : Generic HDF data writing routine.
628
629 Examples
630 --------
631 >>> import tempfile, numpy as np
632 >>> from pathlib import Path
633 >>> from psi_io import wrhdf_2d, rdhdf_2d
634 >>> x = np.linspace(0.0, 2*np.pi, 64, dtype=np.float32)
635 >>> y = np.linspace(0.0, np.pi, 32, dtype=np.float32)
636 >>> f = np.outer(np.sin(x), np.cos(y)).astype(np.float32)
637 >>> with tempfile.TemporaryDirectory() as d:
638 ... wrhdf_2d(Path(d) / "out.h5", x, y, f)
639 ... x2, y2, f2 = rdhdf_2d(Path(d) / "out.h5")
640 ... np.array_equal(f, f2)
641 True
642 """
643 return _wrhdf_nd(hdf_filename, f, x, y, dimensionality=2, **kwargs)
644
645
[docs]
646def wrhdf_3d(hdf_filename: PathLike,
647 x: np.ndarray,
648 y: np.ndarray,
649 z: np.ndarray,
650 f: np.ndarray,
651 **kwargs) -> None:
652 """Write a 3D PSI-style HDF5 or HDF4 file.
653
654 The data in the HDF file will appear as X,Y,Z in Fortran order.
655
656 Each dimension requires a 1D "scale" associated with it that
657 describes the rectilinear grid coordinates in each dimension.
658
659 Parameters
660 ----------
661 hdf_filename : PathLike
662 The path to the 3D HDF5 (.h5) or HDF4 (.hdf) file to write.
663 x : np.ndarray
664 1D array of scales in the X dimension.
665 y : np.ndarray
666 1D array of scales in the Y dimension.
667 z : np.ndarray
668 1D array of scales in the Z dimension.
669 f : np.ndarray
670 3D data array, C-ordered with shape ``(nz, ny, nx)`` in Python.
671 **kwargs
672 Additional keyword arguments passed through to the underlying
673 :func:`~psi_io.psi_io.write_hdf_data` routine, specifically:
674
675 - ``dataset_id`` : str | None
676 The identifier of the dataset to write.
677 If None, a default dataset is used (``'Data-Set-2'`` for HDF4 and ``'Data'`` for HDF5).
678 - ``sync_dtype``: bool
679 If True, the data type of the scales will be matched to that of the data array.
680
681 Omitting these will yield the same behavior as the legacy routines, *i.e.* writing to
682 the default PSI dataset IDs for HDF4/HDF5 files and synchronizing datatypes between
683 the dataset and scales.
684
685 Raises
686 ------
687 ValueError
688 If the file does not have a ``.hdf`` or ``.h5`` extension.
689 KeyError
690 If, for HDF4 files, the data or scale dtype is not supported by
691 :py:mod:`pyhdf`. See :func:`write_hdf_data` for the full dtype
692 support table.
693
694 Notes
695 -----
696 This routine is provided for backward compatibility with existing PSI codes that
697 expect this API signature. The underlying implementation dispatches to
698 :func:`~psi_io.psi_io.write_hdf_data`, but the argument order and default
699 behavior (sync dtype, default dataset IDs) are preserved.
700
701 .. warning::
702 When called with its default arguments this routine **writes to the default PSI
703 dataset IDs for HDF4/HDF5 files and synchronizes dtypes between the dataset and
704 scales.** This behavior is required for interoperability with certain Fortran-based
705 PSI tools.
706
707 See Also
708 --------
709 write_hdf_data : Generic HDF data writing routine.
710
711 Examples
712 --------
713 >>> import tempfile, numpy as np
714 >>> from pathlib import Path
715 >>> from psi_io import wrhdf_3d, rdhdf_3d
716 >>> r = np.linspace(1.0, 5.0, 10, dtype=np.float32)
717 >>> t = np.linspace(0.0, np.pi, 20, dtype=np.float32)
718 >>> p = np.linspace(0.0, 2*np.pi, 30, dtype=np.float32)
719 >>> f = np.ones((30, 20, 10), dtype=np.float32)
720 >>> with tempfile.TemporaryDirectory() as d:
721 ... wrhdf_3d(Path(d) / "out.h5", r, t, p, f)
722 ... r2, t2, p2, f2 = rdhdf_3d(Path(d) / "out.h5")
723 ... np.array_equal(f, f2)
724 True
725 """
726 return _wrhdf_nd(hdf_filename, f, x, y, z, dimensionality=3, **kwargs)
727
728
[docs]
729def get_scales_1d(filename: PathLike
730 ) -> np.ndarray:
731 """Return the coordinate scale of a 1D PSI-style HDF5 or HDF4 dataset.
732
733 Does not load the data array, so this is efficient for large files.
734
735 Parameters
736 ----------
737 filename : PathLike
738 The path to the 1D HDF5 (.h5) or HDF4 (.hdf) file to read.
739
740 Returns
741 -------
742 x : np.ndarray
743 1D coordinate scale array.
744
745 Examples
746 --------
747 >>> from psi_io import get_scales_1d
748 >>> from psi_data import fetch_example_radial_scale
749 >>> filepath = fetch_example_radial_scale()
750 >>> x = get_scales_1d(filepath)
751 >>> x.ndim
752 1
753 """
754 return _dispatch_by_ext(filename, _get_scales_nd_h4, _get_scales_nd_h5,
755 dimensionality=1)
756
757
[docs]
758def get_scales_2d(filename: PathLike
759 ) -> Tuple[np.ndarray, np.ndarray]:
760 """Return the coordinate scales of a 2D PSI-style HDF5 or HDF4 dataset.
761
762 Does not load the data array, so this is efficient for large files.
763 The data in the HDF file is assumed to be ordered X, Y in Fortran order.
764
765 Parameters
766 ----------
767 filename : PathLike
768 The path to the 2D HDF5 (.h5) or HDF4 (.hdf) file to read.
769
770 Returns
771 -------
772 x : np.ndarray
773 1D coordinate scale array for the X dimension.
774 y : np.ndarray
775 1D coordinate scale array for the Y dimension.
776
777 Examples
778 --------
779 >>> from psi_io import get_scales_2d
780 >>> from psi_data import fetch_mas_quantities
781 >>> filepath = fetch_mas_quantities().ch_pm
782 >>> x, y = get_scales_2d(filepath)
783 >>> x.ndim, y.ndim
784 (1, 1)
785 """
786 return _dispatch_by_ext(filename, _get_scales_nd_h4, _get_scales_nd_h5,
787 dimensionality=2)
788
789
[docs]
790def get_scales_3d(filename: PathLike
791 ) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
792 """Return the coordinate scales of a 3D PSI-style HDF5 or HDF4 dataset.
793
794 Does not load the data array, so this is efficient for large files.
795 The data in the HDF file is assumed to be ordered X, Y, Z in Fortran order.
796
797 Parameters
798 ----------
799 filename : PathLike
800 The path to the 3D HDF5 (.h5) or HDF4 (.hdf) file to read.
801
802 Returns
803 -------
804 x : np.ndarray
805 1D coordinate scale array for the X dimension.
806 y : np.ndarray
807 1D coordinate scale array for the Y dimension.
808 z : np.ndarray
809 1D coordinate scale array for the Z dimension.
810
811 Examples
812 --------
813 >>> from psi_io import get_scales_3d
814 >>> from psi_data import fetch_mas_data
815 >>> filepath = fetch_mas_data().cor_br
816 >>> r, t, p = get_scales_3d(filepath)
817 >>> r.ndim, t.ndim, p.ndim
818 (1, 1, 1)
819 """
820 return _dispatch_by_ext(filename, _get_scales_nd_h4, _get_scales_nd_h5,
821 dimensionality=3)
822
823
824# -----------------------------------------------------------------------------
825# "Updated" HDF reading and slicing routines for Hdf4 and Hdf5 datasets.
826# -----------------------------------------------------------------------------
827
828
906
907
949
950
[docs]
951def read_hdf_data(ifile: PathLike, /,
952 dataset_id: Optional[str] = None,
953 return_scales: bool = True,
954 ) -> Tuple[np.ndarray]:
955 """
956 Read data from an HDF4 (.hdf) or HDF5 (.h5) file.
957
958 Parameters
959 ----------
960 ifile : PathLike
961 The path to the HDF file to read.
962 dataset_id : str | None, optional
963 The identifier of the dataset to read.
964 If ``None``, a default dataset is used (``'Data-Set-2'`` for HDF4 and
965 ``'Data'`` for HDF5). Default is ``None``.
966 return_scales : bool, optional
967 If ``True``, the coordinate scale arrays for each dimension are also
968 returned. Default is ``True``.
969
970 Returns
971 -------
972 out : np.ndarray | tuple[np.ndarray, ...]
973 The data array. If ``return_scales`` is ``True``, returns a tuple
974 ``(data, scale_0, scale_1, ...)`` with one scale per dimension.
975
976 Raises
977 ------
978 ValueError
979 If the file does not have a ``.hdf`` or ``.h5`` extension.
980
981 See Also
982 --------
983 read_hdf_by_index : Read HDF datasets by index.
984 read_hdf_by_value : Read HDF datasets by value ranges.
985 read_hdf_by_ivalue : Read HDF datasets by subindex values.
986
987 Notes
988 -----
989 This function delegates to :func:`_read_h5_data` for HDF5 files and
990 :func:`_read_h4_data` for HDF4 files based on the file extension.
991
992 Examples
993 --------
994 >>> from psi_io import read_hdf_data
995 >>> from psi_data import fetch_mas_data
996 >>> filepath = fetch_mas_data().cor_br
997 >>> data, r, t, p = read_hdf_data(filepath)
998 >>> data.shape
999 (299, 142, 255)
1000 >>> r.shape, t.shape, p.shape
1001 ((255,), (142,), (299,))
1002 """
1003 return _dispatch_by_ext(ifile, _read_h4_data, _read_h5_data,
1004 dataset_id=dataset_id, return_scales=return_scales)
1005
1006
[docs]
1007def read_hdf_by_index(ifile: PathLike, /,
1008 *xi: Union[int, Tuple[Union[int, None], Union[int, None]], None],
1009 dataset_id: Optional[str] = None,
1010 return_scales: bool = True,
1011 ) -> Union[np.ndarray, Tuple[np.ndarray]]:
1012 r"""
1013 Read data from an HDF4 (.hdf) or HDF5 (.h5) file by index.
1014
1015 .. attention::
1016 For each dimension, the *minimum* number of elements returned is 1 *e.g.*
1017 if 3 ints are passed (as positional `*xi` arguments) for a 3D dataset,
1018 the resulting subset will have a shape of (1, 1, 1,) with scales of length 1.
1019
1020 Parameters
1021 ----------
1022 ifile : PathLike
1023 The path to the HDF file to read.
1024 *xi : int | tuple[int | None, int | None] | None
1025 Indices or ranges for each dimension of the `n`-dimensional dataset.
1026 Use None for a dimension to select all indices. If no arguments are passed,
1027 the entire dataset (and its scales) will be returned – see
1028 :func:`~psi_io.psi_io.read_hdf_data`.
1029 dataset_id : str | None, optional
1030 The identifier of the dataset to read.
1031 If ``None``, a default dataset is used (``'Data-Set-2'`` for HDF4 and
1032 ``'Data'`` for HDF5). Default is ``None``.
1033 return_scales : bool, optional
1034 If ``True``, the coordinate scale arrays for each dimension are also
1035 returned. Default is ``True``.
1036
1037 Returns
1038 -------
1039 out : np.ndarray | tuple[np.ndarray, ...]
1040 The selected data array. If ``return_scales`` is ``True``, returns a
1041 tuple ``(data, scale_0, scale_1, ...)`` with one scale per dimension.
1042
1043 Raises
1044 ------
1045 ValueError
1046 If the file does not have a ``.hdf`` or ``.h5`` extension.
1047
1048 See Also
1049 --------
1050 read_hdf_by_value : Read HDF datasets by value ranges.
1051 read_hdf_by_ivalue : Read HDF datasets by subindex values.
1052 read_hdf_data : Read entire HDF datasets.
1053
1054 Notes
1055 -----
1056 This function delegates to :func:`_read_h5_by_index` for HDF5 files and
1057 :func:`_read_h4_by_index` for HDF4 files based on the file extension.
1058
1059 This function assumes Fortran (column-major) ordering for compatibility with
1060 PSI's data ecosystem. For an :math:`n`-dimensional array of shape
1061 :math:`(D_0, D_1, \ldots, D_{n-1})` the scales satisfy
1062 :math:`|x_i| = |D_{(n-1)-i}|`. For example, a 3D dataset of shape
1063 :math:`(D_\phi, D_\theta, D_r)` has scales :math:`r, \theta, \phi`.
1064
1065 Each ``*xi`` argument is forwarded to Python's built-in :class:`slice` to
1066 extract the desired subset without reading the entire dataset into memory.
1067
1068 Examples
1069 --------
1070 Import a 3D HDF5 cube.
1071
1072 >>> from psi_data import fetch_mas_data
1073 >>> from psi_io import read_hdf_by_index
1074 >>> filepath = fetch_mas_data().cor_br
1075
1076 Extract a radial slice at the first radial index from a 3D cube:
1077
1078 >>> f, r, t, p = read_hdf_by_index(filepath, 0, None, None)
1079 >>> f.shape, r.shape, t.shape, p.shape
1080 ((299, 142, 1), (1,), (142,), (299,))
1081
1082 Extract a phi slice at the 90th index from a 3D cube:
1083
1084 >>> f, r, t, p = read_hdf_by_index(filepath, None, None, 90)
1085 >>> f.shape, r.shape, t.shape, p.shape
1086 ((1, 142, 255), (255,), (142,), (1,))
1087
1088 Extract up to the 20th radial index with phi indices 10 to 25:
1089
1090 >>> f, r, t, p = read_hdf_by_index(filepath, (None, 20), None, (10, 25))
1091 >>> f.shape, r.shape, t.shape, p.shape
1092 ((15, 142, 20), (20,), (142,), (15,))
1093 """
1094 if not xi:
1095 return read_hdf_data(ifile, dataset_id=dataset_id, return_scales=return_scales)
1096 return _dispatch_by_ext(ifile, _read_h4_by_index, _read_h5_by_index,
1097 *xi, dataset_id=dataset_id, return_scales=return_scales)
1098
1099
[docs]
1100def read_hdf_by_value(ifile: PathLike, /,
1101 *xi: Union[float, Tuple[float, float], None],
1102 dataset_id: Optional[str] = None,
1103 return_scales: bool = True,
1104 ) -> Union[np.ndarray, Tuple[np.ndarray]]:
1105 """
1106 Read data from an HDF4 (.hdf) or HDF5 (.h5) file by value.
1107
1108 .. note::
1109 For each dimension, the minimum number of elements returned is 2 *e.g.*
1110 if 3 floats are passed (as positional `*xi` arguments) for a 3D dataset,
1111 the resulting subset will have a shape of (2, 2, 2,) with scales of length 2.
1112
1113 Parameters
1114 ----------
1115 ifile : PathLike
1116 The path to the HDF file to read.
1117 *xi : float | tuple[float, float] | None
1118 Values or value ranges corresponding to each dimension of the `n`-dimensional
1119 dataset specified by ``dataset_id``. Pass ``None`` for a dimension to
1120 select all indices. If no arguments are passed, the entire dataset (and
1121 its scales) will be returned.
1122 dataset_id : str | None, optional
1123 The identifier of the dataset to read.
1124 If ``None``, a default dataset is used (``'Data-Set-2'`` for HDF4 and
1125 ``'Data'`` for HDF5). Default is ``None``.
1126 return_scales : bool, optional
1127 If ``True``, the coordinate scale arrays for each dimension are also
1128 returned. Default is ``True``.
1129
1130 Returns
1131 -------
1132 out : np.ndarray | tuple[np.ndarray, ...]
1133 The selected data array. If ``return_scales`` is ``True``, returns a
1134 tuple ``(data, scale_0, scale_1, ...)`` with one scale per dimension.
1135
1136 Raises
1137 ------
1138 ValueError
1139 If the file does not have a ``.hdf`` or ``.h5`` extension.
1140
1141 See Also
1142 --------
1143 read_hdf_by_index : Read HDF datasets by index.
1144 read_hdf_by_ivalue : Read HDF datasets by subindex values.
1145 read_hdf_data : Read entire HDF datasets.
1146 sp_interpolate_slice_from_hdf : Interpolate slices using SciPy's
1147 :class:`~scipy.interpolate.RegularGridInterpolator`.
1148 np_interpolate_slice_from_hdf : Linear/bilinear/trilinear interpolation
1149 using vectorized NumPy routines.
1150
1151 Notes
1152 -----
1153 This function delegates to :func:`_read_h5_by_value` for HDF5 files and
1154 :func:`_read_h4_by_value` for HDF4 files based on the file extension.
1155
1156 This function assumes that the dataset is Fortran (or column-major) ordered *viz.* for
1157 compatibility with PSI's data ecosystem; as such, a given :math:`n`-dimensional array,
1158 of shape :math:`(D_0, D_1, ..., D_{n-1})`, has scales :math:`(x_0, x_1, ..., x_{n-1})`,
1159 such that :math:`| x_i | = | D_{(n-1)-i} |`. For example, a 3D dataset with shape
1160 :math:`(D_p, D_t, D_r)` has scales :math:`r, t, p` corresponding to the radial, theta,
1161 and phi dimensions respectively.
1162
1163 This function extracts a subset of the given dataset/scales without reading the
1164 entire data into memory. For a given scale :math:`x_j`, if:
1165
1166 - *i)* a single float is provided (:math:`a`), the function will return a 2-element
1167 subset of the scale (:math:`xʹ_j`) such that :math:`xʹ_j[0] <= a < xʹ_j[1]`.
1168 - *ii)* a (float, float) tuple is provided (:math:`a_0, a_1`), the function will return an
1169 *m*-element subset of the scale (:math:`xʹ_j`) where
1170 :math:`xʹ_j[0] <= a_0` and :math:`xʹ_j[m-1] > a_1`.
1171 - *iii)* a **None** value is provided, the function will return the entire scale :math:`x_j`
1172
1173 The returned subset can then be passed to a linear interpolation routine to extract the
1174 "slice" at the desired fixed dimensions.
1175
1176 Examples
1177 --------
1178 Import a 3D HDF5 cube.
1179
1180 >>> from psi_data import fetch_mas_data
1181 >>> from psi_io import read_hdf_by_value
1182 >>> filepath = fetch_mas_data().cor_br
1183
1184 Extract a radial slice at r=15 from a 3D cube:
1185
1186 >>> f, r, t, p = read_hdf_by_value(filepath, 15, None, None)
1187 >>> f.shape, r.shape, t.shape, p.shape
1188 ((299, 142, 2), (2,), (142,), (299,))
1189
1190 Extract a phi slice at p=1.57 from a 3D cube:
1191
1192 >>> f, r, t, p = read_hdf_by_value(filepath, None, None, 1.57)
1193 >>> f.shape, r.shape, t.shape, p.shape
1194 ((2, 142, 255), (255,), (142,), (2,))
1195
1196 Extract the values between 3.2 and 6.4 (in the radial dimension) and with
1197 phi equal to 4.5
1198
1199 >>> f, r, t, p = read_hdf_by_value(filepath, (3.2, 6.4), None, 4.5)
1200 >>> f.shape, r.shape, t.shape, p.shape
1201 ((2, 142, 35), (35,), (142,), (2,))
1202 """
1203 if not xi:
1204 return read_hdf_data(ifile, dataset_id=dataset_id, return_scales=return_scales)
1205 return _dispatch_by_ext(ifile, _read_h4_by_value, _read_h5_by_value,
1206 *xi, dataset_id=dataset_id, return_scales=return_scales)
1207
1208
[docs]
1209def read_hdf_by_ivalue(ifile: PathLike, /,
1210 *xi: Union[float, Tuple[float, float], None],
1211 dataset_id: Optional[str] = None,
1212 return_scales: bool = True,
1213 ) -> Union[np.ndarray, Tuple[np.ndarray]]:
1214 r"""
1215 Read data from an HDF4 (.hdf) or HDF5 (.h5) file by subindex value.
1216
1217 Unlike :func:`read_hdf_by_value` (which works in physical coordinate space),
1218 this function accepts fractional *index* positions and returns the minimal
1219 bracketing subset required for interpolation.
1220
1221 .. note::
1222 For each dimension the minimum number of elements returned is 2. For
1223 example, passing 3 scalar floats for a 3D dataset yields a subset of
1224 shape ``(2, 2, 2)`` with index arrays of length 2.
1225
1226 Parameters
1227 ----------
1228 ifile : PathLike
1229 The path to the HDF file to read.
1230 *xi : float | tuple[float, float] | None
1231 Fractional index values or ranges for each dimension of the
1232 ``n``-dimensional dataset. Pass ``None`` to select an entire dimension.
1233 If no arguments are passed, the full dataset is returned.
1234 dataset_id : str | None, optional
1235 The identifier of the dataset to read. If ``None``, a default dataset
1236 is used (``'Data-Set-2'`` for HDF4 and ``'Data'`` for HDF5).
1237 Default is ``None``.
1238 return_scales : bool, optional
1239 If ``True``, 0-based index arrays (generated with :func:`~numpy.arange`)
1240 are returned for each dimension alongside the data. These are always
1241 index-space arrays regardless of whether the dataset has physical
1242 coordinate scales. Default is ``True``.
1243
1244 Returns
1245 -------
1246 out : np.ndarray | tuple[np.ndarray, ...]
1247 The selected data array. If ``return_scales`` is ``True``, returns a
1248 tuple ``(data, idx_0, idx_1, ...)`` with one index array per dimension.
1249
1250 Raises
1251 ------
1252 ValueError
1253 If the file does not have a ``.hdf`` or ``.h5`` extension.
1254
1255 See Also
1256 --------
1257 read_hdf_by_index : Read HDF datasets by integer index.
1258 read_hdf_by_value : Read HDF datasets by physical coordinate value.
1259 read_hdf_data : Read entire HDF datasets.
1260 sp_interpolate_slice_from_hdf : Interpolate slices using SciPy's
1261 :class:`~scipy.interpolate.RegularGridInterpolator`.
1262 np_interpolate_slice_from_hdf : Linear/bilinear/trilinear interpolation
1263 using vectorized NumPy routines.
1264
1265 Notes
1266 -----
1267 This function delegates to :func:`_read_h5_by_ivalue` for HDF5 files and
1268 :func:`_read_h4_by_ivalue` for HDF4 files based on the file extension.
1269
1270 For a given dimension with scale :math:`x_j`, the bracketing rule is:
1271
1272 - *single float* :math:`a` → returns
1273 :math:`x_j[\lfloor a \rfloor], x_j[\lceil a \rceil]`.
1274 - *(float, float)* :math:`(a_0, a_1)` → returns
1275 :math:`x_j[\lfloor a_0 \rfloor], \ldots, x_j[\lceil a_1 \rceil]`.
1276 - ``None`` → returns the entire scale :math:`x_j`.
1277
1278 Examples
1279 --------
1280 >>> from psi_data import fetch_mas_data
1281 >>> from psi_io import read_hdf_by_ivalue
1282 >>> filepath = fetch_mas_data().cor_br
1283
1284 Extract a 2-element bracket around fractional radial index 2.7:
1285
1286 >>> f, r_idx, t_idx, p_idx = read_hdf_by_ivalue(filepath, 2.7, None, None)
1287 >>> r_idx.shape
1288 (2,)
1289 """
1290 if not xi:
1291 return read_hdf_data(ifile, dataset_id=dataset_id, return_scales=return_scales)
1292 return _dispatch_by_ext(ifile, _read_h4_by_ivalue, _read_h5_by_ivalue,
1293 *xi, dataset_id=dataset_id, return_scales=return_scales)
1294
1295
[docs]
1296def write_hdf_data(ifile: PathLike, /,
1297 data: np.ndarray,
1298 *scales: Sequence[Union[np.ndarray, None]],
1299 dataset_id: Optional[str] = None,
1300 sync_dtype: bool = False,
1301 strict: bool = True,
1302 **kwargs
1303 ) -> Path:
1304 """
1305 Write data to an HDF4 (.hdf) or HDF5 (.h5) file.
1306
1307 Following PSI conventions, the data array is assumed to be Fortran-ordered,
1308 with the scales provided in the order corresponding to each dimension *e.g.* a
1309 3D dataset with shape (Dp, Dt, Dr) has scales r, t, p corresponding to the
1310 radial, theta, and phi dimensions respectively.
1311
1312 Parameters
1313 ----------
1314 ifile : PathLike
1315 The path to the HDF file to write.
1316 data : np.ndarray
1317 The data array to write.
1318 *scales : Sequence[np.ndarray | None]
1319 The scales (coordinate arrays) for each dimension.
1320 dataset_id : str | None, optional
1321 The identifier of the dataset to write. If ``None``, a default dataset
1322 is used (``'Data-Set-2'`` for HDF4 and ``'Data'`` for HDF5).
1323 Default is ``None``.
1324 sync_dtype : bool, optional
1325 If ``True``, the scale dtypes are cast to match the data array dtype.
1326 This mimics the behavior of PSI's legacy HDF writing routines and ensures
1327 compatibility with Fortran tools that require uniform precision between
1328 datasets and their scales. Default is ``False``.
1329 strict : bool, optional
1330 If ``True``, raise an error if any dataset attribute cannot be written to
1331 the target format. If ``False``, a warning is printed and the attribute
1332 is skipped. Default is ``True``.
1333 **kwargs
1334 Key-value pairs of dataset attributes to attach to the dataset.
1335
1336 Returns
1337 -------
1338 out : Path
1339 The path to the written HDF file.
1340
1341 Raises
1342 ------
1343 ValueError
1344 If the file does not have a ``.hdf`` or ``.h5`` extension.
1345 KeyError
1346 If, for HDF4 files, the data or scale dtype is not supported by
1347 :py:mod:`pyhdf`. See the dtype support table in the Notes section.
1348
1349 Notes
1350 -----
1351 This function delegates to :func:`_write_h5_data` for HDF5 files and
1352 :func:`_write_h4_data` for HDF4 files based on the file extension.
1353
1354 If no scales are provided the dataset is written without coordinate variables.
1355 The number of scales may be less than or equal to the number of dimensions;
1356 pass ``None`` for dimensions that should not have an attached scale.
1357
1358 The table below summarizes dtype support across formats. HDF4 support is
1359 determined by the :data:`DTYPE_TO_SDC` mapping; HDF5 support is provided
1360 by :mod:`h5py`.
1361
1362 .. list-table:: NumPy dtype support by format
1363 :header-rows: 1
1364 :widths: 20 20 20
1365
1366 * - dtype
1367 - HDF4 (``.hdf``)
1368 - HDF5 (``.h5``)
1369 * - ``float16``
1370 - No
1371 - Yes
1372 * - ``float32``
1373 - Yes
1374 - Yes
1375 * - ``float64``
1376 - Yes
1377 - Yes
1378 * - ``int8``
1379 - Yes
1380 - Yes
1381 * - ``int16``
1382 - Yes
1383 - Yes
1384 * - ``int32``
1385 - Yes
1386 - Yes
1387 * - ``int64``
1388 - No
1389 - Yes
1390 * - ``uint8``
1391 - Yes
1392 - Yes
1393 * - ``uint16``
1394 - Yes
1395 - Yes
1396 * - ``uint32``
1397 - Yes
1398 - Yes
1399 * - ``uint64``
1400 - No
1401 - Yes
1402 * - ``complex64``
1403 - No
1404 - Yes
1405 * - ``complex128``
1406 - No
1407 - Yes
1408 * - ``bool``
1409 - Yes (stored as ``uint8``)
1410 - Yes
1411
1412 See Also
1413 --------
1414 wrhdf_1d : Write 1D HDF files.
1415 wrhdf_2d : Write 2D HDF files.
1416 wrhdf_3d : Write 3D HDF files.
1417
1418 Examples
1419 --------
1420 >>> import tempfile, numpy as np
1421 >>> from pathlib import Path
1422 >>> from psi_io import write_hdf_data, read_hdf_data
1423 >>> r = np.linspace(1.0, 5.0, 10, dtype=np.float32)
1424 >>> t = np.linspace(0.0, np.pi, 20, dtype=np.float32)
1425 >>> p = np.linspace(0.0, 2*np.pi, 30, dtype=np.float32)
1426 >>> f = np.ones((30, 20, 10), dtype=np.float32)
1427 >>> with tempfile.TemporaryDirectory() as d:
1428 ... _ = write_hdf_data(Path(d) / "out.h5", f, r, t, p)
1429 ... data, r2, t2, p2 = read_hdf_data(Path(d) / "out.h5")
1430 ... data.shape
1431 (30, 20, 10)
1432 """
1433 return _dispatch_by_ext(ifile, _write_h4_data, _write_h5_data, data,
1434 *scales, dataset_id=dataset_id, sync_dtype=sync_dtype, strict=strict, **kwargs)
1435
1436
1501
1502
[docs]
1503def convert(ifile: PathLike,
1504 ofile: Optional[PathLike] = None,
1505 strict: bool = True) -> Path:
1506 """
1507 Convert an HDF file between HDF4 (.hdf) and HDF5 (.h5) formats.
1508
1509 All datasets and their attributes are preserved in the output file.
1510 The output format is inferred from the input: `.hdf` → `.h5` and vice versa,
1511 unless an explicit output path is provided.
1512
1513 Parameters
1514 ----------
1515 ifile : PathLike
1516 The path to the source HDF file.
1517 ofile : PathLike, optional
1518 The path to the output HDF file.
1519 If ``None``, the output file is written alongside the input file
1520 with its extension swapped (*e.g.* ``foo.hdf`` → ``foo.h5``).
1521 strict : bool, optional
1522 If ``True``, raise an error if any dataset attribute cannot be written
1523 to the output format. If ``False``, a warning is printed and the
1524 attribute is skipped. Default is ``True``.
1525
1526 Returns
1527 -------
1528 out : Path
1529 The path to the written output file.
1530
1531 Raises
1532 ------
1533 ValueError
1534 If the input file does not have a ``.hdf`` or ``.h5`` extension.
1535 KeyError
1536 If, for HDF4 output files, the data or an attribute value has a dtype
1537 not supported by :py:mod:`pyhdf` and ``strict`` is ``True``. See
1538 :func:`write_hdf_data` for the full dtype support table.
1539 TypeError
1540 If, for HDF5 output files, a dataset attribute has a type that cannot be
1541 stored as an HDF5 attribute and ``strict`` is ``True``.
1542
1543 See Also
1544 --------
1545 convert_psih4_to_psih5 : Specialized PSI-convention HDF4 → HDF5 converter.
1546 write_hdf_data : Generic HDF data writing routine.
1547 read_hdf_data : Generic HDF data reading routine.
1548
1549 Examples
1550 --------
1551 >>> import tempfile
1552 >>> from pathlib import Path
1553 >>> from psi_io import convert, read_hdf_meta
1554 >>> from psi_data import fetch_mas_data
1555 >>> h5_path = fetch_mas_data(hdf=5).cor_br
1556 >>> with tempfile.TemporaryDirectory() as d:
1557 ... out = convert(h5_path, Path(d) / "br.hdf")
1558 ... meta = read_hdf_meta(out)
1559 ... meta[0].name
1560 'Data'
1561 """
1562 ifile = Path(ifile)
1563 if not ofile:
1564 ofile = ifile.with_suffix(".hdf") if ifile.suffix == ".h5" else ifile.with_suffix(".h5")
1565 else:
1566 ofile = Path(ofile)
1567 ofile.parent.mkdir(parents=True, exist_ok=True)
1568
1569 meta_data = read_hdf_meta(ifile)
1570 for dataset in meta_data:
1571 data, *scales = read_hdf_data(ifile, dataset_id=dataset.name, return_scales=True)
1572 write_hdf_data(ofile, data, *scales, dataset_id=dataset.name, strict=strict, **dataset.attr)
1573
1574 return ofile
1575
1576
[docs]
1577def convert_psih4_to_psih5(ifile: PathLike,
1578 ofile: Optional[PathLike] = None) -> Path:
1579 """
1580 Convert a PSI-convention HDF4 file to an HDF5 file.
1581
1582 Unlike :func:`convert`, this function is specialized for PSI-style HDF4 files:
1583 it reads the primary dataset (``'Data-Set-2'``) and writes it under the PSI HDF5
1584 dataset name (``'Data'``), preserving the dataset's attributes and scales.
1585
1586 Parameters
1587 ----------
1588 ifile : PathLike
1589 The path to the source HDF4 (.hdf) file.
1590 ofile : PathLike, optional
1591 The path to the output HDF5 (.h5) file.
1592 If ``None``, the output file is written alongside the input file with a
1593 ``.h5`` extension (*e.g.* ``foo.hdf`` → ``foo.h5``).
1594
1595 Returns
1596 -------
1597 out : Path
1598 The path to the written HDF5 file.
1599
1600 Raises
1601 ------
1602 ValueError
1603 If ``ifile`` does not have a ``.hdf`` extension.
1604 ValueError
1605 If ``ofile`` does not have a ``.h5`` extension.
1606 ImportError
1607 If the :py:mod:`pyhdf` package is not available.
1608
1609 See Also
1610 --------
1611 convert : General-purpose HDF4 ↔ HDF5 converter.
1612 write_hdf_data : Generic HDF data writing routine.
1613
1614 Examples
1615 --------
1616 >>> import tempfile
1617 >>> from pathlib import Path
1618 >>> from psi_io import convert_psih4_to_psih5, read_hdf_meta
1619 >>> from psi_data import fetch_mas_data
1620 >>> hdf4_path = fetch_mas_data(hdf=4).cor_br
1621 >>> with tempfile.TemporaryDirectory() as d:
1622 ... out = convert_psih4_to_psih5(hdf4_path, Path(d) / "br.h5")
1623 ... meta = read_hdf_meta(out)
1624 ... meta[0].name
1625 'Data'
1626 """
1627 ifile = Path(ifile)
1628 ofile = ifile.with_suffix(".h5") if not ofile else Path(ofile)
1629 if ifile.suffix != ".hdf":
1630 raise ValueError(f"Input file must have a .hdf extension; got {ifile.suffix}")
1631 if ofile.suffix != ".h5":
1632 raise ValueError(f"Output file must have a .h5 extension; got {ofile.suffix}")
1633 ofile.parent.mkdir(parents=True, exist_ok=True)
1634
1635
1636 data, *scales = read_hdf_data(ifile, dataset_id=PSI_DATA_ID["h4"], return_scales=True)
1637 meta_data, *_ = read_hdf_meta(ifile, dataset_id=PSI_DATA_ID["h4"])
1638
1639 write_hdf_data(ofile, data, *scales,
1640 dataset_id=PSI_DATA_ID["h5"], **meta_data.attr)
1641 return ofile
1642
[docs]
1643def instantiate_linear_interpolator(*args, **kwargs):
1644 r"""
1645 Instantiate a linear interpolator using the provided data and scales.
1646
1647 Parameters
1648 ----------
1649 *args : sequence[array_like]
1650 The first argument is the data array.
1651 Subsequent arguments are the scales (coordinate arrays) for each dimension.
1652 **kwargs : dict
1653 Additional keyword arguments to pass to
1654 :class:`~scipy.interpolate.RegularGridInterpolator`.
1655
1656 Returns
1657 -------
1658 out : RegularGridInterpolator
1659 An instance of RegularGridInterpolator initialized
1660 with the provided data and scales.
1661
1662 Raises
1663 ------
1664 ImportError
1665 If the ``scipy`` package is not available.
1666
1667 Notes
1668 -----
1669 This function transposes the data array and passes it along with the scales
1670 to :class:`~scipy.interpolate.RegularGridInterpolator`. Given a PSI-style
1671 Fortran-ordered 3D dataset, the resulting interpolator can be queried using
1672 :math:`(r, \theta, \phi)` coordinates.
1673
1674 Examples
1675 --------
1676 Import a 3D HDF5 cube.
1677
1678 >>> from psi_data import fetch_mas_data
1679 >>> from psi_io import read_hdf_by_value
1680 >>> from numpy import pi
1681 >>> filepath = fetch_mas_data().cor_br
1682
1683 Read the dataset by value (at 15 R_sun in the radial dimension).
1684
1685 >>> data_and_scales = read_hdf_by_value(filepath, 15, None, None)
1686 >>> interpolator = instantiate_linear_interpolator(*data_and_scales)
1687
1688 Interpolate at a specific position.
1689
1690 >>> interpolator((15, pi/2, pi))
1691 array(-0.00117191)
1692 """
1693 _except_no_scipy()
1694 return RegularGridInterpolator(
1695 values=args[0].T,
1696 points=args[1:],
1697 **kwargs)
1698
1699
[docs]
1700def sp_interpolate_slice_from_hdf(*xi, **kwargs):
1701 r"""
1702 Interpolate a slice from HDF data using SciPy's `RegularGridInterpolator`.
1703
1704 .. note::
1705 Slicing routines result in a dimensional reduction. The dimensions
1706 that are fixed (i.e. provided as float values in `*xi`) are removed
1707 from the output slice, while the dimensions that are not fixed
1708 (*i.e.* provided as None in `*xi`) are retained.
1709
1710 Parameters
1711 ----------
1712 *xi : sequence
1713 Positional arguments passed-through to :func:`read_hdf_by_value`.
1714 **kwargs : dict
1715 Keyword arguments passed-through to :func:`read_hdf_by_value`.
1716 **NOTE: Instantiating a linear interpolator requires the** ``return_scales``
1717 **keyword argument to be set to True; this function overrides
1718 any provided value for** ``return_scales`` **to ensure this behavior.**
1719
1720 Returns
1721 -------
1722 data_slice : np.ndarray
1723 The interpolated data slice with fixed dimensions removed.
1724 scales : list[np.ndarray]
1725 Coordinate scale arrays for the retained (non-fixed) dimensions.
1726
1727 Notes
1728 -----
1729 This function reads data from an HDF file, builds a
1730 :class:`~scipy.interpolate.RegularGridInterpolator`, and evaluates it at
1731 the requested fixed coordinates to produce the slice.
1732
1733 .. note::
1734 The returned slice is Fortran-ordered, *e.g.* radial slices have shape
1735 :math:`(n_\phi, n_\theta)`, and :math:`\phi` slices have shape
1736 :math:`(n_r, n_\theta)`.
1737
1738 .. note::
1739 :class:`~scipy.interpolate.RegularGridInterpolator` casts all input data
1740 to ``float64`` internally. PSI HDF datasets stored as ``float32`` will
1741 therefore be upcast during interpolation.
1742
1743 Examples
1744 --------
1745 >>> from psi_data import fetch_mas_data
1746 >>> from psi_io import sp_interpolate_slice_from_hdf
1747 >>> from numpy import pi
1748 >>> filepath = fetch_mas_data().cor_br
1749
1750 Fetch a 2D slice at r=15 from 3D map
1751
1752 >>> slice_, theta_scale, phi_scale = sp_interpolate_slice_from_hdf(filepath, 15, None, None)
1753 >>> slice_.shape, theta_scale.shape, phi_scale.shape
1754 ((299, 142), (142,), (299,))
1755
1756 Fetch a single point from 3D map
1757
1758 >>> point_value, *_ = sp_interpolate_slice_from_hdf(filepath, 1, pi/2, pi)
1759 >>> point_value
1760 np.float64(-0.20146621106799947)
1761 """
1762 filepath, *args = xi
1763 kwargs.pop('return_scales', None)
1764 result = read_hdf_by_value(filepath, *args, **kwargs)
1765 interpolator = instantiate_linear_interpolator(*result)
1766 grid = [yi[0] if yi[0] is not None else yi[1] for yi in zip(args, result[1:])]
1767 slice_ = interpolator(tuple(np.meshgrid(*grid, indexing='ij')))
1768 indices = [0 if yi is not None else slice(None, None) for yi in args]
1769 return slice_[tuple(indices)].T, *[yi[1] for yi in zip(args, result[1:]) if yi[0] is None]
1770
1771
[docs]
1772def np_interpolate_slice_from_hdf(ifile: PathLike, /,
1773 *xi: Union[float, Tuple[float, float], None],
1774 dataset_id: Optional[str] = None,
1775 by_index: bool = False,
1776 ):
1777 """
1778 Interpolate a slice from HDF data using linear interpolation.
1779
1780 .. note::
1781 Slicing routines result in a dimensional reduction. The dimensions
1782 that are fixed (i.e. provided as float values in `*xi`) are removed
1783 from the output slice, while the dimensions that are not fixed
1784 (*i.e.* provided as `None` in `*xi`) are retained.
1785
1786 Parameters
1787 ----------
1788 ifile : PathLike
1789 The path to the HDF file to read.
1790 *xi : sequence
1791 Positional arguments passed-through to reader function.
1792 dataset_id : str | None, optional
1793 The identifier of the dataset to read. If ``None``, a default dataset
1794 is used (``'Data-Set-2'`` for HDF4 and ``'Data'`` for HDF5).
1795 Default is ``None``.
1796 by_index : bool, optional
1797 If ``True``, use :func:`read_hdf_by_ivalue` to read data by fractional
1798 index values. If ``False``, use :func:`read_hdf_by_value` to read by
1799 physical coordinate values. Default is ``False``.
1800
1801 Returns
1802 -------
1803 data_slice : np.ndarray
1804 The interpolated data slice with fixed dimensions removed.
1805 scales : list[np.ndarray]
1806 Coordinate scale arrays for the retained (non-fixed) dimensions.
1807
1808 Raises
1809 ------
1810 ValueError
1811 If the number of dimensions to interpolate over is not supported.
1812
1813 Notes
1814 -----
1815 This function supports linear, bilinear, and trilinear interpolation
1816 depending on the number of dimensions fixed in `xi`.
1817
1818 Examples
1819 --------
1820 >>> from psi_data import fetch_mas_data
1821 >>> from psi_io import np_interpolate_slice_from_hdf
1822 >>> from numpy import pi
1823 >>> filepath = fetch_mas_data().cor_br
1824
1825 Fetch a 2D slice at r=15 from 3D map
1826
1827 >>> slice_, theta_scale, phi_scale = np_interpolate_slice_from_hdf(filepath, 15, None, None)
1828 >>> slice_.shape, theta_scale.shape, phi_scale.shape
1829 ((299, 142), (142,), (299,))
1830
1831 Fetch a single point from 3D map
1832
1833 >>> point_value, *_ = np_interpolate_slice_from_hdf(filepath, 1, pi/2, pi)
1834 >>> point_value
1835 np.float32(-0.2014672)
1836
1837 """
1838 reader = read_hdf_by_value if not by_index else read_hdf_by_ivalue
1839 data, *scales = reader(ifile, *xi, dataset_id=dataset_id, return_scales=True)
1840 f_ = np.transpose(data)
1841 slice_type = sum([yi is not None for yi in xi])
1842 if slice_type == 1:
1843 return _np_linear_interpolation(xi, scales, f_).T, *[yi[1] for yi in zip(xi, scales) if yi[0] is None]
1844 elif slice_type == 2:
1845 return _np_bilinear_interpolation(xi, scales, f_).T, *[yi[1] for yi in zip(xi, scales) if yi[0] is None]
1846 elif slice_type == 3:
1847 return _np_trilinear_interpolation(xi, scales, f_).T, *[yi[1] for yi in zip(xi, scales) if yi[0] is None]
1848 else:
1849 raise ValueError("Not a valid number of dimensions for supported linear interpolation methods")
1850
1851
[docs]
1852def interpolate_positions_from_hdf(ifile, *xi, **kwargs):
1853 r"""
1854 Interpolate at a list of scale positions using SciPy's
1855 :class:`~scipy.interpolate.RegularGridInterpolator`.
1856
1857 Parameters
1858 ----------
1859 ifile : PathLike
1860 The path to the HDF file to read.
1861 *xi : sequence[np.ndarray]
1862 Coordinate values for each dimension of the ``n``-dimensional dataset.
1863 Each array must have the same length :math:`m`; the function assembles
1864 them into an :math:`m \times n` column stack for interpolation.
1865 **kwargs
1866 Keyword arguments forwarded to :func:`read_hdf_by_value`.
1867
1868 Returns
1869 -------
1870 out : np.ndarray
1871 The interpolated values at the provided positions.
1872
1873 Notes
1874 -----
1875 This function reads data from an HDF file, creates a linear interpolator,
1876 and interpolates at the provided scale values. For each dimension, the
1877 minimum and maximum values from the provided arrays are used to read
1878 the necessary subset of data from the HDF file *viz.* to avoid loading
1879 the entire dataset into memory.
1880
1881 Examples
1882 --------
1883 Import a 3D HDF5 cube.
1884
1885 >>> from psi_data import fetch_mas_data
1886 >>> from psi_io import interpolate_positions_from_hdf
1887 >>> import numpy as np
1888 >>> filepath = fetch_mas_data().cor_br
1889
1890 Set up positions to interpolate.
1891
1892 >>> r_vals = np.array([15, 20, 25])
1893 >>> theta_vals = np.array([np.pi/4, np.pi/2, 3*np.pi/4])
1894 >>> phi_vals = np.array([0, np.pi, 2*np.pi])
1895
1896 Interpolate at the specified positions.
1897
1898 >>> interpolate_positions_from_hdf(filepath, r_vals, theta_vals, phi_vals)
1899 array([-1.44383936e-03, -6.70081301e-04, 8.56632460e-05])
1900 """
1901 xi_ = [(np.nanmin(i), np.nanmax(i)) for i in xi]
1902 f, *scales = read_hdf_by_value(ifile, *xi_, **kwargs)
1903 interpolator = instantiate_linear_interpolator(f, *scales, bounds_error=False)
1904 return interpolator(np.stack(xi, axis=len(xi[0].shape)))
1905
1906
[docs]
1907def interpolate_point_from_1d_slice(xi, scalex, values):
1908 """
1909 Interpolate a point from a 1D slice using linear interpolation.
1910
1911 Parameters
1912 ----------
1913 xi : float
1914 The coordinate value at which to interpolate.
1915 scalex : np.ndarray
1916 The coordinate scale array for the dimension.
1917 values : np.ndarray
1918 The 1D data array to interpolate.
1919
1920 Returns
1921 -------
1922 out : np.ndarray
1923 The interpolated scalar value as a zero-dimensional array.
1924
1925 Examples
1926 --------
1927 >>> import numpy as np
1928 >>> from psi_io.psi_io import interpolate_point_from_1d_slice
1929 >>> x = np.linspace(0.0, 1.0, 11)
1930 >>> f = x ** 2
1931 >>> float(interpolate_point_from_1d_slice(0.5, x, f))
1932 0.25
1933 """
1934 if scalex[0] > scalex[-1]:
1935 scalex, values = scalex[::-1], values[::-1]
1936 xi_ = int(np.searchsorted(scalex, xi))
1937 sx_ = slice(*_check_index_ranges(len(scalex), xi_, xi_))
1938 return _np_linear_interpolation([xi], [scalex[sx_]], values[sx_])
1939
1940
[docs]
1941def interpolate_point_from_2d_slice(xi, yi, scalex, scaley, values):
1942 """
1943 Interpolate a point from a 2D slice using bilinear interpolation.
1944
1945 Parameters
1946 ----------
1947 xi : float
1948 The coordinate value for the first dimension.
1949 yi : float
1950 The coordinate value for the second dimension.
1951 scalex : np.ndarray
1952 The coordinate scale array for the first dimension.
1953 scaley : np.ndarray
1954 The coordinate scale array for the second dimension.
1955 values : np.ndarray
1956 The 2D data array to interpolate, with shape ``(nx, ny)``.
1957
1958 Returns
1959 -------
1960 out : np.ndarray
1961 The interpolated scalar value as a zero-dimensional array.
1962
1963 Examples
1964 --------
1965 >>> import numpy as np
1966 >>> from psi_io.psi_io import interpolate_point_from_2d_slice
1967 >>> x = np.linspace(0.0, 1.0, 11)
1968 >>> y = np.linspace(0.0, 1.0, 11)
1969 >>> f = np.outer(x, y)
1970 >>> float(interpolate_point_from_2d_slice(0.5, 0.5, x, y, f))
1971 0.25
1972 """
1973 values = np.transpose(values)
1974 if scalex[0] > scalex[-1]:
1975 scalex, values = scalex[::-1], values[::-1, :]
1976 if scaley[0] > scaley[-1]:
1977 scaley, values = scaley[::-1], values[:, ::-1]
1978 xi_, yi_ = int(np.searchsorted(scalex, xi)), int(np.searchsorted(scaley, yi))
1979 sx_, sy_ = slice(*_check_index_ranges(len(scalex), xi_, xi_)), slice(*_check_index_ranges(len(scaley), yi_, yi_))
1980 return _np_bilinear_interpolation([xi, yi], [scalex[sx_], scaley[sy_]], values[(sx_, sy_)])
1981
1982
1983def _rdhdf_nd(hdf_filename: str,
1984 dimensionality: int
1985 ) -> Tuple[np.ndarray, ...]:
1986 """Read an n-dimensional PSI-style HDF file; shared implementation for `rdhdf_1d/2d/3d`.
1987
1988 Parameters
1989 ----------
1990 hdf_filename : PathLike
1991 The path to the HDF5 (.h5) or HDF4 (.hdf) file to read.
1992 dimensionality : int
1993 The expected number of dimensions of the dataset.
1994
1995 Returns
1996 -------
1997 out : tuple[np.ndarray, ...]
1998 The coordinate scales for each dimension followed by the data array.
1999
2000 Raises
2001 ------
2002 ValueError
2003 If the dataset dimensionality does not match ``dimensionality``.
2004
2005 Examples
2006 --------
2007 >>> from psi_io.psi_io import _rdhdf_nd
2008 >>> from psi_data import fetch_mas_data
2009 >>> r, t, p, f = _rdhdf_nd(fetch_mas_data().cor_br, dimensionality=3)
2010 >>> f.shape
2011 (299, 142, 255)
2012 """
2013 f, *scales = read_hdf_data(hdf_filename)
2014 if f.ndim != dimensionality:
2015 err = f'Expected {dimensionality}D data, got {f.ndim}D data instead.'
2016 raise ValueError(err)
2017 scales = scales or (np.empty(0) for _ in f.shape)
2018 return *scales, f
2019
2020
2021def _wrhdf_nd(hdf_filename: str,
2022 data: np.ndarray,
2023 *scales: Sequence[Union[np.ndarray, None]],
2024 dimensionality: int,
2025 sync_dtype: bool = True,
2026 **kwargs
2027 ) -> None:
2028 """Write an n-dimensional PSI-style HDF file; shared implementation for `wrhdf_1d/2d/3d`.
2029
2030 Parameters
2031 ----------
2032 hdf_filename : PathLike
2033 The path to the HDF5 (.h5) or HDF4 (.hdf) file to write.
2034 data : np.ndarray
2035 The data array to write.
2036 *scales : Sequence[np.ndarray | None]
2037 The scales (coordinate arrays) for each dimension.
2038 dimensionality : int
2039 The expected number of dimensions of the data array.
2040 sync_dtype : bool
2041 If True, scale dtypes are cast to match the data array dtype.
2042 **kwargs
2043 Additional keyword arguments forwarded to :func:`write_hdf_data`.
2044
2045 Raises
2046 ------
2047 ValueError
2048 If the data dimensionality does not match ``dimensionality``.
2049
2050 Examples
2051 --------
2052 >>> import tempfile, numpy as np
2053 >>> from pathlib import Path
2054 >>> from psi_io.psi_io import _wrhdf_nd
2055 >>> from psi_io import read_hdf_data
2056 >>> f = np.ones((10, 20, 30), dtype=np.float32)
2057 >>> r = np.linspace(1.0, 5.0, 10, dtype=np.float32)
2058 >>> t = np.linspace(0.0, 3.14, 20, dtype=np.float32)
2059 >>> p = np.linspace(0.0, 6.28, 30, dtype=np.float32)
2060 >>> with tempfile.TemporaryDirectory() as d:
2061 ... _wrhdf_nd(Path(d) / "out.h5", f, r, t, p, dimensionality=3)
2062 ... data, *_ = read_hdf_data(Path(d) / "out.h5")
2063 ... data.shape
2064 (10, 20, 30)
2065 """
2066 if data.ndim != dimensionality:
2067 err = f'Expected {dimensionality}D data, got {data.ndim}D data instead.'
2068 raise ValueError(err)
2069 write_hdf_data(hdf_filename, data, *scales, sync_dtype=sync_dtype, **kwargs)
2070
2071
2072def _get_scales_nd_h5(ifile: Union[ Path, str], /,
2073 dimensionality: int,
2074 dataset_id: Optional[str] = None,
2075 ):
2076 """HDF5 (.h5) version of :func:`get_scales_1d` / :func:`get_scales_2d` / :func:`get_scales_3d`.
2077
2078 Parameters
2079 ----------
2080 ifile : PathLike
2081 The path to the HDF5 (.h5) file.
2082 dimensionality : int
2083 The expected number of dimensions of the dataset.
2084 dataset_id : str, optional
2085 The identifier of the dataset whose scales are read.
2086 If ``None``, the default PSI dataset (``'Data'``) is used.
2087
2088 Returns
2089 -------
2090 out : tuple[np.ndarray, ...]
2091 The coordinate scale arrays for each dimension.
2092
2093 Raises
2094 ------
2095 ValueError
2096 If the dataset dimensionality does not match ``dimensionality``.
2097 ValueError
2098 If any dimension has no associated scale.
2099
2100 Examples
2101 --------
2102 >>> from psi_io.psi_io import _get_scales_nd_h5
2103 >>> from psi_data import fetch_mas_data
2104 >>> r, t, p = _get_scales_nd_h5(fetch_mas_data().cor_br, dimensionality=3)
2105 >>> r.shape, t.shape, p.shape
2106 ((255,), (142,), (299,))
2107 """
2108 with h5.File(ifile, 'r') as hdf:
2109 data = hdf[dataset_id or PSI_DATA_ID['h5']]
2110 ndim = data.ndim
2111 if ndim != dimensionality:
2112 err = f'Expected {dimensionality}D data, got {ndim}D data instead.'
2113 raise ValueError(err)
2114 scales = []
2115 for dim in data.dims:
2116 if dim:
2117 scales.append(dim[0][:])
2118 else:
2119 raise ValueError(f'Dimension has no scale associated with it.')
2120 return tuple(scales)
2121
2122
2123def _get_scales_nd_h4(ifile: Union[ Path, str], /,
2124 dimensionality: int,
2125 dataset_id: Optional[str] = None,
2126 ):
2127 """HDF4 (.hdf) version of :func:`get_scales_1d` / :func:`get_scales_2d` / :func:`get_scales_3d`.
2128
2129 Parameters
2130 ----------
2131 ifile : PathLike
2132 The path to the HDF4 (.hdf) file.
2133 dimensionality : int
2134 The expected number of dimensions of the dataset.
2135 dataset_id : str, optional
2136 The identifier of the dataset whose scales are read.
2137 If ``None``, the default PSI dataset (``'Data-Set-2'``) is used.
2138
2139 Returns
2140 -------
2141 out : tuple[np.ndarray, ...]
2142 The coordinate scale arrays for each dimension.
2143
2144 Raises
2145 ------
2146 ValueError
2147 If the dataset dimensionality does not match ``dimensionality``.
2148 ValueError
2149 If any dimension has no associated scale.
2150
2151 Examples
2152 --------
2153 >>> from psi_io.psi_io import _get_scales_nd_h4
2154 >>> from psi_data import fetch_mas_data
2155 >>> path = fetch_mas_data(hdf=4).cor_br # doctest: +SKIP
2156 >>> r, t, p = _get_scales_nd_h4(path, dimensionality=3) # doctest: +SKIP
2157 >>> r.shape # doctest: +SKIP
2158 (151,)
2159 """
2160 hdf = h4.SD(str(ifile))
2161 data = hdf.select(dataset_id or PSI_DATA_ID['h4'])
2162 ndim = data.info()[1]
2163 if ndim != dimensionality:
2164 err = f'Expected {dimensionality}D data, got {ndim}D data instead.'
2165 raise ValueError(err)
2166 scales = []
2167 for k_, v_ in reversed(data.dimensions(full=1).items()):
2168 if v_[3]:
2169 scales.append(hdf.select(k_)[:])
2170 else:
2171 raise ValueError('Dimension has no scale associated with it.')
2172 return tuple(scales)
2173
2174
2175def _read_h5_meta(ifile: PathLike, /,
2176 dataset_id: Optional[str] = None
2177 ):
2178 """HDF5 (.h5) version of :func:`read_hdf_meta`.
2179
2180 Examples
2181 --------
2182 >>> from psi_io.psi_io import _read_h5_meta
2183 >>> from psi_data import fetch_mas_data
2184 >>> meta = _read_h5_meta(fetch_mas_data().cor_br)
2185 >>> meta[0].name
2186 'Data'
2187 """
2188 with h5.File(ifile, 'r') as hdf:
2189 # Raises KeyError if ``dataset_id`` not found
2190 # If ``dataset_id`` is None, get all non-scale :class:`h5.Dataset`s
2191 if dataset_id:
2192 datasets = (dataset_id, hdf[dataset_id]),
2193 else:
2194 datasets = ((k, v) for k, v in hdf.items() if not v.is_scale)
2195
2196 # One should avoid multiple calls to ``dimproxy[0]`` – *e.g.* ``dimproxy[0].dtype`` and
2197 # ``dimproxy[0].shape`` – because the __getitem__ method creates and returns a new
2198 # :class:`~h5.DimensionProxy` object each time it is called. [Does this matter? Probably not.]
2199 return [HdfDataMeta(name=k,
2200 type=v.dtype,
2201 shape=v.shape,
2202 # Exclude HDF5 dimension-scale bookkeeping attributes
2203 # (``DIMENSION_LIST`` / ``DIMENSION_LABELS``). These are
2204 # managed automatically by ``attach_scale`` and hold object
2205 # references that are not real user attributes; surfacing
2206 # them breaks round-trips such as :func:`convert` to HDF4.
2207 attr={ak: av for ak, av in v.attrs.items()
2208 if not ak.startswith("DIMENSION")},
2209 scales=[HdfScaleMeta(name=dimproxy.label,
2210 type=dim.dtype,
2211 shape=dim.shape,
2212 attr=dict(dim.attrs),
2213 imin=dim[0],
2214 imax=dim[-1])
2215 for dimproxy in v.dims if dimproxy and (dim := dimproxy[0])])
2216 for k, v in datasets]
2217
2218
2219def _read_h4_meta(ifile: PathLike, /,
2220 dataset_id: Optional[str] = None
2221 ):
2222 """HDF4 (.hdf) version of :func:`read_hdf_meta`.
2223
2224 Examples
2225 --------
2226 >>> from psi_io.psi_io import _read_h4_meta
2227 >>> from psi_data import fetch_mas_data
2228 >>> meta = _read_h4_meta(fetch_mas_data(hdf=4).cor_br) # doctest: +SKIP
2229 >>> meta[0].name # doctest: +SKIP
2230 'Data-Set-2'
2231 """
2232 hdf = h4.SD(str(ifile))
2233 # Raises HDF4Error if ``dataset_id`` not found
2234 # If ``dataset_id`` is None, get all non-scale :class:`pyhdf.SD.SDS`s
2235 if dataset_id:
2236 datasets = (dataset_id, hdf.select(dataset_id)),
2237 else:
2238 datasets = ((k, hdf.select(k)) for k in hdf.datasets().keys() if not hdf.select(k).iscoordvar())
2239
2240 # The inner list comprehension differs in approach from the HDF5 version because calling
2241 # ``dimensions(full=1)`` on an :class:`~pyhdf.SD.SDS` returns a dictionary of dimension
2242 # dataset identifiers (keys) and tuples containing dimension metadata (values). Even if no
2243 # coordinate-variable datasets are defined, this dictionary is still returned; the only
2244 # indication that the datasets returned do not exist is that the "type" field (within the
2245 # tuple of dimension metadata) is set to 0.
2246
2247 # Also, one cannot avoid multiple calls to ``hdf.select(k_)`` within the inner list comprehension
2248 # because :class:`~pyhdf.SD.SDS` objects do not define a ``__bool__`` method, and the fallback
2249 # behavior of Python is to assess if the __len__ method returns a non-zero value (which, in
2250 # this case, always returns 0).
2251 return [HdfDataMeta(name=k,
2252 type=SDC_TYPE_CONVERSIONS[v.info()[3]],
2253 shape=_cast_shape_tuple(v.info()[2]),
2254 attr=v.attributes(),
2255 scales=[HdfScaleMeta(name=k_,
2256 type=SDC_TYPE_CONVERSIONS[v_[3]],
2257 shape=_cast_shape_tuple(v_[0]),
2258 attr=hdf.select(k_).attributes(),
2259 imin=hdf.select(k_)[0],
2260 imax=hdf.select(k_)[-1])
2261 for k_, v_ in v.dimensions(full=1).items() if v_[3]])
2262 for k, v in datasets]
2263
2264
2265def _read_h5_rtp(ifile: Union[ Path, str], /):
2266 """HDF5 (.h5) version of :func:`read_rtp_meta`.
2267
2268 Examples
2269 --------
2270 >>> from psi_io.psi_io import _read_h5_rtp
2271 >>> from psi_data import fetch_mas_data
2272 >>> meta = _read_h5_rtp(fetch_mas_data().cor_br)
2273 >>> sorted(meta.keys())
2274 ['p', 'r', 't']
2275 """
2276 with h5.File(ifile, 'r') as hdf:
2277 return {k: (hdf[v].size, hdf[v][0], hdf[v][-1])
2278 for k, v in zip('rtp', PSI_SCALE_ID['h5'])}
2279
2280
2281def _read_h4_rtp(ifile: Union[ Path, str], /):
2282 """HDF4 (.hdf) version of :func:`read_rtp_meta`.
2283
2284 Examples
2285 --------
2286 >>> from psi_io.psi_io import _read_h4_rtp
2287 >>> from psi_data import fetch_mas_data
2288 >>> meta = _read_h4_rtp(fetch_mas_data(hdf=4).cor_br) # doctest: +SKIP
2289 >>> sorted(meta.keys()) # doctest: +SKIP
2290 ['p', 'r', 't']
2291 """
2292 hdf = h4.SD(str(ifile))
2293 return {k: (hdf.select(v).info()[2], hdf.select(v)[0], hdf.select(v)[-1])
2294 for k, v in zip('ptr', PSI_SCALE_ID['h4'])}
2295
2296
2297def _read_h5_data(ifile: PathLike, /,
2298 dataset_id: Optional[str] = None,
2299 return_scales: bool = True,
2300 ) -> Union[np.ndarray, Tuple[np.ndarray]]:
2301 """HDF5 (.h5) version of :func:`read_hdf_data`.
2302
2303 Examples
2304 --------
2305 >>> from psi_io.psi_io import _read_h5_data
2306 >>> from psi_data import fetch_mas_data
2307 >>> data, r, t, p = _read_h5_data(fetch_mas_data().cor_br)
2308 >>> data.shape
2309 (299, 142, 255)
2310 """
2311 with h5.File(ifile, 'r') as hdf:
2312 data = hdf[dataset_id or PSI_DATA_ID['h5']]
2313 dataset = data[:]
2314 if return_scales:
2315 scales = [dim[0][:] for dim in data.dims if dim]
2316 return dataset, *scales
2317 return dataset
2318
2319
2320def _read_h4_data(ifile: PathLike, /,
2321 dataset_id: Optional[str] = None,
2322 return_scales: bool = True,
2323 ) -> Union[np.ndarray, Tuple[np.ndarray]]:
2324 """HDF4 (.hdf) version of :func:`read_hdf_data`.
2325
2326 Examples
2327 --------
2328 >>> from psi_io.psi_io import _read_h4_data
2329 >>> from psi_data import fetch_mas_data
2330 >>> data, *_ = _read_h4_data(fetch_mas_data(hdf=4).cor_br) # doctest: +SKIP
2331 >>> data.shape # doctest: +SKIP
2332 (299, 142, 255)
2333 """
2334 hdf = h4.SD(str(ifile))
2335 data = hdf.select(dataset_id or PSI_DATA_ID['h4'])
2336 if return_scales:
2337 out = (data[:],
2338 *[hdf.select(k_)[:] for k_, v_ in reversed(data.dimensions(full=1).items()) if v_[3]])
2339 else:
2340 out = data[:]
2341 return out
2342
2343
2344def _read_h5_by_index(ifile: PathLike, /,
2345 *xi: Union[int, Tuple[Union[int, None], Union[int, None]], None],
2346 dataset_id: Optional[str] = None,
2347 return_scales: bool = True,
2348 ) -> Union[np.ndarray, Tuple[np.ndarray]]:
2349 """HDF5 (.h5) version of :func:`read_hdf_by_index`.
2350
2351 Examples
2352 --------
2353 >>> from psi_io.psi_io import _read_h5_by_index
2354 >>> from psi_data import fetch_mas_data
2355 >>> f, r, t, p = _read_h5_by_index(fetch_mas_data().cor_br, 0, None, None)
2356 >>> f.shape
2357 (299, 142, 1)
2358 """
2359 with h5.File(ifile, 'r') as hdf:
2360 data = hdf[dataset_id or PSI_DATA_ID['h5']]
2361 if len(xi) != data.ndim:
2362 raise ValueError(f"len(xi) must equal the number of scales for {dataset_id}")
2363 slices = [_parse_index_inputs(slice_input) for slice_input in xi]
2364 dataset = data[tuple(reversed(slices))]
2365 if return_scales:
2366 scales = [dim[0][si] for si, dim in zip(slices, data.dims) if dim]
2367 return dataset, *scales
2368 return dataset
2369
2370def _read_h4_by_index(ifile: PathLike, /,
2371 *xi: Union[int, Tuple[Union[int, None], Union[int, None]], None],
2372 dataset_id: Optional[str] = None,
2373 return_scales: bool = True,
2374 ) -> Union[np.ndarray, Tuple[np.ndarray]]:
2375 """HDF4 (.hdf) version of :func:`read_hdf_by_index`.
2376
2377 Examples
2378 --------
2379 >>> from psi_io.psi_io import _read_h4_by_index
2380 >>> from psi_data import fetch_mas_data
2381 >>> path = fetch_mas_data(hdf=4).cor_br # doctest: +SKIP
2382 >>> f, r, t, p = _read_h4_by_index(path, 0, None, None) # doctest: +SKIP
2383 >>> f.shape # doctest: +SKIP
2384 (299, 142, 1)
2385 """
2386 hdf = h4.SD(str(ifile))
2387 data = hdf.select(dataset_id or PSI_DATA_ID['h4'])
2388 ndim = data.info()[1]
2389 if len(xi) != ndim:
2390 raise ValueError(f"len(xi) must equal the number of scales for {dataset_id}")
2391 slices = [_parse_index_inputs(slice_input) for slice_input in xi]
2392 dataset = data[tuple(reversed(slices))]
2393 if return_scales:
2394 scales = [hdf.select(k_)[si] for si, (k_, v_) in zip(slices, reversed(data.dimensions(full=1).items())) if v_[3]]
2395 return dataset, *scales
2396 return dataset
2397
2398
2399def _read_h5_by_value(ifile: PathLike, /,
2400 *xi: Union[float, Tuple[float, float], None],
2401 dataset_id: Optional[str] = None,
2402 return_scales: bool = True,
2403 ) -> Union[np.ndarray, Tuple[np.ndarray]]:
2404 """HDF5 (.h5) version of :func:`read_hdf_by_value`.
2405
2406 Examples
2407 --------
2408 >>> from psi_io.psi_io import _read_h5_by_value
2409 >>> from psi_data import fetch_mas_data
2410 >>> f, r, t, p = _read_h5_by_value(fetch_mas_data().cor_br, 15, None, None)
2411 >>> f.shape
2412 (299, 142, 2)
2413 """
2414 with h5.File(ifile, 'r') as hdf:
2415 data = hdf[dataset_id or PSI_DATA_ID['h5']]
2416 if len(xi) != data.ndim:
2417 raise ValueError(f"len(xi) must equal the number of scales for {dataset_id}")
2418 slices = []
2419 for dimproxy, value in zip(data.dims, xi):
2420 if dimproxy:
2421 slices.append(_parse_value_inputs(dimproxy[0], value))
2422 elif value is None:
2423 slices.append(slice(None))
2424 else:
2425 raise ValueError("Cannot slice by value on dimension without scales")
2426 dataset = data[tuple(reversed(slices))]
2427 if return_scales:
2428 scales = [dim[0][si] for si, dim in zip(slices, data.dims) if dim]
2429 return dataset, *scales
2430 return dataset
2431
2432
2433def _read_h4_by_value(ifile: PathLike, /,
2434 *xi: Union[float, Tuple[float, float], None],
2435 dataset_id: Optional[str] = None,
2436 return_scales: bool = True,
2437 ) -> Union[np.ndarray, Tuple[np.ndarray]]:
2438 """HDF4 (.hdf) version of :func:`read_hdf_by_value`.
2439
2440 Examples
2441 --------
2442 >>> from psi_io.psi_io import _read_h4_by_value
2443 >>> from psi_data import fetch_mas_data
2444 >>> path = fetch_mas_data(hdf=4).cor_br # doctest: +SKIP
2445 >>> f, r, t, p = _read_h4_by_value(path, 15, None, None) # doctest: +SKIP
2446 >>> f.shape # doctest: +SKIP
2447 (299, 142, 2)
2448 """
2449 hdf = h4.SD(str(ifile))
2450 data = hdf.select(dataset_id or PSI_DATA_ID['h4'])
2451 ndim = data.info()[1]
2452 if len(xi) != ndim:
2453 raise ValueError(f"len(xi) must equal the number of scales for {dataset_id}")
2454 slices = []
2455 for (k_, v_), value in zip(reversed(data.dimensions(full=1).items()), xi):
2456 if v_[3] != 0:
2457 slices.append(_parse_value_inputs(hdf.select(k_), value))
2458 elif value is None:
2459 slices.append(slice(None))
2460 else:
2461 raise ValueError("Cannot slice by value on dimension without scales")
2462 dataset = data[tuple(reversed(slices))]
2463 if return_scales:
2464 scales = [hdf.select(k_)[si] for si, (k_, v_) in zip(slices, reversed(data.dimensions(full=1).items())) if v_[3]]
2465 return dataset, *scales
2466 return dataset
2467
2468
2469def _read_h5_by_ivalue(ifile: PathLike, /,
2470 *xi: Union[float, Tuple[float, float], None],
2471 dataset_id: Optional[str] = None,
2472 return_scales: bool = True,
2473 ) -> Union[np.ndarray, Tuple[np.ndarray]]:
2474 """HDF5 (.h5) version of :func:`read_hdf_by_ivalue`.
2475
2476 Examples
2477 --------
2478 >>> from psi_io.psi_io import _read_h5_by_ivalue
2479 >>> from psi_data import fetch_mas_data
2480 >>> f, r_idx, t_idx, p_idx = _read_h5_by_ivalue(fetch_mas_data().cor_br, 2.7, None, None)
2481 >>> r_idx.shape
2482 (2,)
2483 """
2484 with h5.File(ifile, 'r') as hdf:
2485 data = hdf[dataset_id or PSI_DATA_ID['h5']]
2486 if len(xi) != data.ndim:
2487 raise ValueError(f"len(xi) must equal the number of scales for {dataset_id}")
2488 slices = [_parse_ivalue_inputs(*args) for args in zip(reversed(data.shape), xi)]
2489 dataset = data[tuple(reversed(slices))]
2490 if return_scales:
2491 scales = [np.arange(si.start or 0, si.stop or size) for si, size in zip(slices, reversed(data.shape))]
2492 return dataset, *scales
2493 return dataset
2494
2495
2496def _read_h4_by_ivalue(ifile: PathLike, /,
2497 *xi: Union[float, Tuple[float, float], None],
2498 dataset_id: Optional[str] = None,
2499 return_scales: bool = True,
2500 ) -> Union[np.ndarray, Tuple[np.ndarray]]:
2501 """HDF4 (.hdf) version of :func:`read_hdf_by_ivalue`.
2502
2503 Examples
2504 --------
2505 >>> from psi_io.psi_io import _read_h4_by_ivalue
2506 >>> from psi_data import fetch_mas_data
2507 >>> path = fetch_mas_data(hdf=4).cor_br # doctest: +SKIP
2508 >>> f, r_idx, t_idx, p_idx = _read_h4_by_ivalue(path, 2.7, None, None) # doctest: +SKIP
2509 >>> r_idx.shape # doctest: +SKIP
2510 (2,)
2511 """
2512 hdf = h4.SD(str(ifile))
2513 data = hdf.select(dataset_id or PSI_DATA_ID['h4'])
2514 ndim, shape = data.info()[1], _cast_shape_tuple(data.info()[2])
2515 if len(xi) != ndim:
2516 raise ValueError(f"len(xi) must equal the number of scales for {dataset_id}")
2517 slices = [_parse_ivalue_inputs(*args) for args in zip(reversed(shape), xi)]
2518 dataset = data[tuple(reversed(slices))]
2519 if return_scales:
2520 scales = [np.arange(si.start or 0, si.stop or size) for si, size in zip(slices, reversed(shape))]
2521 return dataset, *scales
2522 return dataset
2523
2524
2525def _write_h4_data(ifile: PathLike, /,
2526 data: np.ndarray,
2527 *scales: Sequence[np.ndarray],
2528 dataset_id: Optional[str] = None,
2529 sync_dtype: bool = False,
2530 strict: bool = True,
2531 **kwargs) -> Path:
2532 """HDF4 (.hdf) version of :func:`write_hdf_data`.
2533
2534 Examples
2535 --------
2536 >>> import tempfile, numpy as np
2537 >>> from pathlib import Path
2538 >>> from psi_io.psi_io import _write_h4_data, _read_h4_data
2539 >>> f = np.ones((10,), dtype=np.float32)
2540 >>> x = np.linspace(0.0, 1.0, 10, dtype=np.float32)
2541 >>> with tempfile.TemporaryDirectory() as d: # doctest: +SKIP
2542 ... _write_h4_data(Path(d) / "out.hdf", f, x) # doctest: +SKIP
2543 ... data, *_ = _read_h4_data(Path(d) / "out.hdf") # doctest: +SKIP
2544 ... data.shape # doctest: +SKIP
2545 (10,)
2546 """
2547 dataid = dataset_id or PSI_DATA_ID['h4']
2548 h4file = h4.SD(str(ifile), h4.SDC.WRITE | h4.SDC.CREATE | h4.SDC.TRUNC)
2549 sds_id = h4file.create(dataid, _dtype_to_sdc(data.dtype), data.shape)
2550
2551 if scales:
2552 for i, scale in enumerate(reversed(scales)):
2553 if scale is not None:
2554 if sync_dtype:
2555 scale = scale.astype(data.dtype)
2556 sds_id.dim(i).setscale(_dtype_to_sdc(scale.dtype), scale.tolist())
2557
2558 if kwargs:
2559 for k, v in kwargs.items():
2560 npv = np.asarray(v)
2561 attr_ = sds_id.attr(k)
2562 try:
2563 val = npv.tolist()
2564 if isinstance(val, bytes):
2565 val = val.decode('latin-1')
2566 attr_.set(_dtype_to_sdc(npv.dtype), val)
2567 except KeyError as e:
2568 if strict:
2569 raise KeyError(f"Failed to set attribute '{k}' on dataset '{dataid}'") from e
2570 else:
2571 print(f"Warning: Failed to set attribute '{k}' on dataset '{dataid}'; skipping.")
2572
2573 sds_id.set(data)
2574 sds_id.endaccess()
2575 h4file.end()
2576
2577 return ifile
2578
2579
2580def _write_h5_data(ifile: PathLike, /,
2581 data: np.ndarray,
2582 *scales: Sequence[np.ndarray],
2583 dataset_id: Optional[str] = None,
2584 sync_dtype: bool = False,
2585 strict: bool = True,
2586 **kwargs) -> Path:
2587 """HDF5 (.h5) version of :func:`write_hdf_data`.
2588
2589 Examples
2590 --------
2591 >>> import tempfile, numpy as np
2592 >>> from pathlib import Path
2593 >>> from psi_io.psi_io import _write_h5_data, _read_h5_data
2594 >>> f = np.ones((10,), dtype=np.float32)
2595 >>> x = np.linspace(0.0, 1.0, 10, dtype=np.float32)
2596 >>> with tempfile.TemporaryDirectory() as d:
2597 ... _ = _write_h5_data(Path(d) / "out.h5", f, x)
2598 ... data, *_ = _read_h5_data(Path(d) / "out.h5")
2599 ... data.shape
2600 (10,)
2601 """
2602 dataid = dataset_id or PSI_DATA_ID['h5']
2603 with h5.File(ifile, "w") as h5file:
2604 dataset = h5file.create_dataset(dataid, data=data, dtype=data.dtype, shape=data.shape)
2605
2606 if scales:
2607 for i, scale in enumerate(scales):
2608 if scale is not None:
2609 if sync_dtype:
2610 scale = scale.astype(data.dtype)
2611 h5file.create_dataset(f"dim{i+1}", data=scale, dtype=scale.dtype, shape=scale.shape)
2612 h5file[dataid].dims[i].attach_scale(h5file[f"dim{i+1}"])
2613 h5file[dataid].dims[i].label = f"dim{i+1}"
2614
2615 if kwargs:
2616 for key, value in kwargs.items():
2617 if key.startswith('DIMENSION'):
2618 # Skip HDF5 dimension-scale bookkeeping attributes —
2619 # these are managed by attach_scale above and must not
2620 # be overwritten with stale object references.
2621 continue
2622 try:
2623 dataset.attrs[key] = value
2624 except TypeError as e:
2625 if strict:
2626 raise TypeError(f"Failed to set attribute '{key}' on dataset '{dataid}'") from e
2627 else:
2628 print(f"Warning: Failed to set attribute '{key}' on dataset '{dataid}'; skipping.")
2629
2630 return ifile
2631
2632
2633def _write_h4_meta(ifile: PathLike, /,
2634 meta: Optional[Mapping[str, Mapping[str, Any]]] = None,
2635 **kwargs) -> Path:
2636 """HDF4 (.hdf) version of :func:`write_hdf_meta`."""
2637 metadata = dict(meta or {})
2638 h4file = h4.SD(str(ifile), h4.SDC.READ | h4.SDC.WRITE)
2639
2640 for k, v in kwargs.items():
2641 npv = np.asarray(v)
2642 attr_ = h4file.attr(k)
2643 val = npv.tolist()
2644 if isinstance(val, bytes):
2645 val = val.decode('latin-1')
2646 attr_.set(_dtype_to_sdc(npv.dtype), val)
2647 for key, value in metadata.items():
2648 sds_id = h4file.select(key)
2649 for k, v in dict(value).items():
2650 npv = np.asarray(v)
2651 attr_ = sds_id.attr(k)
2652 val = npv.tolist()
2653 if isinstance(val, bytes):
2654 val = val.decode('latin-1')
2655 attr_.set(_dtype_to_sdc(npv.dtype), val)
2656 h4file.end()
2657
2658 return ifile
2659
2660
2661def _write_h5_meta(ifile: PathLike, /,
2662 meta: Optional[Mapping[str, Mapping[str, Any]]] = None,
2663 **kwargs) -> Path:
2664 """HDF5 (.h5) version of :func:`write_hdf_meta`."""
2665 metadata = dict(meta or {})
2666 with h5.File(ifile, "r+") as h5file:
2667 if kwargs:
2668 h5file.attrs.update(**kwargs)
2669 for key, value in metadata.items():
2670 h5file[key].attrs.update(**dict(value))
2671 return ifile
2672
2673
2674def _np_linear_interpolation(xi: Sequence, scales: Sequence, values: np.ndarray):
2675 """
2676 Perform linear interpolation over one dimension.
2677
2678 Parameters
2679 ----------
2680 xi : list[float | None]
2681 Target coordinate values for each dimension; ``None`` marks a
2682 "free" (not interpolated) dimension.
2683 scales : list[np.ndarray]
2684 Coordinate scale arrays, one per dimension.
2685 values : np.ndarray
2686 The data array to interpolate.
2687
2688 Returns
2689 -------
2690 out : np.ndarray
2691 The interpolated data array.
2692
2693 Examples
2694 --------
2695 >>> import numpy as np
2696 >>> from psi_io.psi_io import _np_linear_interpolation
2697 >>> scales = [np.array([0.0, 1.0])]
2698 >>> values = np.array([0.0, 2.0])
2699 >>> float(_np_linear_interpolation([0.5], scales, values))
2700 1.0
2701 """
2702 index0 = next((i for i, v in enumerate(xi) if v is not None), None)
2703 t = (xi[index0] - scales[index0][0])/(scales[index0][1] - scales[index0][0])
2704 f0 = [slice(None, None)]*values.ndim
2705 f1 = [slice(None, None)]*values.ndim
2706 f0[index0] = 0
2707 f1[index0] = 1
2708
2709 return (1 - t)*values[tuple(f0)] + t*values[tuple(f1)]
2710
2711
2712def _np_bilinear_interpolation(xi, scales, values):
2713 """
2714 Perform bilinear interpolation over two dimensions.
2715
2716 Parameters
2717 ----------
2718 xi : Sequence[float | None]
2719 Target coordinate values for each dimension; ``None`` marks a
2720 "free" (not interpolated) dimension.
2721 scales : Sequence[np.ndarray]
2722 Coordinate scale arrays, one per dimension.
2723 values : np.ndarray
2724 The data array to interpolate.
2725
2726 Returns
2727 -------
2728 out : np.ndarray
2729 The interpolated data array.
2730
2731 Examples
2732 --------
2733 >>> import numpy as np
2734 >>> from psi_io.psi_io import _np_bilinear_interpolation
2735 >>> scales = [np.array([0.0, 1.0]), np.array([0.0, 1.0])]
2736 >>> values = np.array([[0.0, 0.0], [1.0, 2.0]])
2737 >>> float(_np_bilinear_interpolation([0.5, 0.5], scales, values))
2738 0.75
2739 """
2740 index0, index1 = [i for i, v in enumerate(xi) if v is not None]
2741 t, u = [(xi[i] - scales[i][0])/(scales[i][1] - scales[i][0]) for i in (index0, index1)]
2742
2743 f00 = [slice(None, None)]*values.ndim
2744 f10 = [slice(None, None)]*values.ndim
2745 f01 = [slice(None, None)]*values.ndim
2746 f11 = [slice(None, None)]*values.ndim
2747 f00[index0], f00[index1] = 0, 0
2748 f10[index0], f10[index1] = 1, 0
2749 f01[index0], f01[index1] = 0, 1
2750 f11[index0], f11[index1] = 1, 1
2751
2752 return (
2753 (1 - t)*(1 - u)*values[tuple(f00)] +
2754 t*(1 - u)*values[tuple(f10)] +
2755 (1 - t)*u*values[tuple(f01)] +
2756 t*u*values[tuple(f11)]
2757 )
2758
2759
2760def _np_trilinear_interpolation(xi, scales, values):
2761 """
2762 Perform trilinear interpolation over three dimensions.
2763
2764 Parameters
2765 ----------
2766 xi : list[float | None]
2767 Target coordinate values for each dimension; ``None`` marks a
2768 "free" (not interpolated) dimension.
2769 scales : list[np.ndarray]
2770 Coordinate scale arrays, one per dimension.
2771 values : np.ndarray
2772 The data array to interpolate.
2773
2774 Returns
2775 -------
2776 out : np.ndarray
2777 The interpolated data array.
2778
2779 Examples
2780 --------
2781 >>> import numpy as np
2782 >>> from psi_io.psi_io import _np_trilinear_interpolation
2783 >>> scales = [np.array([0.0, 1.0]), np.array([0.0, 1.0]), np.array([0.0, 1.0])]
2784 >>> values = np.zeros((2, 2, 2))
2785 >>> values[1, 1, 1] = 8.0
2786 >>> float(_np_trilinear_interpolation([0.5, 0.5, 0.5], scales, values))
2787 1.0
2788 """
2789 index0, index1, index2 = [i for i, v in enumerate(xi) if v is not None]
2790 t, u, v = [(xi[i] - scales[i][0])/(scales[i][1] - scales[i][0]) for i in (index0, index1, index2)]
2791
2792 f000 = [slice(None, None)]*values.ndim
2793 f100 = [slice(None, None)]*values.ndim
2794 f010 = [slice(None, None)]*values.ndim
2795 f110 = [slice(None, None)]*values.ndim
2796 f001 = [slice(None, None)]*values.ndim
2797 f101 = [slice(None, None)]*values.ndim
2798 f011 = [slice(None, None)]*values.ndim
2799 f111 = [slice(None, None)]*values.ndim
2800
2801 f000[index0], f000[index1], f000[index2] = 0, 0, 0
2802 f100[index0], f100[index1], f100[index2] = 1, 0, 0
2803 f010[index0], f010[index1], f010[index2] = 0, 1, 0
2804 f110[index0], f110[index1], f110[index2] = 1, 1, 0
2805 f001[index0], f001[index1], f001[index2] = 0, 0, 1
2806 f101[index0], f101[index1], f101[index2] = 1, 0, 1
2807 f011[index0], f011[index1], f011[index2] = 0, 1, 1
2808 f111[index0], f111[index1], f111[index2] = 1, 1, 1
2809
2810 c00 = values[tuple(f000)]*(1 - t) + values[tuple(f100)]*t
2811 c10 = values[tuple(f010)]*(1 - t) + values[tuple(f110)]*t
2812 c01 = values[tuple(f001)]*(1 - t) + values[tuple(f101)]*t
2813 c11 = values[tuple(f011)]*(1 - t) + values[tuple(f111)]*t
2814
2815 c0 = c00*(1 - u) + c10*u
2816 c1 = c01*(1 - u) + c11*u
2817
2818 return c0*(1 - v) + c1*v
2819
2820
2821def _check_index_ranges(arr_size: int,
2822 i0: Union[int, np.integer],
2823 i1: Union[int, np.integer]
2824 ) -> Tuple[int, int]:
2825 """
2826 Adjust index ranges to ensure they cover at least two indices.
2827
2828 Parameters
2829 ----------
2830 arr_size : int
2831 The size of the array along the dimension.
2832 i0 : int
2833 The starting index.
2834 i1 : int
2835 The ending index.
2836
2837 Returns
2838 -------
2839 out : tuple[int, int]
2840 Adjusted starting and ending indices.
2841
2842 Notes
2843 -----
2844 This function ensures that the range between `i0` and `i1` includes at least
2845 two indices for interpolation purposes.
2846
2847 Examples
2848 --------
2849 >>> from psi_io.psi_io import _check_index_ranges
2850 >>> _check_index_ranges(10, 0, 0)
2851 (0, 2)
2852 >>> _check_index_ranges(10, 5, 5)
2853 (4, 6)
2854 >>> _check_index_ranges(10, 10, 10)
2855 (8, 10)
2856 """
2857 i0, i1 = int(i0), int(i1)
2858 if i0 == 0:
2859 return (i0, i1 + 2) if i1 == 0 else (i0, i1 + 1)
2860 elif i0 == arr_size:
2861 return i0 - 2, i1
2862 else:
2863 return i0 - 1, i1 + 1
2864
2865
2866def _cast_shape_tuple(input: Union[int, Sequence[int]]
2867 ) -> tuple[int, ...]:
2868 """
2869 Cast an input to a tuple of integers.
2870
2871 Parameters
2872 ----------
2873 input : int | Sequence[int]
2874 The input to cast.
2875
2876 Returns
2877 -------
2878 out : tuple[int, ...]
2879 The input cast as a tuple of integers.
2880
2881 Raises
2882 ------
2883 TypeError
2884 If the input is neither an integer nor an iterable of integers.
2885
2886 Examples
2887 --------
2888 >>> from psi_io.psi_io import _cast_shape_tuple
2889 >>> _cast_shape_tuple(5)
2890 (5,)
2891 >>> _cast_shape_tuple([3, 4, 5])
2892 (3, 4, 5)
2893 """
2894 if isinstance(input, int):
2895 return (input,)
2896 elif isinstance(input, Sequence):
2897 return tuple(int(i) for i in input)
2898 else:
2899 raise TypeError("Input must be an integer or an iterable of integers.")
2900
2901
2902def _parse_index_inputs(input: Union[int, slice, Sequence[Union[int, None]], None]
2903 ) -> slice:
2904 """
2905 Parse various slice input formats into a standard slice object.
2906
2907 Parameters
2908 ----------
2909 input : int | slice | Sequence[int | None] | None
2910 The index specification to parse:
2911
2912 - *int* — selects a single-element slice ``[n, n+1)``.
2913 - *Sequence[int | None]* — unpacked directly into :class:`slice`.
2914 - ``None`` — selects the entire dimension.
2915
2916 Returns
2917 -------
2918 slice
2919 The parsed slice object.
2920
2921 Raises
2922 ------
2923 TypeError
2924 If the input type is unsupported.
2925
2926 Examples
2927 --------
2928 >>> from psi_io.psi_io import _parse_index_inputs
2929 >>> _parse_index_inputs(3)
2930 slice(3, 4, None)
2931 >>> _parse_index_inputs((2, 7))
2932 slice(2, 7, None)
2933 >>> _parse_index_inputs(None)
2934 slice(None, None, None)
2935 """
2936 if isinstance(input, int):
2937 return slice(input, input + 1)
2938 elif isinstance(input, Sequence):
2939 return slice(*input)
2940 elif input is None:
2941 return slice(None)
2942 else:
2943 raise TypeError("Unsupported input type for slicing.")
2944
2945
2946def _parse_value_inputs(dimproxy,
2947 value,
2948 scale_exists: bool = True
2949 ) -> slice:
2950 """Parse a scale value or value range into a slice over a dimension's coordinate array.
2951
2952 Parameters
2953 ----------
2954 dimproxy : array-like
2955 The coordinate array (scale) for the dimension, supporting ``[:]`` indexing.
2956 value : float | tuple[float | None, float | None] | None
2957 The target value or range:
2958
2959 - ``None`` — select the entire dimension.
2960 - *float* — find the 2-element bracket ``[a, b]`` such that ``a <= value < b``.
2961 - *(float, float)* — find the bracket spanning the given range.
2962 scale_exists : bool
2963 Guard flag; raises :exc:`ValueError` if ``False`` and ``value`` is not ``None``.
2964
2965 Returns
2966 -------
2967 slice
2968 A slice object that selects the appropriate indices from the coordinate array.
2969
2970 Raises
2971 ------
2972 ValueError
2973 If ``value`` is not ``None`` and ``scale_exists`` is ``False``.
2974
2975 Examples
2976 --------
2977 >>> import numpy as np
2978 >>> from psi_io.psi_io import _parse_value_inputs
2979 >>> scale = np.linspace(0.0, 10.0, 11)
2980 >>> _parse_value_inputs(scale, None)
2981 slice(None, None, None)
2982 >>> _parse_value_inputs(scale, 4.5)
2983 slice(4, 6, None)
2984 >>> _parse_value_inputs(scale, (2.0, 7.0))
2985 slice(1, 8, None)
2986 """
2987 if value is None:
2988 return slice(None)
2989 if not scale_exists:
2990 raise ValueError("Cannot parse value inputs when scale does not exist.")
2991 dim = dimproxy[:]
2992 if not isinstance(value, Sequence):
2993 insert_index = np.searchsorted(dim, value)
2994 return slice(*_check_index_ranges(dim.size, insert_index, insert_index))
2995 else:
2996 temp_range = list(value)
2997 if temp_range[0] is None:
2998 temp_range[0] = -np.inf
2999 if temp_range[-1] is None:
3000 temp_range[-1] = np.inf
3001 insert_indices = np.searchsorted(dim, temp_range)
3002 return slice(*_check_index_ranges(dim.size, *insert_indices))
3003
3004
3005def _parse_ivalue_inputs(dimsize,
3006 input: Union[Union[int, float], slice, Sequence[Union[Union[int, float], None]], None]
3007 ) -> slice:
3008 """
3009 Parse a sub-index value or range into a slice over a dimension of a given size.
3010
3011 Parameters
3012 ----------
3013 dimsize : int
3014 The size of the array along the dimension; used to clamp the resulting slice.
3015 input : int | float | Sequence[int | float | None] | None
3016 The sub-index specification to parse:
3017
3018 - ``None`` — select the entire dimension.
3019 - *int* or *float* (:math:`a`) — returns ``slice(floor(a), ceil(a))``
3020 (clamped to ``[0, dimsize - 1]``).
3021 - *Sequence* ``(a, b)`` — returns ``slice(floor(a), ceil(b))``
3022 (clamped to ``[0, dimsize - 1]``).
3023
3024 Returns
3025 -------
3026 slice
3027 The parsed slice object, guaranteed to span at least 2 elements.
3028
3029 Raises
3030 ------
3031 TypeError
3032 If the input type is unsupported.
3033
3034 Examples
3035 --------
3036 >>> from psi_io.psi_io import _parse_ivalue_inputs
3037 >>> _parse_ivalue_inputs(10, None)
3038 slice(None, None, None)
3039 >>> _parse_ivalue_inputs(10, 2.7)
3040 slice(2, 4, None)
3041 >>> _parse_ivalue_inputs(10, (1.3, 4.8))
3042 slice(1, 5, None)
3043 """
3044 if input is None:
3045 return slice(None)
3046 elif isinstance(input, (int, float)):
3047 i0, i1 = math.floor(input), math.ceil(input)
3048 elif isinstance(input, Sequence):
3049 i0, i1 = math.floor(input[0]), math.ceil(input[1])
3050 else:
3051 raise TypeError("Unsupported input type for slicing.")
3052
3053 if i0 > i1:
3054 i0, i1 = i1, i0
3055 i0, i1 = max(0, i0), min(dimsize - 1, i1)
3056 if i0 > i1:
3057 i0, i1 = i1, i0
3058 i0, i1 = max(0, i0), min(dimsize - 1, i1)
3059 if (i1 - i0) < 2:
3060 return slice(i0, i1 + 2 - (i1-i0))
3061 else:
3062 return slice(i0, i1)