1"""
2Routines for reading and writing PSI-style HDF5 and HDF4 data files.
3
4This module provides a unified interface for interacting with PSI's HDF data
5ecosystem. It handles both HDF4 (``.hdf``) and HDF5 (``.h5``) file formats,
6automatically dispatching to the appropriate backend based on the file
7extension.
8
9Key interfaces
10--------------
11Reading full datasets:
12 :func:`read_hdf_data`, :func:`rdhdf_1d`, :func:`rdhdf_2d`, :func:`rdhdf_3d`
13
14Writing full datasets:
15 :func:`write_hdf_data`, :func:`wrhdf_1d`, :func:`wrhdf_2d`, :func:`wrhdf_3d`
16
17Reading file metadata:
18 :func:`read_hdf_meta`, :func:`read_rtp_meta`
19
20Reading dataset subsets:
21 :func:`get_scales_1d`, :func:`get_scales_2d`, :func:`get_scales_3d`,
22 :func:`read_hdf_by_index`, :func:`read_hdf_by_value`, :func:`read_hdf_by_ivalue`
23
24Interpolating data:
25 :func:`np_interpolate_slice_from_hdf`, :func:`sp_interpolate_slice_from_hdf`,
26 :func:`interpolate_positions_from_hdf`
27
28Converting between formats:
29 :func:`convert`, :func:`convert_psih4_to_psih5`
30
31See Also
32--------
33:mod:`psi_io.data` :
34 Helpers for fetching example HDF data files.
35
36Examples
37--------
38Read a 3D PSI-style HDF5 file:
39
40>>> from psi_io import read_hdf_data
41>>> from psi_io.data import get_3d_data
42>>> filepath = get_3d_data()
43>>> data, r, t, p = read_hdf_data(filepath)
44>>> data.shape
45(181, 100, 151)
46"""
47
48from __future__ import annotations
49
50__all__ = [
51 "read_hdf_meta",
52 "read_rtp_meta",
53
54 "get_scales_1d",
55 "get_scales_2d",
56 "get_scales_3d",
57
58 "read_hdf_by_index",
59 "read_hdf_by_value",
60 "read_hdf_by_ivalue",
61
62 "np_interpolate_slice_from_hdf",
63 "sp_interpolate_slice_from_hdf",
64 "interpolate_positions_from_hdf",
65
66 "instantiate_linear_interpolator",
67 "interpolate_point_from_1d_slice",
68 "interpolate_point_from_2d_slice",
69
70 "read_hdf_data",
71 "rdhdf_1d",
72 "rdhdf_2d",
73 "rdhdf_3d",
74
75 "write_hdf_data",
76 "write_hdf_meta",
77 "wrhdf_1d",
78 "wrhdf_2d",
79 "wrhdf_3d",
80
81 "convert",
82 "convert_psih4_to_psih5"
83]
84
85import math
86from collections import namedtuple
87from pathlib import Path
88from types import MappingProxyType
89from typing import Optional, Literal, Tuple, Sequence, List, Dict, Union, Callable, Any, Mapping
90
91import numpy as np
92import h5py as h5
93
94# -----------------------------------------------------------------------------
95# Optional Imports and Import Checking
96# -----------------------------------------------------------------------------
97# These packages are needed by several functions and must be imported in the
98# module namespace.
99try:
100 import pyhdf.SD as h4
101 H4_AVAILABLE = True
102 DTYPE_TO_SDC = MappingProxyType({
103 "i": {
104 1: h4.SDC.INT8,
105 2: h4.SDC.INT16,
106 4: h4.SDC.INT32,
107 },
108 "u": {
109 1: h4.SDC.UINT8,
110 2: h4.SDC.UINT16,
111 4: h4.SDC.UINT32,
112 },
113 "f": {
114 4: h4.SDC.FLOAT32,
115 8: h4.SDC.FLOAT64,
116 },
117 "b": {
118 1: h4.SDC.UINT8
119 },
120 "U": h4.SDC.CHAR,
121 "S": h4.SDC.CHAR
122 })
123 """
124 Helper dictionary mapping :class:`~numpy.dtype` kinds to HDF4 SDC types.
125
126 The keys are :attr:`~numpy.dtype.kind`, and the values are either a direct
127 mapping (for byte-string or unicode-string types) or a nested mapping of
128 :attr:`~numpy.dtype.itemsize` to SDC type (for numeric types).
129 """
130except ImportError:
131 H4_AVAILABLE = False
132 DTYPE_TO_SDC = {}
133
134try:
135 from scipy.interpolate import RegularGridInterpolator
136 SCIPY_AVAILABLE = True
137except ImportError:
138 SCIPY_AVAILABLE = False
139
140
141# Functions to stop execution if a package doesn't exist.
142def _except_no_pyhdf():
143 """Raise :exc:`ImportError` if the ``pyhdf`` package is not available.
144
145 Raises
146 ------
147 ImportError
148 If ``pyhdf`` was not importable at module load time.
149
150 Examples
151 --------
152 >>> from psi_io.psi_io import _except_no_pyhdf, H4_AVAILABLE
153 >>> H4_AVAILABLE # doctest: +SKIP
154 True
155 >>> _except_no_pyhdf() is None # no-op when pyhdf is installed
156 True
157 """
158 if not H4_AVAILABLE:
159 raise ImportError('The pyhdf package is required to read/write HDF4 .hdf files!')
160 return
161
162
163def _except_no_scipy():
164 """Raise :exc:`ImportError` if the ``scipy`` package is not available.
165
166 Raises
167 ------
168 ImportError
169 If ``scipy`` was not importable at module load time.
170
171 Examples
172 --------
173 >>> from psi_io.psi_io import _except_no_scipy, SCIPY_AVAILABLE
174 >>> SCIPY_AVAILABLE # doctest: +SKIP
175 True
176 >>> _except_no_scipy() is None # no-op when scipy is installed
177 True
178 """
179 if not SCIPY_AVAILABLE:
180 raise ImportError('The scipy package is required for the interpolation routines!')
181 return
182
183
184SDC_TYPE_CONVERSIONS = MappingProxyType({
185 3: np.dtype("ubyte"),
186 4: np.dtype("byte"),
187 5: np.dtype("float32"),
188 6: np.dtype("float64"),
189 20: np.dtype("int8"),
190 21: np.dtype("uint8"),
191 22: np.dtype("int16"),
192 23: np.dtype("uint16"),
193 24: np.dtype("int32"),
194 25: np.dtype("uint32")
195})
196"""Helper dictionary for mapping HDF4 types to numpy dtypes"""
197
198
199PSI_DATA_ID = MappingProxyType({
200 'h4': 'Data-Set-2',
201 'h5': 'Data'
202})
203"""Mapping of PSI standard dataset names for HDF4 and HDF5 files"""
204
205
206PSI_SCALE_ID = MappingProxyType({
207 'h4': ('fakeDim0', 'fakeDim1', 'fakeDim2'),
208 'h5': ('dim1', 'dim2', 'dim3')
209})
210"""Mapping of PSI standard scale names for HDF4 and HDF5 files"""
211
212
213HDFEXT = {'.hdf', '.h5'}
214"""Set of possible HDF file extensions"""
215
216
217HdfExtType = Literal['.hdf', '.h5']
218"""Type alias for possible HDF file extensions"""
219
220
221HdfScaleMeta = namedtuple('HdfScaleMeta', ['name', 'type', 'shape', 'attr', 'imin', 'imax'])
222"""
223 Named tuple storing metadata for a single HDF scale (coordinate) dimension.
224
225 Parameters
226 ----------
227 name : str
228 The name of the scale dataset.
229 type : str
230 The data type of the scale.
231 shape : tuple[int, ...]
232 The shape of the scale array.
233 attr : dict[str, Any]
234 A dictionary of attributes associated with the dataset.
235 imin : float
236 The minimum value of the scale.
237 This assumes the scale is monotonically increasing.
238 imax : float
239 The maximum value of the scale.
240 This assumes the scale is monotonically increasing.
241"""
242
243
244HdfDataMeta = namedtuple('HdfDataMeta', ['name', 'type', 'shape', 'attr', 'scales'])
245"""
246 Named tuple for HDF dataset metadata
247
248 Parameters
249 ----------
250 name : str
251 The name of the dataset.
252 type : str
253 The data type of the dataset.
254 shape : tuple[int, ...]
255 The shape of the dataset.
256 attr : dict[str, Any]
257 A dictionary of attributes associated with the dataset.
258 scales : list[HdfScaleMeta]
259 A list of scale metadata objects corresponding to each dimension of the dataset.
260 If the dataset has no scales, this list will be empty.
261"""
262
263PathLike = Union[Path, str]
264"""Type alias for file paths, accepting either :class:`pathlib.Path` or str"""
265
266
267def _dtype_to_sdc(dtype: np.dtype):
268 """Convert a numpy dtype to the corresponding HDF4 SDC type.
269
270 Parameters
271 ----------
272 dtype : np.dtype
273 The numpy dtype to convert.
274
275 Returns
276 -------
277 out : int
278 The corresponding HDF4 SDC type constant for the given numpy dtype.
279
280 Raises
281 ------
282 ImportError
283 If the ``pyhdf`` package is not available.
284 KeyError
285 If the dtype kind or itemsize is not supported by HDF4. See
286 :func:`write_hdf_data` for the full dtype support table.
287
288 Examples
289 --------
290 >>> import numpy as np
291 >>> from psi_io.psi_io import _dtype_to_sdc
292 >>> import pyhdf.SD as h4
293 >>> _dtype_to_sdc(np.dtype("float32")) == h4.SDC.FLOAT32
294 True
295 """
296 _except_no_pyhdf()
297 if dtype.kind in {"U", "S"}:
298 return DTYPE_TO_SDC[dtype.kind]
299
300 try:
301 return DTYPE_TO_SDC[dtype.kind][dtype.itemsize]
302 except KeyError as e:
303 if dtype.kind not in DTYPE_TO_SDC:
304 msg = (f"Unsupported dtype kind '{dtype.kind}' for HDF4. "
305 f"Supported kinds are: {set(DTYPE_TO_SDC.keys())}")
306 raise KeyError(msg) from e
307 elif dtype.itemsize not in DTYPE_TO_SDC[dtype.kind]:
308 msg = (f"Unsupported itemsize '{dtype.itemsize}' for dtype kind '{dtype.kind}' in HDF4. "
309 f"Supported itemsizes are: {set(DTYPE_TO_SDC[dtype.kind].keys())}")
310 raise KeyError(msg) from e
311 raise e
312
313
314def _dispatch_by_ext(ifile: PathLike,
315 hdf4_func: Callable,
316 hdf5_func: Callable,
317 *args: Any, **kwargs: Any
318 ):
319 """
320 Dispatch function to call HDF4 or HDF5 specific functions based on file extension.
321
322 Parameters
323 ----------
324 ifile : PathLike
325 The path to the HDF file.
326 hdf4_func : Callable
327 The function to call for HDF4 files.
328 hdf5_func : Callable
329 The function to call for HDF5 files.
330 *args : Any
331 Positional arguments to pass to the selected function.
332 **kwargs : Any
333 Keyword arguments to pass to the selected function.
334
335 Returns
336 -------
337 out : Any
338 The return value of the dispatched function.
339
340 Raises
341 ------
342 ValueError
343 If the file does not have a `.hdf` or `.h5` extension.
344 ImportError
345 If the file is HDF4 and the `pyhdf` package is not available.
346
347 Examples
348 --------
349 >>> from psi_io.psi_io import _dispatch_by_ext, _read_h5_data, _read_h4_data
350 >>> from psi_io.data import get_3d_data
351 >>> filepath = get_3d_data()
352 >>> data, *_ = _dispatch_by_ext(filepath, _read_h4_data, _read_h5_data)
353 >>> data.shape
354 (181, 100, 151)
355 """
356 ipath = Path(ifile)
357 if ipath.suffix == '.h5':
358 return hdf5_func(ifile, *args, **kwargs)
359 if ipath.suffix == '.hdf':
360 _except_no_pyhdf()
361 return hdf4_func(ifile, *args, **kwargs)
362 raise ValueError("File must be HDF4 (.hdf) or HDF5 (.h5)")
363
364
365# -----------------------------------------------------------------------------
366# "Classic" HDF reading and writing routines adapted from psihdf.py or psi_io.py.
367# -----------------------------------------------------------------------------
368
369
[docs]
370def rdhdf_1d(hdf_filename: PathLike
371 ) -> Tuple[np.ndarray, np.ndarray]:
372 """Read a 1D PSI-style HDF5 or HDF4 file.
373
374 Parameters
375 ----------
376 hdf_filename : PathLike
377 The path to the 1D HDF5 (.h5) or HDF4 (.hdf) file to read.
378
379 Returns
380 -------
381 x : np.ndarray
382 1D coordinate scale array.
383 f : np.ndarray
384 1D data array.
385
386 See Also
387 --------
388 read_hdf_data : Generic HDF data reading routine.
389
390 Examples
391 --------
392 >>> from psi_io import rdhdf_1d
393 >>> from psi_io.data import get_1d_data
394 >>> filepath = get_1d_data()
395 >>> x, f = rdhdf_1d(filepath)
396 >>> x.shape, f.shape
397 ((151,), (151,))
398 """
399 return _rdhdf_nd(hdf_filename, dimensionality=1)
400
401
[docs]
402def rdhdf_2d(hdf_filename: PathLike
403 ) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
404 """Read a 2D PSI-style HDF5 or HDF4 file.
405
406 The data in the HDF file is assumed to be ordered X, Y in Fortran order.
407 Each dimension is assumed to have a 1D scale associated with it that
408 describes the rectilinear grid coordinates.
409
410 Parameters
411 ----------
412 hdf_filename : PathLike
413 The path to the 2D HDF5 (.h5) or HDF4 (.hdf) file to read.
414
415 Returns
416 -------
417 x : np.ndarray
418 1D coordinate scale array for the X dimension.
419 y : np.ndarray
420 1D coordinate scale array for the Y dimension.
421 f : np.ndarray
422 2D data array, C-ordered with shape ``(ny, nx)`` in Python.
423
424 Notes
425 -----
426 Because Fortran uses column-major ordering and Python uses row-major
427 ordering, the data array is returned with reversed axis order relative to
428 how it is stored on disk. The first returned scale always corresponds to
429 the *slowest* varying dimension in the data array (i.e. the last axis).
430
431 See Also
432 --------
433 read_hdf_data : Generic HDF data reading routine.
434
435 Examples
436 --------
437 >>> from psi_io import rdhdf_2d
438 >>> from psi_io.data import get_2d_data
439 >>> filepath = get_2d_data()
440 >>> x, y, f = rdhdf_2d(filepath)
441 >>> f.shape == (y.shape[0], x.shape[0])
442 True
443 """
444 return _rdhdf_nd(hdf_filename, dimensionality=2)
445
446
[docs]
447def rdhdf_3d(hdf_filename: PathLike
448 ) -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]:
449 """Read a 3D PSI-style HDF5 or HDF4 file.
450
451 The data in the HDF file is assumed to be ordered X, Y, Z in Fortran order.
452 Each dimension is assumed to have a 1D scale associated with it that
453 describes the rectilinear grid coordinates.
454
455 Parameters
456 ----------
457 hdf_filename : PathLike
458 The path to the 3D HDF5 (.h5) or HDF4 (.hdf) file to read.
459
460 Returns
461 -------
462 x : np.ndarray
463 1D coordinate scale array for the X dimension.
464 y : np.ndarray
465 1D coordinate scale array for the Y dimension.
466 z : np.ndarray
467 1D coordinate scale array for the Z dimension.
468 f : np.ndarray
469 3D data array, C-ordered with shape ``(nz, ny, nx)`` in Python.
470
471 Notes
472 -----
473 Because Fortran uses column-major ordering and Python uses row-major
474 ordering, the data array is returned with reversed axis order relative to
475 how it is stored on disk. For PSI spherical datasets the returned shapes
476 follow the convention ``f.shape == (n_phi, n_theta, n_r)`` with scales
477 returned in physical order ``(r, theta, phi)``.
478
479 See Also
480 --------
481 read_hdf_data : Generic HDF data reading routine.
482
483 Examples
484 --------
485 >>> from psi_io import rdhdf_3d
486 >>> from psi_io.data import get_3d_data
487 >>> filepath = get_3d_data()
488 >>> r, t, p, f = rdhdf_3d(filepath)
489 >>> f.shape == (p.shape[0], t.shape[0], r.shape[0])
490 True
491 """
492 return _rdhdf_nd(hdf_filename, dimensionality=3)
493
494
[docs]
495def wrhdf_1d(hdf_filename: PathLike,
496 x: np.ndarray,
497 f: np.ndarray,
498 **kwargs) -> None:
499 """Write a 1D PSI-style HDF5 or HDF4 file.
500
501 Parameters
502 ----------
503 hdf_filename : PathLike
504 The path to the 1D HDF5 (.h5) or HDF4 (.hdf) file to write.
505 x : np.ndarray
506 1D array of scales.
507 f : np.ndarray
508 1D array of data.
509 **kwargs
510 Additional keyword arguments passed through to the underlying
511 :func:`~psi_io.psi_io.write_hdf_data` routine, specifically:
512
513 - ``dataset_id`` : str | None
514 The identifier of the dataset to write.
515 If None, a default dataset is used ('Data-Set-2' for HDF4 and 'Data' for HDF5).
516 - ``sync_dtype``: bool
517 If True, the data type of the scales will be matched to that of the data array.
518
519 Omitting these will yield the same behavior as the legacy routines, *i.e.* writing to
520 the default PSI dataset IDs for HDF4/HDF5 files and synchronizing datatypes between
521 the dataset and scales.
522
523 Raises
524 ------
525 ValueError
526 If the file does not have a ``.hdf`` or ``.h5`` extension.
527 KeyError
528 If, for HDF4 files, the data or scale dtype is not supported by
529 :py:mod:`pyhdf`. See :func:`write_hdf_data` for the full dtype
530 support table.
531
532 Notes
533 -----
534 This routine is provided for backward compatibility with existing PSI codes that
535 expect this API signature. The underlying implementation dispatches to
536 :func:`~psi_io.psi_io.write_hdf_data`, but the argument order and default
537 behavior (sync dtype, default dataset IDs) are preserved.
538
539 .. warning::
540 When called with its default arguments this routine **writes to the default PSI
541 dataset IDs for HDF4/HDF5 files and synchronizes dtypes between the dataset and
542 scales.** This behavior is required for interoperability with certain Fortran-based
543 PSI tools.
544
545 See Also
546 --------
547 write_hdf_data : Generic HDF data writing routine.
548
549 Examples
550 --------
551 >>> import tempfile, numpy as np
552 >>> from pathlib import Path
553 >>> from psi_io import wrhdf_1d, rdhdf_1d
554 >>> x = np.linspace(0.0, 1.0, 50, dtype=np.float32)
555 >>> f = np.sin(x)
556 >>> with tempfile.TemporaryDirectory() as d:
557 ... wrhdf_1d(Path(d) / "out.h5", x, f)
558 ... x2, f2 = rdhdf_1d(Path(d) / "out.h5")
559 ... np.array_equal(f, f2)
560 True
561 """
562 return _wrhdf_nd(hdf_filename, f, x, dimensionality=1, **kwargs)
563
564
[docs]
565def wrhdf_2d(hdf_filename: PathLike,
566 x: np.ndarray,
567 y: np.ndarray,
568 f: np.ndarray,
569 **kwargs) -> None:
570 """Write a 2D PSI-style HDF5 or HDF4 file.
571
572 The data in the HDF file will appear as X,Y in Fortran order.
573
574 Each dimension requires a 1D "scale" associated with it that
575 describes the rectilinear grid coordinates in each dimension.
576
577 Parameters
578 ----------
579 hdf_filename : PathLike
580 The path to the 2D HDF5 (.h5) or HDF4 (.hdf) file to write.
581 x : np.ndarray
582 1D array of scales in the X dimension.
583 y : np.ndarray
584 1D array of scales in the Y dimension.
585 f : np.ndarray
586 2D data array, C-ordered with shape ``(ny, nx)`` in Python.
587 **kwargs
588 Additional keyword arguments passed through to the underlying
589 :func:`~psi_io.psi_io.write_hdf_data` routine, specifically:
590
591 - ``dataset_id`` : str | None
592 The identifier of the dataset to write.
593 If None, a default dataset is used (``'Data-Set-2'`` for HDF4 and ``'Data'`` for HDF5).
594 - ``sync_dtype``: bool
595 If True, the data type of the scales will be matched to that of the data array.
596
597 Omitting these will yield the same behavior as the legacy routines, *i.e.* writing to
598 the default PSI dataset IDs for HDF4/HDF5 files and synchronizing datatypes between
599 the dataset and scales.
600
601 Raises
602 ------
603 ValueError
604 If the file does not have a ``.hdf`` or ``.h5`` extension.
605 KeyError
606 If, for HDF4 files, the data or scale dtype is not supported by
607 :py:mod:`pyhdf`. See :func:`write_hdf_data` for the full dtype
608 support table.
609
610 Notes
611 -----
612 This routine is provided for backward compatibility with existing PSI codes that
613 expect this API signature. The underlying implementation dispatches to
614 :func:`~psi_io.psi_io.write_hdf_data`, but the argument order and default
615 behavior (sync dtype, default dataset IDs) are preserved.
616
617 .. warning::
618 When called with its default arguments this routine **writes to the default PSI
619 dataset IDs for HDF4/HDF5 files and synchronizes dtypes between the dataset and
620 scales.** This behavior is required for interoperability with certain Fortran-based
621 PSI tools.
622
623 See Also
624 --------
625 write_hdf_data : Generic HDF data writing routine.
626
627 Examples
628 --------
629 >>> import tempfile, numpy as np
630 >>> from pathlib import Path
631 >>> from psi_io import wrhdf_2d, rdhdf_2d
632 >>> x = np.linspace(0.0, 2*np.pi, 64, dtype=np.float32)
633 >>> y = np.linspace(0.0, np.pi, 32, dtype=np.float32)
634 >>> f = np.outer(np.sin(x), np.cos(y)).astype(np.float32)
635 >>> with tempfile.TemporaryDirectory() as d:
636 ... wrhdf_2d(Path(d) / "out.h5", x, y, f)
637 ... x2, y2, f2 = rdhdf_2d(Path(d) / "out.h5")
638 ... np.array_equal(f, f2)
639 True
640 """
641 return _wrhdf_nd(hdf_filename, f, x, y, dimensionality=2, **kwargs)
642
643
[docs]
644def wrhdf_3d(hdf_filename: PathLike,
645 x: np.ndarray,
646 y: np.ndarray,
647 z: np.ndarray,
648 f: np.ndarray,
649 **kwargs) -> None:
650 """Write a 3D PSI-style HDF5 or HDF4 file.
651
652 The data in the HDF file will appear as X,Y,Z in Fortran order.
653
654 Each dimension requires a 1D "scale" associated with it that
655 describes the rectilinear grid coordinates in each dimension.
656
657 Parameters
658 ----------
659 hdf_filename : PathLike
660 The path to the 3D HDF5 (.h5) or HDF4 (.hdf) file to write.
661 x : np.ndarray
662 1D array of scales in the X dimension.
663 y : np.ndarray
664 1D array of scales in the Y dimension.
665 z : np.ndarray
666 1D array of scales in the Z dimension.
667 f : np.ndarray
668 3D data array, C-ordered with shape ``(nz, ny, nx)`` in Python.
669 **kwargs
670 Additional keyword arguments passed through to the underlying
671 :func:`~psi_io.psi_io.write_hdf_data` routine, specifically:
672
673 - ``dataset_id`` : str | None
674 The identifier of the dataset to write.
675 If None, a default dataset is used (``'Data-Set-2'`` for HDF4 and ``'Data'`` for HDF5).
676 - ``sync_dtype``: bool
677 If True, the data type of the scales will be matched to that of the data array.
678
679 Omitting these will yield the same behavior as the legacy routines, *i.e.* writing to
680 the default PSI dataset IDs for HDF4/HDF5 files and synchronizing datatypes between
681 the dataset and scales.
682
683 Raises
684 ------
685 ValueError
686 If the file does not have a ``.hdf`` or ``.h5`` extension.
687 KeyError
688 If, for HDF4 files, the data or scale dtype is not supported by
689 :py:mod:`pyhdf`. See :func:`write_hdf_data` for the full dtype
690 support table.
691
692 Notes
693 -----
694 This routine is provided for backward compatibility with existing PSI codes that
695 expect this API signature. The underlying implementation dispatches to
696 :func:`~psi_io.psi_io.write_hdf_data`, but the argument order and default
697 behavior (sync dtype, default dataset IDs) are preserved.
698
699 .. warning::
700 When called with its default arguments this routine **writes to the default PSI
701 dataset IDs for HDF4/HDF5 files and synchronizes dtypes between the dataset and
702 scales.** This behavior is required for interoperability with certain Fortran-based
703 PSI tools.
704
705 See Also
706 --------
707 write_hdf_data : Generic HDF data writing routine.
708
709 Examples
710 --------
711 >>> import tempfile, numpy as np
712 >>> from pathlib import Path
713 >>> from psi_io import wrhdf_3d, rdhdf_3d
714 >>> r = np.linspace(1.0, 5.0, 10, dtype=np.float32)
715 >>> t = np.linspace(0.0, np.pi, 20, dtype=np.float32)
716 >>> p = np.linspace(0.0, 2*np.pi, 30, dtype=np.float32)
717 >>> f = np.ones((30, 20, 10), dtype=np.float32)
718 >>> with tempfile.TemporaryDirectory() as d:
719 ... wrhdf_3d(Path(d) / "out.h5", r, t, p, f)
720 ... r2, t2, p2, f2 = rdhdf_3d(Path(d) / "out.h5")
721 ... np.array_equal(f, f2)
722 True
723 """
724 return _wrhdf_nd(hdf_filename, f, x, y, z, dimensionality=3, **kwargs)
725
726
[docs]
727def get_scales_1d(filename: PathLike
728 ) -> np.ndarray:
729 """Return the coordinate scale of a 1D PSI-style HDF5 or HDF4 dataset.
730
731 Does not load the data array, so this is efficient for large files.
732
733 Parameters
734 ----------
735 filename : PathLike
736 The path to the 1D HDF5 (.h5) or HDF4 (.hdf) file to read.
737
738 Returns
739 -------
740 x : np.ndarray
741 1D coordinate scale array.
742
743 Examples
744 --------
745 >>> from psi_io import get_scales_1d
746 >>> from psi_io.data import get_1d_data
747 >>> filepath = get_1d_data()
748 >>> x = get_scales_1d(filepath)
749 >>> x.ndim
750 1
751 """
752 return _dispatch_by_ext(filename, _get_scales_nd_h4, _get_scales_nd_h5,
753 dimensionality=1)
754
755
[docs]
756def get_scales_2d(filename: PathLike
757 ) -> Tuple[np.ndarray, np.ndarray]:
758 """Return the coordinate scales of a 2D PSI-style HDF5 or HDF4 dataset.
759
760 Does not load the data array, so this is efficient for large files.
761 The data in the HDF file is assumed to be ordered X, Y in Fortran order.
762
763 Parameters
764 ----------
765 filename : PathLike
766 The path to the 2D HDF5 (.h5) or HDF4 (.hdf) file to read.
767
768 Returns
769 -------
770 x : np.ndarray
771 1D coordinate scale array for the X dimension.
772 y : np.ndarray
773 1D coordinate scale array for the Y dimension.
774
775 Examples
776 --------
777 >>> from psi_io import get_scales_2d
778 >>> from psi_io.data import get_2d_data
779 >>> filepath = get_2d_data()
780 >>> x, y = get_scales_2d(filepath)
781 >>> x.ndim, y.ndim
782 (1, 1)
783 """
784 return _dispatch_by_ext(filename, _get_scales_nd_h4, _get_scales_nd_h5,
785 dimensionality=2)
786
787
[docs]
788def get_scales_3d(filename: PathLike
789 ) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
790 """Return the coordinate scales of a 3D PSI-style HDF5 or HDF4 dataset.
791
792 Does not load the data array, so this is efficient for large files.
793 The data in the HDF file is assumed to be ordered X, Y, Z in Fortran order.
794
795 Parameters
796 ----------
797 filename : PathLike
798 The path to the 3D HDF5 (.h5) or HDF4 (.hdf) file to read.
799
800 Returns
801 -------
802 x : np.ndarray
803 1D coordinate scale array for the X dimension.
804 y : np.ndarray
805 1D coordinate scale array for the Y dimension.
806 z : np.ndarray
807 1D coordinate scale array for the Z dimension.
808
809 Examples
810 --------
811 >>> from psi_io import get_scales_3d
812 >>> from psi_io.data import get_3d_data
813 >>> filepath = get_3d_data()
814 >>> r, t, p = get_scales_3d(filepath)
815 >>> r.ndim, t.ndim, p.ndim
816 (1, 1, 1)
817 """
818 return _dispatch_by_ext(filename, _get_scales_nd_h4, _get_scales_nd_h5,
819 dimensionality=3)
820
821
822# -----------------------------------------------------------------------------
823# "Updated" HDF reading and slicing routines for Hdf4 and Hdf5 datasets.
824# -----------------------------------------------------------------------------
825
826
904
905
947
948
[docs]
949def read_hdf_data(ifile: PathLike, /,
950 dataset_id: Optional[str] = None,
951 return_scales: bool = True,
952 ) -> Tuple[np.ndarray]:
953 """
954 Read data from an HDF4 (.hdf) or HDF5 (.h5) file.
955
956 Parameters
957 ----------
958 ifile : PathLike
959 The path to the HDF file to read.
960 dataset_id : str | None, optional
961 The identifier of the dataset to read.
962 If ``None``, a default dataset is used (``'Data-Set-2'`` for HDF4 and
963 ``'Data'`` for HDF5). Default is ``None``.
964 return_scales : bool, optional
965 If ``True``, the coordinate scale arrays for each dimension are also
966 returned. Default is ``True``.
967
968 Returns
969 -------
970 out : np.ndarray | tuple[np.ndarray, ...]
971 The data array. If ``return_scales`` is ``True``, returns a tuple
972 ``(data, scale_0, scale_1, ...)`` with one scale per dimension.
973
974 Raises
975 ------
976 ValueError
977 If the file does not have a ``.hdf`` or ``.h5`` extension.
978
979 See Also
980 --------
981 read_hdf_by_index : Read HDF datasets by index.
982 read_hdf_by_value : Read HDF datasets by value ranges.
983 read_hdf_by_ivalue : Read HDF datasets by subindex values.
984
985 Notes
986 -----
987 This function delegates to :func:`_read_h5_data` for HDF5 files and
988 :func:`_read_h4_data` for HDF4 files based on the file extension.
989
990 Examples
991 --------
992 >>> from psi_io import read_hdf_data
993 >>> from psi_io.data import get_3d_data
994 >>> filepath = get_3d_data()
995 >>> data, r, t, p = read_hdf_data(filepath)
996 >>> data.shape
997 (181, 100, 151)
998 >>> r.shape, t.shape, p.shape
999 ((151,), (100,), (181,))
1000 """
1001 return _dispatch_by_ext(ifile, _read_h4_data, _read_h5_data,
1002 dataset_id=dataset_id, return_scales=return_scales)
1003
1004
[docs]
1005def read_hdf_by_index(ifile: PathLike, /,
1006 *xi: Union[int, Tuple[Union[int, None], Union[int, None]], None],
1007 dataset_id: Optional[str] = None,
1008 return_scales: bool = True,
1009 ) -> Union[np.ndarray, Tuple[np.ndarray]]:
1010 r"""
1011 Read data from an HDF4 (.hdf) or HDF5 (.h5) file by index.
1012
1013 .. attention::
1014 For each dimension, the *minimum* number of elements returned is 1 *e.g.*
1015 if 3 ints are passed (as positional `*xi` arguments) for a 3D dataset,
1016 the resulting subset will have a shape of (1, 1, 1,) with scales of length 1.
1017
1018 Parameters
1019 ----------
1020 ifile : PathLike
1021 The path to the HDF file to read.
1022 *xi : int | tuple[int | None, int | None] | None
1023 Indices or ranges for each dimension of the `n`-dimensional dataset.
1024 Use None for a dimension to select all indices. If no arguments are passed,
1025 the entire dataset (and its scales) will be returned – see
1026 :func:`~psi_io.psi_io.read_hdf_data`.
1027 dataset_id : str | None, optional
1028 The identifier of the dataset to read.
1029 If ``None``, a default dataset is used (``'Data-Set-2'`` for HDF4 and
1030 ``'Data'`` for HDF5). Default is ``None``.
1031 return_scales : bool, optional
1032 If ``True``, the coordinate scale arrays for each dimension are also
1033 returned. Default is ``True``.
1034
1035 Returns
1036 -------
1037 out : np.ndarray | tuple[np.ndarray, ...]
1038 The selected data array. If ``return_scales`` is ``True``, returns a
1039 tuple ``(data, scale_0, scale_1, ...)`` with one scale per dimension.
1040
1041 Raises
1042 ------
1043 ValueError
1044 If the file does not have a ``.hdf`` or ``.h5`` extension.
1045
1046 See Also
1047 --------
1048 read_hdf_by_value : Read HDF datasets by value ranges.
1049 read_hdf_by_ivalue : Read HDF datasets by subindex values.
1050 read_hdf_data : Read entire HDF datasets.
1051
1052 Notes
1053 -----
1054 This function delegates to :func:`_read_h5_by_index` for HDF5 files and
1055 :func:`_read_h4_by_index` for HDF4 files based on the file extension.
1056
1057 This function assumes Fortran (column-major) ordering for compatibility with
1058 PSI's data ecosystem. For an :math:`n`-dimensional array of shape
1059 :math:`(D_0, D_1, \ldots, D_{n-1})` the scales satisfy
1060 :math:`|x_i| = |D_{(n-1)-i}|`. For example, a 3D dataset of shape
1061 :math:`(D_\phi, D_\theta, D_r)` has scales :math:`r, \theta, \phi`.
1062
1063 Each ``*xi`` argument is forwarded to Python's built-in :class:`slice` to
1064 extract the desired subset without reading the entire dataset into memory.
1065
1066 Examples
1067 --------
1068 Import a 3D HDF5 cube.
1069
1070 >>> from psi_io.data import get_3d_data
1071 >>> from psi_io import read_hdf_by_index
1072 >>> filepath = get_3d_data()
1073
1074 Extract a radial slice at the first radial index from a 3D cube:
1075
1076 >>> f, r, t, p = read_hdf_by_index(filepath, 0, None, None)
1077 >>> f.shape, r.shape, t.shape, p.shape
1078 ((181, 100, 1), (1,), (100,), (181,))
1079
1080 Extract a phi slice at the 90th index from a 3D cube:
1081
1082 >>> f, r, t, p = read_hdf_by_index(filepath, None, None, 90)
1083 >>> f.shape, r.shape, t.shape, p.shape
1084 ((1, 100, 151), (151,), (100,), (1,))
1085
1086 Extract up to the 20th radial index with phi indices 10 to 25:
1087
1088 >>> f, r, t, p = read_hdf_by_index(filepath, (None, 20), None, (10, 25))
1089 >>> f.shape, r.shape, t.shape, p.shape
1090 ((15, 100, 20), (20,), (100,), (15,))
1091 """
1092 if not xi:
1093 return read_hdf_data(ifile, dataset_id=dataset_id, return_scales=return_scales)
1094 return _dispatch_by_ext(ifile, _read_h4_by_index, _read_h5_by_index,
1095 *xi, dataset_id=dataset_id, return_scales=return_scales)
1096
1097
[docs]
1098def read_hdf_by_value(ifile: PathLike, /,
1099 *xi: Union[float, Tuple[float, float], None],
1100 dataset_id: Optional[str] = None,
1101 return_scales: bool = True,
1102 ) -> Union[np.ndarray, Tuple[np.ndarray]]:
1103 """
1104 Read data from an HDF4 (.hdf) or HDF5 (.h5) file by value.
1105
1106 .. note::
1107 For each dimension, the minimum number of elements returned is 2 *e.g.*
1108 if 3 floats are passed (as positional `*xi` arguments) for a 3D dataset,
1109 the resulting subset will have a shape of (2, 2, 2,) with scales of length 2.
1110
1111 Parameters
1112 ----------
1113 ifile : PathLike
1114 The path to the HDF file to read.
1115 *xi : float | tuple[float, float] | None
1116 Values or value ranges corresponding to each dimension of the `n`-dimensional
1117 dataset specified by ``dataset_id``. Pass ``None`` for a dimension to
1118 select all indices. If no arguments are passed, the entire dataset (and
1119 its scales) will be returned.
1120 dataset_id : str | None, optional
1121 The identifier of the dataset to read.
1122 If ``None``, a default dataset is used (``'Data-Set-2'`` for HDF4 and
1123 ``'Data'`` for HDF5). Default is ``None``.
1124 return_scales : bool, optional
1125 If ``True``, the coordinate scale arrays for each dimension are also
1126 returned. Default is ``True``.
1127
1128 Returns
1129 -------
1130 out : np.ndarray | tuple[np.ndarray, ...]
1131 The selected data array. If ``return_scales`` is ``True``, returns a
1132 tuple ``(data, scale_0, scale_1, ...)`` with one scale per dimension.
1133
1134 Raises
1135 ------
1136 ValueError
1137 If the file does not have a ``.hdf`` or ``.h5`` extension.
1138
1139 See Also
1140 --------
1141 read_hdf_by_index : Read HDF datasets by index.
1142 read_hdf_by_ivalue : Read HDF datasets by subindex values.
1143 read_hdf_data : Read entire HDF datasets.
1144 sp_interpolate_slice_from_hdf : Interpolate slices using SciPy's
1145 :class:`~scipy.interpolate.RegularGridInterpolator`.
1146 np_interpolate_slice_from_hdf : Linear/bilinear/trilinear interpolation
1147 using vectorized NumPy routines.
1148
1149 Notes
1150 -----
1151 This function delegates to :func:`_read_h5_by_value` for HDF5 files and
1152 :func:`_read_h4_by_value` for HDF4 files based on the file extension.
1153
1154 This function assumes that the dataset is Fortran (or column-major) ordered *viz.* for
1155 compatibility with PSI's data ecosystem; as such, a given :math:`n`-dimensional array,
1156 of shape :math:`(D_0, D_1, ..., D_{n-1})`, has scales :math:`(x_0, x_1, ..., x_{n-1})`,
1157 such that :math:`| x_i | = | D_{(n-1)-i} |`. For example, a 3D dataset with shape
1158 :math:`(D_p, D_t, D_r)` has scales :math:`r, t, p` corresponding to the radial, theta,
1159 and phi dimensions respectively.
1160
1161 This function extracts a subset of the given dataset/scales without reading the
1162 entire data into memory. For a given scale :math:`x_j`, if:
1163
1164 - *i)* a single float is provided (:math:`a`), the function will return a 2-element
1165 subset of the scale (:math:`xʹ_j`) such that :math:`xʹ_j[0] <= a < xʹ_j[1]`.
1166 - *ii)* a (float, float) tuple is provided (:math:`a_0, a_1`), the function will return an
1167 *m*-element subset of the scale (:math:`xʹ_j`) where
1168 :math:`xʹ_j[0] <= a_0` and :math:`xʹ_j[m-1] > a_1`.
1169 - *iii)* a **None** value is provided, the function will return the entire scale :math:`x_j`
1170
1171 The returned subset can then be passed to a linear interpolation routine to extract the
1172 "slice" at the desired fixed dimensions.
1173
1174 Examples
1175 --------
1176 Import a 3D HDF5 cube.
1177
1178 >>> from psi_io.data import get_3d_data
1179 >>> from psi_io import read_hdf_by_value
1180 >>> filepath = get_3d_data()
1181
1182 Extract a radial slice at r=15 from a 3D cube:
1183
1184 >>> f, r, t, p = read_hdf_by_value(filepath, 15, None, None)
1185 >>> f.shape, r.shape, t.shape, p.shape
1186 ((181, 100, 2), (2,), (100,), (181,))
1187
1188 Extract a phi slice at p=1.57 from a 3D cube:
1189
1190 >>> f, r, t, p = read_hdf_by_value(filepath, None, None, 1.57)
1191 >>> f.shape, r.shape, t.shape, p.shape
1192 ((2, 100, 151), (151,), (100,), (2,))
1193
1194 Extract the values between 3.2 and 6.4 (in the radial dimension) and with
1195 phi equal to 4.5
1196
1197 >>> f, r, t, p = read_hdf_by_value(filepath, (3.2, 6.4), None, 4.5)
1198 >>> f.shape, r.shape, t.shape, p.shape
1199 ((2, 100, 15), (15,), (100,), (2,))
1200 """
1201 if not xi:
1202 return read_hdf_data(ifile, dataset_id=dataset_id, return_scales=return_scales)
1203 return _dispatch_by_ext(ifile, _read_h4_by_value, _read_h5_by_value,
1204 *xi, dataset_id=dataset_id, return_scales=return_scales)
1205
1206
[docs]
1207def read_hdf_by_ivalue(ifile: PathLike, /,
1208 *xi: Union[float, Tuple[float, float], None],
1209 dataset_id: Optional[str] = None,
1210 return_scales: bool = True,
1211 ) -> Union[np.ndarray, Tuple[np.ndarray]]:
1212 r"""
1213 Read data from an HDF4 (.hdf) or HDF5 (.h5) file by subindex value.
1214
1215 Unlike :func:`read_hdf_by_value` (which works in physical coordinate space),
1216 this function accepts fractional *index* positions and returns the minimal
1217 bracketing subset required for interpolation.
1218
1219 .. note::
1220 For each dimension the minimum number of elements returned is 2. For
1221 example, passing 3 scalar floats for a 3D dataset yields a subset of
1222 shape ``(2, 2, 2)`` with index arrays of length 2.
1223
1224 Parameters
1225 ----------
1226 ifile : PathLike
1227 The path to the HDF file to read.
1228 *xi : float | tuple[float, float] | None
1229 Fractional index values or ranges for each dimension of the
1230 ``n``-dimensional dataset. Pass ``None`` to select an entire dimension.
1231 If no arguments are passed, the full dataset is returned.
1232 dataset_id : str | None, optional
1233 The identifier of the dataset to read. If ``None``, a default dataset
1234 is used (``'Data-Set-2'`` for HDF4 and ``'Data'`` for HDF5).
1235 Default is ``None``.
1236 return_scales : bool, optional
1237 If ``True``, 0-based index arrays (generated with :func:`~numpy.arange`)
1238 are returned for each dimension alongside the data. These are always
1239 index-space arrays regardless of whether the dataset has physical
1240 coordinate scales. Default is ``True``.
1241
1242 Returns
1243 -------
1244 out : np.ndarray | tuple[np.ndarray, ...]
1245 The selected data array. If ``return_scales`` is ``True``, returns a
1246 tuple ``(data, idx_0, idx_1, ...)`` with one index array per dimension.
1247
1248 Raises
1249 ------
1250 ValueError
1251 If the file does not have a ``.hdf`` or ``.h5`` extension.
1252
1253 See Also
1254 --------
1255 read_hdf_by_index : Read HDF datasets by integer index.
1256 read_hdf_by_value : Read HDF datasets by physical coordinate value.
1257 read_hdf_data : Read entire HDF datasets.
1258 sp_interpolate_slice_from_hdf : Interpolate slices using SciPy's
1259 :class:`~scipy.interpolate.RegularGridInterpolator`.
1260 np_interpolate_slice_from_hdf : Linear/bilinear/trilinear interpolation
1261 using vectorized NumPy routines.
1262
1263 Notes
1264 -----
1265 This function delegates to :func:`_read_h5_by_ivalue` for HDF5 files and
1266 :func:`_read_h4_by_ivalue` for HDF4 files based on the file extension.
1267
1268 For a given dimension with scale :math:`x_j`, the bracketing rule is:
1269
1270 - *single float* :math:`a` → returns
1271 :math:`x_j[\lfloor a \rfloor], x_j[\lceil a \rceil]`.
1272 - *(float, float)* :math:`(a_0, a_1)` → returns
1273 :math:`x_j[\lfloor a_0 \rfloor], \ldots, x_j[\lceil a_1 \rceil]`.
1274 - ``None`` → returns the entire scale :math:`x_j`.
1275
1276 Examples
1277 --------
1278 >>> from psi_io.data import get_3d_data
1279 >>> from psi_io import read_hdf_by_ivalue
1280 >>> filepath = get_3d_data()
1281
1282 Extract a 2-element bracket around fractional radial index 2.7:
1283
1284 >>> f, r_idx, t_idx, p_idx = read_hdf_by_ivalue(filepath, 2.7, None, None)
1285 >>> r_idx.shape
1286 (2,)
1287 """
1288 if not xi:
1289 return read_hdf_data(ifile, dataset_id=dataset_id, return_scales=return_scales)
1290 return _dispatch_by_ext(ifile, _read_h4_by_ivalue, _read_h5_by_ivalue,
1291 *xi, dataset_id=dataset_id, return_scales=return_scales)
1292
1293
[docs]
1294def write_hdf_data(ifile: PathLike, /,
1295 data: np.ndarray,
1296 *scales: Sequence[Union[np.ndarray, None]],
1297 dataset_id: Optional[str] = None,
1298 sync_dtype: bool = False,
1299 strict: bool = True,
1300 **kwargs
1301 ) -> Path:
1302 """
1303 Write data to an HDF4 (.hdf) or HDF5 (.h5) file.
1304
1305 Following PSI conventions, the data array is assumed to be Fortran-ordered,
1306 with the scales provided in the order corresponding to each dimension *e.g.* a
1307 3D dataset with shape (Dp, Dt, Dr) has scales r, t, p corresponding to the
1308 radial, theta, and phi dimensions respectively.
1309
1310 Parameters
1311 ----------
1312 ifile : PathLike
1313 The path to the HDF file to write.
1314 data : np.ndarray
1315 The data array to write.
1316 *scales : Sequence[np.ndarray | None]
1317 The scales (coordinate arrays) for each dimension.
1318 dataset_id : str | None, optional
1319 The identifier of the dataset to write. If ``None``, a default dataset
1320 is used (``'Data-Set-2'`` for HDF4 and ``'Data'`` for HDF5).
1321 Default is ``None``.
1322 sync_dtype : bool, optional
1323 If ``True``, the scale dtypes are cast to match the data array dtype.
1324 This mimics the behavior of PSI's legacy HDF writing routines and ensures
1325 compatibility with Fortran tools that require uniform precision between
1326 datasets and their scales. Default is ``False``.
1327 strict : bool, optional
1328 If ``True``, raise an error if any dataset attribute cannot be written to
1329 the target format. If ``False``, a warning is printed and the attribute
1330 is skipped. Default is ``True``.
1331 **kwargs
1332 Key-value pairs of dataset attributes to attach to the dataset.
1333
1334 Returns
1335 -------
1336 out : Path
1337 The path to the written HDF file.
1338
1339 Raises
1340 ------
1341 ValueError
1342 If the file does not have a ``.hdf`` or ``.h5`` extension.
1343 KeyError
1344 If, for HDF4 files, the data or scale dtype is not supported by
1345 :py:mod:`pyhdf`. See the dtype support table in the Notes section.
1346
1347 Notes
1348 -----
1349 This function delegates to :func:`_write_h5_data` for HDF5 files and
1350 :func:`_write_h4_data` for HDF4 files based on the file extension.
1351
1352 If no scales are provided the dataset is written without coordinate variables.
1353 The number of scales may be less than or equal to the number of dimensions;
1354 pass ``None`` for dimensions that should not have an attached scale.
1355
1356 The table below summarizes dtype support across formats. HDF4 support is
1357 determined by the :data:`DTYPE_TO_SDC` mapping; HDF5 support is provided
1358 by :mod:`h5py`.
1359
1360 .. list-table:: NumPy dtype support by format
1361 :header-rows: 1
1362 :widths: 20 20 20
1363
1364 * - dtype
1365 - HDF4 (``.hdf``)
1366 - HDF5 (``.h5``)
1367 * - ``float16``
1368 - No
1369 - Yes
1370 * - ``float32``
1371 - Yes
1372 - Yes
1373 * - ``float64``
1374 - Yes
1375 - Yes
1376 * - ``int8``
1377 - Yes
1378 - Yes
1379 * - ``int16``
1380 - Yes
1381 - Yes
1382 * - ``int32``
1383 - Yes
1384 - Yes
1385 * - ``int64``
1386 - No
1387 - Yes
1388 * - ``uint8``
1389 - Yes
1390 - Yes
1391 * - ``uint16``
1392 - Yes
1393 - Yes
1394 * - ``uint32``
1395 - Yes
1396 - Yes
1397 * - ``uint64``
1398 - No
1399 - Yes
1400 * - ``complex64``
1401 - No
1402 - Yes
1403 * - ``complex128``
1404 - No
1405 - Yes
1406 * - ``bool``
1407 - Yes (stored as ``uint8``)
1408 - Yes
1409
1410 See Also
1411 --------
1412 wrhdf_1d : Write 1D HDF files.
1413 wrhdf_2d : Write 2D HDF files.
1414 wrhdf_3d : Write 3D HDF files.
1415
1416 Examples
1417 --------
1418 >>> import tempfile, numpy as np
1419 >>> from pathlib import Path
1420 >>> from psi_io import write_hdf_data, read_hdf_data
1421 >>> r = np.linspace(1.0, 5.0, 10, dtype=np.float32)
1422 >>> t = np.linspace(0.0, np.pi, 20, dtype=np.float32)
1423 >>> p = np.linspace(0.0, 2*np.pi, 30, dtype=np.float32)
1424 >>> f = np.ones((30, 20, 10), dtype=np.float32)
1425 >>> with tempfile.TemporaryDirectory() as d:
1426 ... write_hdf_data(Path(d) / "out.h5", f, r, t, p)
1427 ... data, r2, t2, p2 = read_hdf_data(Path(d) / "out.h5")
1428 ... data.shape
1429 (30, 20, 10)
1430 """
1431 return _dispatch_by_ext(ifile, _write_h4_data, _write_h5_data, data,
1432 *scales, dataset_id=dataset_id, sync_dtype=sync_dtype, strict=strict, **kwargs)
1433
1434
1499
1500
[docs]
1501def convert(ifile: PathLike,
1502 ofile: Optional[PathLike] = None,
1503 strict: bool = True) -> Path:
1504 """
1505 Convert an HDF file between HDF4 (.hdf) and HDF5 (.h5) formats.
1506
1507 All datasets and their attributes are preserved in the output file.
1508 The output format is inferred from the input: `.hdf` → `.h5` and vice versa,
1509 unless an explicit output path is provided.
1510
1511 Parameters
1512 ----------
1513 ifile : PathLike
1514 The path to the source HDF file.
1515 ofile : PathLike, optional
1516 The path to the output HDF file.
1517 If ``None``, the output file is written alongside the input file
1518 with its extension swapped (*e.g.* ``foo.hdf`` → ``foo.h5``).
1519 strict : bool, optional
1520 If ``True``, raise an error if any dataset attribute cannot be written
1521 to the output format. If ``False``, a warning is printed and the
1522 attribute is skipped. Default is ``True``.
1523
1524 Returns
1525 -------
1526 out : Path
1527 The path to the written output file.
1528
1529 Raises
1530 ------
1531 ValueError
1532 If the input file does not have a ``.hdf`` or ``.h5`` extension.
1533 KeyError
1534 If, for HDF4 output files, the data or an attribute value has a dtype
1535 not supported by :py:mod:`pyhdf` and ``strict`` is ``True``. See
1536 :func:`write_hdf_data` for the full dtype support table.
1537 TypeError
1538 If, for HDF5 output files, a dataset attribute has a type that cannot be
1539 stored as an HDF5 attribute and ``strict`` is ``True``.
1540
1541 See Also
1542 --------
1543 convert_psih4_to_psih5 : Specialized PSI-convention HDF4 → HDF5 converter.
1544 write_hdf_data : Generic HDF data writing routine.
1545 read_hdf_data : Generic HDF data reading routine.
1546
1547 Examples
1548 --------
1549 >>> import tempfile
1550 >>> from pathlib import Path
1551 >>> from psi_io import convert, read_hdf_meta
1552 >>> from psi_io.data import get_3d_data
1553 >>> h5_path = get_3d_data(hdf=".h5")
1554 >>> with tempfile.TemporaryDirectory() as d:
1555 ... out = convert(h5_path, Path(d) / "br.hdf")
1556 ... meta = read_hdf_meta(out)
1557 ... meta[0].name
1558 'Data'
1559 """
1560 ifile = Path(ifile)
1561 if not ofile:
1562 ofile = ifile.with_suffix(".hdf") if ifile.suffix == ".h5" else ifile.with_suffix(".h5")
1563 else:
1564 ofile = Path(ofile)
1565 ofile.parent.mkdir(parents=True, exist_ok=True)
1566
1567 meta_data = read_hdf_meta(ifile)
1568 for dataset in meta_data:
1569 data, *scales = read_hdf_data(ifile, dataset_id=dataset.name, return_scales=True)
1570 write_hdf_data(ofile, data, *scales, dataset_id=dataset.name, strict=strict, **dataset.attr)
1571
1572 return ofile
1573
1574
[docs]
1575def convert_psih4_to_psih5(ifile: PathLike,
1576 ofile: Optional[PathLike] = None) -> Path:
1577 """
1578 Convert a PSI-convention HDF4 file to an HDF5 file.
1579
1580 Unlike :func:`convert`, this function is specialized for PSI-style HDF4 files:
1581 it reads the primary dataset (``'Data-Set-2'``) and writes it under the PSI HDF5
1582 dataset name (``'Data'``), preserving the dataset's attributes and scales.
1583
1584 Parameters
1585 ----------
1586 ifile : PathLike
1587 The path to the source HDF4 (.hdf) file.
1588 ofile : PathLike, optional
1589 The path to the output HDF5 (.h5) file.
1590 If ``None``, the output file is written alongside the input file with a
1591 ``.h5`` extension (*e.g.* ``foo.hdf`` → ``foo.h5``).
1592
1593 Returns
1594 -------
1595 out : Path
1596 The path to the written HDF5 file.
1597
1598 Raises
1599 ------
1600 ValueError
1601 If ``ifile`` does not have a ``.hdf`` extension.
1602 ValueError
1603 If ``ofile`` does not have a ``.h5`` extension.
1604 ImportError
1605 If the :py:mod:`pyhdf` package is not available.
1606
1607 See Also
1608 --------
1609 convert : General-purpose HDF4 ↔ HDF5 converter.
1610 write_hdf_data : Generic HDF data writing routine.
1611
1612 Examples
1613 --------
1614 >>> import tempfile
1615 >>> from pathlib import Path
1616 >>> from psi_io import convert_psih4_to_psih5, read_hdf_meta
1617 >>> from psi_io.data import get_3d_data
1618 >>> hdf4_path = get_3d_data(hdf=".hdf")
1619 >>> with tempfile.TemporaryDirectory() as d:
1620 ... out = convert_psih4_to_psih5(hdf4_path, Path(d) / "br.h5")
1621 ... meta = read_hdf_meta(out)
1622 ... meta[0].name
1623 'Data'
1624 """
1625 ifile = Path(ifile)
1626 ofile = ifile.with_suffix(".h5") if not ofile else Path(ofile)
1627 if ifile.suffix != ".hdf":
1628 raise ValueError(f"Input file must have a .hdf extension; got {ifile.suffix}")
1629 if ofile.suffix != ".h5":
1630 raise ValueError(f"Output file must have a .h5 extension; got {ofile.suffix}")
1631 ofile.parent.mkdir(parents=True, exist_ok=True)
1632
1633
1634 data, *scales = read_hdf_data(ifile, dataset_id=PSI_DATA_ID["h4"], return_scales=True)
1635 meta_data, *_ = read_hdf_meta(ifile, dataset_id=PSI_DATA_ID["h4"])
1636
1637 write_hdf_data(ofile, data, *scales,
1638 dataset_id=PSI_DATA_ID["h5"], **meta_data.attr)
1639 return ofile
1640
[docs]
1641def instantiate_linear_interpolator(*args, **kwargs):
1642 r"""
1643 Instantiate a linear interpolator using the provided data and scales.
1644
1645 Parameters
1646 ----------
1647 *args : sequence[array_like]
1648 The first argument is the data array.
1649 Subsequent arguments are the scales (coordinate arrays) for each dimension.
1650 **kwargs : dict
1651 Additional keyword arguments to pass to
1652 :class:`~scipy.interpolate.RegularGridInterpolator`.
1653
1654 Returns
1655 -------
1656 out : RegularGridInterpolator
1657 An instance of RegularGridInterpolator initialized
1658 with the provided data and scales.
1659
1660 Raises
1661 ------
1662 ImportError
1663 If the ``scipy`` package is not available.
1664
1665 Notes
1666 -----
1667 This function transposes the data array and passes it along with the scales
1668 to :class:`~scipy.interpolate.RegularGridInterpolator`. Given a PSI-style
1669 Fortran-ordered 3D dataset, the resulting interpolator can be queried using
1670 :math:`(r, \theta, \phi)` coordinates.
1671
1672 Examples
1673 --------
1674 Import a 3D HDF5 cube.
1675
1676 >>> from psi_io.data import get_3d_data
1677 >>> from psi_io import read_hdf_by_value
1678 >>> from numpy import pi
1679 >>> filepath = get_3d_data()
1680
1681 Read the dataset by value (at 15 R_sun in the radial dimension).
1682
1683 >>> data_and_scales = read_hdf_by_value(filepath, 15, None, None)
1684 >>> interpolator = instantiate_linear_interpolator(*data_and_scales)
1685
1686 Interpolate at a specific position.
1687
1688 >>> interpolator((15, pi/2, pi))
1689 0.0012864485109423877
1690 """
1691 _except_no_scipy()
1692 return RegularGridInterpolator(
1693 values=args[0].T,
1694 points=args[1:],
1695 **kwargs)
1696
1697
[docs]
1698def sp_interpolate_slice_from_hdf(*xi, **kwargs):
1699 r"""
1700 Interpolate a slice from HDF data using SciPy's `RegularGridInterpolator`.
1701
1702 .. note::
1703 Slicing routines result in a dimensional reduction. The dimensions
1704 that are fixed (i.e. provided as float values in `*xi`) are removed
1705 from the output slice, while the dimensions that are not fixed
1706 (*i.e.* provided as None in `*xi`) are retained.
1707
1708 Parameters
1709 ----------
1710 *xi : sequence
1711 Positional arguments passed-through to :func:`read_hdf_by_value`.
1712 **kwargs : dict
1713 Keyword arguments passed-through to :func:`read_hdf_by_value`.
1714 **NOTE: Instantiating a linear interpolator requires the** ``return_scales``
1715 **keyword argument to be set to True; this function overrides
1716 any provided value for** ``return_scales`` **to ensure this behavior.**
1717
1718 Returns
1719 -------
1720 data_slice : np.ndarray
1721 The interpolated data slice with fixed dimensions removed.
1722 scales : list[np.ndarray]
1723 Coordinate scale arrays for the retained (non-fixed) dimensions.
1724
1725 Notes
1726 -----
1727 This function reads data from an HDF file, builds a
1728 :class:`~scipy.interpolate.RegularGridInterpolator`, and evaluates it at
1729 the requested fixed coordinates to produce the slice.
1730
1731 .. note::
1732 The returned slice is Fortran-ordered, *e.g.* radial slices have shape
1733 :math:`(n_\phi, n_\theta)`, and :math:`\phi` slices have shape
1734 :math:`(n_r, n_\theta)`.
1735
1736 .. note::
1737 :class:`~scipy.interpolate.RegularGridInterpolator` casts all input data
1738 to ``float64`` internally. PSI HDF datasets stored as ``float32`` will
1739 therefore be upcast during interpolation.
1740
1741 Examples
1742 --------
1743 >>> from psi_io.data import get_3d_data
1744 >>> from psi_io import sp_interpolate_slice_from_hdf
1745 >>> from numpy import pi
1746 >>> filepath = get_3d_data()
1747
1748 Fetch a 2D slice at r=15 from 3D map
1749
1750 >>> slice_, theta_scale, phi_scale = sp_interpolate_slice_from_hdf(filepath, 15, None, None)
1751 >>> slice_.shape, theta_scale.shape, phi_scale.shape
1752 ((181, 100), (100,), (181,))
1753
1754 Fetch a single point from 3D map
1755
1756 >>> point_value, *_ = sp_interpolate_slice_from_hdf(filepath, 1, pi/2, pi)
1757 >>> point_value
1758 6.084495480971823
1759 """
1760 filepath, *args = xi
1761 kwargs.pop('return_scales', None)
1762 result = read_hdf_by_value(filepath, *args, **kwargs)
1763 interpolator = instantiate_linear_interpolator(*result)
1764 grid = [yi[0] if yi[0] is not None else yi[1] for yi in zip(args, result[1:])]
1765 slice_ = interpolator(tuple(np.meshgrid(*grid, indexing='ij')))
1766 indices = [0 if yi is not None else slice(None, None) for yi in args]
1767 return slice_[tuple(indices)].T, *[yi[1] for yi in zip(args, result[1:]) if yi[0] is None]
1768
1769
[docs]
1770def np_interpolate_slice_from_hdf(ifile: PathLike, /,
1771 *xi: Union[float, Tuple[float, float], None],
1772 dataset_id: Optional[str] = None,
1773 by_index: bool = False,
1774 ):
1775 """
1776 Interpolate a slice from HDF data using linear interpolation.
1777
1778 .. note::
1779 Slicing routines result in a dimensional reduction. The dimensions
1780 that are fixed (i.e. provided as float values in `*xi`) are removed
1781 from the output slice, while the dimensions that are not fixed
1782 (*i.e.* provided as `None` in `*xi`) are retained.
1783
1784 Parameters
1785 ----------
1786 ifile : PathLike
1787 The path to the HDF file to read.
1788 *xi : sequence
1789 Positional arguments passed-through to reader function.
1790 dataset_id : str | None, optional
1791 The identifier of the dataset to read. If ``None``, a default dataset
1792 is used (``'Data-Set-2'`` for HDF4 and ``'Data'`` for HDF5).
1793 Default is ``None``.
1794 by_index : bool, optional
1795 If ``True``, use :func:`read_hdf_by_ivalue` to read data by fractional
1796 index values. If ``False``, use :func:`read_hdf_by_value` to read by
1797 physical coordinate values. Default is ``False``.
1798
1799 Returns
1800 -------
1801 data_slice : np.ndarray
1802 The interpolated data slice with fixed dimensions removed.
1803 scales : list[np.ndarray]
1804 Coordinate scale arrays for the retained (non-fixed) dimensions.
1805
1806 Raises
1807 ------
1808 ValueError
1809 If the number of dimensions to interpolate over is not supported.
1810
1811 Notes
1812 -----
1813 This function supports linear, bilinear, and trilinear interpolation
1814 depending on the number of dimensions fixed in `xi`.
1815
1816 Examples
1817 --------
1818 >>> from psi_io.data import get_3d_data
1819 >>> from psi_io import np_interpolate_slice_from_hdf
1820 >>> from numpy import pi
1821 >>> filepath = get_3d_data()
1822
1823 Fetch a 2D slice at r=15 from 3D map
1824
1825 >>> slice_, theta_scale, phi_scale = np_interpolate_slice_from_hdf(filepath, 15, None, None)
1826 >>> slice_.shape, theta_scale.shape, phi_scale.shape
1827 ((181, 100), (100,), (181,))
1828
1829 Fetch a single point from 3D map
1830
1831 >>> point_value, *_ = np_interpolate_slice_from_hdf(filepath, 1, pi/2, pi)
1832 >>> point_value
1833 6.084496
1834
1835 """
1836 reader = read_hdf_by_value if not by_index else read_hdf_by_ivalue
1837 data, *scales = reader(ifile, *xi, dataset_id=dataset_id, return_scales=True)
1838 f_ = np.transpose(data)
1839 slice_type = sum([yi is not None for yi in xi])
1840 if slice_type == 1:
1841 return _np_linear_interpolation(xi, scales, f_).T, *[yi[1] for yi in zip(xi, scales) if yi[0] is None]
1842 elif slice_type == 2:
1843 return _np_bilinear_interpolation(xi, scales, f_).T, *[yi[1] for yi in zip(xi, scales) if yi[0] is None]
1844 elif slice_type == 3:
1845 return _np_trilinear_interpolation(xi, scales, f_).T, *[yi[1] for yi in zip(xi, scales) if yi[0] is None]
1846 else:
1847 raise ValueError("Not a valid number of dimensions for supported linear interpolation methods")
1848
1849
[docs]
1850def interpolate_positions_from_hdf(ifile, *xi, **kwargs):
1851 r"""
1852 Interpolate at a list of scale positions using SciPy's
1853 :class:`~scipy.interpolate.RegularGridInterpolator`.
1854
1855 Parameters
1856 ----------
1857 ifile : PathLike
1858 The path to the HDF file to read.
1859 *xi : sequence[np.ndarray]
1860 Coordinate values for each dimension of the ``n``-dimensional dataset.
1861 Each array must have the same length :math:`m`; the function assembles
1862 them into an :math:`m \times n` column stack for interpolation.
1863 **kwargs
1864 Keyword arguments forwarded to :func:`read_hdf_by_value`.
1865
1866 Returns
1867 -------
1868 out : np.ndarray
1869 The interpolated values at the provided positions.
1870
1871 Notes
1872 -----
1873 This function reads data from an HDF file, creates a linear interpolator,
1874 and interpolates at the provided scale values. For each dimension, the
1875 minimum and maximum values from the provided arrays are used to read
1876 the necessary subset of data from the HDF file *viz.* to avoid loading
1877 the entire dataset into memory.
1878
1879 Examples
1880 --------
1881 Import a 3D HDF5 cube.
1882
1883 >>> from psi_io.data import get_3d_data
1884 >>> from psi_io import interpolate_positions_from_hdf
1885 >>> import numpy as np
1886 >>> filepath = get_3d_data()
1887
1888 Set up positions to interpolate.
1889
1890 >>> r_vals = np.array([15, 20, 25])
1891 >>> theta_vals = np.array([np.pi/4, np.pi/2, 3*np.pi/4])
1892 >>> phi_vals = np.array([0, np.pi, 2*np.pi])
1893
1894 Interpolate at the specified positions.
1895
1896 >>> interpolate_positions_from_hdf(filepath, r_vals, theta_vals, phi_vals)
1897 [0.0008402743657585175, 0.000723875405654482, -0.00041033233811179216]
1898 """
1899 xi_ = [(np.nanmin(i), np.nanmax(i)) for i in xi]
1900 f, *scales = read_hdf_by_value(ifile, *xi_, **kwargs)
1901 interpolator = instantiate_linear_interpolator(f, *scales, bounds_error=False)
1902 return interpolator(np.stack(xi, axis=len(xi[0].shape)))
1903
1904
[docs]
1905def interpolate_point_from_1d_slice(xi, scalex, values):
1906 """
1907 Interpolate a point from a 1D slice using linear interpolation.
1908
1909 Parameters
1910 ----------
1911 xi : float
1912 The coordinate value at which to interpolate.
1913 scalex : np.ndarray
1914 The coordinate scale array for the dimension.
1915 values : np.ndarray
1916 The 1D data array to interpolate.
1917
1918 Returns
1919 -------
1920 out : np.ndarray
1921 The interpolated scalar value as a zero-dimensional array.
1922
1923 Examples
1924 --------
1925 >>> import numpy as np
1926 >>> from psi_io.psi_io import interpolate_point_from_1d_slice
1927 >>> x = np.linspace(0.0, 1.0, 11)
1928 >>> f = x ** 2
1929 >>> float(interpolate_point_from_1d_slice(0.5, x, f))
1930 0.25
1931 """
1932 if scalex[0] > scalex[-1]:
1933 scalex, values = scalex[::-1], values[::-1]
1934 xi_ = int(np.searchsorted(scalex, xi))
1935 sx_ = slice(*_check_index_ranges(len(scalex), xi_, xi_))
1936 return _np_linear_interpolation([xi], [scalex[sx_]], values[sx_])
1937
1938
[docs]
1939def interpolate_point_from_2d_slice(xi, yi, scalex, scaley, values):
1940 """
1941 Interpolate a point from a 2D slice using bilinear interpolation.
1942
1943 Parameters
1944 ----------
1945 xi : float
1946 The coordinate value for the first dimension.
1947 yi : float
1948 The coordinate value for the second dimension.
1949 scalex : np.ndarray
1950 The coordinate scale array for the first dimension.
1951 scaley : np.ndarray
1952 The coordinate scale array for the second dimension.
1953 values : np.ndarray
1954 The 2D data array to interpolate, with shape ``(nx, ny)``.
1955
1956 Returns
1957 -------
1958 out : np.ndarray
1959 The interpolated scalar value as a zero-dimensional array.
1960
1961 Examples
1962 --------
1963 >>> import numpy as np
1964 >>> from psi_io.psi_io import interpolate_point_from_2d_slice
1965 >>> x = np.linspace(0.0, 1.0, 11)
1966 >>> y = np.linspace(0.0, 1.0, 11)
1967 >>> f = np.outer(x, y)
1968 >>> float(interpolate_point_from_2d_slice(0.5, 0.5, x, y, f))
1969 0.25
1970 """
1971 values = np.transpose(values)
1972 if scalex[0] > scalex[-1]:
1973 scalex, values = scalex[::-1], values[::-1, :]
1974 if scaley[0] > scaley[-1]:
1975 scaley, values = scaley[::-1], values[:, ::-1]
1976 xi_, yi_ = int(np.searchsorted(scalex, xi)), int(np.searchsorted(scaley, yi))
1977 sx_, sy_ = slice(*_check_index_ranges(len(scalex), xi_, xi_)), slice(*_check_index_ranges(len(scaley), yi_, yi_))
1978 return _np_bilinear_interpolation([xi, yi], [scalex[sx_], scaley[sy_]], values[(sx_, sy_)])
1979
1980
1981def _rdhdf_nd(hdf_filename: str,
1982 dimensionality: int
1983 ) -> Tuple[np.ndarray, ...]:
1984 """Read an n-dimensional PSI-style HDF file; shared implementation for `rdhdf_1d/2d/3d`.
1985
1986 Parameters
1987 ----------
1988 hdf_filename : PathLike
1989 The path to the HDF5 (.h5) or HDF4 (.hdf) file to read.
1990 dimensionality : int
1991 The expected number of dimensions of the dataset.
1992
1993 Returns
1994 -------
1995 out : tuple[np.ndarray, ...]
1996 The coordinate scales for each dimension followed by the data array.
1997
1998 Raises
1999 ------
2000 ValueError
2001 If the dataset dimensionality does not match ``dimensionality``.
2002
2003 Examples
2004 --------
2005 >>> from psi_io.psi_io import _rdhdf_nd
2006 >>> from psi_io.data import get_3d_data
2007 >>> r, t, p, f = _rdhdf_nd(get_3d_data(), dimensionality=3)
2008 >>> f.shape
2009 (181, 100, 151)
2010 """
2011 f, *scales = read_hdf_data(hdf_filename)
2012 if f.ndim != dimensionality:
2013 err = f'Expected {dimensionality}D data, got {f.ndim}D data instead.'
2014 raise ValueError(err)
2015 scales = scales or (np.empty(0) for _ in f.shape)
2016 return *scales, f
2017
2018
2019def _wrhdf_nd(hdf_filename: str,
2020 data: np.ndarray,
2021 *scales: Sequence[Union[np.ndarray, None]],
2022 dimensionality: int,
2023 sync_dtype: bool = True,
2024 **kwargs
2025 ) -> None:
2026 """Write an n-dimensional PSI-style HDF file; shared implementation for `wrhdf_1d/2d/3d`.
2027
2028 Parameters
2029 ----------
2030 hdf_filename : PathLike
2031 The path to the HDF5 (.h5) or HDF4 (.hdf) file to write.
2032 data : np.ndarray
2033 The data array to write.
2034 *scales : Sequence[np.ndarray | None]
2035 The scales (coordinate arrays) for each dimension.
2036 dimensionality : int
2037 The expected number of dimensions of the data array.
2038 sync_dtype : bool
2039 If True, scale dtypes are cast to match the data array dtype.
2040 **kwargs
2041 Additional keyword arguments forwarded to :func:`write_hdf_data`.
2042
2043 Raises
2044 ------
2045 ValueError
2046 If the data dimensionality does not match ``dimensionality``.
2047
2048 Examples
2049 --------
2050 >>> import tempfile, numpy as np
2051 >>> from pathlib import Path
2052 >>> from psi_io.psi_io import _wrhdf_nd
2053 >>> from psi_io import read_hdf_data
2054 >>> f = np.ones((10, 20, 30), dtype=np.float32)
2055 >>> r = np.linspace(1.0, 5.0, 10, dtype=np.float32)
2056 >>> t = np.linspace(0.0, 3.14, 20, dtype=np.float32)
2057 >>> p = np.linspace(0.0, 6.28, 30, dtype=np.float32)
2058 >>> with tempfile.TemporaryDirectory() as d:
2059 ... _wrhdf_nd(Path(d) / "out.h5", f, r, t, p, dimensionality=3)
2060 ... data, *_ = read_hdf_data(Path(d) / "out.h5")
2061 ... data.shape
2062 (10, 20, 30)
2063 """
2064 if data.ndim != dimensionality:
2065 err = f'Expected {dimensionality}D data, got {data.ndim}D data instead.'
2066 raise ValueError(err)
2067 write_hdf_data(hdf_filename, data, *scales, sync_dtype=sync_dtype, **kwargs)
2068
2069
2070def _get_scales_nd_h5(ifile: Union[ Path, str], /,
2071 dimensionality: int,
2072 dataset_id: Optional[str] = None,
2073 ):
2074 """HDF5 (.h5) version of :func:`get_scales_1d` / :func:`get_scales_2d` / :func:`get_scales_3d`.
2075
2076 Parameters
2077 ----------
2078 ifile : PathLike
2079 The path to the HDF5 (.h5) file.
2080 dimensionality : int
2081 The expected number of dimensions of the dataset.
2082 dataset_id : str, optional
2083 The identifier of the dataset whose scales are read.
2084 If ``None``, the default PSI dataset (``'Data'``) is used.
2085
2086 Returns
2087 -------
2088 out : tuple[np.ndarray, ...]
2089 The coordinate scale arrays for each dimension.
2090
2091 Raises
2092 ------
2093 ValueError
2094 If the dataset dimensionality does not match ``dimensionality``.
2095 ValueError
2096 If any dimension has no associated scale.
2097
2098 Examples
2099 --------
2100 >>> from psi_io.psi_io import _get_scales_nd_h5
2101 >>> from psi_io.data import get_3d_data
2102 >>> r, t, p = _get_scales_nd_h5(get_3d_data(), dimensionality=3)
2103 >>> r.shape, t.shape, p.shape
2104 ((151,), (100,), (181,))
2105 """
2106 with h5.File(ifile, 'r') as hdf:
2107 data = hdf[dataset_id or PSI_DATA_ID['h5']]
2108 ndim = data.ndim
2109 if ndim != dimensionality:
2110 err = f'Expected {dimensionality}D data, got {ndim}D data instead.'
2111 raise ValueError(err)
2112 scales = []
2113 for dim in data.dims:
2114 if dim:
2115 scales.append(dim[0][:])
2116 else:
2117 raise ValueError(f'Dimension has no scale associated with it.')
2118 return tuple(scales)
2119
2120
2121def _get_scales_nd_h4(ifile: Union[ Path, str], /,
2122 dimensionality: int,
2123 dataset_id: Optional[str] = None,
2124 ):
2125 """HDF4 (.hdf) version of :func:`get_scales_1d` / :func:`get_scales_2d` / :func:`get_scales_3d`.
2126
2127 Parameters
2128 ----------
2129 ifile : PathLike
2130 The path to the HDF4 (.hdf) file.
2131 dimensionality : int
2132 The expected number of dimensions of the dataset.
2133 dataset_id : str, optional
2134 The identifier of the dataset whose scales are read.
2135 If ``None``, the default PSI dataset (``'Data-Set-2'``) is used.
2136
2137 Returns
2138 -------
2139 out : tuple[np.ndarray, ...]
2140 The coordinate scale arrays for each dimension.
2141
2142 Raises
2143 ------
2144 ValueError
2145 If the dataset dimensionality does not match ``dimensionality``.
2146 ValueError
2147 If any dimension has no associated scale.
2148
2149 Examples
2150 --------
2151 >>> from psi_io.psi_io import _get_scales_nd_h4
2152 >>> from psi_io.data import get_3d_data
2153 >>> r, t, p = _get_scales_nd_h4(get_3d_data(hdf=".hdf"), dimensionality=3) # doctest: +SKIP
2154 >>> r.shape # doctest: +SKIP
2155 (151,)
2156 """
2157 hdf = h4.SD(str(ifile))
2158 data = hdf.select(dataset_id or PSI_DATA_ID['h4'])
2159 ndim = data.info()[1]
2160 if ndim != dimensionality:
2161 err = f'Expected {dimensionality}D data, got {ndim}D data instead.'
2162 raise ValueError(err)
2163 scales = []
2164 for k_, v_ in reversed(data.dimensions(full=1).items()):
2165 if v_[3]:
2166 scales.append(hdf.select(k_)[:])
2167 else:
2168 raise ValueError('Dimension has no scale associated with it.')
2169 return tuple(scales)
2170
2171
2172def _read_h5_meta(ifile: PathLike, /,
2173 dataset_id: Optional[str] = None
2174 ):
2175 """HDF5 (.h5) version of :func:`read_hdf_meta`.
2176
2177 Examples
2178 --------
2179 >>> from psi_io.psi_io import _read_h5_meta
2180 >>> from psi_io.data import get_3d_data
2181 >>> meta = _read_h5_meta(get_3d_data())
2182 >>> meta[0].name
2183 'Data'
2184 """
2185 with h5.File(ifile, 'r') as hdf:
2186 # Raises KeyError if ``dataset_id`` not found
2187 # If ``dataset_id`` is None, get all non-scale :class:`h5.Dataset`s
2188 if dataset_id:
2189 datasets = (dataset_id, hdf[dataset_id]),
2190 else:
2191 datasets = ((k, v) for k, v in hdf.items() if not v.is_scale)
2192
2193 # One should avoid multiple calls to ``dimproxy[0]`` – *e.g.* ``dimproxy[0].dtype`` and
2194 # ``dimproxy[0].shape`` – because the __getitem__ method creates and returns a new
2195 # :class:`~h5.DimensionProxy` object each time it is called. [Does this matter? Probably not.]
2196 return [HdfDataMeta(name=k,
2197 type=v.dtype,
2198 shape=v.shape,
2199 attr=dict(v.attrs),
2200 scales=[HdfScaleMeta(name=dimproxy.label,
2201 type=dim.dtype,
2202 shape=dim.shape,
2203 attr=dict(dim.attrs),
2204 imin=dim[0],
2205 imax=dim[-1])
2206 for dimproxy in v.dims if dimproxy and (dim := dimproxy[0])])
2207 for k, v in datasets]
2208
2209
2210def _read_h4_meta(ifile: PathLike, /,
2211 dataset_id: Optional[str] = None
2212 ):
2213 """HDF4 (.hdf) version of :func:`read_hdf_meta`.
2214
2215 Examples
2216 --------
2217 >>> from psi_io.psi_io import _read_h4_meta
2218 >>> from psi_io.data import get_3d_data
2219 >>> meta = _read_h4_meta(get_3d_data(hdf=".hdf")) # doctest: +SKIP
2220 >>> meta[0].name # doctest: +SKIP
2221 'Data-Set-2'
2222 """
2223 hdf = h4.SD(str(ifile))
2224 # Raises HDF4Error if ``dataset_id`` not found
2225 # If ``dataset_id`` is None, get all non-scale :class:`pyhdf.SD.SDS`s
2226 if dataset_id:
2227 datasets = (dataset_id, hdf.select(dataset_id)),
2228 else:
2229 datasets = ((k, hdf.select(k)) for k in hdf.datasets().keys() if not hdf.select(k).iscoordvar())
2230
2231 # The inner list comprehension differs in approach from the HDF5 version because calling
2232 # ``dimensions(full=1)`` on an :class:`~pyhdf.SD.SDS` returns a dictionary of dimension
2233 # dataset identifiers (keys) and tuples containing dimension metadata (values). Even if no
2234 # coordinate-variable datasets are defined, this dictionary is still returned; the only
2235 # indication that the datasets returned do not exist is that the "type" field (within the
2236 # tuple of dimension metadata) is set to 0.
2237
2238 # Also, one cannot avoid multiple calls to ``hdf.select(k_)`` within the inner list comprehension
2239 # because :class:`~pyhdf.SD.SDS` objects do not define a ``__bool__`` method, and the fallback
2240 # behavior of Python is to assess if the __len__ method returns a non-zero value (which, in
2241 # this case, always returns 0).
2242 return [HdfDataMeta(name=k,
2243 type=SDC_TYPE_CONVERSIONS[v.info()[3]],
2244 shape=_cast_shape_tuple(v.info()[2]),
2245 attr=v.attributes(),
2246 scales=[HdfScaleMeta(name=k_,
2247 type=SDC_TYPE_CONVERSIONS[v_[3]],
2248 shape=_cast_shape_tuple(v_[0]),
2249 attr=hdf.select(k_).attributes(),
2250 imin=hdf.select(k_)[0],
2251 imax=hdf.select(k_)[-1])
2252 for k_, v_ in v.dimensions(full=1).items() if v_[3]])
2253 for k, v in datasets]
2254
2255
2256def _read_h5_rtp(ifile: Union[ Path, str], /):
2257 """HDF5 (.h5) version of :func:`read_rtp_meta`.
2258
2259 Examples
2260 --------
2261 >>> from psi_io.psi_io import _read_h5_rtp
2262 >>> from psi_io.data import get_3d_data
2263 >>> meta = _read_h5_rtp(get_3d_data())
2264 >>> sorted(meta.keys())
2265 ['p', 'r', 't']
2266 """
2267 with h5.File(ifile, 'r') as hdf:
2268 return {k: (hdf[v].size, hdf[v][0], hdf[v][-1])
2269 for k, v in zip('rtp', PSI_SCALE_ID['h5'])}
2270
2271
2272def _read_h4_rtp(ifile: Union[ Path, str], /):
2273 """HDF4 (.hdf) version of :func:`read_rtp_meta`.
2274
2275 Examples
2276 --------
2277 >>> from psi_io.psi_io import _read_h4_rtp
2278 >>> from psi_io.data import get_3d_data
2279 >>> meta = _read_h4_rtp(get_3d_data(hdf=".hdf")) # doctest: +SKIP
2280 >>> sorted(meta.keys()) # doctest: +SKIP
2281 ['p', 'r', 't']
2282 """
2283 hdf = h4.SD(str(ifile))
2284 return {k: (hdf.select(v).info()[2], hdf.select(v)[0], hdf.select(v)[-1])
2285 for k, v in zip('ptr', PSI_SCALE_ID['h4'])}
2286
2287
2288def _read_h5_data(ifile: PathLike, /,
2289 dataset_id: Optional[str] = None,
2290 return_scales: bool = True,
2291 ) -> Union[np.ndarray, Tuple[np.ndarray]]:
2292 """HDF5 (.h5) version of :func:`read_hdf_data`.
2293
2294 Examples
2295 --------
2296 >>> from psi_io.psi_io import _read_h5_data
2297 >>> from psi_io.data import get_3d_data
2298 >>> data, r, t, p = _read_h5_data(get_3d_data())
2299 >>> data.shape
2300 (181, 100, 151)
2301 """
2302 with h5.File(ifile, 'r') as hdf:
2303 data = hdf[dataset_id or PSI_DATA_ID['h5']]
2304 dataset = data[:]
2305 if return_scales:
2306 scales = [dim[0][:] for dim in data.dims if dim]
2307 return dataset, *scales
2308 return dataset
2309
2310
2311def _read_h4_data(ifile: PathLike, /,
2312 dataset_id: Optional[str] = None,
2313 return_scales: bool = True,
2314 ) -> Union[np.ndarray, Tuple[np.ndarray]]:
2315 """HDF4 (.hdf) version of :func:`read_hdf_data`.
2316
2317 Examples
2318 --------
2319 >>> from psi_io.psi_io import _read_h4_data
2320 >>> from psi_io.data import get_3d_data
2321 >>> data, *_ = _read_h4_data(get_3d_data(hdf=".hdf")) # doctest: +SKIP
2322 >>> data.shape # doctest: +SKIP
2323 (181, 100, 151)
2324 """
2325 hdf = h4.SD(str(ifile))
2326 data = hdf.select(dataset_id or PSI_DATA_ID['h4'])
2327 if return_scales:
2328 out = (data[:],
2329 *[hdf.select(k_)[:] for k_, v_ in reversed(data.dimensions(full=1).items()) if v_[3]])
2330 else:
2331 out = data[:]
2332 return out
2333
2334
2335def _read_h5_by_index(ifile: PathLike, /,
2336 *xi: Union[int, Tuple[Union[int, None], Union[int, None]], None],
2337 dataset_id: Optional[str] = None,
2338 return_scales: bool = True,
2339 ) -> Union[np.ndarray, Tuple[np.ndarray]]:
2340 """HDF5 (.h5) version of :func:`read_hdf_by_index`.
2341
2342 Examples
2343 --------
2344 >>> from psi_io.psi_io import _read_h5_by_index
2345 >>> from psi_io.data import get_3d_data
2346 >>> f, r, t, p = _read_h5_by_index(get_3d_data(), 0, None, None)
2347 >>> f.shape
2348 (181, 100, 1)
2349 """
2350 with h5.File(ifile, 'r') as hdf:
2351 data = hdf[dataset_id or PSI_DATA_ID['h5']]
2352 if len(xi) != data.ndim:
2353 raise ValueError(f"len(xi) must equal the number of scales for {dataset_id}")
2354 slices = [_parse_index_inputs(slice_input) for slice_input in xi]
2355 dataset = data[tuple(reversed(slices))]
2356 if return_scales:
2357 scales = [dim[0][si] for si, dim in zip(slices, data.dims) if dim]
2358 return dataset, *scales
2359 return dataset
2360
2361def _read_h4_by_index(ifile: PathLike, /,
2362 *xi: Union[int, Tuple[Union[int, None], Union[int, None]], None],
2363 dataset_id: Optional[str] = None,
2364 return_scales: bool = True,
2365 ) -> Union[np.ndarray, Tuple[np.ndarray]]:
2366 """HDF4 (.hdf) version of :func:`read_hdf_by_index`.
2367
2368 Examples
2369 --------
2370 >>> from psi_io.psi_io import _read_h4_by_index
2371 >>> from psi_io.data import get_3d_data
2372 >>> f, r, t, p = _read_h4_by_index(get_3d_data(hdf=".hdf"), 0, None, None) # doctest: +SKIP
2373 >>> f.shape # doctest: +SKIP
2374 (181, 100, 1)
2375 """
2376 hdf = h4.SD(str(ifile))
2377 data = hdf.select(dataset_id or PSI_DATA_ID['h4'])
2378 ndim = data.info()[1]
2379 if len(xi) != ndim:
2380 raise ValueError(f"len(xi) must equal the number of scales for {dataset_id}")
2381 slices = [_parse_index_inputs(slice_input) for slice_input in xi]
2382 dataset = data[tuple(reversed(slices))]
2383 if return_scales:
2384 scales = [hdf.select(k_)[si] for si, (k_, v_) in zip(slices, reversed(data.dimensions(full=1).items())) if v_[3]]
2385 return dataset, *scales
2386 return dataset
2387
2388
2389def _read_h5_by_value(ifile: PathLike, /,
2390 *xi: Union[float, Tuple[float, float], None],
2391 dataset_id: Optional[str] = None,
2392 return_scales: bool = True,
2393 ) -> Union[np.ndarray, Tuple[np.ndarray]]:
2394 """HDF5 (.h5) version of :func:`read_hdf_by_value`.
2395
2396 Examples
2397 --------
2398 >>> from psi_io.psi_io import _read_h5_by_value
2399 >>> from psi_io.data import get_3d_data
2400 >>> f, r, t, p = _read_h5_by_value(get_3d_data(), 15, None, None)
2401 >>> f.shape
2402 (181, 100, 2)
2403 """
2404 with h5.File(ifile, 'r') as hdf:
2405 data = hdf[dataset_id or PSI_DATA_ID['h5']]
2406 if len(xi) != data.ndim:
2407 raise ValueError(f"len(xi) must equal the number of scales for {dataset_id}")
2408 slices = []
2409 for dimproxy, value in zip(data.dims, xi):
2410 if dimproxy:
2411 slices.append(_parse_value_inputs(dimproxy[0], value))
2412 elif value is None:
2413 slices.append(slice(None))
2414 else:
2415 raise ValueError("Cannot slice by value on dimension without scales")
2416 dataset = data[tuple(reversed(slices))]
2417 if return_scales:
2418 scales = [dim[0][si] for si, dim in zip(slices, data.dims) if dim]
2419 return dataset, *scales
2420 return dataset
2421
2422
2423def _read_h4_by_value(ifile: PathLike, /,
2424 *xi: Union[float, Tuple[float, float], None],
2425 dataset_id: Optional[str] = None,
2426 return_scales: bool = True,
2427 ) -> Union[np.ndarray, Tuple[np.ndarray]]:
2428 """HDF4 (.hdf) version of :func:`read_hdf_by_value`.
2429
2430 Examples
2431 --------
2432 >>> from psi_io.psi_io import _read_h4_by_value
2433 >>> from psi_io.data import get_3d_data
2434 >>> f, r, t, p = _read_h4_by_value(get_3d_data(hdf=".hdf"), 15, None, None) # doctest: +SKIP
2435 >>> f.shape # doctest: +SKIP
2436 (181, 100, 2)
2437 """
2438 hdf = h4.SD(str(ifile))
2439 data = hdf.select(dataset_id or PSI_DATA_ID['h4'])
2440 ndim = data.info()[1]
2441 if len(xi) != ndim:
2442 raise ValueError(f"len(xi) must equal the number of scales for {dataset_id}")
2443 slices = []
2444 for (k_, v_), value in zip(reversed(data.dimensions(full=1).items()), xi):
2445 if v_[3] != 0:
2446 slices.append(_parse_value_inputs(hdf.select(k_), value))
2447 elif value is None:
2448 slices.append(slice(None))
2449 else:
2450 raise ValueError("Cannot slice by value on dimension without scales")
2451 dataset = data[tuple(reversed(slices))]
2452 if return_scales:
2453 scales = [hdf.select(k_)[si] for si, (k_, v_) in zip(slices, reversed(data.dimensions(full=1).items())) if v_[3]]
2454 return dataset, *scales
2455 return dataset
2456
2457
2458def _read_h5_by_ivalue(ifile: PathLike, /,
2459 *xi: Union[float, Tuple[float, float], None],
2460 dataset_id: Optional[str] = None,
2461 return_scales: bool = True,
2462 ) -> Union[np.ndarray, Tuple[np.ndarray]]:
2463 """HDF5 (.h5) version of :func:`read_hdf_by_ivalue`.
2464
2465 Examples
2466 --------
2467 >>> from psi_io.psi_io import _read_h5_by_ivalue
2468 >>> from psi_io.data import get_3d_data
2469 >>> f, r_idx, t_idx, p_idx = _read_h5_by_ivalue(get_3d_data(), 2.7, None, None)
2470 >>> r_idx.shape
2471 (2,)
2472 """
2473 with h5.File(ifile, 'r') as hdf:
2474 data = hdf[dataset_id or PSI_DATA_ID['h5']]
2475 if len(xi) != data.ndim:
2476 raise ValueError(f"len(xi) must equal the number of scales for {dataset_id}")
2477 slices = [_parse_ivalue_inputs(*args) for args in zip(reversed(data.shape), xi)]
2478 dataset = data[tuple(reversed(slices))]
2479 if return_scales:
2480 scales = [np.arange(si.start or 0, si.stop or size) for si, size in zip(slices, reversed(data.shape))]
2481 return dataset, *scales
2482 return dataset
2483
2484
2485def _read_h4_by_ivalue(ifile: PathLike, /,
2486 *xi: Union[float, Tuple[float, float], None],
2487 dataset_id: Optional[str] = None,
2488 return_scales: bool = True,
2489 ) -> Union[np.ndarray, Tuple[np.ndarray]]:
2490 """HDF4 (.hdf) version of :func:`read_hdf_by_ivalue`.
2491
2492 Examples
2493 --------
2494 >>> from psi_io.psi_io import _read_h4_by_ivalue
2495 >>> from psi_io.data import get_3d_data
2496 >>> f, r_idx, t_idx, p_idx = _read_h4_by_ivalue(get_3d_data(hdf=".hdf"), 2.7, None, None) # doctest: +SKIP
2497 >>> r_idx.shape # doctest: +SKIP
2498 (2,)
2499 """
2500 hdf = h4.SD(str(ifile))
2501 data = hdf.select(dataset_id or PSI_DATA_ID['h4'])
2502 ndim, shape = data.info()[1], _cast_shape_tuple(data.info()[2])
2503 if len(xi) != ndim:
2504 raise ValueError(f"len(xi) must equal the number of scales for {dataset_id}")
2505 slices = [_parse_ivalue_inputs(*args) for args in zip(reversed(shape), xi)]
2506 dataset = data[tuple(reversed(slices))]
2507 if return_scales:
2508 scales = [np.arange(si.start or 0, si.stop or size) for si, size in zip(slices, reversed(shape))]
2509 return dataset, *scales
2510 return dataset
2511
2512
2513def _write_h4_data(ifile: PathLike, /,
2514 data: np.ndarray,
2515 *scales: Sequence[np.ndarray],
2516 dataset_id: Optional[str] = None,
2517 sync_dtype: bool = False,
2518 strict: bool = True,
2519 **kwargs) -> Path:
2520 """HDF4 (.hdf) version of :func:`write_hdf_data`.
2521
2522 Examples
2523 --------
2524 >>> import tempfile, numpy as np
2525 >>> from pathlib import Path
2526 >>> from psi_io.psi_io import _write_h4_data, _read_h4_data
2527 >>> f = np.ones((10,), dtype=np.float32)
2528 >>> x = np.linspace(0.0, 1.0, 10, dtype=np.float32)
2529 >>> with tempfile.TemporaryDirectory() as d: # doctest: +SKIP
2530 ... _write_h4_data(Path(d) / "out.hdf", f, x) # doctest: +SKIP
2531 ... data, *_ = _read_h4_data(Path(d) / "out.hdf") # doctest: +SKIP
2532 ... data.shape # doctest: +SKIP
2533 (10,)
2534 """
2535 dataid = dataset_id or PSI_DATA_ID['h4']
2536 h4file = h4.SD(str(ifile), h4.SDC.WRITE | h4.SDC.CREATE | h4.SDC.TRUNC)
2537 sds_id = h4file.create(dataid, _dtype_to_sdc(data.dtype), data.shape)
2538
2539 if scales:
2540 for i, scale in enumerate(reversed(scales)):
2541 if scale is not None:
2542 if sync_dtype:
2543 scale = scale.astype(data.dtype)
2544 sds_id.dim(i).setscale(_dtype_to_sdc(scale.dtype), scale.tolist())
2545
2546 if kwargs:
2547 for k, v in kwargs.items():
2548 npv = np.asarray(v)
2549 attr_ = sds_id.attr(k)
2550 try:
2551 val = npv.tolist()
2552 if isinstance(val, bytes):
2553 val = val.decode('latin-1')
2554 attr_.set(_dtype_to_sdc(npv.dtype), val)
2555 except KeyError as e:
2556 if strict:
2557 raise KeyError(f"Failed to set attribute '{k}' on dataset '{dataid}'") from e
2558 else:
2559 print(f"Warning: Failed to set attribute '{k}' on dataset '{dataid}'; skipping.")
2560
2561 sds_id.set(data)
2562 sds_id.endaccess()
2563 h4file.end()
2564
2565 return ifile
2566
2567
2568def _write_h5_data(ifile: PathLike, /,
2569 data: np.ndarray,
2570 *scales: Sequence[np.ndarray],
2571 dataset_id: Optional[str] = None,
2572 sync_dtype: bool = False,
2573 strict: bool = True,
2574 **kwargs) -> Path:
2575 """HDF5 (.h5) version of :func:`write_hdf_data`.
2576
2577 Examples
2578 --------
2579 >>> import tempfile, numpy as np
2580 >>> from pathlib import Path
2581 >>> from psi_io.psi_io import _write_h5_data, _read_h5_data
2582 >>> f = np.ones((10,), dtype=np.float32)
2583 >>> x = np.linspace(0.0, 1.0, 10, dtype=np.float32)
2584 >>> with tempfile.TemporaryDirectory() as d:
2585 ... _write_h5_data(Path(d) / "out.h5", f, x)
2586 ... data, *_ = _read_h5_data(Path(d) / "out.h5")
2587 ... data.shape
2588 (10,)
2589 """
2590 dataid = dataset_id or PSI_DATA_ID['h5']
2591 with h5.File(ifile, "w") as h5file:
2592 dataset = h5file.create_dataset(dataid, data=data, dtype=data.dtype, shape=data.shape)
2593
2594 if scales:
2595 for i, scale in enumerate(scales):
2596 if scale is not None:
2597 if sync_dtype:
2598 scale = scale.astype(data.dtype)
2599 h5file.create_dataset(f"dim{i+1}", data=scale, dtype=scale.dtype, shape=scale.shape)
2600 h5file[dataid].dims[i].attach_scale(h5file[f"dim{i+1}"])
2601 h5file[dataid].dims[i].label = f"dim{i+1}"
2602
2603 if kwargs:
2604 for key, value in kwargs.items():
2605 if key.startswith('DIMENSION'):
2606 # Skip HDF5 dimension-scale bookkeeping attributes —
2607 # these are managed by attach_scale above and must not
2608 # be overwritten with stale object references.
2609 continue
2610 try:
2611 dataset.attrs[key] = value
2612 except TypeError as e:
2613 if strict:
2614 raise TypeError(f"Failed to set attribute '{key}' on dataset '{dataid}'") from e
2615 else:
2616 print(f"Warning: Failed to set attribute '{key}' on dataset '{dataid}'; skipping.")
2617
2618 return ifile
2619
2620
2621def _write_h4_meta(ifile: PathLike, /,
2622 meta: Optional[Mapping[str, Mapping[str, Any]]] = None,
2623 **kwargs) -> Path:
2624 """HDF4 (.hdf) version of :func:`write_hdf_meta`."""
2625 metadata = dict(meta or {})
2626 h4file = h4.SD(str(ifile), h4.SDC.READ | h4.SDC.WRITE)
2627
2628 for k, v in kwargs.items():
2629 npv = np.asarray(v)
2630 attr_ = h4file.attr(k)
2631 val = npv.tolist()
2632 if isinstance(val, bytes):
2633 val = val.decode('latin-1')
2634 attr_.set(_dtype_to_sdc(npv.dtype), val)
2635 for key, value in metadata.items():
2636 sds_id = h4file.select(key)
2637 for k, v in dict(value).items():
2638 npv = np.asarray(v)
2639 attr_ = sds_id.attr(k)
2640 val = npv.tolist()
2641 if isinstance(val, bytes):
2642 val = val.decode('latin-1')
2643 attr_.set(_dtype_to_sdc(npv.dtype), val)
2644 h4file.end()
2645
2646 return ifile
2647
2648
2649def _write_h5_meta(ifile: PathLike, /,
2650 meta: Optional[Mapping[str, Mapping[str, Any]]] = None,
2651 **kwargs) -> Path:
2652 """HDF5 (.h5) version of :func:`write_hdf_meta`."""
2653 metadata = dict(meta or {})
2654 with h5.File(ifile, "r+") as h5file:
2655 if kwargs:
2656 h5file.attrs.update(**kwargs)
2657 for key, value in metadata.items():
2658 h5file[key].attrs.update(**dict(value))
2659 return ifile
2660
2661
2662def _np_linear_interpolation(xi: Sequence, scales: Sequence, values: np.ndarray):
2663 """
2664 Perform linear interpolation over one dimension.
2665
2666 Parameters
2667 ----------
2668 xi : list[float | None]
2669 Target coordinate values for each dimension; ``None`` marks a
2670 "free" (not interpolated) dimension.
2671 scales : list[np.ndarray]
2672 Coordinate scale arrays, one per dimension.
2673 values : np.ndarray
2674 The data array to interpolate.
2675
2676 Returns
2677 -------
2678 out : np.ndarray
2679 The interpolated data array.
2680
2681 Examples
2682 --------
2683 >>> import numpy as np
2684 >>> from psi_io.psi_io import _np_linear_interpolation
2685 >>> scales = [np.array([0.0, 1.0])]
2686 >>> values = np.array([0.0, 2.0])
2687 >>> float(_np_linear_interpolation([0.5], scales, values))
2688 1.0
2689 """
2690 index0 = next((i for i, v in enumerate(xi) if v is not None), None)
2691 t = (xi[index0] - scales[index0][0])/(scales[index0][1] - scales[index0][0])
2692 f0 = [slice(None, None)]*values.ndim
2693 f1 = [slice(None, None)]*values.ndim
2694 f0[index0] = 0
2695 f1[index0] = 1
2696
2697 return (1 - t)*values[tuple(f0)] + t*values[tuple(f1)]
2698
2699
2700def _np_bilinear_interpolation(xi, scales, values):
2701 """
2702 Perform bilinear interpolation over two dimensions.
2703
2704 Parameters
2705 ----------
2706 xi : Sequence[float | None]
2707 Target coordinate values for each dimension; ``None`` marks a
2708 "free" (not interpolated) dimension.
2709 scales : Sequence[np.ndarray]
2710 Coordinate scale arrays, one per dimension.
2711 values : np.ndarray
2712 The data array to interpolate.
2713
2714 Returns
2715 -------
2716 out : np.ndarray
2717 The interpolated data array.
2718
2719 Examples
2720 --------
2721 >>> import numpy as np
2722 >>> from psi_io.psi_io import _np_bilinear_interpolation
2723 >>> scales = [np.array([0.0, 1.0]), np.array([0.0, 1.0])]
2724 >>> values = np.array([[0.0, 0.0], [1.0, 2.0]])
2725 >>> float(_np_bilinear_interpolation([0.5, 0.5], scales, values))
2726 0.75
2727 """
2728 index0, index1 = [i for i, v in enumerate(xi) if v is not None]
2729 t, u = [(xi[i] - scales[i][0])/(scales[i][1] - scales[i][0]) for i in (index0, index1)]
2730
2731 f00 = [slice(None, None)]*values.ndim
2732 f10 = [slice(None, None)]*values.ndim
2733 f01 = [slice(None, None)]*values.ndim
2734 f11 = [slice(None, None)]*values.ndim
2735 f00[index0], f00[index1] = 0, 0
2736 f10[index0], f10[index1] = 1, 0
2737 f01[index0], f01[index1] = 0, 1
2738 f11[index0], f11[index1] = 1, 1
2739
2740 return (
2741 (1 - t)*(1 - u)*values[tuple(f00)] +
2742 t*(1 - u)*values[tuple(f10)] +
2743 (1 - t)*u*values[tuple(f01)] +
2744 t*u*values[tuple(f11)]
2745 )
2746
2747
2748def _np_trilinear_interpolation(xi, scales, values):
2749 """
2750 Perform trilinear interpolation over three dimensions.
2751
2752 Parameters
2753 ----------
2754 xi : list[float | None]
2755 Target coordinate values for each dimension; ``None`` marks a
2756 "free" (not interpolated) dimension.
2757 scales : list[np.ndarray]
2758 Coordinate scale arrays, one per dimension.
2759 values : np.ndarray
2760 The data array to interpolate.
2761
2762 Returns
2763 -------
2764 out : np.ndarray
2765 The interpolated data array.
2766
2767 Examples
2768 --------
2769 >>> import numpy as np
2770 >>> from psi_io.psi_io import _np_trilinear_interpolation
2771 >>> scales = [np.array([0.0, 1.0]), np.array([0.0, 1.0]), np.array([0.0, 1.0])]
2772 >>> values = np.zeros((2, 2, 2))
2773 >>> values[1, 1, 1] = 8.0
2774 >>> float(_np_trilinear_interpolation([0.5, 0.5, 0.5], scales, values))
2775 1.0
2776 """
2777 index0, index1, index2 = [i for i, v in enumerate(xi) if v is not None]
2778 t, u, v = [(xi[i] - scales[i][0])/(scales[i][1] - scales[i][0]) for i in (index0, index1, index2)]
2779
2780 f000 = [slice(None, None)]*values.ndim
2781 f100 = [slice(None, None)]*values.ndim
2782 f010 = [slice(None, None)]*values.ndim
2783 f110 = [slice(None, None)]*values.ndim
2784 f001 = [slice(None, None)]*values.ndim
2785 f101 = [slice(None, None)]*values.ndim
2786 f011 = [slice(None, None)]*values.ndim
2787 f111 = [slice(None, None)]*values.ndim
2788
2789 f000[index0], f000[index1], f000[index2] = 0, 0, 0
2790 f100[index0], f100[index1], f100[index2] = 1, 0, 0
2791 f010[index0], f010[index1], f010[index2] = 0, 1, 0
2792 f110[index0], f110[index1], f110[index2] = 1, 1, 0
2793 f001[index0], f001[index1], f001[index2] = 0, 0, 1
2794 f101[index0], f101[index1], f101[index2] = 1, 0, 1
2795 f011[index0], f011[index1], f011[index2] = 0, 1, 1
2796 f111[index0], f111[index1], f111[index2] = 1, 1, 1
2797
2798 c00 = values[tuple(f000)]*(1 - t) + values[tuple(f100)]*t
2799 c10 = values[tuple(f010)]*(1 - t) + values[tuple(f110)]*t
2800 c01 = values[tuple(f001)]*(1 - t) + values[tuple(f101)]*t
2801 c11 = values[tuple(f011)]*(1 - t) + values[tuple(f111)]*t
2802
2803 c0 = c00*(1 - u) + c10*u
2804 c1 = c01*(1 - u) + c11*u
2805
2806 return c0*(1 - v) + c1*v
2807
2808
2809def _check_index_ranges(arr_size: int,
2810 i0: Union[int, np.integer],
2811 i1: Union[int, np.integer]
2812 ) -> Tuple[int, int]:
2813 """
2814 Adjust index ranges to ensure they cover at least two indices.
2815
2816 Parameters
2817 ----------
2818 arr_size : int
2819 The size of the array along the dimension.
2820 i0 : int
2821 The starting index.
2822 i1 : int
2823 The ending index.
2824
2825 Returns
2826 -------
2827 out : tuple[int, int]
2828 Adjusted starting and ending indices.
2829
2830 Notes
2831 -----
2832 This function ensures that the range between `i0` and `i1` includes at least
2833 two indices for interpolation purposes.
2834
2835 Examples
2836 --------
2837 >>> from psi_io.psi_io import _check_index_ranges
2838 >>> _check_index_ranges(10, 0, 0)
2839 (0, 2)
2840 >>> _check_index_ranges(10, 5, 5)
2841 (4, 6)
2842 >>> _check_index_ranges(10, 10, 10)
2843 (8, 10)
2844 """
2845 i0, i1 = int(i0), int(i1)
2846 if i0 == 0:
2847 return (i0, i1 + 2) if i1 == 0 else (i0, i1 + 1)
2848 elif i0 == arr_size:
2849 return i0 - 2, i1
2850 else:
2851 return i0 - 1, i1 + 1
2852
2853
2854def _cast_shape_tuple(input: Union[int, Sequence[int]]
2855 ) -> tuple[int, ...]:
2856 """
2857 Cast an input to a tuple of integers.
2858
2859 Parameters
2860 ----------
2861 input : int | Sequence[int]
2862 The input to cast.
2863
2864 Returns
2865 -------
2866 out : tuple[int, ...]
2867 The input cast as a tuple of integers.
2868
2869 Raises
2870 ------
2871 TypeError
2872 If the input is neither an integer nor an iterable of integers.
2873
2874 Examples
2875 --------
2876 >>> from psi_io.psi_io import _cast_shape_tuple
2877 >>> _cast_shape_tuple(5)
2878 (5,)
2879 >>> _cast_shape_tuple([3, 4, 5])
2880 (3, 4, 5)
2881 """
2882 if isinstance(input, int):
2883 return (input,)
2884 elif isinstance(input, Sequence):
2885 return tuple(int(i) for i in input)
2886 else:
2887 raise TypeError("Input must be an integer or an iterable of integers.")
2888
2889
2890def _parse_index_inputs(input: Union[int, slice, Sequence[Union[int, None]], None]
2891 ) -> slice:
2892 """
2893 Parse various slice input formats into a standard slice object.
2894
2895 Parameters
2896 ----------
2897 input : int | slice | Sequence[int | None] | None
2898 The index specification to parse:
2899
2900 - *int* — selects a single-element slice ``[n, n+1)``.
2901 - *Sequence[int | None]* — unpacked directly into :class:`slice`.
2902 - ``None`` — selects the entire dimension.
2903
2904 Returns
2905 -------
2906 slice
2907 The parsed slice object.
2908
2909 Raises
2910 ------
2911 TypeError
2912 If the input type is unsupported.
2913
2914 Examples
2915 --------
2916 >>> from psi_io.psi_io import _parse_index_inputs
2917 >>> _parse_index_inputs(3)
2918 slice(3, 4, None)
2919 >>> _parse_index_inputs((2, 7))
2920 slice(2, 7, None)
2921 >>> _parse_index_inputs(None)
2922 slice(None, None, None)
2923 """
2924 if isinstance(input, int):
2925 return slice(input, input + 1)
2926 elif isinstance(input, Sequence):
2927 return slice(*input)
2928 elif input is None:
2929 return slice(None)
2930 else:
2931 raise TypeError("Unsupported input type for slicing.")
2932
2933
2934def _parse_value_inputs(dimproxy,
2935 value,
2936 scale_exists: bool = True
2937 ) -> slice:
2938 """Parse a scale value or value range into a slice over a dimension's coordinate array.
2939
2940 Parameters
2941 ----------
2942 dimproxy : array-like
2943 The coordinate array (scale) for the dimension, supporting ``[:]`` indexing.
2944 value : float | tuple[float | None, float | None] | None
2945 The target value or range:
2946
2947 - ``None`` — select the entire dimension.
2948 - *float* — find the 2-element bracket ``[a, b]`` such that ``a <= value < b``.
2949 - *(float, float)* — find the bracket spanning the given range.
2950 scale_exists : bool
2951 Guard flag; raises :exc:`ValueError` if ``False`` and ``value`` is not ``None``.
2952
2953 Returns
2954 -------
2955 slice
2956 A slice object that selects the appropriate indices from the coordinate array.
2957
2958 Raises
2959 ------
2960 ValueError
2961 If ``value`` is not ``None`` and ``scale_exists`` is ``False``.
2962
2963 Examples
2964 --------
2965 >>> import numpy as np
2966 >>> from psi_io.psi_io import _parse_value_inputs
2967 >>> scale = np.linspace(0.0, 10.0, 11)
2968 >>> _parse_value_inputs(scale, None)
2969 slice(None, None, None)
2970 >>> _parse_value_inputs(scale, 4.5)
2971 slice(4, 6, None)
2972 >>> _parse_value_inputs(scale, (2.0, 7.0))
2973 slice(1, 8, None)
2974 """
2975 if value is None:
2976 return slice(None)
2977 if not scale_exists:
2978 raise ValueError("Cannot parse value inputs when scale does not exist.")
2979 dim = dimproxy[:]
2980 if not isinstance(value, Sequence):
2981 insert_index = np.searchsorted(dim, value)
2982 return slice(*_check_index_ranges(dim.size, insert_index, insert_index))
2983 else:
2984 temp_range = list(value)
2985 if temp_range[0] is None:
2986 temp_range[0] = -np.inf
2987 if temp_range[-1] is None:
2988 temp_range[-1] = np.inf
2989 insert_indices = np.searchsorted(dim, temp_range)
2990 return slice(*_check_index_ranges(dim.size, *insert_indices))
2991
2992
2993def _parse_ivalue_inputs(dimsize,
2994 input: Union[Union[int, float], slice, Sequence[Union[Union[int, float], None]], None]
2995 ) -> slice:
2996 """
2997 Parse a sub-index value or range into a slice over a dimension of a given size.
2998
2999 Parameters
3000 ----------
3001 dimsize : int
3002 The size of the array along the dimension; used to clamp the resulting slice.
3003 input : int | float | Sequence[int | float | None] | None
3004 The sub-index specification to parse:
3005
3006 - ``None`` — select the entire dimension.
3007 - *int* or *float* (:math:`a`) — returns ``slice(floor(a), ceil(a))``
3008 (clamped to ``[0, dimsize - 1]``).
3009 - *Sequence* ``(a, b)`` — returns ``slice(floor(a), ceil(b))``
3010 (clamped to ``[0, dimsize - 1]``).
3011
3012 Returns
3013 -------
3014 slice
3015 The parsed slice object, guaranteed to span at least 2 elements.
3016
3017 Raises
3018 ------
3019 TypeError
3020 If the input type is unsupported.
3021
3022 Examples
3023 --------
3024 >>> from psi_io.psi_io import _parse_ivalue_inputs
3025 >>> _parse_ivalue_inputs(10, None)
3026 slice(None, None, None)
3027 >>> _parse_ivalue_inputs(10, 2.7)
3028 slice(2, 4, None)
3029 >>> _parse_ivalue_inputs(10, (1.3, 4.8))
3030 slice(1, 5, None)
3031 """
3032 if input is None:
3033 return slice(None)
3034 elif isinstance(input, (int, float)):
3035 i0, i1 = math.floor(input), math.ceil(input)
3036 elif isinstance(input, Sequence):
3037 i0, i1 = math.floor(input[0]), math.ceil(input[1])
3038 else:
3039 raise TypeError("Unsupported input type for slicing.")
3040
3041 if i0 > i1:
3042 i0, i1 = i1, i0
3043 i0, i1 = max(0, i0), min(dimsize - 1, i1)
3044 if i0 > i1:
3045 i0, i1 = i1, i0
3046 i0, i1 = max(0, i0), min(dimsize - 1, i1)
3047 if (i1 - i0) < 2:
3048 return slice(i0, i1 + 2 - (i1-i0))
3049 else:
3050 return slice(i0, i1)