Source code for psi_io.psi_io

   1"""
   2Routines for reading and writing PSI-style HDF5 and HDF4 data files.
   3
   4This module provides a unified interface for interacting with PSI's HDF data
   5ecosystem.  It handles both HDF4 (``.hdf``) and HDF5 (``.h5``) file formats,
   6automatically dispatching to the appropriate backend based on the file
   7extension.
   8
   9Key interfaces
  10--------------
  11Reading full datasets:
  12    :func:`read_hdf_data`, :func:`rdhdf_1d`, :func:`rdhdf_2d`, :func:`rdhdf_3d`
  13
  14Writing full datasets:
  15    :func:`write_hdf_data`, :func:`wrhdf_1d`, :func:`wrhdf_2d`, :func:`wrhdf_3d`
  16
  17Reading file metadata:
  18    :func:`read_hdf_meta`, :func:`read_rtp_meta`
  19
  20Reading dataset subsets:
  21    :func:`get_scales_1d`, :func:`get_scales_2d`, :func:`get_scales_3d`,
  22    :func:`read_hdf_by_index`, :func:`read_hdf_by_value`, :func:`read_hdf_by_ivalue`
  23
  24Interpolating data:
  25    :func:`np_interpolate_slice_from_hdf`, :func:`sp_interpolate_slice_from_hdf`,
  26    :func:`interpolate_positions_from_hdf`
  27
  28Converting between formats:
  29    :func:`convert`, :func:`convert_psih4_to_psih5`
  30
  31See Also
  32--------
  33:mod:`psi_io.data` :
  34    Helpers for fetching example HDF data files.
  35
  36Examples
  37--------
  38Read a 3D PSI-style HDF5 file:
  39
  40>>> from psi_io import read_hdf_data
  41>>> from psi_io.data import get_3d_data
  42>>> filepath = get_3d_data()
  43>>> data, r, t, p = read_hdf_data(filepath)
  44>>> data.shape
  45(181, 100, 151)
  46"""
  47
  48from __future__ import annotations
  49
  50__all__ = [
  51    "read_hdf_meta",
  52    "read_rtp_meta",
  53
  54    "get_scales_1d",
  55    "get_scales_2d",
  56    "get_scales_3d",
  57
  58    "read_hdf_by_index",
  59    "read_hdf_by_value",
  60    "read_hdf_by_ivalue",
  61
  62    "np_interpolate_slice_from_hdf",
  63    "sp_interpolate_slice_from_hdf",
  64    "interpolate_positions_from_hdf",
  65
  66    "instantiate_linear_interpolator",
  67    "interpolate_point_from_1d_slice",
  68    "interpolate_point_from_2d_slice",
  69
  70    "read_hdf_data",
  71    "rdhdf_1d",
  72    "rdhdf_2d",
  73    "rdhdf_3d",
  74
  75    "write_hdf_data",
  76    "write_hdf_meta",
  77    "wrhdf_1d",
  78    "wrhdf_2d",
  79    "wrhdf_3d",
  80
  81    "convert",
  82    "convert_psih4_to_psih5"
  83]
  84
  85import math
  86from collections import namedtuple
  87from pathlib import Path
  88from types import MappingProxyType
  89from typing import Optional, Literal, Tuple, Sequence, List, Dict, Union, Callable, Any, Mapping
  90
  91import numpy as np
  92import h5py as h5
  93
  94# -----------------------------------------------------------------------------
  95# Optional Imports and Import Checking
  96# -----------------------------------------------------------------------------
  97# These packages are needed by several functions and must be imported in the
  98# module namespace.
  99try:
 100    import pyhdf.SD as h4
 101    H4_AVAILABLE = True
 102    DTYPE_TO_SDC = MappingProxyType({
 103        "i": {
 104            1: h4.SDC.INT8,
 105            2: h4.SDC.INT16,
 106            4: h4.SDC.INT32,
 107        },
 108        "u": {
 109            1: h4.SDC.UINT8,
 110            2: h4.SDC.UINT16,
 111            4: h4.SDC.UINT32,
 112        },
 113        "f": {
 114            4: h4.SDC.FLOAT32,
 115            8: h4.SDC.FLOAT64,
 116        },
 117        "b": {
 118            1: h4.SDC.UINT8
 119        },
 120        "U": h4.SDC.CHAR,
 121        "S": h4.SDC.CHAR
 122    })
 123    """
 124        Helper dictionary mapping :class:`~numpy.dtype` kinds to HDF4 SDC types.
 125
 126        The keys are :attr:`~numpy.dtype.kind`, and the values are either a direct
 127        mapping (for byte-string or unicode-string types) or a nested mapping of
 128        :attr:`~numpy.dtype.itemsize` to SDC type (for numeric types).
 129    """
 130except ImportError:
 131    H4_AVAILABLE = False
 132    DTYPE_TO_SDC = {}
 133
 134try:
 135    from scipy.interpolate import RegularGridInterpolator
 136    SCIPY_AVAILABLE = True
 137except ImportError:
 138    SCIPY_AVAILABLE = False
 139
 140
 141# Functions to stop execution if a package doesn't exist.
 142def _except_no_pyhdf():
 143    """Raise :exc:`ImportError` if the ``pyhdf`` package is not available.
 144
 145    Raises
 146    ------
 147    ImportError
 148        If ``pyhdf`` was not importable at module load time.
 149
 150    Examples
 151    --------
 152    >>> from psi_io.psi_io import _except_no_pyhdf, H4_AVAILABLE
 153    >>> H4_AVAILABLE  # doctest: +SKIP
 154    True
 155    >>> _except_no_pyhdf() is None  # no-op when pyhdf is installed
 156    True
 157    """
 158    if not H4_AVAILABLE:
 159        raise ImportError('The pyhdf package is required to read/write HDF4 .hdf files!')
 160    return
 161
 162
 163def _except_no_scipy():
 164    """Raise :exc:`ImportError` if the ``scipy`` package is not available.
 165
 166    Raises
 167    ------
 168    ImportError
 169        If ``scipy`` was not importable at module load time.
 170
 171    Examples
 172    --------
 173    >>> from psi_io.psi_io import _except_no_scipy, SCIPY_AVAILABLE
 174    >>> SCIPY_AVAILABLE  # doctest: +SKIP
 175    True
 176    >>> _except_no_scipy() is None  # no-op when scipy is installed
 177    True
 178    """
 179    if not SCIPY_AVAILABLE:
 180        raise ImportError('The scipy package is required for the interpolation routines!')
 181    return
 182
 183
 184SDC_TYPE_CONVERSIONS = MappingProxyType({
 185    3: np.dtype("ubyte"),
 186    4: np.dtype("byte"),
 187    5: np.dtype("float32"),
 188    6: np.dtype("float64"),
 189    20: np.dtype("int8"),
 190    21: np.dtype("uint8"),
 191    22: np.dtype("int16"),
 192    23: np.dtype("uint16"),
 193    24: np.dtype("int32"),
 194    25: np.dtype("uint32")
 195})
 196"""Helper dictionary for mapping HDF4 types to numpy dtypes"""
 197
 198
 199PSI_DATA_ID = MappingProxyType({
 200    'h4': 'Data-Set-2',
 201    'h5': 'Data'
 202})
 203"""Mapping of PSI standard dataset names for HDF4 and HDF5 files"""
 204
 205
 206PSI_SCALE_ID = MappingProxyType({
 207    'h4': ('fakeDim0', 'fakeDim1', 'fakeDim2'),
 208    'h5': ('dim1', 'dim2', 'dim3')
 209})
 210"""Mapping of PSI standard scale names for HDF4 and HDF5 files"""
 211
 212
 213HDFEXT = {'.hdf', '.h5'}
 214"""Set of possible HDF file extensions"""
 215
 216
 217HdfExtType = Literal['.hdf', '.h5']
 218"""Type alias for possible HDF file extensions"""
 219
 220
 221HdfScaleMeta = namedtuple('HdfScaleMeta', ['name', 'type', 'shape', 'attr', 'imin', 'imax'])
 222"""
 223    Named tuple storing metadata for a single HDF scale (coordinate) dimension.
 224
 225    Parameters
 226    ----------
 227    name : str
 228        The name of the scale dataset.
 229    type : str
 230        The data type of the scale.
 231    shape : tuple[int, ...]
 232        The shape of the scale array.
 233    attr : dict[str, Any]
 234        A dictionary of attributes associated with the dataset.
 235    imin : float
 236        The minimum value of the scale.
 237        This assumes the scale is monotonically increasing.
 238    imax : float
 239        The maximum value of the scale.
 240        This assumes the scale is monotonically increasing.
 241"""
 242
 243
 244HdfDataMeta = namedtuple('HdfDataMeta', ['name', 'type', 'shape', 'attr', 'scales'])
 245"""
 246    Named tuple for HDF dataset metadata
 247
 248    Parameters
 249    ----------
 250    name : str
 251        The name of the dataset.
 252    type : str
 253        The data type of the dataset.
 254    shape : tuple[int, ...]
 255        The shape of the dataset.
 256    attr : dict[str, Any]
 257        A dictionary of attributes associated with the dataset.
 258    scales : list[HdfScaleMeta]
 259        A list of scale metadata objects corresponding to each dimension of the dataset.
 260        If the dataset has no scales, this list will be empty.
 261"""
 262
 263PathLike = Union[Path, str]
 264"""Type alias for file paths, accepting either :class:`pathlib.Path` or str"""
 265
 266
 267def _dtype_to_sdc(dtype: np.dtype):
 268    """Convert a numpy dtype to the corresponding HDF4 SDC type.
 269
 270    Parameters
 271    ----------
 272    dtype : np.dtype
 273        The numpy dtype to convert.
 274
 275    Returns
 276    -------
 277    out : int
 278        The corresponding HDF4 SDC type constant for the given numpy dtype.
 279
 280    Raises
 281    ------
 282    ImportError
 283        If the ``pyhdf`` package is not available.
 284    KeyError
 285        If the dtype kind or itemsize is not supported by HDF4.  See
 286        :func:`write_hdf_data` for the full dtype support table.
 287
 288    Examples
 289    --------
 290    >>> import numpy as np
 291    >>> from psi_io.psi_io import _dtype_to_sdc
 292    >>> import pyhdf.SD as h4
 293    >>> _dtype_to_sdc(np.dtype("float32")) == h4.SDC.FLOAT32
 294    True
 295    """
 296    _except_no_pyhdf()
 297    if dtype.kind in {"U", "S"}:
 298        return DTYPE_TO_SDC[dtype.kind]
 299
 300    try:
 301        return DTYPE_TO_SDC[dtype.kind][dtype.itemsize]
 302    except KeyError as e:
 303        if dtype.kind not in DTYPE_TO_SDC:
 304            msg = (f"Unsupported dtype kind '{dtype.kind}' for HDF4. "
 305                   f"Supported kinds are: {set(DTYPE_TO_SDC.keys())}")
 306            raise KeyError(msg) from e
 307        elif dtype.itemsize not in DTYPE_TO_SDC[dtype.kind]:
 308            msg = (f"Unsupported itemsize '{dtype.itemsize}' for dtype kind '{dtype.kind}' in HDF4. "
 309                   f"Supported itemsizes are: {set(DTYPE_TO_SDC[dtype.kind].keys())}")
 310            raise KeyError(msg) from e
 311        raise e
 312
 313
 314def _dispatch_by_ext(ifile: PathLike,
 315                     hdf4_func: Callable,
 316                     hdf5_func: Callable,
 317                     *args: Any, **kwargs: Any
 318                     ):
 319    """
 320    Dispatch function to call HDF4 or HDF5 specific functions based on file extension.
 321
 322    Parameters
 323    ----------
 324    ifile : PathLike
 325        The path to the HDF file.
 326    hdf4_func : Callable
 327        The function to call for HDF4 files.
 328    hdf5_func : Callable
 329        The function to call for HDF5 files.
 330    *args : Any
 331        Positional arguments to pass to the selected function.
 332    **kwargs : Any
 333        Keyword arguments to pass to the selected function.
 334
 335    Returns
 336    -------
 337    out : Any
 338        The return value of the dispatched function.
 339
 340    Raises
 341    ------
 342    ValueError
 343        If the file does not have a `.hdf` or `.h5` extension.
 344    ImportError
 345        If the file is HDF4 and the `pyhdf` package is not available.
 346
 347    Examples
 348    --------
 349    >>> from psi_io.psi_io import _dispatch_by_ext, _read_h5_data, _read_h4_data
 350    >>> from psi_io.data import get_3d_data
 351    >>> filepath = get_3d_data()
 352    >>> data, *_ = _dispatch_by_ext(filepath, _read_h4_data, _read_h5_data)
 353    >>> data.shape
 354    (181, 100, 151)
 355    """
 356    ipath = Path(ifile)
 357    if ipath.suffix == '.h5':
 358        return hdf5_func(ifile, *args, **kwargs)
 359    if ipath.suffix == '.hdf':
 360        _except_no_pyhdf()
 361        return hdf4_func(ifile, *args, **kwargs)
 362    raise ValueError("File must be HDF4 (.hdf) or HDF5 (.h5)")
 363
 364
 365# -----------------------------------------------------------------------------
 366# "Classic" HDF reading and writing routines adapted from psihdf.py or psi_io.py.
 367# -----------------------------------------------------------------------------
 368
 369
[docs] 370def rdhdf_1d(hdf_filename: PathLike 371 ) -> Tuple[np.ndarray, np.ndarray]: 372 """Read a 1D PSI-style HDF5 or HDF4 file. 373 374 Parameters 375 ---------- 376 hdf_filename : PathLike 377 The path to the 1D HDF5 (.h5) or HDF4 (.hdf) file to read. 378 379 Returns 380 ------- 381 x : np.ndarray 382 1D coordinate scale array. 383 f : np.ndarray 384 1D data array. 385 386 See Also 387 -------- 388 read_hdf_data : Generic HDF data reading routine. 389 390 Examples 391 -------- 392 >>> from psi_io import rdhdf_1d 393 >>> from psi_io.data import get_1d_data 394 >>> filepath = get_1d_data() 395 >>> x, f = rdhdf_1d(filepath) 396 >>> x.shape, f.shape 397 ((151,), (151,)) 398 """ 399 return _rdhdf_nd(hdf_filename, dimensionality=1)
400 401
[docs] 402def rdhdf_2d(hdf_filename: PathLike 403 ) -> Tuple[np.ndarray, np.ndarray, np.ndarray]: 404 """Read a 2D PSI-style HDF5 or HDF4 file. 405 406 The data in the HDF file is assumed to be ordered X, Y in Fortran order. 407 Each dimension is assumed to have a 1D scale associated with it that 408 describes the rectilinear grid coordinates. 409 410 Parameters 411 ---------- 412 hdf_filename : PathLike 413 The path to the 2D HDF5 (.h5) or HDF4 (.hdf) file to read. 414 415 Returns 416 ------- 417 x : np.ndarray 418 1D coordinate scale array for the X dimension. 419 y : np.ndarray 420 1D coordinate scale array for the Y dimension. 421 f : np.ndarray 422 2D data array, C-ordered with shape ``(ny, nx)`` in Python. 423 424 Notes 425 ----- 426 Because Fortran uses column-major ordering and Python uses row-major 427 ordering, the data array is returned with reversed axis order relative to 428 how it is stored on disk. The first returned scale always corresponds to 429 the *slowest* varying dimension in the data array (i.e. the last axis). 430 431 See Also 432 -------- 433 read_hdf_data : Generic HDF data reading routine. 434 435 Examples 436 -------- 437 >>> from psi_io import rdhdf_2d 438 >>> from psi_io.data import get_2d_data 439 >>> filepath = get_2d_data() 440 >>> x, y, f = rdhdf_2d(filepath) 441 >>> f.shape == (y.shape[0], x.shape[0]) 442 True 443 """ 444 return _rdhdf_nd(hdf_filename, dimensionality=2)
445 446
[docs] 447def rdhdf_3d(hdf_filename: PathLike 448 ) -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]: 449 """Read a 3D PSI-style HDF5 or HDF4 file. 450 451 The data in the HDF file is assumed to be ordered X, Y, Z in Fortran order. 452 Each dimension is assumed to have a 1D scale associated with it that 453 describes the rectilinear grid coordinates. 454 455 Parameters 456 ---------- 457 hdf_filename : PathLike 458 The path to the 3D HDF5 (.h5) or HDF4 (.hdf) file to read. 459 460 Returns 461 ------- 462 x : np.ndarray 463 1D coordinate scale array for the X dimension. 464 y : np.ndarray 465 1D coordinate scale array for the Y dimension. 466 z : np.ndarray 467 1D coordinate scale array for the Z dimension. 468 f : np.ndarray 469 3D data array, C-ordered with shape ``(nz, ny, nx)`` in Python. 470 471 Notes 472 ----- 473 Because Fortran uses column-major ordering and Python uses row-major 474 ordering, the data array is returned with reversed axis order relative to 475 how it is stored on disk. For PSI spherical datasets the returned shapes 476 follow the convention ``f.shape == (n_phi, n_theta, n_r)`` with scales 477 returned in physical order ``(r, theta, phi)``. 478 479 See Also 480 -------- 481 read_hdf_data : Generic HDF data reading routine. 482 483 Examples 484 -------- 485 >>> from psi_io import rdhdf_3d 486 >>> from psi_io.data import get_3d_data 487 >>> filepath = get_3d_data() 488 >>> r, t, p, f = rdhdf_3d(filepath) 489 >>> f.shape == (p.shape[0], t.shape[0], r.shape[0]) 490 True 491 """ 492 return _rdhdf_nd(hdf_filename, dimensionality=3)
493 494
[docs] 495def wrhdf_1d(hdf_filename: PathLike, 496 x: np.ndarray, 497 f: np.ndarray, 498 **kwargs) -> None: 499 """Write a 1D PSI-style HDF5 or HDF4 file. 500 501 Parameters 502 ---------- 503 hdf_filename : PathLike 504 The path to the 1D HDF5 (.h5) or HDF4 (.hdf) file to write. 505 x : np.ndarray 506 1D array of scales. 507 f : np.ndarray 508 1D array of data. 509 **kwargs 510 Additional keyword arguments passed through to the underlying 511 :func:`~psi_io.psi_io.write_hdf_data` routine, specifically: 512 513 - ``dataset_id`` : str | None 514 The identifier of the dataset to write. 515 If None, a default dataset is used ('Data-Set-2' for HDF4 and 'Data' for HDF5). 516 - ``sync_dtype``: bool 517 If True, the data type of the scales will be matched to that of the data array. 518 519 Omitting these will yield the same behavior as the legacy routines, *i.e.* writing to 520 the default PSI dataset IDs for HDF4/HDF5 files and synchronizing datatypes between 521 the dataset and scales. 522 523 Raises 524 ------ 525 ValueError 526 If the file does not have a ``.hdf`` or ``.h5`` extension. 527 KeyError 528 If, for HDF4 files, the data or scale dtype is not supported by 529 :py:mod:`pyhdf`. See :func:`write_hdf_data` for the full dtype 530 support table. 531 532 Notes 533 ----- 534 This routine is provided for backward compatibility with existing PSI codes that 535 expect this API signature. The underlying implementation dispatches to 536 :func:`~psi_io.psi_io.write_hdf_data`, but the argument order and default 537 behavior (sync dtype, default dataset IDs) are preserved. 538 539 .. warning:: 540 When called with its default arguments this routine **writes to the default PSI 541 dataset IDs for HDF4/HDF5 files and synchronizes dtypes between the dataset and 542 scales.** This behavior is required for interoperability with certain Fortran-based 543 PSI tools. 544 545 See Also 546 -------- 547 write_hdf_data : Generic HDF data writing routine. 548 549 Examples 550 -------- 551 >>> import tempfile, numpy as np 552 >>> from pathlib import Path 553 >>> from psi_io import wrhdf_1d, rdhdf_1d 554 >>> x = np.linspace(0.0, 1.0, 50, dtype=np.float32) 555 >>> f = np.sin(x) 556 >>> with tempfile.TemporaryDirectory() as d: 557 ... wrhdf_1d(Path(d) / "out.h5", x, f) 558 ... x2, f2 = rdhdf_1d(Path(d) / "out.h5") 559 ... np.array_equal(f, f2) 560 True 561 """ 562 return _wrhdf_nd(hdf_filename, f, x, dimensionality=1, **kwargs)
563 564
[docs] 565def wrhdf_2d(hdf_filename: PathLike, 566 x: np.ndarray, 567 y: np.ndarray, 568 f: np.ndarray, 569 **kwargs) -> None: 570 """Write a 2D PSI-style HDF5 or HDF4 file. 571 572 The data in the HDF file will appear as X,Y in Fortran order. 573 574 Each dimension requires a 1D "scale" associated with it that 575 describes the rectilinear grid coordinates in each dimension. 576 577 Parameters 578 ---------- 579 hdf_filename : PathLike 580 The path to the 2D HDF5 (.h5) or HDF4 (.hdf) file to write. 581 x : np.ndarray 582 1D array of scales in the X dimension. 583 y : np.ndarray 584 1D array of scales in the Y dimension. 585 f : np.ndarray 586 2D data array, C-ordered with shape ``(ny, nx)`` in Python. 587 **kwargs 588 Additional keyword arguments passed through to the underlying 589 :func:`~psi_io.psi_io.write_hdf_data` routine, specifically: 590 591 - ``dataset_id`` : str | None 592 The identifier of the dataset to write. 593 If None, a default dataset is used (``'Data-Set-2'`` for HDF4 and ``'Data'`` for HDF5). 594 - ``sync_dtype``: bool 595 If True, the data type of the scales will be matched to that of the data array. 596 597 Omitting these will yield the same behavior as the legacy routines, *i.e.* writing to 598 the default PSI dataset IDs for HDF4/HDF5 files and synchronizing datatypes between 599 the dataset and scales. 600 601 Raises 602 ------ 603 ValueError 604 If the file does not have a ``.hdf`` or ``.h5`` extension. 605 KeyError 606 If, for HDF4 files, the data or scale dtype is not supported by 607 :py:mod:`pyhdf`. See :func:`write_hdf_data` for the full dtype 608 support table. 609 610 Notes 611 ----- 612 This routine is provided for backward compatibility with existing PSI codes that 613 expect this API signature. The underlying implementation dispatches to 614 :func:`~psi_io.psi_io.write_hdf_data`, but the argument order and default 615 behavior (sync dtype, default dataset IDs) are preserved. 616 617 .. warning:: 618 When called with its default arguments this routine **writes to the default PSI 619 dataset IDs for HDF4/HDF5 files and synchronizes dtypes between the dataset and 620 scales.** This behavior is required for interoperability with certain Fortran-based 621 PSI tools. 622 623 See Also 624 -------- 625 write_hdf_data : Generic HDF data writing routine. 626 627 Examples 628 -------- 629 >>> import tempfile, numpy as np 630 >>> from pathlib import Path 631 >>> from psi_io import wrhdf_2d, rdhdf_2d 632 >>> x = np.linspace(0.0, 2*np.pi, 64, dtype=np.float32) 633 >>> y = np.linspace(0.0, np.pi, 32, dtype=np.float32) 634 >>> f = np.outer(np.sin(x), np.cos(y)).astype(np.float32) 635 >>> with tempfile.TemporaryDirectory() as d: 636 ... wrhdf_2d(Path(d) / "out.h5", x, y, f) 637 ... x2, y2, f2 = rdhdf_2d(Path(d) / "out.h5") 638 ... np.array_equal(f, f2) 639 True 640 """ 641 return _wrhdf_nd(hdf_filename, f, x, y, dimensionality=2, **kwargs)
642 643
[docs] 644def wrhdf_3d(hdf_filename: PathLike, 645 x: np.ndarray, 646 y: np.ndarray, 647 z: np.ndarray, 648 f: np.ndarray, 649 **kwargs) -> None: 650 """Write a 3D PSI-style HDF5 or HDF4 file. 651 652 The data in the HDF file will appear as X,Y,Z in Fortran order. 653 654 Each dimension requires a 1D "scale" associated with it that 655 describes the rectilinear grid coordinates in each dimension. 656 657 Parameters 658 ---------- 659 hdf_filename : PathLike 660 The path to the 3D HDF5 (.h5) or HDF4 (.hdf) file to write. 661 x : np.ndarray 662 1D array of scales in the X dimension. 663 y : np.ndarray 664 1D array of scales in the Y dimension. 665 z : np.ndarray 666 1D array of scales in the Z dimension. 667 f : np.ndarray 668 3D data array, C-ordered with shape ``(nz, ny, nx)`` in Python. 669 **kwargs 670 Additional keyword arguments passed through to the underlying 671 :func:`~psi_io.psi_io.write_hdf_data` routine, specifically: 672 673 - ``dataset_id`` : str | None 674 The identifier of the dataset to write. 675 If None, a default dataset is used (``'Data-Set-2'`` for HDF4 and ``'Data'`` for HDF5). 676 - ``sync_dtype``: bool 677 If True, the data type of the scales will be matched to that of the data array. 678 679 Omitting these will yield the same behavior as the legacy routines, *i.e.* writing to 680 the default PSI dataset IDs for HDF4/HDF5 files and synchronizing datatypes between 681 the dataset and scales. 682 683 Raises 684 ------ 685 ValueError 686 If the file does not have a ``.hdf`` or ``.h5`` extension. 687 KeyError 688 If, for HDF4 files, the data or scale dtype is not supported by 689 :py:mod:`pyhdf`. See :func:`write_hdf_data` for the full dtype 690 support table. 691 692 Notes 693 ----- 694 This routine is provided for backward compatibility with existing PSI codes that 695 expect this API signature. The underlying implementation dispatches to 696 :func:`~psi_io.psi_io.write_hdf_data`, but the argument order and default 697 behavior (sync dtype, default dataset IDs) are preserved. 698 699 .. warning:: 700 When called with its default arguments this routine **writes to the default PSI 701 dataset IDs for HDF4/HDF5 files and synchronizes dtypes between the dataset and 702 scales.** This behavior is required for interoperability with certain Fortran-based 703 PSI tools. 704 705 See Also 706 -------- 707 write_hdf_data : Generic HDF data writing routine. 708 709 Examples 710 -------- 711 >>> import tempfile, numpy as np 712 >>> from pathlib import Path 713 >>> from psi_io import wrhdf_3d, rdhdf_3d 714 >>> r = np.linspace(1.0, 5.0, 10, dtype=np.float32) 715 >>> t = np.linspace(0.0, np.pi, 20, dtype=np.float32) 716 >>> p = np.linspace(0.0, 2*np.pi, 30, dtype=np.float32) 717 >>> f = np.ones((30, 20, 10), dtype=np.float32) 718 >>> with tempfile.TemporaryDirectory() as d: 719 ... wrhdf_3d(Path(d) / "out.h5", r, t, p, f) 720 ... r2, t2, p2, f2 = rdhdf_3d(Path(d) / "out.h5") 721 ... np.array_equal(f, f2) 722 True 723 """ 724 return _wrhdf_nd(hdf_filename, f, x, y, z, dimensionality=3, **kwargs)
725 726
[docs] 727def get_scales_1d(filename: PathLike 728 ) -> np.ndarray: 729 """Return the coordinate scale of a 1D PSI-style HDF5 or HDF4 dataset. 730 731 Does not load the data array, so this is efficient for large files. 732 733 Parameters 734 ---------- 735 filename : PathLike 736 The path to the 1D HDF5 (.h5) or HDF4 (.hdf) file to read. 737 738 Returns 739 ------- 740 x : np.ndarray 741 1D coordinate scale array. 742 743 Examples 744 -------- 745 >>> from psi_io import get_scales_1d 746 >>> from psi_io.data import get_1d_data 747 >>> filepath = get_1d_data() 748 >>> x = get_scales_1d(filepath) 749 >>> x.ndim 750 1 751 """ 752 return _dispatch_by_ext(filename, _get_scales_nd_h4, _get_scales_nd_h5, 753 dimensionality=1)
754 755
[docs] 756def get_scales_2d(filename: PathLike 757 ) -> Tuple[np.ndarray, np.ndarray]: 758 """Return the coordinate scales of a 2D PSI-style HDF5 or HDF4 dataset. 759 760 Does not load the data array, so this is efficient for large files. 761 The data in the HDF file is assumed to be ordered X, Y in Fortran order. 762 763 Parameters 764 ---------- 765 filename : PathLike 766 The path to the 2D HDF5 (.h5) or HDF4 (.hdf) file to read. 767 768 Returns 769 ------- 770 x : np.ndarray 771 1D coordinate scale array for the X dimension. 772 y : np.ndarray 773 1D coordinate scale array for the Y dimension. 774 775 Examples 776 -------- 777 >>> from psi_io import get_scales_2d 778 >>> from psi_io.data import get_2d_data 779 >>> filepath = get_2d_data() 780 >>> x, y = get_scales_2d(filepath) 781 >>> x.ndim, y.ndim 782 (1, 1) 783 """ 784 return _dispatch_by_ext(filename, _get_scales_nd_h4, _get_scales_nd_h5, 785 dimensionality=2)
786 787
[docs] 788def get_scales_3d(filename: PathLike 789 ) -> Tuple[np.ndarray, np.ndarray, np.ndarray]: 790 """Return the coordinate scales of a 3D PSI-style HDF5 or HDF4 dataset. 791 792 Does not load the data array, so this is efficient for large files. 793 The data in the HDF file is assumed to be ordered X, Y, Z in Fortran order. 794 795 Parameters 796 ---------- 797 filename : PathLike 798 The path to the 3D HDF5 (.h5) or HDF4 (.hdf) file to read. 799 800 Returns 801 ------- 802 x : np.ndarray 803 1D coordinate scale array for the X dimension. 804 y : np.ndarray 805 1D coordinate scale array for the Y dimension. 806 z : np.ndarray 807 1D coordinate scale array for the Z dimension. 808 809 Examples 810 -------- 811 >>> from psi_io import get_scales_3d 812 >>> from psi_io.data import get_3d_data 813 >>> filepath = get_3d_data() 814 >>> r, t, p = get_scales_3d(filepath) 815 >>> r.ndim, t.ndim, p.ndim 816 (1, 1, 1) 817 """ 818 return _dispatch_by_ext(filename, _get_scales_nd_h4, _get_scales_nd_h5, 819 dimensionality=3)
820 821 822# ----------------------------------------------------------------------------- 823# "Updated" HDF reading and slicing routines for Hdf4 and Hdf5 datasets. 824# ----------------------------------------------------------------------------- 825 826
[docs] 827def read_hdf_meta(ifile: PathLike, /, 828 dataset_id: Optional[str] = None 829 ) -> List[HdfDataMeta]: 830 """ 831 Read metadata from an HDF4 (.hdf) or HDF5 (.h5) file. 832 833 This function provides a unified interface to read metadata from both HDF4 and HDF5 files. 834 835 .. warning:: 836 Unlike elsewhere in this module, the scales and datasets are read **as is**, *i.e.* without 837 reordering scales to match PSI's Fortran data ecosystem. 838 839 .. warning:: 840 Unlike elsewhere in this module, when ``None`` is passed to ``dataset_id``, all (non-scale) 841 datasets are returned (instead of the default psi datasets *e.g.* 'Data-Set-2' or 'Data'). 842 This will, effectively, return the standard PSI datasets when reading PSI-style HDF files. 843 844 Parameters 845 ---------- 846 ifile : PathLike 847 The path to the HDF file to read. 848 dataset_id : str | None, optional 849 The identifier of the dataset for which to read metadata. 850 If ``None``, metadata for **all** datasets is returned. Default is ``None``. 851 852 Returns 853 ------- 854 out : list[HdfDataMeta] 855 A list of :class:`HdfDataMeta` named tuples, one per dataset (or one 856 for the requested *dataset_id*). Each object exposes: 857 858 ``name`` — dataset name (e.g. ``'Data'`` or ``'Data-Set-2'``). 859 860 ``type`` — NumPy dtype string of the stored array. 861 862 ``shape`` — array shape tuple. 863 864 ``attr`` — :class:`dict` of attribute key-value pairs attached to the 865 dataset. Empty when no attributes are present. 866 867 ``scales`` — list of :class:`HdfScaleMeta` objects, one per 868 coordinate dimension. Each :class:`HdfScaleMeta` similarly carries 869 an ``attr`` dict for scale-level attributes. 870 871 Raises 872 ------ 873 ValueError 874 If the file does not have a ``.hdf`` or ``.h5`` extension. 875 876 Notes 877 ----- 878 This function delegates to :func:`_read_h5_meta` for HDF5 files and 879 :func:`_read_h4_meta` for HDF4 files based on the file extension. 880 881 Although this function is designed to read metadata for dataset objects, it is possible 882 to read metadata for coordinate variables (scales) by passing their names to 883 ``dataset_id``, *e.g.* ``'dim1'``, ``'dim2'``, etc. However, this is not the 884 intended use case. 885 886 Examples 887 -------- 888 >>> from psi_io import read_hdf_meta 889 >>> from psi_io.data import get_3d_data 890 >>> filepath = get_3d_data() 891 >>> meta = read_hdf_meta(filepath) 892 >>> meta[0].name 893 'Data' 894 >>> meta[0].shape 895 (181, 100, 151) 896 >>> meta[0].attr # attributes attached to the dataset 897 {} 898 >>> len(meta[0].scales) # one HdfScaleMeta per dimension 899 3 900 """ 901 902 return _dispatch_by_ext(ifile, _read_h4_meta, _read_h5_meta, 903 dataset_id=dataset_id)
904 905
[docs] 906def read_rtp_meta(ifile: PathLike, /) -> Dict: 907 """ 908 Read the scale metadata for PSI's 3D cubes. 909 910 Parameters 911 ---------- 912 ifile : PathLike 913 The path to the HDF file to read. 914 915 Returns 916 ------- 917 out : dict 918 A dictionary containing the RTP metadata. 919 The value for each key ('r', 't', and 'p') is a tuple containing: 920 921 1. The scale length 922 2. The scale's value at the first index 923 3. The scale's value at the last index 924 925 Raises 926 ------ 927 ValueError 928 If the file does not have a `.hdf` or `.h5` extension. 929 930 Notes 931 ----- 932 This function delegates to :func:`_read_h5_rtp` for HDF5 files and 933 :func:`_read_h4_rtp` for HDF4 files based on the file extension. 934 935 Examples 936 -------- 937 >>> from psi_io import read_rtp_meta 938 >>> from psi_io.data import get_3d_data 939 >>> filepath = get_3d_data() 940 >>> meta = read_rtp_meta(filepath) 941 >>> sorted(meta.keys()) 942 ['p', 'r', 't'] 943 >>> len(meta['r']) 944 3 945 """ 946 return _dispatch_by_ext(ifile, _read_h4_rtp, _read_h5_rtp)
947 948
[docs] 949def read_hdf_data(ifile: PathLike, /, 950 dataset_id: Optional[str] = None, 951 return_scales: bool = True, 952 ) -> Tuple[np.ndarray]: 953 """ 954 Read data from an HDF4 (.hdf) or HDF5 (.h5) file. 955 956 Parameters 957 ---------- 958 ifile : PathLike 959 The path to the HDF file to read. 960 dataset_id : str | None, optional 961 The identifier of the dataset to read. 962 If ``None``, a default dataset is used (``'Data-Set-2'`` for HDF4 and 963 ``'Data'`` for HDF5). Default is ``None``. 964 return_scales : bool, optional 965 If ``True``, the coordinate scale arrays for each dimension are also 966 returned. Default is ``True``. 967 968 Returns 969 ------- 970 out : np.ndarray | tuple[np.ndarray, ...] 971 The data array. If ``return_scales`` is ``True``, returns a tuple 972 ``(data, scale_0, scale_1, ...)`` with one scale per dimension. 973 974 Raises 975 ------ 976 ValueError 977 If the file does not have a ``.hdf`` or ``.h5`` extension. 978 979 See Also 980 -------- 981 read_hdf_by_index : Read HDF datasets by index. 982 read_hdf_by_value : Read HDF datasets by value ranges. 983 read_hdf_by_ivalue : Read HDF datasets by subindex values. 984 985 Notes 986 ----- 987 This function delegates to :func:`_read_h5_data` for HDF5 files and 988 :func:`_read_h4_data` for HDF4 files based on the file extension. 989 990 Examples 991 -------- 992 >>> from psi_io import read_hdf_data 993 >>> from psi_io.data import get_3d_data 994 >>> filepath = get_3d_data() 995 >>> data, r, t, p = read_hdf_data(filepath) 996 >>> data.shape 997 (181, 100, 151) 998 >>> r.shape, t.shape, p.shape 999 ((151,), (100,), (181,)) 1000 """ 1001 return _dispatch_by_ext(ifile, _read_h4_data, _read_h5_data, 1002 dataset_id=dataset_id, return_scales=return_scales)
1003 1004
[docs] 1005def read_hdf_by_index(ifile: PathLike, /, 1006 *xi: Union[int, Tuple[Union[int, None], Union[int, None]], None], 1007 dataset_id: Optional[str] = None, 1008 return_scales: bool = True, 1009 ) -> Union[np.ndarray, Tuple[np.ndarray]]: 1010 r""" 1011 Read data from an HDF4 (.hdf) or HDF5 (.h5) file by index. 1012 1013 .. attention:: 1014 For each dimension, the *minimum* number of elements returned is 1 *e.g.* 1015 if 3 ints are passed (as positional `*xi` arguments) for a 3D dataset, 1016 the resulting subset will have a shape of (1, 1, 1,) with scales of length 1. 1017 1018 Parameters 1019 ---------- 1020 ifile : PathLike 1021 The path to the HDF file to read. 1022 *xi : int | tuple[int | None, int | None] | None 1023 Indices or ranges for each dimension of the `n`-dimensional dataset. 1024 Use None for a dimension to select all indices. If no arguments are passed, 1025 the entire dataset (and its scales) will be returned – see 1026 :func:`~psi_io.psi_io.read_hdf_data`. 1027 dataset_id : str | None, optional 1028 The identifier of the dataset to read. 1029 If ``None``, a default dataset is used (``'Data-Set-2'`` for HDF4 and 1030 ``'Data'`` for HDF5). Default is ``None``. 1031 return_scales : bool, optional 1032 If ``True``, the coordinate scale arrays for each dimension are also 1033 returned. Default is ``True``. 1034 1035 Returns 1036 ------- 1037 out : np.ndarray | tuple[np.ndarray, ...] 1038 The selected data array. If ``return_scales`` is ``True``, returns a 1039 tuple ``(data, scale_0, scale_1, ...)`` with one scale per dimension. 1040 1041 Raises 1042 ------ 1043 ValueError 1044 If the file does not have a ``.hdf`` or ``.h5`` extension. 1045 1046 See Also 1047 -------- 1048 read_hdf_by_value : Read HDF datasets by value ranges. 1049 read_hdf_by_ivalue : Read HDF datasets by subindex values. 1050 read_hdf_data : Read entire HDF datasets. 1051 1052 Notes 1053 ----- 1054 This function delegates to :func:`_read_h5_by_index` for HDF5 files and 1055 :func:`_read_h4_by_index` for HDF4 files based on the file extension. 1056 1057 This function assumes Fortran (column-major) ordering for compatibility with 1058 PSI's data ecosystem. For an :math:`n`-dimensional array of shape 1059 :math:`(D_0, D_1, \ldots, D_{n-1})` the scales satisfy 1060 :math:`|x_i| = |D_{(n-1)-i}|`. For example, a 3D dataset of shape 1061 :math:`(D_\phi, D_\theta, D_r)` has scales :math:`r, \theta, \phi`. 1062 1063 Each ``*xi`` argument is forwarded to Python's built-in :class:`slice` to 1064 extract the desired subset without reading the entire dataset into memory. 1065 1066 Examples 1067 -------- 1068 Import a 3D HDF5 cube. 1069 1070 >>> from psi_io.data import get_3d_data 1071 >>> from psi_io import read_hdf_by_index 1072 >>> filepath = get_3d_data() 1073 1074 Extract a radial slice at the first radial index from a 3D cube: 1075 1076 >>> f, r, t, p = read_hdf_by_index(filepath, 0, None, None) 1077 >>> f.shape, r.shape, t.shape, p.shape 1078 ((181, 100, 1), (1,), (100,), (181,)) 1079 1080 Extract a phi slice at the 90th index from a 3D cube: 1081 1082 >>> f, r, t, p = read_hdf_by_index(filepath, None, None, 90) 1083 >>> f.shape, r.shape, t.shape, p.shape 1084 ((1, 100, 151), (151,), (100,), (1,)) 1085 1086 Extract up to the 20th radial index with phi indices 10 to 25: 1087 1088 >>> f, r, t, p = read_hdf_by_index(filepath, (None, 20), None, (10, 25)) 1089 >>> f.shape, r.shape, t.shape, p.shape 1090 ((15, 100, 20), (20,), (100,), (15,)) 1091 """ 1092 if not xi: 1093 return read_hdf_data(ifile, dataset_id=dataset_id, return_scales=return_scales) 1094 return _dispatch_by_ext(ifile, _read_h4_by_index, _read_h5_by_index, 1095 *xi, dataset_id=dataset_id, return_scales=return_scales)
1096 1097
[docs] 1098def read_hdf_by_value(ifile: PathLike, /, 1099 *xi: Union[float, Tuple[float, float], None], 1100 dataset_id: Optional[str] = None, 1101 return_scales: bool = True, 1102 ) -> Union[np.ndarray, Tuple[np.ndarray]]: 1103 """ 1104 Read data from an HDF4 (.hdf) or HDF5 (.h5) file by value. 1105 1106 .. note:: 1107 For each dimension, the minimum number of elements returned is 2 *e.g.* 1108 if 3 floats are passed (as positional `*xi` arguments) for a 3D dataset, 1109 the resulting subset will have a shape of (2, 2, 2,) with scales of length 2. 1110 1111 Parameters 1112 ---------- 1113 ifile : PathLike 1114 The path to the HDF file to read. 1115 *xi : float | tuple[float, float] | None 1116 Values or value ranges corresponding to each dimension of the `n`-dimensional 1117 dataset specified by ``dataset_id``. Pass ``None`` for a dimension to 1118 select all indices. If no arguments are passed, the entire dataset (and 1119 its scales) will be returned. 1120 dataset_id : str | None, optional 1121 The identifier of the dataset to read. 1122 If ``None``, a default dataset is used (``'Data-Set-2'`` for HDF4 and 1123 ``'Data'`` for HDF5). Default is ``None``. 1124 return_scales : bool, optional 1125 If ``True``, the coordinate scale arrays for each dimension are also 1126 returned. Default is ``True``. 1127 1128 Returns 1129 ------- 1130 out : np.ndarray | tuple[np.ndarray, ...] 1131 The selected data array. If ``return_scales`` is ``True``, returns a 1132 tuple ``(data, scale_0, scale_1, ...)`` with one scale per dimension. 1133 1134 Raises 1135 ------ 1136 ValueError 1137 If the file does not have a ``.hdf`` or ``.h5`` extension. 1138 1139 See Also 1140 -------- 1141 read_hdf_by_index : Read HDF datasets by index. 1142 read_hdf_by_ivalue : Read HDF datasets by subindex values. 1143 read_hdf_data : Read entire HDF datasets. 1144 sp_interpolate_slice_from_hdf : Interpolate slices using SciPy's 1145 :class:`~scipy.interpolate.RegularGridInterpolator`. 1146 np_interpolate_slice_from_hdf : Linear/bilinear/trilinear interpolation 1147 using vectorized NumPy routines. 1148 1149 Notes 1150 ----- 1151 This function delegates to :func:`_read_h5_by_value` for HDF5 files and 1152 :func:`_read_h4_by_value` for HDF4 files based on the file extension. 1153 1154 This function assumes that the dataset is Fortran (or column-major) ordered *viz.* for 1155 compatibility with PSI's data ecosystem; as such, a given :math:`n`-dimensional array, 1156 of shape :math:`(D_0, D_1, ..., D_{n-1})`, has scales :math:`(x_0, x_1, ..., x_{n-1})`, 1157 such that :math:`| x_i | = | D_{(n-1)-i} |`. For example, a 3D dataset with shape 1158 :math:`(D_p, D_t, D_r)` has scales :math:`r, t, p` corresponding to the radial, theta, 1159 and phi dimensions respectively. 1160 1161 This function extracts a subset of the given dataset/scales without reading the 1162 entire data into memory. For a given scale :math:`x_j`, if: 1163 1164 - *i)* a single float is provided (:math:`a`), the function will return a 2-element 1165 subset of the scale (:math:`xʹ_j`) such that :math:`xʹ_j[0] <= a < xʹ_j[1]`. 1166 - *ii)* a (float, float) tuple is provided (:math:`a_0, a_1`), the function will return an 1167 *m*-element subset of the scale (:math:`xʹ_j`) where 1168 :math:`xʹ_j[0] <= a_0` and :math:`xʹ_j[m-1] > a_1`. 1169 - *iii)* a **None** value is provided, the function will return the entire scale :math:`x_j` 1170 1171 The returned subset can then be passed to a linear interpolation routine to extract the 1172 "slice" at the desired fixed dimensions. 1173 1174 Examples 1175 -------- 1176 Import a 3D HDF5 cube. 1177 1178 >>> from psi_io.data import get_3d_data 1179 >>> from psi_io import read_hdf_by_value 1180 >>> filepath = get_3d_data() 1181 1182 Extract a radial slice at r=15 from a 3D cube: 1183 1184 >>> f, r, t, p = read_hdf_by_value(filepath, 15, None, None) 1185 >>> f.shape, r.shape, t.shape, p.shape 1186 ((181, 100, 2), (2,), (100,), (181,)) 1187 1188 Extract a phi slice at p=1.57 from a 3D cube: 1189 1190 >>> f, r, t, p = read_hdf_by_value(filepath, None, None, 1.57) 1191 >>> f.shape, r.shape, t.shape, p.shape 1192 ((2, 100, 151), (151,), (100,), (2,)) 1193 1194 Extract the values between 3.2 and 6.4 (in the radial dimension) and with 1195 phi equal to 4.5 1196 1197 >>> f, r, t, p = read_hdf_by_value(filepath, (3.2, 6.4), None, 4.5) 1198 >>> f.shape, r.shape, t.shape, p.shape 1199 ((2, 100, 15), (15,), (100,), (2,)) 1200 """ 1201 if not xi: 1202 return read_hdf_data(ifile, dataset_id=dataset_id, return_scales=return_scales) 1203 return _dispatch_by_ext(ifile, _read_h4_by_value, _read_h5_by_value, 1204 *xi, dataset_id=dataset_id, return_scales=return_scales)
1205 1206
[docs] 1207def read_hdf_by_ivalue(ifile: PathLike, /, 1208 *xi: Union[float, Tuple[float, float], None], 1209 dataset_id: Optional[str] = None, 1210 return_scales: bool = True, 1211 ) -> Union[np.ndarray, Tuple[np.ndarray]]: 1212 r""" 1213 Read data from an HDF4 (.hdf) or HDF5 (.h5) file by subindex value. 1214 1215 Unlike :func:`read_hdf_by_value` (which works in physical coordinate space), 1216 this function accepts fractional *index* positions and returns the minimal 1217 bracketing subset required for interpolation. 1218 1219 .. note:: 1220 For each dimension the minimum number of elements returned is 2. For 1221 example, passing 3 scalar floats for a 3D dataset yields a subset of 1222 shape ``(2, 2, 2)`` with index arrays of length 2. 1223 1224 Parameters 1225 ---------- 1226 ifile : PathLike 1227 The path to the HDF file to read. 1228 *xi : float | tuple[float, float] | None 1229 Fractional index values or ranges for each dimension of the 1230 ``n``-dimensional dataset. Pass ``None`` to select an entire dimension. 1231 If no arguments are passed, the full dataset is returned. 1232 dataset_id : str | None, optional 1233 The identifier of the dataset to read. If ``None``, a default dataset 1234 is used (``'Data-Set-2'`` for HDF4 and ``'Data'`` for HDF5). 1235 Default is ``None``. 1236 return_scales : bool, optional 1237 If ``True``, 0-based index arrays (generated with :func:`~numpy.arange`) 1238 are returned for each dimension alongside the data. These are always 1239 index-space arrays regardless of whether the dataset has physical 1240 coordinate scales. Default is ``True``. 1241 1242 Returns 1243 ------- 1244 out : np.ndarray | tuple[np.ndarray, ...] 1245 The selected data array. If ``return_scales`` is ``True``, returns a 1246 tuple ``(data, idx_0, idx_1, ...)`` with one index array per dimension. 1247 1248 Raises 1249 ------ 1250 ValueError 1251 If the file does not have a ``.hdf`` or ``.h5`` extension. 1252 1253 See Also 1254 -------- 1255 read_hdf_by_index : Read HDF datasets by integer index. 1256 read_hdf_by_value : Read HDF datasets by physical coordinate value. 1257 read_hdf_data : Read entire HDF datasets. 1258 sp_interpolate_slice_from_hdf : Interpolate slices using SciPy's 1259 :class:`~scipy.interpolate.RegularGridInterpolator`. 1260 np_interpolate_slice_from_hdf : Linear/bilinear/trilinear interpolation 1261 using vectorized NumPy routines. 1262 1263 Notes 1264 ----- 1265 This function delegates to :func:`_read_h5_by_ivalue` for HDF5 files and 1266 :func:`_read_h4_by_ivalue` for HDF4 files based on the file extension. 1267 1268 For a given dimension with scale :math:`x_j`, the bracketing rule is: 1269 1270 - *single float* :math:`a` → returns 1271 :math:`x_j[\lfloor a \rfloor], x_j[\lceil a \rceil]`. 1272 - *(float, float)* :math:`(a_0, a_1)` → returns 1273 :math:`x_j[\lfloor a_0 \rfloor], \ldots, x_j[\lceil a_1 \rceil]`. 1274 - ``None`` → returns the entire scale :math:`x_j`. 1275 1276 Examples 1277 -------- 1278 >>> from psi_io.data import get_3d_data 1279 >>> from psi_io import read_hdf_by_ivalue 1280 >>> filepath = get_3d_data() 1281 1282 Extract a 2-element bracket around fractional radial index 2.7: 1283 1284 >>> f, r_idx, t_idx, p_idx = read_hdf_by_ivalue(filepath, 2.7, None, None) 1285 >>> r_idx.shape 1286 (2,) 1287 """ 1288 if not xi: 1289 return read_hdf_data(ifile, dataset_id=dataset_id, return_scales=return_scales) 1290 return _dispatch_by_ext(ifile, _read_h4_by_ivalue, _read_h5_by_ivalue, 1291 *xi, dataset_id=dataset_id, return_scales=return_scales)
1292 1293
[docs] 1294def write_hdf_data(ifile: PathLike, /, 1295 data: np.ndarray, 1296 *scales: Sequence[Union[np.ndarray, None]], 1297 dataset_id: Optional[str] = None, 1298 sync_dtype: bool = False, 1299 strict: bool = True, 1300 **kwargs 1301 ) -> Path: 1302 """ 1303 Write data to an HDF4 (.hdf) or HDF5 (.h5) file. 1304 1305 Following PSI conventions, the data array is assumed to be Fortran-ordered, 1306 with the scales provided in the order corresponding to each dimension *e.g.* a 1307 3D dataset with shape (Dp, Dt, Dr) has scales r, t, p corresponding to the 1308 radial, theta, and phi dimensions respectively. 1309 1310 Parameters 1311 ---------- 1312 ifile : PathLike 1313 The path to the HDF file to write. 1314 data : np.ndarray 1315 The data array to write. 1316 *scales : Sequence[np.ndarray | None] 1317 The scales (coordinate arrays) for each dimension. 1318 dataset_id : str | None, optional 1319 The identifier of the dataset to write. If ``None``, a default dataset 1320 is used (``'Data-Set-2'`` for HDF4 and ``'Data'`` for HDF5). 1321 Default is ``None``. 1322 sync_dtype : bool, optional 1323 If ``True``, the scale dtypes are cast to match the data array dtype. 1324 This mimics the behavior of PSI's legacy HDF writing routines and ensures 1325 compatibility with Fortran tools that require uniform precision between 1326 datasets and their scales. Default is ``False``. 1327 strict : bool, optional 1328 If ``True``, raise an error if any dataset attribute cannot be written to 1329 the target format. If ``False``, a warning is printed and the attribute 1330 is skipped. Default is ``True``. 1331 **kwargs 1332 Key-value pairs of dataset attributes to attach to the dataset. 1333 1334 Returns 1335 ------- 1336 out : Path 1337 The path to the written HDF file. 1338 1339 Raises 1340 ------ 1341 ValueError 1342 If the file does not have a ``.hdf`` or ``.h5`` extension. 1343 KeyError 1344 If, for HDF4 files, the data or scale dtype is not supported by 1345 :py:mod:`pyhdf`. See the dtype support table in the Notes section. 1346 1347 Notes 1348 ----- 1349 This function delegates to :func:`_write_h5_data` for HDF5 files and 1350 :func:`_write_h4_data` for HDF4 files based on the file extension. 1351 1352 If no scales are provided the dataset is written without coordinate variables. 1353 The number of scales may be less than or equal to the number of dimensions; 1354 pass ``None`` for dimensions that should not have an attached scale. 1355 1356 The table below summarizes dtype support across formats. HDF4 support is 1357 determined by the :data:`DTYPE_TO_SDC` mapping; HDF5 support is provided 1358 by :mod:`h5py`. 1359 1360 .. list-table:: NumPy dtype support by format 1361 :header-rows: 1 1362 :widths: 20 20 20 1363 1364 * - dtype 1365 - HDF4 (``.hdf``) 1366 - HDF5 (``.h5``) 1367 * - ``float16`` 1368 - No 1369 - Yes 1370 * - ``float32`` 1371 - Yes 1372 - Yes 1373 * - ``float64`` 1374 - Yes 1375 - Yes 1376 * - ``int8`` 1377 - Yes 1378 - Yes 1379 * - ``int16`` 1380 - Yes 1381 - Yes 1382 * - ``int32`` 1383 - Yes 1384 - Yes 1385 * - ``int64`` 1386 - No 1387 - Yes 1388 * - ``uint8`` 1389 - Yes 1390 - Yes 1391 * - ``uint16`` 1392 - Yes 1393 - Yes 1394 * - ``uint32`` 1395 - Yes 1396 - Yes 1397 * - ``uint64`` 1398 - No 1399 - Yes 1400 * - ``complex64`` 1401 - No 1402 - Yes 1403 * - ``complex128`` 1404 - No 1405 - Yes 1406 * - ``bool`` 1407 - Yes (stored as ``uint8``) 1408 - Yes 1409 1410 See Also 1411 -------- 1412 wrhdf_1d : Write 1D HDF files. 1413 wrhdf_2d : Write 2D HDF files. 1414 wrhdf_3d : Write 3D HDF files. 1415 1416 Examples 1417 -------- 1418 >>> import tempfile, numpy as np 1419 >>> from pathlib import Path 1420 >>> from psi_io import write_hdf_data, read_hdf_data 1421 >>> r = np.linspace(1.0, 5.0, 10, dtype=np.float32) 1422 >>> t = np.linspace(0.0, np.pi, 20, dtype=np.float32) 1423 >>> p = np.linspace(0.0, 2*np.pi, 30, dtype=np.float32) 1424 >>> f = np.ones((30, 20, 10), dtype=np.float32) 1425 >>> with tempfile.TemporaryDirectory() as d: 1426 ... write_hdf_data(Path(d) / "out.h5", f, r, t, p) 1427 ... data, r2, t2, p2 = read_hdf_data(Path(d) / "out.h5") 1428 ... data.shape 1429 (30, 20, 10) 1430 """ 1431 return _dispatch_by_ext(ifile, _write_h4_data, _write_h5_data, data, 1432 *scales, dataset_id=dataset_id, sync_dtype=sync_dtype, strict=strict, **kwargs)
1433 1434
[docs] 1435def write_hdf_meta(ifile: PathLike, /, 1436 meta: Optional[Mapping[str, Mapping[str, Any]]] = None, 1437 **kwargs) -> Path: 1438 """ 1439 Add or update attributes on an existing HDF4 (.hdf) or HDF5 (.h5) file. 1440 1441 Unlike :func:`write_hdf_data`, this function does **not** create or overwrite 1442 any dataset — it only attaches attribute key-value pairs to the file root 1443 (via ``**kwargs``) or to named datasets within the file (via ``meta``). 1444 1445 Parameters 1446 ---------- 1447 ifile : PathLike 1448 Path to an existing HDF file. The file must already contain any 1449 datasets referenced by *meta*. 1450 meta : Mapping[str, Mapping[str, Any]] | None, optional 1451 Per-dataset attributes. Each key is a dataset name (e.g. ``'Data'`` 1452 for HDF5, ``'Data-Set-2'`` for HDF4); the corresponding value is a 1453 mapping of attribute name → attribute value to attach to that dataset. 1454 ``None`` (the default) applies no per-dataset attributes. 1455 **kwargs 1456 File-level (root) attributes. For HDF5 files these are written to 1457 the root group; for HDF4 files they are written as SD-level global 1458 attributes. They do **not** appear on any dataset and will not be 1459 returned by :func:`read_hdf_meta`. 1460 1461 Returns 1462 ------- 1463 out : Path 1464 The path to the modified HDF file. 1465 1466 Raises 1467 ------ 1468 ValueError 1469 If the file does not have a ``.hdf`` or ``.h5`` extension. 1470 1471 Notes 1472 ----- 1473 This function delegates to :func:`_write_h5_meta` for HDF5 files and 1474 :func:`_write_h4_meta` for HDF4 files based on the file extension. 1475 1476 Per-dataset attributes written via *meta* are subsequently accessible 1477 through the :attr:`~HdfDataMeta.attr` field of the :class:`HdfDataMeta` 1478 named tuple returned by :func:`read_hdf_meta`. 1479 1480 See Also 1481 -------- 1482 write_hdf_data : Write dataset data (and optionally attributes) to a new file. 1483 read_hdf_meta : Read dataset metadata and attributes from an HDF file. 1484 1485 Examples 1486 -------- 1487 >>> import tempfile, numpy as np 1488 >>> from pathlib import Path 1489 >>> from psi_io import write_hdf_data, write_hdf_meta, read_hdf_meta 1490 >>> f = np.ones((30, 20, 10), dtype=np.float32) 1491 >>> with tempfile.TemporaryDirectory() as d: 1492 ... _ = write_hdf_data(Path(d) / "out.h5", f) 1493 ... _ = write_hdf_meta(Path(d) / "out.h5", meta={'Data': {'long_name': 'test field', 'units': 'G'}}) 1494 ... meta = read_hdf_meta(Path(d) / "out.h5") 1495 ... meta[0].attr['long_name'] 1496 'test field' 1497 """ 1498 return _dispatch_by_ext(ifile, _write_h4_meta, _write_h5_meta, meta, **kwargs)
1499 1500
[docs] 1501def convert(ifile: PathLike, 1502 ofile: Optional[PathLike] = None, 1503 strict: bool = True) -> Path: 1504 """ 1505 Convert an HDF file between HDF4 (.hdf) and HDF5 (.h5) formats. 1506 1507 All datasets and their attributes are preserved in the output file. 1508 The output format is inferred from the input: `.hdf` → `.h5` and vice versa, 1509 unless an explicit output path is provided. 1510 1511 Parameters 1512 ---------- 1513 ifile : PathLike 1514 The path to the source HDF file. 1515 ofile : PathLike, optional 1516 The path to the output HDF file. 1517 If ``None``, the output file is written alongside the input file 1518 with its extension swapped (*e.g.* ``foo.hdf`` → ``foo.h5``). 1519 strict : bool, optional 1520 If ``True``, raise an error if any dataset attribute cannot be written 1521 to the output format. If ``False``, a warning is printed and the 1522 attribute is skipped. Default is ``True``. 1523 1524 Returns 1525 ------- 1526 out : Path 1527 The path to the written output file. 1528 1529 Raises 1530 ------ 1531 ValueError 1532 If the input file does not have a ``.hdf`` or ``.h5`` extension. 1533 KeyError 1534 If, for HDF4 output files, the data or an attribute value has a dtype 1535 not supported by :py:mod:`pyhdf` and ``strict`` is ``True``. See 1536 :func:`write_hdf_data` for the full dtype support table. 1537 TypeError 1538 If, for HDF5 output files, a dataset attribute has a type that cannot be 1539 stored as an HDF5 attribute and ``strict`` is ``True``. 1540 1541 See Also 1542 -------- 1543 convert_psih4_to_psih5 : Specialized PSI-convention HDF4 → HDF5 converter. 1544 write_hdf_data : Generic HDF data writing routine. 1545 read_hdf_data : Generic HDF data reading routine. 1546 1547 Examples 1548 -------- 1549 >>> import tempfile 1550 >>> from pathlib import Path 1551 >>> from psi_io import convert, read_hdf_meta 1552 >>> from psi_io.data import get_3d_data 1553 >>> h5_path = get_3d_data(hdf=".h5") 1554 >>> with tempfile.TemporaryDirectory() as d: 1555 ... out = convert(h5_path, Path(d) / "br.hdf") 1556 ... meta = read_hdf_meta(out) 1557 ... meta[0].name 1558 'Data' 1559 """ 1560 ifile = Path(ifile) 1561 if not ofile: 1562 ofile = ifile.with_suffix(".hdf") if ifile.suffix == ".h5" else ifile.with_suffix(".h5") 1563 else: 1564 ofile = Path(ofile) 1565 ofile.parent.mkdir(parents=True, exist_ok=True) 1566 1567 meta_data = read_hdf_meta(ifile) 1568 for dataset in meta_data: 1569 data, *scales = read_hdf_data(ifile, dataset_id=dataset.name, return_scales=True) 1570 write_hdf_data(ofile, data, *scales, dataset_id=dataset.name, strict=strict, **dataset.attr) 1571 1572 return ofile
1573 1574
[docs] 1575def convert_psih4_to_psih5(ifile: PathLike, 1576 ofile: Optional[PathLike] = None) -> Path: 1577 """ 1578 Convert a PSI-convention HDF4 file to an HDF5 file. 1579 1580 Unlike :func:`convert`, this function is specialized for PSI-style HDF4 files: 1581 it reads the primary dataset (``'Data-Set-2'``) and writes it under the PSI HDF5 1582 dataset name (``'Data'``), preserving the dataset's attributes and scales. 1583 1584 Parameters 1585 ---------- 1586 ifile : PathLike 1587 The path to the source HDF4 (.hdf) file. 1588 ofile : PathLike, optional 1589 The path to the output HDF5 (.h5) file. 1590 If ``None``, the output file is written alongside the input file with a 1591 ``.h5`` extension (*e.g.* ``foo.hdf`` → ``foo.h5``). 1592 1593 Returns 1594 ------- 1595 out : Path 1596 The path to the written HDF5 file. 1597 1598 Raises 1599 ------ 1600 ValueError 1601 If ``ifile`` does not have a ``.hdf`` extension. 1602 ValueError 1603 If ``ofile`` does not have a ``.h5`` extension. 1604 ImportError 1605 If the :py:mod:`pyhdf` package is not available. 1606 1607 See Also 1608 -------- 1609 convert : General-purpose HDF4 ↔ HDF5 converter. 1610 write_hdf_data : Generic HDF data writing routine. 1611 1612 Examples 1613 -------- 1614 >>> import tempfile 1615 >>> from pathlib import Path 1616 >>> from psi_io import convert_psih4_to_psih5, read_hdf_meta 1617 >>> from psi_io.data import get_3d_data 1618 >>> hdf4_path = get_3d_data(hdf=".hdf") 1619 >>> with tempfile.TemporaryDirectory() as d: 1620 ... out = convert_psih4_to_psih5(hdf4_path, Path(d) / "br.h5") 1621 ... meta = read_hdf_meta(out) 1622 ... meta[0].name 1623 'Data' 1624 """ 1625 ifile = Path(ifile) 1626 ofile = ifile.with_suffix(".h5") if not ofile else Path(ofile) 1627 if ifile.suffix != ".hdf": 1628 raise ValueError(f"Input file must have a .hdf extension; got {ifile.suffix}") 1629 if ofile.suffix != ".h5": 1630 raise ValueError(f"Output file must have a .h5 extension; got {ofile.suffix}") 1631 ofile.parent.mkdir(parents=True, exist_ok=True) 1632 1633 1634 data, *scales = read_hdf_data(ifile, dataset_id=PSI_DATA_ID["h4"], return_scales=True) 1635 meta_data, *_ = read_hdf_meta(ifile, dataset_id=PSI_DATA_ID["h4"]) 1636 1637 write_hdf_data(ofile, data, *scales, 1638 dataset_id=PSI_DATA_ID["h5"], **meta_data.attr) 1639 return ofile
1640
[docs] 1641def instantiate_linear_interpolator(*args, **kwargs): 1642 r""" 1643 Instantiate a linear interpolator using the provided data and scales. 1644 1645 Parameters 1646 ---------- 1647 *args : sequence[array_like] 1648 The first argument is the data array. 1649 Subsequent arguments are the scales (coordinate arrays) for each dimension. 1650 **kwargs : dict 1651 Additional keyword arguments to pass to 1652 :class:`~scipy.interpolate.RegularGridInterpolator`. 1653 1654 Returns 1655 ------- 1656 out : RegularGridInterpolator 1657 An instance of RegularGridInterpolator initialized 1658 with the provided data and scales. 1659 1660 Raises 1661 ------ 1662 ImportError 1663 If the ``scipy`` package is not available. 1664 1665 Notes 1666 ----- 1667 This function transposes the data array and passes it along with the scales 1668 to :class:`~scipy.interpolate.RegularGridInterpolator`. Given a PSI-style 1669 Fortran-ordered 3D dataset, the resulting interpolator can be queried using 1670 :math:`(r, \theta, \phi)` coordinates. 1671 1672 Examples 1673 -------- 1674 Import a 3D HDF5 cube. 1675 1676 >>> from psi_io.data import get_3d_data 1677 >>> from psi_io import read_hdf_by_value 1678 >>> from numpy import pi 1679 >>> filepath = get_3d_data() 1680 1681 Read the dataset by value (at 15 R_sun in the radial dimension). 1682 1683 >>> data_and_scales = read_hdf_by_value(filepath, 15, None, None) 1684 >>> interpolator = instantiate_linear_interpolator(*data_and_scales) 1685 1686 Interpolate at a specific position. 1687 1688 >>> interpolator((15, pi/2, pi)) 1689 0.0012864485109423877 1690 """ 1691 _except_no_scipy() 1692 return RegularGridInterpolator( 1693 values=args[0].T, 1694 points=args[1:], 1695 **kwargs)
1696 1697
[docs] 1698def sp_interpolate_slice_from_hdf(*xi, **kwargs): 1699 r""" 1700 Interpolate a slice from HDF data using SciPy's `RegularGridInterpolator`. 1701 1702 .. note:: 1703 Slicing routines result in a dimensional reduction. The dimensions 1704 that are fixed (i.e. provided as float values in `*xi`) are removed 1705 from the output slice, while the dimensions that are not fixed 1706 (*i.e.* provided as None in `*xi`) are retained. 1707 1708 Parameters 1709 ---------- 1710 *xi : sequence 1711 Positional arguments passed-through to :func:`read_hdf_by_value`. 1712 **kwargs : dict 1713 Keyword arguments passed-through to :func:`read_hdf_by_value`. 1714 **NOTE: Instantiating a linear interpolator requires the** ``return_scales`` 1715 **keyword argument to be set to True; this function overrides 1716 any provided value for** ``return_scales`` **to ensure this behavior.** 1717 1718 Returns 1719 ------- 1720 data_slice : np.ndarray 1721 The interpolated data slice with fixed dimensions removed. 1722 scales : list[np.ndarray] 1723 Coordinate scale arrays for the retained (non-fixed) dimensions. 1724 1725 Notes 1726 ----- 1727 This function reads data from an HDF file, builds a 1728 :class:`~scipy.interpolate.RegularGridInterpolator`, and evaluates it at 1729 the requested fixed coordinates to produce the slice. 1730 1731 .. note:: 1732 The returned slice is Fortran-ordered, *e.g.* radial slices have shape 1733 :math:`(n_\phi, n_\theta)`, and :math:`\phi` slices have shape 1734 :math:`(n_r, n_\theta)`. 1735 1736 .. note:: 1737 :class:`~scipy.interpolate.RegularGridInterpolator` casts all input data 1738 to ``float64`` internally. PSI HDF datasets stored as ``float32`` will 1739 therefore be upcast during interpolation. 1740 1741 Examples 1742 -------- 1743 >>> from psi_io.data import get_3d_data 1744 >>> from psi_io import sp_interpolate_slice_from_hdf 1745 >>> from numpy import pi 1746 >>> filepath = get_3d_data() 1747 1748 Fetch a 2D slice at r=15 from 3D map 1749 1750 >>> slice_, theta_scale, phi_scale = sp_interpolate_slice_from_hdf(filepath, 15, None, None) 1751 >>> slice_.shape, theta_scale.shape, phi_scale.shape 1752 ((181, 100), (100,), (181,)) 1753 1754 Fetch a single point from 3D map 1755 1756 >>> point_value, *_ = sp_interpolate_slice_from_hdf(filepath, 1, pi/2, pi) 1757 >>> point_value 1758 6.084495480971823 1759 """ 1760 filepath, *args = xi 1761 kwargs.pop('return_scales', None) 1762 result = read_hdf_by_value(filepath, *args, **kwargs) 1763 interpolator = instantiate_linear_interpolator(*result) 1764 grid = [yi[0] if yi[0] is not None else yi[1] for yi in zip(args, result[1:])] 1765 slice_ = interpolator(tuple(np.meshgrid(*grid, indexing='ij'))) 1766 indices = [0 if yi is not None else slice(None, None) for yi in args] 1767 return slice_[tuple(indices)].T, *[yi[1] for yi in zip(args, result[1:]) if yi[0] is None]
1768 1769
[docs] 1770def np_interpolate_slice_from_hdf(ifile: PathLike, /, 1771 *xi: Union[float, Tuple[float, float], None], 1772 dataset_id: Optional[str] = None, 1773 by_index: bool = False, 1774 ): 1775 """ 1776 Interpolate a slice from HDF data using linear interpolation. 1777 1778 .. note:: 1779 Slicing routines result in a dimensional reduction. The dimensions 1780 that are fixed (i.e. provided as float values in `*xi`) are removed 1781 from the output slice, while the dimensions that are not fixed 1782 (*i.e.* provided as `None` in `*xi`) are retained. 1783 1784 Parameters 1785 ---------- 1786 ifile : PathLike 1787 The path to the HDF file to read. 1788 *xi : sequence 1789 Positional arguments passed-through to reader function. 1790 dataset_id : str | None, optional 1791 The identifier of the dataset to read. If ``None``, a default dataset 1792 is used (``'Data-Set-2'`` for HDF4 and ``'Data'`` for HDF5). 1793 Default is ``None``. 1794 by_index : bool, optional 1795 If ``True``, use :func:`read_hdf_by_ivalue` to read data by fractional 1796 index values. If ``False``, use :func:`read_hdf_by_value` to read by 1797 physical coordinate values. Default is ``False``. 1798 1799 Returns 1800 ------- 1801 data_slice : np.ndarray 1802 The interpolated data slice with fixed dimensions removed. 1803 scales : list[np.ndarray] 1804 Coordinate scale arrays for the retained (non-fixed) dimensions. 1805 1806 Raises 1807 ------ 1808 ValueError 1809 If the number of dimensions to interpolate over is not supported. 1810 1811 Notes 1812 ----- 1813 This function supports linear, bilinear, and trilinear interpolation 1814 depending on the number of dimensions fixed in `xi`. 1815 1816 Examples 1817 -------- 1818 >>> from psi_io.data import get_3d_data 1819 >>> from psi_io import np_interpolate_slice_from_hdf 1820 >>> from numpy import pi 1821 >>> filepath = get_3d_data() 1822 1823 Fetch a 2D slice at r=15 from 3D map 1824 1825 >>> slice_, theta_scale, phi_scale = np_interpolate_slice_from_hdf(filepath, 15, None, None) 1826 >>> slice_.shape, theta_scale.shape, phi_scale.shape 1827 ((181, 100), (100,), (181,)) 1828 1829 Fetch a single point from 3D map 1830 1831 >>> point_value, *_ = np_interpolate_slice_from_hdf(filepath, 1, pi/2, pi) 1832 >>> point_value 1833 6.084496 1834 1835 """ 1836 reader = read_hdf_by_value if not by_index else read_hdf_by_ivalue 1837 data, *scales = reader(ifile, *xi, dataset_id=dataset_id, return_scales=True) 1838 f_ = np.transpose(data) 1839 slice_type = sum([yi is not None for yi in xi]) 1840 if slice_type == 1: 1841 return _np_linear_interpolation(xi, scales, f_).T, *[yi[1] for yi in zip(xi, scales) if yi[0] is None] 1842 elif slice_type == 2: 1843 return _np_bilinear_interpolation(xi, scales, f_).T, *[yi[1] for yi in zip(xi, scales) if yi[0] is None] 1844 elif slice_type == 3: 1845 return _np_trilinear_interpolation(xi, scales, f_).T, *[yi[1] for yi in zip(xi, scales) if yi[0] is None] 1846 else: 1847 raise ValueError("Not a valid number of dimensions for supported linear interpolation methods")
1848 1849
[docs] 1850def interpolate_positions_from_hdf(ifile, *xi, **kwargs): 1851 r""" 1852 Interpolate at a list of scale positions using SciPy's 1853 :class:`~scipy.interpolate.RegularGridInterpolator`. 1854 1855 Parameters 1856 ---------- 1857 ifile : PathLike 1858 The path to the HDF file to read. 1859 *xi : sequence[np.ndarray] 1860 Coordinate values for each dimension of the ``n``-dimensional dataset. 1861 Each array must have the same length :math:`m`; the function assembles 1862 them into an :math:`m \times n` column stack for interpolation. 1863 **kwargs 1864 Keyword arguments forwarded to :func:`read_hdf_by_value`. 1865 1866 Returns 1867 ------- 1868 out : np.ndarray 1869 The interpolated values at the provided positions. 1870 1871 Notes 1872 ----- 1873 This function reads data from an HDF file, creates a linear interpolator, 1874 and interpolates at the provided scale values. For each dimension, the 1875 minimum and maximum values from the provided arrays are used to read 1876 the necessary subset of data from the HDF file *viz.* to avoid loading 1877 the entire dataset into memory. 1878 1879 Examples 1880 -------- 1881 Import a 3D HDF5 cube. 1882 1883 >>> from psi_io.data import get_3d_data 1884 >>> from psi_io import interpolate_positions_from_hdf 1885 >>> import numpy as np 1886 >>> filepath = get_3d_data() 1887 1888 Set up positions to interpolate. 1889 1890 >>> r_vals = np.array([15, 20, 25]) 1891 >>> theta_vals = np.array([np.pi/4, np.pi/2, 3*np.pi/4]) 1892 >>> phi_vals = np.array([0, np.pi, 2*np.pi]) 1893 1894 Interpolate at the specified positions. 1895 1896 >>> interpolate_positions_from_hdf(filepath, r_vals, theta_vals, phi_vals) 1897 [0.0008402743657585175, 0.000723875405654482, -0.00041033233811179216] 1898 """ 1899 xi_ = [(np.nanmin(i), np.nanmax(i)) for i in xi] 1900 f, *scales = read_hdf_by_value(ifile, *xi_, **kwargs) 1901 interpolator = instantiate_linear_interpolator(f, *scales, bounds_error=False) 1902 return interpolator(np.stack(xi, axis=len(xi[0].shape)))
1903 1904
[docs] 1905def interpolate_point_from_1d_slice(xi, scalex, values): 1906 """ 1907 Interpolate a point from a 1D slice using linear interpolation. 1908 1909 Parameters 1910 ---------- 1911 xi : float 1912 The coordinate value at which to interpolate. 1913 scalex : np.ndarray 1914 The coordinate scale array for the dimension. 1915 values : np.ndarray 1916 The 1D data array to interpolate. 1917 1918 Returns 1919 ------- 1920 out : np.ndarray 1921 The interpolated scalar value as a zero-dimensional array. 1922 1923 Examples 1924 -------- 1925 >>> import numpy as np 1926 >>> from psi_io.psi_io import interpolate_point_from_1d_slice 1927 >>> x = np.linspace(0.0, 1.0, 11) 1928 >>> f = x ** 2 1929 >>> float(interpolate_point_from_1d_slice(0.5, x, f)) 1930 0.25 1931 """ 1932 if scalex[0] > scalex[-1]: 1933 scalex, values = scalex[::-1], values[::-1] 1934 xi_ = int(np.searchsorted(scalex, xi)) 1935 sx_ = slice(*_check_index_ranges(len(scalex), xi_, xi_)) 1936 return _np_linear_interpolation([xi], [scalex[sx_]], values[sx_])
1937 1938
[docs] 1939def interpolate_point_from_2d_slice(xi, yi, scalex, scaley, values): 1940 """ 1941 Interpolate a point from a 2D slice using bilinear interpolation. 1942 1943 Parameters 1944 ---------- 1945 xi : float 1946 The coordinate value for the first dimension. 1947 yi : float 1948 The coordinate value for the second dimension. 1949 scalex : np.ndarray 1950 The coordinate scale array for the first dimension. 1951 scaley : np.ndarray 1952 The coordinate scale array for the second dimension. 1953 values : np.ndarray 1954 The 2D data array to interpolate, with shape ``(nx, ny)``. 1955 1956 Returns 1957 ------- 1958 out : np.ndarray 1959 The interpolated scalar value as a zero-dimensional array. 1960 1961 Examples 1962 -------- 1963 >>> import numpy as np 1964 >>> from psi_io.psi_io import interpolate_point_from_2d_slice 1965 >>> x = np.linspace(0.0, 1.0, 11) 1966 >>> y = np.linspace(0.0, 1.0, 11) 1967 >>> f = np.outer(x, y) 1968 >>> float(interpolate_point_from_2d_slice(0.5, 0.5, x, y, f)) 1969 0.25 1970 """ 1971 values = np.transpose(values) 1972 if scalex[0] > scalex[-1]: 1973 scalex, values = scalex[::-1], values[::-1, :] 1974 if scaley[0] > scaley[-1]: 1975 scaley, values = scaley[::-1], values[:, ::-1] 1976 xi_, yi_ = int(np.searchsorted(scalex, xi)), int(np.searchsorted(scaley, yi)) 1977 sx_, sy_ = slice(*_check_index_ranges(len(scalex), xi_, xi_)), slice(*_check_index_ranges(len(scaley), yi_, yi_)) 1978 return _np_bilinear_interpolation([xi, yi], [scalex[sx_], scaley[sy_]], values[(sx_, sy_)])
1979 1980 1981def _rdhdf_nd(hdf_filename: str, 1982 dimensionality: int 1983 ) -> Tuple[np.ndarray, ...]: 1984 """Read an n-dimensional PSI-style HDF file; shared implementation for `rdhdf_1d/2d/3d`. 1985 1986 Parameters 1987 ---------- 1988 hdf_filename : PathLike 1989 The path to the HDF5 (.h5) or HDF4 (.hdf) file to read. 1990 dimensionality : int 1991 The expected number of dimensions of the dataset. 1992 1993 Returns 1994 ------- 1995 out : tuple[np.ndarray, ...] 1996 The coordinate scales for each dimension followed by the data array. 1997 1998 Raises 1999 ------ 2000 ValueError 2001 If the dataset dimensionality does not match ``dimensionality``. 2002 2003 Examples 2004 -------- 2005 >>> from psi_io.psi_io import _rdhdf_nd 2006 >>> from psi_io.data import get_3d_data 2007 >>> r, t, p, f = _rdhdf_nd(get_3d_data(), dimensionality=3) 2008 >>> f.shape 2009 (181, 100, 151) 2010 """ 2011 f, *scales = read_hdf_data(hdf_filename) 2012 if f.ndim != dimensionality: 2013 err = f'Expected {dimensionality}D data, got {f.ndim}D data instead.' 2014 raise ValueError(err) 2015 scales = scales or (np.empty(0) for _ in f.shape) 2016 return *scales, f 2017 2018 2019def _wrhdf_nd(hdf_filename: str, 2020 data: np.ndarray, 2021 *scales: Sequence[Union[np.ndarray, None]], 2022 dimensionality: int, 2023 sync_dtype: bool = True, 2024 **kwargs 2025 ) -> None: 2026 """Write an n-dimensional PSI-style HDF file; shared implementation for `wrhdf_1d/2d/3d`. 2027 2028 Parameters 2029 ---------- 2030 hdf_filename : PathLike 2031 The path to the HDF5 (.h5) or HDF4 (.hdf) file to write. 2032 data : np.ndarray 2033 The data array to write. 2034 *scales : Sequence[np.ndarray | None] 2035 The scales (coordinate arrays) for each dimension. 2036 dimensionality : int 2037 The expected number of dimensions of the data array. 2038 sync_dtype : bool 2039 If True, scale dtypes are cast to match the data array dtype. 2040 **kwargs 2041 Additional keyword arguments forwarded to :func:`write_hdf_data`. 2042 2043 Raises 2044 ------ 2045 ValueError 2046 If the data dimensionality does not match ``dimensionality``. 2047 2048 Examples 2049 -------- 2050 >>> import tempfile, numpy as np 2051 >>> from pathlib import Path 2052 >>> from psi_io.psi_io import _wrhdf_nd 2053 >>> from psi_io import read_hdf_data 2054 >>> f = np.ones((10, 20, 30), dtype=np.float32) 2055 >>> r = np.linspace(1.0, 5.0, 10, dtype=np.float32) 2056 >>> t = np.linspace(0.0, 3.14, 20, dtype=np.float32) 2057 >>> p = np.linspace(0.0, 6.28, 30, dtype=np.float32) 2058 >>> with tempfile.TemporaryDirectory() as d: 2059 ... _wrhdf_nd(Path(d) / "out.h5", f, r, t, p, dimensionality=3) 2060 ... data, *_ = read_hdf_data(Path(d) / "out.h5") 2061 ... data.shape 2062 (10, 20, 30) 2063 """ 2064 if data.ndim != dimensionality: 2065 err = f'Expected {dimensionality}D data, got {data.ndim}D data instead.' 2066 raise ValueError(err) 2067 write_hdf_data(hdf_filename, data, *scales, sync_dtype=sync_dtype, **kwargs) 2068 2069 2070def _get_scales_nd_h5(ifile: Union[ Path, str], /, 2071 dimensionality: int, 2072 dataset_id: Optional[str] = None, 2073 ): 2074 """HDF5 (.h5) version of :func:`get_scales_1d` / :func:`get_scales_2d` / :func:`get_scales_3d`. 2075 2076 Parameters 2077 ---------- 2078 ifile : PathLike 2079 The path to the HDF5 (.h5) file. 2080 dimensionality : int 2081 The expected number of dimensions of the dataset. 2082 dataset_id : str, optional 2083 The identifier of the dataset whose scales are read. 2084 If ``None``, the default PSI dataset (``'Data'``) is used. 2085 2086 Returns 2087 ------- 2088 out : tuple[np.ndarray, ...] 2089 The coordinate scale arrays for each dimension. 2090 2091 Raises 2092 ------ 2093 ValueError 2094 If the dataset dimensionality does not match ``dimensionality``. 2095 ValueError 2096 If any dimension has no associated scale. 2097 2098 Examples 2099 -------- 2100 >>> from psi_io.psi_io import _get_scales_nd_h5 2101 >>> from psi_io.data import get_3d_data 2102 >>> r, t, p = _get_scales_nd_h5(get_3d_data(), dimensionality=3) 2103 >>> r.shape, t.shape, p.shape 2104 ((151,), (100,), (181,)) 2105 """ 2106 with h5.File(ifile, 'r') as hdf: 2107 data = hdf[dataset_id or PSI_DATA_ID['h5']] 2108 ndim = data.ndim 2109 if ndim != dimensionality: 2110 err = f'Expected {dimensionality}D data, got {ndim}D data instead.' 2111 raise ValueError(err) 2112 scales = [] 2113 for dim in data.dims: 2114 if dim: 2115 scales.append(dim[0][:]) 2116 else: 2117 raise ValueError(f'Dimension has no scale associated with it.') 2118 return tuple(scales) 2119 2120 2121def _get_scales_nd_h4(ifile: Union[ Path, str], /, 2122 dimensionality: int, 2123 dataset_id: Optional[str] = None, 2124 ): 2125 """HDF4 (.hdf) version of :func:`get_scales_1d` / :func:`get_scales_2d` / :func:`get_scales_3d`. 2126 2127 Parameters 2128 ---------- 2129 ifile : PathLike 2130 The path to the HDF4 (.hdf) file. 2131 dimensionality : int 2132 The expected number of dimensions of the dataset. 2133 dataset_id : str, optional 2134 The identifier of the dataset whose scales are read. 2135 If ``None``, the default PSI dataset (``'Data-Set-2'``) is used. 2136 2137 Returns 2138 ------- 2139 out : tuple[np.ndarray, ...] 2140 The coordinate scale arrays for each dimension. 2141 2142 Raises 2143 ------ 2144 ValueError 2145 If the dataset dimensionality does not match ``dimensionality``. 2146 ValueError 2147 If any dimension has no associated scale. 2148 2149 Examples 2150 -------- 2151 >>> from psi_io.psi_io import _get_scales_nd_h4 2152 >>> from psi_io.data import get_3d_data 2153 >>> r, t, p = _get_scales_nd_h4(get_3d_data(hdf=".hdf"), dimensionality=3) # doctest: +SKIP 2154 >>> r.shape # doctest: +SKIP 2155 (151,) 2156 """ 2157 hdf = h4.SD(str(ifile)) 2158 data = hdf.select(dataset_id or PSI_DATA_ID['h4']) 2159 ndim = data.info()[1] 2160 if ndim != dimensionality: 2161 err = f'Expected {dimensionality}D data, got {ndim}D data instead.' 2162 raise ValueError(err) 2163 scales = [] 2164 for k_, v_ in reversed(data.dimensions(full=1).items()): 2165 if v_[3]: 2166 scales.append(hdf.select(k_)[:]) 2167 else: 2168 raise ValueError('Dimension has no scale associated with it.') 2169 return tuple(scales) 2170 2171 2172def _read_h5_meta(ifile: PathLike, /, 2173 dataset_id: Optional[str] = None 2174 ): 2175 """HDF5 (.h5) version of :func:`read_hdf_meta`. 2176 2177 Examples 2178 -------- 2179 >>> from psi_io.psi_io import _read_h5_meta 2180 >>> from psi_io.data import get_3d_data 2181 >>> meta = _read_h5_meta(get_3d_data()) 2182 >>> meta[0].name 2183 'Data' 2184 """ 2185 with h5.File(ifile, 'r') as hdf: 2186 # Raises KeyError if ``dataset_id`` not found 2187 # If ``dataset_id`` is None, get all non-scale :class:`h5.Dataset`s 2188 if dataset_id: 2189 datasets = (dataset_id, hdf[dataset_id]), 2190 else: 2191 datasets = ((k, v) for k, v in hdf.items() if not v.is_scale) 2192 2193 # One should avoid multiple calls to ``dimproxy[0]`` – *e.g.* ``dimproxy[0].dtype`` and 2194 # ``dimproxy[0].shape`` – because the __getitem__ method creates and returns a new 2195 # :class:`~h5.DimensionProxy` object each time it is called. [Does this matter? Probably not.] 2196 return [HdfDataMeta(name=k, 2197 type=v.dtype, 2198 shape=v.shape, 2199 attr=dict(v.attrs), 2200 scales=[HdfScaleMeta(name=dimproxy.label, 2201 type=dim.dtype, 2202 shape=dim.shape, 2203 attr=dict(dim.attrs), 2204 imin=dim[0], 2205 imax=dim[-1]) 2206 for dimproxy in v.dims if dimproxy and (dim := dimproxy[0])]) 2207 for k, v in datasets] 2208 2209 2210def _read_h4_meta(ifile: PathLike, /, 2211 dataset_id: Optional[str] = None 2212 ): 2213 """HDF4 (.hdf) version of :func:`read_hdf_meta`. 2214 2215 Examples 2216 -------- 2217 >>> from psi_io.psi_io import _read_h4_meta 2218 >>> from psi_io.data import get_3d_data 2219 >>> meta = _read_h4_meta(get_3d_data(hdf=".hdf")) # doctest: +SKIP 2220 >>> meta[0].name # doctest: +SKIP 2221 'Data-Set-2' 2222 """ 2223 hdf = h4.SD(str(ifile)) 2224 # Raises HDF4Error if ``dataset_id`` not found 2225 # If ``dataset_id`` is None, get all non-scale :class:`pyhdf.SD.SDS`s 2226 if dataset_id: 2227 datasets = (dataset_id, hdf.select(dataset_id)), 2228 else: 2229 datasets = ((k, hdf.select(k)) for k in hdf.datasets().keys() if not hdf.select(k).iscoordvar()) 2230 2231 # The inner list comprehension differs in approach from the HDF5 version because calling 2232 # ``dimensions(full=1)`` on an :class:`~pyhdf.SD.SDS` returns a dictionary of dimension 2233 # dataset identifiers (keys) and tuples containing dimension metadata (values). Even if no 2234 # coordinate-variable datasets are defined, this dictionary is still returned; the only 2235 # indication that the datasets returned do not exist is that the "type" field (within the 2236 # tuple of dimension metadata) is set to 0. 2237 2238 # Also, one cannot avoid multiple calls to ``hdf.select(k_)`` within the inner list comprehension 2239 # because :class:`~pyhdf.SD.SDS` objects do not define a ``__bool__`` method, and the fallback 2240 # behavior of Python is to assess if the __len__ method returns a non-zero value (which, in 2241 # this case, always returns 0). 2242 return [HdfDataMeta(name=k, 2243 type=SDC_TYPE_CONVERSIONS[v.info()[3]], 2244 shape=_cast_shape_tuple(v.info()[2]), 2245 attr=v.attributes(), 2246 scales=[HdfScaleMeta(name=k_, 2247 type=SDC_TYPE_CONVERSIONS[v_[3]], 2248 shape=_cast_shape_tuple(v_[0]), 2249 attr=hdf.select(k_).attributes(), 2250 imin=hdf.select(k_)[0], 2251 imax=hdf.select(k_)[-1]) 2252 for k_, v_ in v.dimensions(full=1).items() if v_[3]]) 2253 for k, v in datasets] 2254 2255 2256def _read_h5_rtp(ifile: Union[ Path, str], /): 2257 """HDF5 (.h5) version of :func:`read_rtp_meta`. 2258 2259 Examples 2260 -------- 2261 >>> from psi_io.psi_io import _read_h5_rtp 2262 >>> from psi_io.data import get_3d_data 2263 >>> meta = _read_h5_rtp(get_3d_data()) 2264 >>> sorted(meta.keys()) 2265 ['p', 'r', 't'] 2266 """ 2267 with h5.File(ifile, 'r') as hdf: 2268 return {k: (hdf[v].size, hdf[v][0], hdf[v][-1]) 2269 for k, v in zip('rtp', PSI_SCALE_ID['h5'])} 2270 2271 2272def _read_h4_rtp(ifile: Union[ Path, str], /): 2273 """HDF4 (.hdf) version of :func:`read_rtp_meta`. 2274 2275 Examples 2276 -------- 2277 >>> from psi_io.psi_io import _read_h4_rtp 2278 >>> from psi_io.data import get_3d_data 2279 >>> meta = _read_h4_rtp(get_3d_data(hdf=".hdf")) # doctest: +SKIP 2280 >>> sorted(meta.keys()) # doctest: +SKIP 2281 ['p', 'r', 't'] 2282 """ 2283 hdf = h4.SD(str(ifile)) 2284 return {k: (hdf.select(v).info()[2], hdf.select(v)[0], hdf.select(v)[-1]) 2285 for k, v in zip('ptr', PSI_SCALE_ID['h4'])} 2286 2287 2288def _read_h5_data(ifile: PathLike, /, 2289 dataset_id: Optional[str] = None, 2290 return_scales: bool = True, 2291 ) -> Union[np.ndarray, Tuple[np.ndarray]]: 2292 """HDF5 (.h5) version of :func:`read_hdf_data`. 2293 2294 Examples 2295 -------- 2296 >>> from psi_io.psi_io import _read_h5_data 2297 >>> from psi_io.data import get_3d_data 2298 >>> data, r, t, p = _read_h5_data(get_3d_data()) 2299 >>> data.shape 2300 (181, 100, 151) 2301 """ 2302 with h5.File(ifile, 'r') as hdf: 2303 data = hdf[dataset_id or PSI_DATA_ID['h5']] 2304 dataset = data[:] 2305 if return_scales: 2306 scales = [dim[0][:] for dim in data.dims if dim] 2307 return dataset, *scales 2308 return dataset 2309 2310 2311def _read_h4_data(ifile: PathLike, /, 2312 dataset_id: Optional[str] = None, 2313 return_scales: bool = True, 2314 ) -> Union[np.ndarray, Tuple[np.ndarray]]: 2315 """HDF4 (.hdf) version of :func:`read_hdf_data`. 2316 2317 Examples 2318 -------- 2319 >>> from psi_io.psi_io import _read_h4_data 2320 >>> from psi_io.data import get_3d_data 2321 >>> data, *_ = _read_h4_data(get_3d_data(hdf=".hdf")) # doctest: +SKIP 2322 >>> data.shape # doctest: +SKIP 2323 (181, 100, 151) 2324 """ 2325 hdf = h4.SD(str(ifile)) 2326 data = hdf.select(dataset_id or PSI_DATA_ID['h4']) 2327 if return_scales: 2328 out = (data[:], 2329 *[hdf.select(k_)[:] for k_, v_ in reversed(data.dimensions(full=1).items()) if v_[3]]) 2330 else: 2331 out = data[:] 2332 return out 2333 2334 2335def _read_h5_by_index(ifile: PathLike, /, 2336 *xi: Union[int, Tuple[Union[int, None], Union[int, None]], None], 2337 dataset_id: Optional[str] = None, 2338 return_scales: bool = True, 2339 ) -> Union[np.ndarray, Tuple[np.ndarray]]: 2340 """HDF5 (.h5) version of :func:`read_hdf_by_index`. 2341 2342 Examples 2343 -------- 2344 >>> from psi_io.psi_io import _read_h5_by_index 2345 >>> from psi_io.data import get_3d_data 2346 >>> f, r, t, p = _read_h5_by_index(get_3d_data(), 0, None, None) 2347 >>> f.shape 2348 (181, 100, 1) 2349 """ 2350 with h5.File(ifile, 'r') as hdf: 2351 data = hdf[dataset_id or PSI_DATA_ID['h5']] 2352 if len(xi) != data.ndim: 2353 raise ValueError(f"len(xi) must equal the number of scales for {dataset_id}") 2354 slices = [_parse_index_inputs(slice_input) for slice_input in xi] 2355 dataset = data[tuple(reversed(slices))] 2356 if return_scales: 2357 scales = [dim[0][si] for si, dim in zip(slices, data.dims) if dim] 2358 return dataset, *scales 2359 return dataset 2360 2361def _read_h4_by_index(ifile: PathLike, /, 2362 *xi: Union[int, Tuple[Union[int, None], Union[int, None]], None], 2363 dataset_id: Optional[str] = None, 2364 return_scales: bool = True, 2365 ) -> Union[np.ndarray, Tuple[np.ndarray]]: 2366 """HDF4 (.hdf) version of :func:`read_hdf_by_index`. 2367 2368 Examples 2369 -------- 2370 >>> from psi_io.psi_io import _read_h4_by_index 2371 >>> from psi_io.data import get_3d_data 2372 >>> f, r, t, p = _read_h4_by_index(get_3d_data(hdf=".hdf"), 0, None, None) # doctest: +SKIP 2373 >>> f.shape # doctest: +SKIP 2374 (181, 100, 1) 2375 """ 2376 hdf = h4.SD(str(ifile)) 2377 data = hdf.select(dataset_id or PSI_DATA_ID['h4']) 2378 ndim = data.info()[1] 2379 if len(xi) != ndim: 2380 raise ValueError(f"len(xi) must equal the number of scales for {dataset_id}") 2381 slices = [_parse_index_inputs(slice_input) for slice_input in xi] 2382 dataset = data[tuple(reversed(slices))] 2383 if return_scales: 2384 scales = [hdf.select(k_)[si] for si, (k_, v_) in zip(slices, reversed(data.dimensions(full=1).items())) if v_[3]] 2385 return dataset, *scales 2386 return dataset 2387 2388 2389def _read_h5_by_value(ifile: PathLike, /, 2390 *xi: Union[float, Tuple[float, float], None], 2391 dataset_id: Optional[str] = None, 2392 return_scales: bool = True, 2393 ) -> Union[np.ndarray, Tuple[np.ndarray]]: 2394 """HDF5 (.h5) version of :func:`read_hdf_by_value`. 2395 2396 Examples 2397 -------- 2398 >>> from psi_io.psi_io import _read_h5_by_value 2399 >>> from psi_io.data import get_3d_data 2400 >>> f, r, t, p = _read_h5_by_value(get_3d_data(), 15, None, None) 2401 >>> f.shape 2402 (181, 100, 2) 2403 """ 2404 with h5.File(ifile, 'r') as hdf: 2405 data = hdf[dataset_id or PSI_DATA_ID['h5']] 2406 if len(xi) != data.ndim: 2407 raise ValueError(f"len(xi) must equal the number of scales for {dataset_id}") 2408 slices = [] 2409 for dimproxy, value in zip(data.dims, xi): 2410 if dimproxy: 2411 slices.append(_parse_value_inputs(dimproxy[0], value)) 2412 elif value is None: 2413 slices.append(slice(None)) 2414 else: 2415 raise ValueError("Cannot slice by value on dimension without scales") 2416 dataset = data[tuple(reversed(slices))] 2417 if return_scales: 2418 scales = [dim[0][si] for si, dim in zip(slices, data.dims) if dim] 2419 return dataset, *scales 2420 return dataset 2421 2422 2423def _read_h4_by_value(ifile: PathLike, /, 2424 *xi: Union[float, Tuple[float, float], None], 2425 dataset_id: Optional[str] = None, 2426 return_scales: bool = True, 2427 ) -> Union[np.ndarray, Tuple[np.ndarray]]: 2428 """HDF4 (.hdf) version of :func:`read_hdf_by_value`. 2429 2430 Examples 2431 -------- 2432 >>> from psi_io.psi_io import _read_h4_by_value 2433 >>> from psi_io.data import get_3d_data 2434 >>> f, r, t, p = _read_h4_by_value(get_3d_data(hdf=".hdf"), 15, None, None) # doctest: +SKIP 2435 >>> f.shape # doctest: +SKIP 2436 (181, 100, 2) 2437 """ 2438 hdf = h4.SD(str(ifile)) 2439 data = hdf.select(dataset_id or PSI_DATA_ID['h4']) 2440 ndim = data.info()[1] 2441 if len(xi) != ndim: 2442 raise ValueError(f"len(xi) must equal the number of scales for {dataset_id}") 2443 slices = [] 2444 for (k_, v_), value in zip(reversed(data.dimensions(full=1).items()), xi): 2445 if v_[3] != 0: 2446 slices.append(_parse_value_inputs(hdf.select(k_), value)) 2447 elif value is None: 2448 slices.append(slice(None)) 2449 else: 2450 raise ValueError("Cannot slice by value on dimension without scales") 2451 dataset = data[tuple(reversed(slices))] 2452 if return_scales: 2453 scales = [hdf.select(k_)[si] for si, (k_, v_) in zip(slices, reversed(data.dimensions(full=1).items())) if v_[3]] 2454 return dataset, *scales 2455 return dataset 2456 2457 2458def _read_h5_by_ivalue(ifile: PathLike, /, 2459 *xi: Union[float, Tuple[float, float], None], 2460 dataset_id: Optional[str] = None, 2461 return_scales: bool = True, 2462 ) -> Union[np.ndarray, Tuple[np.ndarray]]: 2463 """HDF5 (.h5) version of :func:`read_hdf_by_ivalue`. 2464 2465 Examples 2466 -------- 2467 >>> from psi_io.psi_io import _read_h5_by_ivalue 2468 >>> from psi_io.data import get_3d_data 2469 >>> f, r_idx, t_idx, p_idx = _read_h5_by_ivalue(get_3d_data(), 2.7, None, None) 2470 >>> r_idx.shape 2471 (2,) 2472 """ 2473 with h5.File(ifile, 'r') as hdf: 2474 data = hdf[dataset_id or PSI_DATA_ID['h5']] 2475 if len(xi) != data.ndim: 2476 raise ValueError(f"len(xi) must equal the number of scales for {dataset_id}") 2477 slices = [_parse_ivalue_inputs(*args) for args in zip(reversed(data.shape), xi)] 2478 dataset = data[tuple(reversed(slices))] 2479 if return_scales: 2480 scales = [np.arange(si.start or 0, si.stop or size) for si, size in zip(slices, reversed(data.shape))] 2481 return dataset, *scales 2482 return dataset 2483 2484 2485def _read_h4_by_ivalue(ifile: PathLike, /, 2486 *xi: Union[float, Tuple[float, float], None], 2487 dataset_id: Optional[str] = None, 2488 return_scales: bool = True, 2489 ) -> Union[np.ndarray, Tuple[np.ndarray]]: 2490 """HDF4 (.hdf) version of :func:`read_hdf_by_ivalue`. 2491 2492 Examples 2493 -------- 2494 >>> from psi_io.psi_io import _read_h4_by_ivalue 2495 >>> from psi_io.data import get_3d_data 2496 >>> f, r_idx, t_idx, p_idx = _read_h4_by_ivalue(get_3d_data(hdf=".hdf"), 2.7, None, None) # doctest: +SKIP 2497 >>> r_idx.shape # doctest: +SKIP 2498 (2,) 2499 """ 2500 hdf = h4.SD(str(ifile)) 2501 data = hdf.select(dataset_id or PSI_DATA_ID['h4']) 2502 ndim, shape = data.info()[1], _cast_shape_tuple(data.info()[2]) 2503 if len(xi) != ndim: 2504 raise ValueError(f"len(xi) must equal the number of scales for {dataset_id}") 2505 slices = [_parse_ivalue_inputs(*args) for args in zip(reversed(shape), xi)] 2506 dataset = data[tuple(reversed(slices))] 2507 if return_scales: 2508 scales = [np.arange(si.start or 0, si.stop or size) for si, size in zip(slices, reversed(shape))] 2509 return dataset, *scales 2510 return dataset 2511 2512 2513def _write_h4_data(ifile: PathLike, /, 2514 data: np.ndarray, 2515 *scales: Sequence[np.ndarray], 2516 dataset_id: Optional[str] = None, 2517 sync_dtype: bool = False, 2518 strict: bool = True, 2519 **kwargs) -> Path: 2520 """HDF4 (.hdf) version of :func:`write_hdf_data`. 2521 2522 Examples 2523 -------- 2524 >>> import tempfile, numpy as np 2525 >>> from pathlib import Path 2526 >>> from psi_io.psi_io import _write_h4_data, _read_h4_data 2527 >>> f = np.ones((10,), dtype=np.float32) 2528 >>> x = np.linspace(0.0, 1.0, 10, dtype=np.float32) 2529 >>> with tempfile.TemporaryDirectory() as d: # doctest: +SKIP 2530 ... _write_h4_data(Path(d) / "out.hdf", f, x) # doctest: +SKIP 2531 ... data, *_ = _read_h4_data(Path(d) / "out.hdf") # doctest: +SKIP 2532 ... data.shape # doctest: +SKIP 2533 (10,) 2534 """ 2535 dataid = dataset_id or PSI_DATA_ID['h4'] 2536 h4file = h4.SD(str(ifile), h4.SDC.WRITE | h4.SDC.CREATE | h4.SDC.TRUNC) 2537 sds_id = h4file.create(dataid, _dtype_to_sdc(data.dtype), data.shape) 2538 2539 if scales: 2540 for i, scale in enumerate(reversed(scales)): 2541 if scale is not None: 2542 if sync_dtype: 2543 scale = scale.astype(data.dtype) 2544 sds_id.dim(i).setscale(_dtype_to_sdc(scale.dtype), scale.tolist()) 2545 2546 if kwargs: 2547 for k, v in kwargs.items(): 2548 npv = np.asarray(v) 2549 attr_ = sds_id.attr(k) 2550 try: 2551 val = npv.tolist() 2552 if isinstance(val, bytes): 2553 val = val.decode('latin-1') 2554 attr_.set(_dtype_to_sdc(npv.dtype), val) 2555 except KeyError as e: 2556 if strict: 2557 raise KeyError(f"Failed to set attribute '{k}' on dataset '{dataid}'") from e 2558 else: 2559 print(f"Warning: Failed to set attribute '{k}' on dataset '{dataid}'; skipping.") 2560 2561 sds_id.set(data) 2562 sds_id.endaccess() 2563 h4file.end() 2564 2565 return ifile 2566 2567 2568def _write_h5_data(ifile: PathLike, /, 2569 data: np.ndarray, 2570 *scales: Sequence[np.ndarray], 2571 dataset_id: Optional[str] = None, 2572 sync_dtype: bool = False, 2573 strict: bool = True, 2574 **kwargs) -> Path: 2575 """HDF5 (.h5) version of :func:`write_hdf_data`. 2576 2577 Examples 2578 -------- 2579 >>> import tempfile, numpy as np 2580 >>> from pathlib import Path 2581 >>> from psi_io.psi_io import _write_h5_data, _read_h5_data 2582 >>> f = np.ones((10,), dtype=np.float32) 2583 >>> x = np.linspace(0.0, 1.0, 10, dtype=np.float32) 2584 >>> with tempfile.TemporaryDirectory() as d: 2585 ... _write_h5_data(Path(d) / "out.h5", f, x) 2586 ... data, *_ = _read_h5_data(Path(d) / "out.h5") 2587 ... data.shape 2588 (10,) 2589 """ 2590 dataid = dataset_id or PSI_DATA_ID['h5'] 2591 with h5.File(ifile, "w") as h5file: 2592 dataset = h5file.create_dataset(dataid, data=data, dtype=data.dtype, shape=data.shape) 2593 2594 if scales: 2595 for i, scale in enumerate(scales): 2596 if scale is not None: 2597 if sync_dtype: 2598 scale = scale.astype(data.dtype) 2599 h5file.create_dataset(f"dim{i+1}", data=scale, dtype=scale.dtype, shape=scale.shape) 2600 h5file[dataid].dims[i].attach_scale(h5file[f"dim{i+1}"]) 2601 h5file[dataid].dims[i].label = f"dim{i+1}" 2602 2603 if kwargs: 2604 for key, value in kwargs.items(): 2605 if key.startswith('DIMENSION'): 2606 # Skip HDF5 dimension-scale bookkeeping attributes — 2607 # these are managed by attach_scale above and must not 2608 # be overwritten with stale object references. 2609 continue 2610 try: 2611 dataset.attrs[key] = value 2612 except TypeError as e: 2613 if strict: 2614 raise TypeError(f"Failed to set attribute '{key}' on dataset '{dataid}'") from e 2615 else: 2616 print(f"Warning: Failed to set attribute '{key}' on dataset '{dataid}'; skipping.") 2617 2618 return ifile 2619 2620 2621def _write_h4_meta(ifile: PathLike, /, 2622 meta: Optional[Mapping[str, Mapping[str, Any]]] = None, 2623 **kwargs) -> Path: 2624 """HDF4 (.hdf) version of :func:`write_hdf_meta`.""" 2625 metadata = dict(meta or {}) 2626 h4file = h4.SD(str(ifile), h4.SDC.READ | h4.SDC.WRITE) 2627 2628 for k, v in kwargs.items(): 2629 npv = np.asarray(v) 2630 attr_ = h4file.attr(k) 2631 val = npv.tolist() 2632 if isinstance(val, bytes): 2633 val = val.decode('latin-1') 2634 attr_.set(_dtype_to_sdc(npv.dtype), val) 2635 for key, value in metadata.items(): 2636 sds_id = h4file.select(key) 2637 for k, v in dict(value).items(): 2638 npv = np.asarray(v) 2639 attr_ = sds_id.attr(k) 2640 val = npv.tolist() 2641 if isinstance(val, bytes): 2642 val = val.decode('latin-1') 2643 attr_.set(_dtype_to_sdc(npv.dtype), val) 2644 h4file.end() 2645 2646 return ifile 2647 2648 2649def _write_h5_meta(ifile: PathLike, /, 2650 meta: Optional[Mapping[str, Mapping[str, Any]]] = None, 2651 **kwargs) -> Path: 2652 """HDF5 (.h5) version of :func:`write_hdf_meta`.""" 2653 metadata = dict(meta or {}) 2654 with h5.File(ifile, "r+") as h5file: 2655 if kwargs: 2656 h5file.attrs.update(**kwargs) 2657 for key, value in metadata.items(): 2658 h5file[key].attrs.update(**dict(value)) 2659 return ifile 2660 2661 2662def _np_linear_interpolation(xi: Sequence, scales: Sequence, values: np.ndarray): 2663 """ 2664 Perform linear interpolation over one dimension. 2665 2666 Parameters 2667 ---------- 2668 xi : list[float | None] 2669 Target coordinate values for each dimension; ``None`` marks a 2670 "free" (not interpolated) dimension. 2671 scales : list[np.ndarray] 2672 Coordinate scale arrays, one per dimension. 2673 values : np.ndarray 2674 The data array to interpolate. 2675 2676 Returns 2677 ------- 2678 out : np.ndarray 2679 The interpolated data array. 2680 2681 Examples 2682 -------- 2683 >>> import numpy as np 2684 >>> from psi_io.psi_io import _np_linear_interpolation 2685 >>> scales = [np.array([0.0, 1.0])] 2686 >>> values = np.array([0.0, 2.0]) 2687 >>> float(_np_linear_interpolation([0.5], scales, values)) 2688 1.0 2689 """ 2690 index0 = next((i for i, v in enumerate(xi) if v is not None), None) 2691 t = (xi[index0] - scales[index0][0])/(scales[index0][1] - scales[index0][0]) 2692 f0 = [slice(None, None)]*values.ndim 2693 f1 = [slice(None, None)]*values.ndim 2694 f0[index0] = 0 2695 f1[index0] = 1 2696 2697 return (1 - t)*values[tuple(f0)] + t*values[tuple(f1)] 2698 2699 2700def _np_bilinear_interpolation(xi, scales, values): 2701 """ 2702 Perform bilinear interpolation over two dimensions. 2703 2704 Parameters 2705 ---------- 2706 xi : Sequence[float | None] 2707 Target coordinate values for each dimension; ``None`` marks a 2708 "free" (not interpolated) dimension. 2709 scales : Sequence[np.ndarray] 2710 Coordinate scale arrays, one per dimension. 2711 values : np.ndarray 2712 The data array to interpolate. 2713 2714 Returns 2715 ------- 2716 out : np.ndarray 2717 The interpolated data array. 2718 2719 Examples 2720 -------- 2721 >>> import numpy as np 2722 >>> from psi_io.psi_io import _np_bilinear_interpolation 2723 >>> scales = [np.array([0.0, 1.0]), np.array([0.0, 1.0])] 2724 >>> values = np.array([[0.0, 0.0], [1.0, 2.0]]) 2725 >>> float(_np_bilinear_interpolation([0.5, 0.5], scales, values)) 2726 0.75 2727 """ 2728 index0, index1 = [i for i, v in enumerate(xi) if v is not None] 2729 t, u = [(xi[i] - scales[i][0])/(scales[i][1] - scales[i][0]) for i in (index0, index1)] 2730 2731 f00 = [slice(None, None)]*values.ndim 2732 f10 = [slice(None, None)]*values.ndim 2733 f01 = [slice(None, None)]*values.ndim 2734 f11 = [slice(None, None)]*values.ndim 2735 f00[index0], f00[index1] = 0, 0 2736 f10[index0], f10[index1] = 1, 0 2737 f01[index0], f01[index1] = 0, 1 2738 f11[index0], f11[index1] = 1, 1 2739 2740 return ( 2741 (1 - t)*(1 - u)*values[tuple(f00)] + 2742 t*(1 - u)*values[tuple(f10)] + 2743 (1 - t)*u*values[tuple(f01)] + 2744 t*u*values[tuple(f11)] 2745 ) 2746 2747 2748def _np_trilinear_interpolation(xi, scales, values): 2749 """ 2750 Perform trilinear interpolation over three dimensions. 2751 2752 Parameters 2753 ---------- 2754 xi : list[float | None] 2755 Target coordinate values for each dimension; ``None`` marks a 2756 "free" (not interpolated) dimension. 2757 scales : list[np.ndarray] 2758 Coordinate scale arrays, one per dimension. 2759 values : np.ndarray 2760 The data array to interpolate. 2761 2762 Returns 2763 ------- 2764 out : np.ndarray 2765 The interpolated data array. 2766 2767 Examples 2768 -------- 2769 >>> import numpy as np 2770 >>> from psi_io.psi_io import _np_trilinear_interpolation 2771 >>> scales = [np.array([0.0, 1.0]), np.array([0.0, 1.0]), np.array([0.0, 1.0])] 2772 >>> values = np.zeros((2, 2, 2)) 2773 >>> values[1, 1, 1] = 8.0 2774 >>> float(_np_trilinear_interpolation([0.5, 0.5, 0.5], scales, values)) 2775 1.0 2776 """ 2777 index0, index1, index2 = [i for i, v in enumerate(xi) if v is not None] 2778 t, u, v = [(xi[i] - scales[i][0])/(scales[i][1] - scales[i][0]) for i in (index0, index1, index2)] 2779 2780 f000 = [slice(None, None)]*values.ndim 2781 f100 = [slice(None, None)]*values.ndim 2782 f010 = [slice(None, None)]*values.ndim 2783 f110 = [slice(None, None)]*values.ndim 2784 f001 = [slice(None, None)]*values.ndim 2785 f101 = [slice(None, None)]*values.ndim 2786 f011 = [slice(None, None)]*values.ndim 2787 f111 = [slice(None, None)]*values.ndim 2788 2789 f000[index0], f000[index1], f000[index2] = 0, 0, 0 2790 f100[index0], f100[index1], f100[index2] = 1, 0, 0 2791 f010[index0], f010[index1], f010[index2] = 0, 1, 0 2792 f110[index0], f110[index1], f110[index2] = 1, 1, 0 2793 f001[index0], f001[index1], f001[index2] = 0, 0, 1 2794 f101[index0], f101[index1], f101[index2] = 1, 0, 1 2795 f011[index0], f011[index1], f011[index2] = 0, 1, 1 2796 f111[index0], f111[index1], f111[index2] = 1, 1, 1 2797 2798 c00 = values[tuple(f000)]*(1 - t) + values[tuple(f100)]*t 2799 c10 = values[tuple(f010)]*(1 - t) + values[tuple(f110)]*t 2800 c01 = values[tuple(f001)]*(1 - t) + values[tuple(f101)]*t 2801 c11 = values[tuple(f011)]*(1 - t) + values[tuple(f111)]*t 2802 2803 c0 = c00*(1 - u) + c10*u 2804 c1 = c01*(1 - u) + c11*u 2805 2806 return c0*(1 - v) + c1*v 2807 2808 2809def _check_index_ranges(arr_size: int, 2810 i0: Union[int, np.integer], 2811 i1: Union[int, np.integer] 2812 ) -> Tuple[int, int]: 2813 """ 2814 Adjust index ranges to ensure they cover at least two indices. 2815 2816 Parameters 2817 ---------- 2818 arr_size : int 2819 The size of the array along the dimension. 2820 i0 : int 2821 The starting index. 2822 i1 : int 2823 The ending index. 2824 2825 Returns 2826 ------- 2827 out : tuple[int, int] 2828 Adjusted starting and ending indices. 2829 2830 Notes 2831 ----- 2832 This function ensures that the range between `i0` and `i1` includes at least 2833 two indices for interpolation purposes. 2834 2835 Examples 2836 -------- 2837 >>> from psi_io.psi_io import _check_index_ranges 2838 >>> _check_index_ranges(10, 0, 0) 2839 (0, 2) 2840 >>> _check_index_ranges(10, 5, 5) 2841 (4, 6) 2842 >>> _check_index_ranges(10, 10, 10) 2843 (8, 10) 2844 """ 2845 i0, i1 = int(i0), int(i1) 2846 if i0 == 0: 2847 return (i0, i1 + 2) if i1 == 0 else (i0, i1 + 1) 2848 elif i0 == arr_size: 2849 return i0 - 2, i1 2850 else: 2851 return i0 - 1, i1 + 1 2852 2853 2854def _cast_shape_tuple(input: Union[int, Sequence[int]] 2855 ) -> tuple[int, ...]: 2856 """ 2857 Cast an input to a tuple of integers. 2858 2859 Parameters 2860 ---------- 2861 input : int | Sequence[int] 2862 The input to cast. 2863 2864 Returns 2865 ------- 2866 out : tuple[int, ...] 2867 The input cast as a tuple of integers. 2868 2869 Raises 2870 ------ 2871 TypeError 2872 If the input is neither an integer nor an iterable of integers. 2873 2874 Examples 2875 -------- 2876 >>> from psi_io.psi_io import _cast_shape_tuple 2877 >>> _cast_shape_tuple(5) 2878 (5,) 2879 >>> _cast_shape_tuple([3, 4, 5]) 2880 (3, 4, 5) 2881 """ 2882 if isinstance(input, int): 2883 return (input,) 2884 elif isinstance(input, Sequence): 2885 return tuple(int(i) for i in input) 2886 else: 2887 raise TypeError("Input must be an integer or an iterable of integers.") 2888 2889 2890def _parse_index_inputs(input: Union[int, slice, Sequence[Union[int, None]], None] 2891 ) -> slice: 2892 """ 2893 Parse various slice input formats into a standard slice object. 2894 2895 Parameters 2896 ---------- 2897 input : int | slice | Sequence[int | None] | None 2898 The index specification to parse: 2899 2900 - *int* — selects a single-element slice ``[n, n+1)``. 2901 - *Sequence[int | None]* — unpacked directly into :class:`slice`. 2902 - ``None`` — selects the entire dimension. 2903 2904 Returns 2905 ------- 2906 slice 2907 The parsed slice object. 2908 2909 Raises 2910 ------ 2911 TypeError 2912 If the input type is unsupported. 2913 2914 Examples 2915 -------- 2916 >>> from psi_io.psi_io import _parse_index_inputs 2917 >>> _parse_index_inputs(3) 2918 slice(3, 4, None) 2919 >>> _parse_index_inputs((2, 7)) 2920 slice(2, 7, None) 2921 >>> _parse_index_inputs(None) 2922 slice(None, None, None) 2923 """ 2924 if isinstance(input, int): 2925 return slice(input, input + 1) 2926 elif isinstance(input, Sequence): 2927 return slice(*input) 2928 elif input is None: 2929 return slice(None) 2930 else: 2931 raise TypeError("Unsupported input type for slicing.") 2932 2933 2934def _parse_value_inputs(dimproxy, 2935 value, 2936 scale_exists: bool = True 2937 ) -> slice: 2938 """Parse a scale value or value range into a slice over a dimension's coordinate array. 2939 2940 Parameters 2941 ---------- 2942 dimproxy : array-like 2943 The coordinate array (scale) for the dimension, supporting ``[:]`` indexing. 2944 value : float | tuple[float | None, float | None] | None 2945 The target value or range: 2946 2947 - ``None`` — select the entire dimension. 2948 - *float* — find the 2-element bracket ``[a, b]`` such that ``a <= value < b``. 2949 - *(float, float)* — find the bracket spanning the given range. 2950 scale_exists : bool 2951 Guard flag; raises :exc:`ValueError` if ``False`` and ``value`` is not ``None``. 2952 2953 Returns 2954 ------- 2955 slice 2956 A slice object that selects the appropriate indices from the coordinate array. 2957 2958 Raises 2959 ------ 2960 ValueError 2961 If ``value`` is not ``None`` and ``scale_exists`` is ``False``. 2962 2963 Examples 2964 -------- 2965 >>> import numpy as np 2966 >>> from psi_io.psi_io import _parse_value_inputs 2967 >>> scale = np.linspace(0.0, 10.0, 11) 2968 >>> _parse_value_inputs(scale, None) 2969 slice(None, None, None) 2970 >>> _parse_value_inputs(scale, 4.5) 2971 slice(4, 6, None) 2972 >>> _parse_value_inputs(scale, (2.0, 7.0)) 2973 slice(1, 8, None) 2974 """ 2975 if value is None: 2976 return slice(None) 2977 if not scale_exists: 2978 raise ValueError("Cannot parse value inputs when scale does not exist.") 2979 dim = dimproxy[:] 2980 if not isinstance(value, Sequence): 2981 insert_index = np.searchsorted(dim, value) 2982 return slice(*_check_index_ranges(dim.size, insert_index, insert_index)) 2983 else: 2984 temp_range = list(value) 2985 if temp_range[0] is None: 2986 temp_range[0] = -np.inf 2987 if temp_range[-1] is None: 2988 temp_range[-1] = np.inf 2989 insert_indices = np.searchsorted(dim, temp_range) 2990 return slice(*_check_index_ranges(dim.size, *insert_indices)) 2991 2992 2993def _parse_ivalue_inputs(dimsize, 2994 input: Union[Union[int, float], slice, Sequence[Union[Union[int, float], None]], None] 2995 ) -> slice: 2996 """ 2997 Parse a sub-index value or range into a slice over a dimension of a given size. 2998 2999 Parameters 3000 ---------- 3001 dimsize : int 3002 The size of the array along the dimension; used to clamp the resulting slice. 3003 input : int | float | Sequence[int | float | None] | None 3004 The sub-index specification to parse: 3005 3006 - ``None`` — select the entire dimension. 3007 - *int* or *float* (:math:`a`) — returns ``slice(floor(a), ceil(a))`` 3008 (clamped to ``[0, dimsize - 1]``). 3009 - *Sequence* ``(a, b)`` — returns ``slice(floor(a), ceil(b))`` 3010 (clamped to ``[0, dimsize - 1]``). 3011 3012 Returns 3013 ------- 3014 slice 3015 The parsed slice object, guaranteed to span at least 2 elements. 3016 3017 Raises 3018 ------ 3019 TypeError 3020 If the input type is unsupported. 3021 3022 Examples 3023 -------- 3024 >>> from psi_io.psi_io import _parse_ivalue_inputs 3025 >>> _parse_ivalue_inputs(10, None) 3026 slice(None, None, None) 3027 >>> _parse_ivalue_inputs(10, 2.7) 3028 slice(2, 4, None) 3029 >>> _parse_ivalue_inputs(10, (1.3, 4.8)) 3030 slice(1, 5, None) 3031 """ 3032 if input is None: 3033 return slice(None) 3034 elif isinstance(input, (int, float)): 3035 i0, i1 = math.floor(input), math.ceil(input) 3036 elif isinstance(input, Sequence): 3037 i0, i1 = math.floor(input[0]), math.ceil(input[1]) 3038 else: 3039 raise TypeError("Unsupported input type for slicing.") 3040 3041 if i0 > i1: 3042 i0, i1 = i1, i0 3043 i0, i1 = max(0, i0), min(dimsize - 1, i1) 3044 if i0 > i1: 3045 i0, i1 = i1, i0 3046 i0, i1 = max(0, i0), min(dimsize - 1, i1) 3047 if (i1 - i0) < 2: 3048 return slice(i0, i1 + 2 - (i1-i0)) 3049 else: 3050 return slice(i0, i1)